From 89cbf1de5583dd98530f5d3d20786238cd7267d4 Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Thu, 1 Jun 2023 11:42:15 -0700
Subject: [PATCH 001/123] peft eval directly from ckpt (#6785)

* update to load from ckpt

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* load ckpt peft model

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update style

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../conf/megatron_gpt_peft_eval_config.yaml   |  2 ++
 .../tuning/megatron_gpt_peft_eval.py          | 32 +++++++++++++++----
 nemo/collections/nlp/parts/nlp_overrides.py   | 12 +++++--
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
index d7ebd69f31be..c430bd7fab5f 100755
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
@@ -70,6 +70,8 @@ model:
   peft:
     peft_scheme: "adapter"  # can be either adapter,ia3, or ptuning
     restore_from_path: null
+    restore_from_ckpt_name: null
+    restore_from_hparams_path: null
     
     # Used for adapter peft training
     adapter_tuning:
diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
index b45f5da69e89..a5bf1ee552cb 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
@@ -104,9 +104,17 @@ def main(cfg) -> None:
 
     trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer)
     if cfg.model.peft.restore_from_path:
-        peft_model_cfg = MegatronGPTPEFTModel.restore_from(
-            restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True,
-        )
+        if cfg.model.peft.restore_from_path.endswith(".nemo"):
+            peft_model_cfg = MegatronGPTPEFTModel.restore_from(
+                restore_path=cfg.model.peft.restore_from_path, trainer=trainer, return_config=True,
+            )
+        elif cfg.model.peft.restore_from_hparams_path:  # not a .nemo model we expect a hparams.yaml file
+            peft_model_cfg = OmegaConf.to_container(OmegaConf.load(cfg.model.peft.restore_from_hparams_path).cfg)
+            peft_model_cfg = OmegaConf.create(peft_model_cfg)
+            # extract dict inside cfg key and convert it to DictConfig
+            # this allows interpolation to work the same way as config from the .restore_from method
+        else:
+            raise RuntimeError("This script requires a .nemo peft model or path to hparams.yaml (and a ckpt path).")
     else:
         peft_model_cfg = MegatronGPTSFTModel.restore_from(
             restore_path=cfg.model.restore_from_path, trainer=trainer, return_config=True,
@@ -127,9 +135,21 @@ def main(cfg) -> None:
         cfg.inference.tokens_to_generate = peft_model_cfg.data.test_ds.tokens_to_generate
 
     if cfg.model.peft.restore_from_path:
-        save_restore_connector = PEFTSaveRestoreConnector(
-            peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None,
-        )
+        if cfg.model.peft.restore_from_path.endswith(".nemo"):
+            save_restore_connector = PEFTSaveRestoreConnector(
+                peft_model_nemo_path=cfg.model.peft.restore_from_path, peft_model_ckpt_path=None,
+            )
+        else:
+            # attempting to load a ckpt peft model.
+            if cfg.model.peft.restore_from_ckpt_name:
+                ckpt_name = cfg.model.peft.restore_from_ckpt_name
+            else:
+                ckpt_name = "model_weights.ckpt"
+            save_restore_connector = PEFTSaveRestoreConnector(
+                peft_model_nemo_path=None,
+                peft_model_ckpt_path=cfg.model.peft.restore_from_path,
+                peft_model_ckpt_name=ckpt_name,
+            )
     else:
         save_restore_connector = NLPSaveRestoreConnector()
 
diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
index a43e06669489..5a0f028ddbe9 100644
--- a/nemo/collections/nlp/parts/nlp_overrides.py
+++ b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -404,14 +404,20 @@ class PEFTSaveRestoreConnector(NLPSaveRestoreConnector):
 
     Args:
         peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params)
-        peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFt model. This is required when no .nemo is available (yet) such as during resumed training.
+        peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFT model. This is required when no .nemo is available (yet) such as during resumed training.
+        peft_model_ckpt_name: The filename of the ckpt file inside the peft_model_ckpt_path folder
     If both are provided the peft_model_ckpt_path takes precedence. 
     If neither are provided, PEFT params are initialized at random (not loaded from any external source).
     """
 
-    def __init__(self, peft_model_nemo_path: Optional[str] = None, peft_model_ckpt_path: Optional[str] = None) -> None:
+    def __init__(
+        self,
+        peft_model_nemo_path: Optional[str] = None,
+        peft_model_ckpt_path: Optional[str] = None,
+        peft_model_ckpt_name: Optional[str] = "model_weights.ckpt",
+    ) -> None:
         super().__init__()
-        self.peft_model_ckpt_name = "model_weights.ckpt"
+        self.peft_model_ckpt_name = peft_model_ckpt_name
         if peft_model_ckpt_path:
             # First we will try to load a adapter ckpt path
             # this is given priority over loading from nemo path to make resumption of training possible

From 23f1c429a9a7a48bf77fc3d689c401fc5ccb34b7 Mon Sep 17 00:00:00 2001
From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com>
Date: Thu, 1 Jun 2023 14:55:57 -0400
Subject: [PATCH 002/123] Add Frame-VAD examples and utils (#6463)

* add model, dataset, necessary utils and tests

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix tarred data

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* add fvad examples and update utils

Signed-off-by: stevehuang52 <heh@nvidia.com>

* add copyright

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor and add tests

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update dataset

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update test

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typos

Signed-off-by: stevehuang52 <heh@nvidia.com>

---------

Signed-off-by: stevehuang52 <heh@nvidia.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
Co-authored-by: Taejin Park <tango4j@gmail.com>
---
 .../conf/marblenet/marblenet_3x2x64_20ms.yaml | 209 +++++++++
 .../conf/vad/frame_vad_infer_postprocess.yaml |  38 ++
 .../speech_classification/frame_vad_infer.py  | 188 ++++++++
 .../speech_to_frame_label.py                  |  61 +++
 nemo/collections/asr/data/audio_to_label.py   |  22 +-
 .../asr/data/audio_to_label_dataset.py        |   6 +-
 .../asr/models/classification_models.py       |  10 +-
 .../asr/parts/preprocessing/features.py       |   4 +-
 .../asr/parts/preprocessing/segment.py        |  30 +-
 nemo/collections/asr/parts/utils/vad_utils.py | 439 +++++++++++++++---
 .../asr/test_asr_classification_model.py      |  47 +-
 tests/collections/asr/utils/test_vad_utils.py | 126 +++++
 12 files changed, 1026 insertions(+), 154 deletions(-)
 create mode 100644 examples/asr/conf/marblenet/marblenet_3x2x64_20ms.yaml
 create mode 100644 examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
 create mode 100644 examples/asr/speech_classification/frame_vad_infer.py
 create mode 100644 examples/asr/speech_classification/speech_to_frame_label.py
 create mode 100644 tests/collections/asr/utils/test_vad_utils.py

diff --git a/examples/asr/conf/marblenet/marblenet_3x2x64_20ms.yaml b/examples/asr/conf/marblenet/marblenet_3x2x64_20ms.yaml
new file mode 100644
index 000000000000..2c98c210eb0e
--- /dev/null
+++ b/examples/asr/conf/marblenet/marblenet_3x2x64_20ms.yaml
@@ -0,0 +1,209 @@
+name: &name "MarbleNet-3x2x64"
+
+model:
+  sample_rate: 16000
+  repeat: 2
+  dropout: 0.0
+  kernel_size_factor: 1.0
+
+  labels: ['0', '1']
+
+  train_ds:
+    manifest_filepath: ???
+    sample_rate: ${model.sample_rate}
+    labels: ${model.labels}
+    batch_size: 128
+    shuffle: True
+    # tarred datasets
+    is_tarred: false
+    tarred_audio_filepaths: null
+    tarred_shard_strategy: "scatter"
+    shuffle_n: 2048
+    num_workers: 8
+    pin_memory: true
+    # bucketing params
+    bucketing_strategy: "synced_randomized"
+    bucketing_batch_size: null
+    bucketing_weights: null
+    augmentor:
+      white_noise:
+        prob: 0.9
+        min_level: -90
+        max_level: -46
+      gain:
+        prob: 0.5
+        min_gain_dbfs: -10.0
+        max_gain_dbfs: 10.0
+      noise:
+        prob: 0.6
+        manifest_path: /manifests/vad_noise/freesound_nonspeech_train_FL200.json
+        min_snr_db: 0
+        max_snr_db: 20
+        max_gain_db: 300.0
+
+  validation_ds:
+    manifest_filepath: ???
+    sample_rate: ${model.sample_rate}
+    labels: ${model.labels}
+    batch_size: 128
+    shuffle: False
+    num_workers: 8
+    pin_memory: true
+    val_loss_idx: 0
+
+  test_ds:
+    manifest_filepath: null
+    sample_rate: ${model.sample_rate}
+    labels: ${model.labels}
+    batch_size: 128
+    shuffle: False
+    num_workers: 8
+    pin_memory: true
+    test_loss_idx: 0
+
+  preprocessor:
+    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
+    normalize: "None"
+    window_size: 0.025
+    sample_rate: ${model.sample_rate}
+    window_stride: 0.01
+    window: "hann"
+    features: &n_mels 80
+    n_fft: 512
+    frame_splicing: 1
+    dither: 0.00001
+    stft_conv: false
+    pad_to: 2
+
+  spec_augment:
+    _target_: nemo.collections.asr.modules.SpectrogramAugmentation
+    freq_masks: 2 # set to zero to disable it
+    time_masks: 10 # set to zero to disable it
+    freq_width: 27
+    time_width: 0.05
+
+  encoder:
+    _target_: nemo.collections.asr.modules.ConvASREncoder
+    feat_in: *n_mels
+    activation: relu
+    conv_mask: true
+
+    jasper:
+      - filters: 128
+        repeat: 1
+        kernel: [11]
+        stride: [2]
+        dilation: [1]
+        dropout: ${model.dropout}
+        residual: false
+        separable: true
+        kernel_size_factor: ${model.kernel_size_factor}
+
+      - filters: 64
+        repeat: ${model.repeat}
+        kernel: [13]
+        stride: [1]
+        dilation: [1]
+        dropout: ${model.dropout}
+        residual: true
+        separable: true
+        kernel_size_factor: ${model.kernel_size_factor}
+
+      - filters: 64
+        repeat: ${model.repeat}
+        kernel: [15]
+        stride: [1]
+        dilation: [1]
+        dropout: ${model.dropout}
+        residual: true
+        separable: true
+        kernel_size_factor: ${model.kernel_size_factor}
+
+      - filters: 64
+        repeat: ${model.repeat}
+        kernel: [17]
+        stride: [1]
+        dilation: [1]
+        dropout: ${model.dropout}
+        residual: true
+        separable: true
+        kernel_size_factor: ${model.kernel_size_factor}
+
+      - filters: 128
+        repeat: 1
+        kernel: [29]
+        stride: [1]
+        dilation: [2]
+        dropout: ${model.dropout}
+        residual: false
+        separable: true
+        kernel_size_factor: ${model.kernel_size_factor}
+
+      - filters: &enc_filters 128
+        repeat: 1
+        kernel: [1]
+        stride: [1]
+        dilation: [1]
+        dropout: ${model.dropout}
+        residual: false
+
+  decoder:
+    _target_: nemo.collections.common.parts.MultiLayerPerceptron
+    hidden_size: *enc_filters
+    num_classes: -1
+    num_layers: 1
+    activation: 'relu'
+    log_softmax: false
+
+  optim:
+    name: sgd
+    lr: 0.01
+    # optimizer arguments
+    weight_decay: 0.001
+    momentum: 0.9
+
+    # scheduler setup
+    sched:
+      name: PolynomialHoldDecayAnnealing
+      # Scheduler params
+      power: 2.0
+      warmup_ratio: 0.05
+      hold_ratio: 0.45
+      min_lr: 0.001
+      last_epoch: -1
+
+trainer:
+  devices: -1 # number of gpus, -1 to use all gpus
+  max_epochs: 100
+  max_steps: -1 # computed at runtime if not set
+  num_nodes: 1
+  accelerator: auto
+  strategy: ddp
+  accumulate_grad_batches: 1
+  enable_checkpointing: False  # Provided by exp_manager
+  logger: False  # Provided by exp_manager
+  log_every_n_steps: 10  # Interval of logging.
+  val_check_interval: 1.0  # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
+  check_val_every_n_epoch: 1
+  benchmark: false # needs to be false for models with variable-length speech input as it slows down training
+
+exp_manager:
+  exp_dir: null
+  name: *name
+  create_tensorboard_logger: True
+  create_checkpoint_callback: True
+  checkpoint_callback_params:
+    monitor: "val_acc_macro"
+    mode: "max"
+    save_top_k: 3
+    always_save_nemo: true # saves the checkpoints as nemo files instead of PTL checkpoints
+    save_best_model: true
+
+  # you need to set these two to True to continue the training
+  resume_if_exists: true
+  resume_ignore_no_checkpoint: true
+
+  create_wandb_logger: False
+  wandb_logger_kwargs:
+    name: null
+    project: null
diff --git a/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
new file mode 100644
index 000000000000..8c9ef7fffaf5
--- /dev/null
+++ b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
@@ -0,0 +1,38 @@
+name: &name "vad_inference_postprocessing"
+
+dataset: null # Path of json file of evaluation data. Audio files should have unique names
+num_workers: 12
+sample_rate: 16000
+evaluate: False  # whether to get AUROC and DERs, the manifest must contains groundtruth if enabled
+
+prepare_manifest:
+  auto_split: True # whether to automatically split manifest entry by split_duration to avoid potential CUDA out of memory issue.
+  split_duration: 400 # try smaller number if you still have CUDA memory issue
+
+vad:
+  model_path: "vad_multilingual_frame_marblenet" #.nemo local model path or pretrained model name or none
+  use_rttm: True # set True to output as RTTM format
+  parameters: # Parameters not tuned on large datasets, please use default parameters with caution
+    normalize_audio_db: null  # set to non null value to normalize RMS DB of audio before preprocessing
+    window_length_in_sec: 0.0  # window length in sec for VAD context input, must be 0 for frame-VAD
+    shift_length_in_sec: 0.02 # frame-length in seconds for frame-VAD 
+    smoothing: False # Deprecated for Frame-VAD. false or type of smoothing method (eg: median, mean)
+    overlap: 0.875 # Deprecated for Frame-VAD. overlap ratio for overlapped mean/median smoothing filter. If smoothing=False, ignore this value.
+    postprocessing:
+      onset: 0.3 # onset threshold for detecting the beginning and end of a speech
+      offset: 0.3 # offset threshold for detecting the end of a speech.
+      pad_onset: 0.5 # adding durations before each speech segment
+      pad_offset: 0.5 # adding durations after each speech segment
+      min_duration_on: 0.0 # threshold for short speech deletion
+      min_duration_off: 0.6 # threshold for short non-speech segment deletion
+      filter_speech_first: True
+
+prepared_manifest_vad_input: null # if not specify, it will automatically generated be "manifest_vad_input.json"
+frame_out_dir: "vad_frame_outputs"
+smoothing_out_dir: null # if not specify, it will automatically generated be frame_out_dir + "/overlap_smoothing_output" + "_" + smoothing_method + "_" + str(overlap)
+rttm_out_dir: null # if not specify, it will automatically be frame_out_dir + "/seg_output_" + key and value in postprocessing params
+out_manifest_filepath: null # if not specify it will automatically be "manifest_vad_out.json"
+
+
+# json manifest line example
+# {"audio_filepath": "/path/to/audio_file.wav", "offset": 0, "duration": 1.23, "label": "infer", "text": "-"}
diff --git a/examples/asr/speech_classification/frame_vad_infer.py b/examples/asr/speech_classification/frame_vad_infer.py
new file mode 100644
index 000000000000..9c8e57b0773d
--- /dev/null
+++ b/examples/asr/speech_classification/frame_vad_infer.py
@@ -0,0 +1,188 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This script peforms VAD on each 20ms frames of the input audio files. 
+Postprocessing is also performed to generate speech segments and store them as RTTM files.
+Long audio files will be splitted into smaller chunks to avoid OOM issues, but the frames close
+to the split points might have worse performance due to truncated context.
+
+## Usage:
+python frame_vad_infer.py \
+    --config-path="../conf/vad" --config-name="frame_vad_infer_postprocess" \
+    dataset=<Path of json file of evaluation data. Audio files should have unique names>
+"""
+
+import os
+from pathlib import Path
+
+import torch
+
+from nemo.collections.asr.parts.utils.manifest_utils import write_manifest
+from nemo.collections.asr.parts.utils.vad_utils import (
+    frame_vad_eval_detection_error,
+    frame_vad_infer_load_manifest,
+    generate_overlap_vad_seq,
+    generate_vad_frame_pred,
+    generate_vad_segment_table,
+    init_frame_vad_model,
+    prepare_manifest,
+)
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+@hydra_runner(config_path="../conf/vad", config_name="frame_vad_infer_postprocess")
+def main(cfg):
+    if not cfg.dataset:
+        raise ValueError("You must input the path of json file of evaluation data")
+
+    # each line of dataset should be have different audio_filepath and unique name to simplify edge cases or conditions
+    logging.info(f"Loading manifest file {cfg.dataset}")
+    manifest_orig, key_labels_map, key_rttm_map = frame_vad_infer_load_manifest(cfg)
+
+    # Prepare manifest for streaming VAD
+    manifest_vad_input = cfg.dataset
+    if cfg.prepare_manifest.auto_split:
+        logging.info("Split long audio file to avoid CUDA memory issue")
+        logging.debug("Try smaller split_duration if you still have CUDA memory issue")
+        config = {
+            'input': manifest_vad_input,
+            'window_length_in_sec': cfg.vad.parameters.window_length_in_sec,
+            'split_duration': cfg.prepare_manifest.split_duration,
+            'num_workers': cfg.num_workers,
+            'prepared_manifest_vad_input': cfg.prepared_manifest_vad_input,
+        }
+        manifest_vad_input = prepare_manifest(config)
+    else:
+        logging.warning(
+            "If you encounter CUDA memory issue, try splitting manifest entry by split_duration to avoid it."
+        )
+
+    torch.set_grad_enabled(False)
+    vad_model = init_frame_vad_model(cfg.vad.model_path)
+
+    # setup_test_data
+    vad_model.setup_test_data(
+        test_data_config={
+            'batch_size': 1,
+            'sample_rate': 16000,
+            'manifest_filepath': manifest_vad_input,
+            'labels': ['infer'],
+            'num_workers': cfg.num_workers,
+            'shuffle': False,
+            'normalize_audio_db': cfg.vad.parameters.normalize_audio_db,
+        }
+    )
+
+    vad_model = vad_model.to(device)
+    vad_model.eval()
+
+    if not os.path.exists(cfg.frame_out_dir):
+        logging.info(f"Frame predictions do not exist at {cfg.frame_out_dir}, generating frame prediction.")
+        os.mkdir(cfg.frame_out_dir)
+        extract_frame_preds = True
+    else:
+        logging.info(f"Frame predictions already exist at {cfg.frame_out_dir}, skipping frame prediction generation.")
+        extract_frame_preds = False
+
+    if extract_frame_preds:
+        logging.info("Generating frame-level prediction ")
+        pred_dir = generate_vad_frame_pred(
+            vad_model=vad_model,
+            window_length_in_sec=cfg.vad.parameters.window_length_in_sec,
+            shift_length_in_sec=cfg.vad.parameters.shift_length_in_sec,
+            manifest_vad_input=manifest_vad_input,
+            out_dir=cfg.frame_out_dir,
+        )
+        logging.info(f"Finish generating VAD frame level prediction. You can find the prediction in {pred_dir}")
+    else:
+        pred_dir = cfg.frame_out_dir
+
+    frame_length_in_sec = cfg.vad.parameters.shift_length_in_sec
+
+    # overlap smoothing filter
+    if cfg.vad.parameters.smoothing:
+        # Generate predictions with overlapping input segments. Then a smoothing filter is applied to decide the label for a frame spanned by multiple segments.
+        # smoothing_method would be either in majority vote (median) or average (mean)
+        logging.info("Generating predictions with overlapping input segments")
+        smoothing_pred_dir = generate_overlap_vad_seq(
+            frame_pred_dir=pred_dir,
+            smoothing_method=cfg.vad.parameters.smoothing,
+            overlap=cfg.vad.parameters.overlap,
+            window_length_in_sec=cfg.vad.parameters.window_length_in_sec,
+            shift_length_in_sec=cfg.vad.parameters.shift_length_in_sec,
+            num_workers=cfg.num_workers,
+            out_dir=cfg.smoothing_out_dir,
+        )
+        logging.info(
+            f"Finish generating predictions with overlapping input segments with smoothing_method={cfg.vad.parameters.smoothing} and overlap={cfg.vad.parameters.overlap}"
+        )
+        pred_dir = smoothing_pred_dir
+
+    # postprocessing and generate speech segments
+    logging.info("Converting frame level prediction to RTTM files.")
+    rttm_out_dir = generate_vad_segment_table(
+        vad_pred_dir=pred_dir,
+        postprocessing_params=cfg.vad.parameters.postprocessing,
+        frame_length_in_sec=frame_length_in_sec,
+        num_workers=cfg.num_workers,
+        use_rttm=cfg.vad.use_rttm,
+        out_dir=cfg.rttm_out_dir,
+    )
+    logging.info(
+        f"Finish generating speech semgents table with postprocessing_params: {cfg.vad.parameters.postprocessing}"
+    )
+
+    logging.info("Writing VAD output to manifest")
+    key_pred_rttm_map = {}
+    manifest_new = []
+    for entry in manifest_orig:
+        key = Path(entry['audio_filepath']).stem
+        entry['rttm_filepath'] = Path(os.path.join(rttm_out_dir, key + ".rttm")).absolute().as_posix()
+        if not Path(entry['rttm_filepath']).is_file():
+            logging.warning(f"Not able to find {entry['rttm_filepath']} for {entry['audio_filepath']}")
+            entry['rttm_filepath'] = ""
+        manifest_new.append(entry)
+        key_pred_rttm_map[key] = entry['rttm_filepath']
+
+    if not cfg.out_manifest_filepath:
+        out_manifest_filepath = "manifest_vad_output.json"
+    else:
+        out_manifest_filepath = cfg.out_manifest_filepath
+    write_manifest(out_manifest_filepath, manifest_new)
+    logging.info(f"Finished writing VAD output to manifest: {out_manifest_filepath}")
+
+    if cfg.get("evaluate", False):
+        logging.info("Evaluating VAD results")
+        auroc, report = frame_vad_eval_detection_error(
+            pred_dir=pred_dir,
+            key_labels_map=key_labels_map,
+            key_rttm_map=key_rttm_map,
+            key_pred_rttm_map=key_pred_rttm_map,
+            frame_length_in_sec=frame_length_in_sec,
+        )
+        DetER = report.iloc[[-1]][('detection error rate', '%')].item()
+        FA = report.iloc[[-1]][('false alarm', '%')].item()
+        MISS = report.iloc[[-1]][('miss', '%')].item()
+        logging.info(f"AUROC: {auroc:.4f}")
+        logging.info(f"DetER={DetER:0.4f}, False Alarm={FA:0.4f}, Miss={MISS:0.4f}")
+        logging.info(f"with params: {cfg.vad.parameters.postprocessing}")
+    logging.info("Done!")
+
+
+if __name__ == "__main__":
+    main()  # pylint: disable=no-value-for-parameter
diff --git a/examples/asr/speech_classification/speech_to_frame_label.py b/examples/asr/speech_classification/speech_to_frame_label.py
new file mode 100644
index 000000000000..04cc77afda44
--- /dev/null
+++ b/examples/asr/speech_classification/speech_to_frame_label.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+The script trains a model that peforms classification on each frame of the input audio. 
+The default config (i.e., marblenet_3x2x64_20ms.yaml) outputs 20ms frames.
+
+## Training
+```sh
+python speech_to_label.py \
+    --config-path=<path to dir of configs e.g. "../conf/marblenet">
+    --config-name=<name of config without .yaml e.g. "marblenet_3x2x64_20ms"> \
+    model.train_ds.manifest_filepath="<path to train manifest>" \
+    model.validation_ds.manifest_filepath=["<path to val manifest>","<path to test manifest>"] \
+    trainer.devices=2 \
+    trainer.accelerator="gpu" \
+    strategy="ddp" \
+    trainer.max_epochs=200
+```
+"""
+
+import pytorch_lightning as pl
+from omegaconf import OmegaConf
+from nemo.collections.asr.models.classification_models import EncDecFrameClassificationModel
+
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+from nemo.utils.exp_manager import exp_manager
+
+
+@hydra_runner(config_path="../conf/marblenet", config_name="marblenet_3x2x64_20ms")
+def main(cfg):
+    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
+
+    trainer = pl.Trainer(**cfg.trainer)
+    exp_manager(trainer, cfg.get("exp_manager", None))
+    model = EncDecFrameClassificationModel(cfg=cfg.model, trainer=trainer)
+
+    # Initialize the weights of the model from another model, if provided via config
+    model.maybe_init_from_pretrained_checkpoint(cfg)
+
+    trainer.fit(model)
+
+    if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
+        if model.prepare_test(trainer):
+            trainer.test(model)
+
+
+if __name__ == '__main__':
+    main()  # noqa pylint: disable=no-value-for-parameter
diff --git a/nemo/collections/asr/data/audio_to_label.py b/nemo/collections/asr/data/audio_to_label.py
index 7585e4d7ea4f..f00f961b4c81 100644
--- a/nemo/collections/asr/data/audio_to_label.py
+++ b/nemo/collections/asr/data/audio_to_label.py
@@ -505,7 +505,7 @@ class _TarredAudioLabelDataset(IterableDataset):
             -   `replicate`: Optional shard strategy, where each node gets all of the set of shards
                 available in the tarred dataset, which are permanently pre-allocated and never changed at runtime.
                 The benefit of replication is that it allows each node to sample data points from the entire
-                dataset independently of other nodes, and reduces dependence on value of `shuffle_n`.
+                dataset independently of other nodes, and reduces dependence on the value of `shuffle_n`.
 
                 .. warning::
                     Replicated strategy allows every node to sample the entire set of available tarfiles,
@@ -894,9 +894,8 @@ class AudioToMultiLabelDataset(Dataset):
             Defaults to False.
         cal_labels_occurrence (bool): Whether to calculate occurrence of labels
             Defaults to False.
-        delimiter (Optional[str]): Delimiter to use when spliting the label string, default to None.
-        normalize_audio_db (bool): Whether to normalize audio signal to a target db, default to False.
-        normalize_audio_db_target (float): Target db to normalize audio signal, default to -20.
+        delimiter (Optional[str]): Delimiter to use when splitting the label string, default to None.
+        normalize_audio_db (Optional[float]):  normalize audio signal to a target db, default to None.
     """
 
     @property
@@ -942,8 +941,7 @@ def __init__(
         is_regression_task: bool = False,
         cal_labels_occurrence: Optional[bool] = False,
         delimiter: Optional[str] = None,
-        normalize_audio_db: bool = False,
-        normalize_audio_db_target: float = -20.0,
+        normalize_audio_db: Optional[float] = None,
     ):
         super().__init__()
         if isinstance(manifest_filepath, str):
@@ -951,7 +949,6 @@ def __init__(
 
         self.delimiter = delimiter
         self.normalize_audio_db = normalize_audio_db
-        self.normalize_audio_db_target = normalize_audio_db_target
 
         self.collection = collections.ASRSpeechLabel(
             manifests_files=manifest_filepath,
@@ -1022,7 +1019,6 @@ def __getitem__(self, index):
             duration=sample.duration,
             trim=self.trim,
             normalize_db=self.normalize_audio_db,
-            normalize_db_target=self.normalize_audio_db_target,
         )
 
         f, fl = features, torch.tensor(features.size(0)).long()
@@ -1104,9 +1100,8 @@ class TarredAudioToMultiLabelDataset(IterableDataset):
                     or test datasets.
         global_rank (int): Worker rank, used for partitioning shards. Defaults to 0.
         world_size (int): Total number of processes, used for partitioning shards. Defaults to 0.
-        delimiter (Optional[str]): Delimiter to use when spliting the label string, default to None.
-        normalize_audio_db (bool): Whether to normalize audio signal to a target db, default to False.
-        normalize_audio_db_target (float): Target db to normalize audio signal, default to -20.
+        delimiter (Optional[str]): Delimiter to use when splitting the label string, default to None.
+        normalize_audio_db (Optional[float]):  normalize audio signal to a target db, default to None.
     """
 
     def __init__(
@@ -1127,8 +1122,7 @@ def __init__(
         global_rank: int = 0,
         world_size: int = 0,
         delimiter: Optional[str] = None,
-        normalize_audio_db: bool = False,
-        normalize_audio_db_target: float = -20.0,
+        normalize_audio_db: Optional[float] = None,
     ):
         super().__init__()
         if isinstance(manifest_filepath, str):
@@ -1138,7 +1132,6 @@ def __init__(
         self.is_regression_task = is_regression_task
         self.delimiter = delimiter
         self.normalize_audio_db = normalize_audio_db
-        self.normalize_audio_db_target = normalize_audio_db_target
 
         self.collection = collections.ASRSpeechLabel(
             manifests_files=manifest_filepath,
@@ -1278,7 +1271,6 @@ def _build_sample(self, tup):
             duration=manifest_entry.duration,
             trim=self.trim,
             normalize_db=self.normalize_audio_db,
-            normalize_db_target=self.normalize_audio_db_target,
         )
 
         audio_filestream.close()
diff --git a/nemo/collections/asr/data/audio_to_label_dataset.py b/nemo/collections/asr/data/audio_to_label_dataset.py
index a242308d4042..dcead6df94b8 100644
--- a/nemo/collections/asr/data/audio_to_label_dataset.py
+++ b/nemo/collections/asr/data/audio_to_label_dataset.py
@@ -240,8 +240,7 @@ def get_audio_multi_label_dataset(cfg: DictConfig) -> audio_to_label.AudioToMult
         is_regression_task=cfg.get("is_regression_task", False),
         cal_labels_occurrence=cfg.get("cal_labels_occurrence", False),
         delimiter=cfg.get("delimiter", None),
-        normalize_audio_db=cfg.get("normalize_audio_db", False),
-        normalize_audio_db_target=cfg.get("normalize_audio_db_target", -20),
+        normalize_audio_db=cfg.get("normalize_audio_db", None),
     )
     return dataset
 
@@ -294,8 +293,7 @@ def get_tarred_audio_multi_label_dataset(
             shard_strategy=cfg.get('tarred_shard_strategy', 'scatter'),
             global_rank=global_rank,
             world_size=world_size,
-            normalize_audio_db=cfg.get("normalize_audio_db", False),
-            normalize_audio_db_target=cfg.get("normalize_audio_db_target", -20),
+            normalize_audio_db=cfg.get("normalize_audio_db", None),
         )
 
         if bucketing_weights:
diff --git a/nemo/collections/asr/models/classification_models.py b/nemo/collections/asr/models/classification_models.py
index a7b55e49d754..fb0ee82132a1 100644
--- a/nemo/collections/asr/models/classification_models.py
+++ b/nemo/collections/asr/models/classification_models.py
@@ -845,6 +845,7 @@ def output_types(self) -> Optional[Dict[str, NeuralType]]:
     def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         self.num_classes = len(cfg.labels)
         self.eval_loop_cnt = 0
+        self.ratio_threshold = cfg.get('ratio_threshold', 0.2)
         super().__init__(cfg=cfg, trainer=trainer)
 
     @classmethod
@@ -1063,8 +1064,9 @@ def reshape_labels(self, logits, labels, logits_len, labels_len):
         Reshape labels to match logits shape. For example, each label is expected to cover a 40ms frame, while each frme prediction from the
         model covers 20ms. If labels are shorter than logits, labels are repeated, otherwise labels are folded and argmax is applied to obtain 
         the label of each frame. When lengths of labels and logits are not factors of each other, labels are truncated or padded with zeros.
-        The threshold 0.2 is used to determine whether to pad or truncate labels, where the value 0.2 is not important as in real cases the ratio
-        is very close to either ceil(ratio) or floor(ratio). We use 0.2 here for easier unit-testing.
+        The ratio_threshold=0.2 is used to determine whether to pad or truncate labels, where the value 0.2 is not important as in real cases the ratio
+        is very close to either ceil(ratio) or floor(ratio). We use 0.2 here for easier unit-testing. This implementation does not allow frame length
+        and label length that are not multiples of each other.
         Args:
             logits: logits tensor with shape [B, T1, C]
             labels: labels tensor with shape [B, T2]
@@ -1080,7 +1082,7 @@ def reshape_labels(self, logits, labels, logits_len, labels_len):
         if logits_max_len < labels_max_len:
             ratio = labels_max_len // logits_max_len
             res = labels_max_len % logits_max_len
-            if ceil(ratio) - ratio < 0.2:  # e.g., ratio is 1.99
+            if ceil(ratio) - ratio < self.ratio_threshold:  # e.g., ratio is 1.99
                 # pad labels with zeros until labels_max_len is a multiple of logits_max_len
                 labels = labels.cpu().tolist()
                 if len(labels) % ceil(ratio) != 0:
@@ -1101,7 +1103,7 @@ def reshape_labels(self, logits, labels, logits_len, labels_len):
         elif logits_max_len > labels_max_len:
             ratio = logits_max_len / labels_max_len
             res = logits_max_len % labels_max_len
-            if ceil(ratio) - ratio < 0.2:  # e.g., ratio is 1.99
+            if ceil(ratio) - ratio < self.ratio_threshold:  # e.g., ratio is 1.99
                 # repeat labels for ceil(ratio) times, and DROP additional labels based on logits_max_len
                 labels = labels.repeat_interleave(ceil(ratio), dim=1).long()
                 labels = labels[:, :logits_max_len]
diff --git a/nemo/collections/asr/parts/preprocessing/features.py b/nemo/collections/asr/parts/preprocessing/features.py
index c2e84b04e981..531cd3105c04 100644
--- a/nemo/collections/asr/parts/preprocessing/features.py
+++ b/nemo/collections/asr/parts/preprocessing/features.py
@@ -181,8 +181,7 @@ def process(
         trim_hop_length=512,
         orig_sr=None,
         channel_selector=None,
-        normalize_db=False,
-        normalize_db_target=-20.0,
+        normalize_db=None,
     ):
         audio = AudioSegment.from_file(
             file_path,
@@ -198,7 +197,6 @@ def process(
             orig_sr=orig_sr,
             channel_selector=channel_selector,
             normalize_db=normalize_db,
-            normalize_db_target=normalize_db_target,
         )
         return self.process_segment(audio)
 
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index af6034f9af3a..89458ff4c4f6 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -36,6 +36,7 @@
 import math
 import os
 import random
+from typing import Optional
 
 import librosa
 import numpy as np
@@ -78,8 +79,8 @@ def __init__(
         trim_hop_length=512,
         orig_sr=None,
         channel_selector=None,
-        normalize_db=False,
-        normalize_db_target=-20.0,
+        normalize_db: Optional[float] = None,
+        ref_channel: Optional[int] = None,
     ):
         """Create audio segment from samples.
         Samples are convert float32 internally, with int scaled to [-1, 1].
@@ -114,8 +115,11 @@ def __init__(
         self._samples = samples
         self._sample_rate = sample_rate
         self._orig_sr = orig_sr if orig_sr is not None else sample_rate
-        if normalize_db:
-            self.normalize_db(normalize_db_target)
+        self._ref_channel = ref_channel
+        self._normalize_db = normalize_db
+
+        if normalize_db is not None:
+            self.normalize_db(normalize_db, ref_channel)
 
     def __eq__(self, other):
         """Return whether two objects are equal."""
@@ -185,8 +189,8 @@ def from_file(
         trim_hop_length=512,
         orig_sr=None,
         channel_selector=None,
-        normalize_db=False,
-        normalize_db_target=-20.0,
+        normalize_db=None,
+        ref_channel=None,
     ):
         """
         Load a file supported by librosa and return as an AudioSegment.
@@ -207,8 +211,8 @@ def from_file(
         :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable
                                  of integers denoting a subset of channels. Channel selector is using zero-based indexing.
                                  If set to `None`, the original signal will be used.
-        :param normalize_db (bool): if true, normalize the audio signal to a target RMS value
-        :param normalize_db_target (float): the target RMS value in decibels
+        :param normalize_db (Optional[float]): if not None, normalize the audio signal to a target RMS value
+        :param ref_channel (Optional[int]): channel to use as reference for normalizing multi-channel audio, set None to use max RMS across channels
         :return: AudioSegment instance
         """
         samples = None
@@ -226,6 +230,8 @@ def from_file(
                 trim_hop_length=trim_hop_length,
                 orig_sr=orig_sr,
                 channel_selector=channel_selector,
+                normalize_db=normalize_db,
+                ref_channel=ref_channel,
             )
 
         if not isinstance(audio_file, str) or os.path.splitext(audio_file)[-1] in sf_supported_formats:
@@ -283,7 +289,7 @@ def from_file(
             orig_sr=orig_sr,
             channel_selector=channel_selector,
             normalize_db=normalize_db,
-            normalize_db_target=normalize_db_target,
+            ref_channel=ref_channel,
         )
 
     @classmethod
@@ -464,10 +470,14 @@ def orig_sr(self):
     def gain_db(self, gain):
         self._samples *= 10.0 ** (gain / 20.0)
 
-    def normalize_db(self, target_db=-20):
+    def normalize_db(self, target_db=-20, ref_channel=None):
         """Normalize the signal to a target RMS value in decibels. 
+        For multi-channel audio, the RMS value is determined by the reference channel (if not None),
+        otherwise it will be the maximum RMS across all channels.
         """
         rms_db = self.rms_db
+        if self.num_channels > 1:
+            rms_db = max(rms_db) if ref_channel is None else rms_db[ref_channel]
         gain = target_db - rms_db
         self.gain_db(gain)
 
diff --git a/nemo/collections/asr/parts/utils/vad_utils.py b/nemo/collections/asr/parts/utils/vad_utils.py
index d35d5466a523..addf3cae29b7 100644
--- a/nemo/collections/asr/parts/utils/vad_utils.py
+++ b/nemo/collections/asr/parts/utils/vad_utils.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import glob
 import json
 import math
@@ -18,8 +19,9 @@
 import os
 import shutil
 from itertools import repeat
+from math import ceil, floor
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple, Union
 
 import IPython.display as ipd
 import librosa
@@ -27,12 +29,15 @@
 import numpy as np
 import pandas as pd
 import torch
+from omegaconf import DictConfig
 from pyannote.core import Annotation, Segment
 from pyannote.metrics import detection
+from sklearn.metrics import roc_auc_score
 from sklearn.model_selection import ParameterGrid
 from tqdm import tqdm
 
-from nemo.collections.asr.models import EncDecClassificationModel
+from nemo.collections.asr.models import EncDecClassificationModel, EncDecFrameClassificationModel
+from nemo.collections.common.parts.preprocessing.manifest import get_full_path
 from nemo.utils import logging
 
 try:
@@ -78,6 +83,7 @@ def prepare_manifest(config: dict) -> str:
         'label': 'infer',
         'split_duration': config['split_duration'],
         'window_length_in_sec': config['window_length_in_sec'],
+        'manifest_dir': Path(config['input']).parent if type(config['input']) == str else '',
     }
 
     if config.get('num_workers') is not None and config['num_workers'] > 1:
@@ -138,6 +144,12 @@ def write_vad_infer_manifest(file: dict, args_func: dict) -> list:
     in_duration = file.get('duration', None)
     in_offset = file.get('offset', 0)
 
+    # if filepath is not found, try to find it in the dir of manifest
+    if not Path(filepath).is_file():
+        new_filepath = Path(args_func['manifest_dir']) / filepath
+        if new_filepath.is_file():
+            filepath = new_filepath.absolute().as_posix()
+
     try:
         sr = 16000
         x, _sr = librosa.load(filepath, sr=sr, offset=in_offset, duration=in_duration)
@@ -692,7 +704,12 @@ def generate_vad_segment_table_per_file(pred_filepath: str, per_args: dict) -> s
 
 
 def generate_vad_segment_table(
-    vad_pred_dir: str, postprocessing_params: dict, frame_length_in_sec: float, num_workers: int, out_dir: str = None,
+    vad_pred_dir: str,
+    postprocessing_params: dict,
+    frame_length_in_sec: float,
+    num_workers: int,
+    out_dir: str = None,
+    use_rttm: bool = False,
 ) -> str:
     """
     Convert frame level prediction to speech segment in start and end times format.
@@ -706,27 +723,26 @@ def generate_vad_segment_table(
         out_dir (str): output dir of generated table/csv file.
         num_workers(float): number of process for multiprocessing
     Returns:
-        table_out_dir(str): directory of the generated table.
+        out_dir(str): directory of the generated table.
     """
 
     suffixes = ("frame", "mean", "median")
     vad_pred_filepath_list = [os.path.join(vad_pred_dir, x) for x in os.listdir(vad_pred_dir) if x.endswith(suffixes)]
 
-    if out_dir:
-        table_out_dir = out_dir
-    else:
-        table_out_dir_name = "table_output_tmp_"
+    if not out_dir:
+        out_dir_name = "seg_output_"
         for key in postprocessing_params:
-            table_out_dir_name = table_out_dir_name + str(key) + str(postprocessing_params[key]) + "_"
+            out_dir_name = out_dir_name + str(key) + str(postprocessing_params[key]) + "-"
 
-        table_out_dir = os.path.join(vad_pred_dir, table_out_dir_name)
+        out_dir = os.path.join(vad_pred_dir, out_dir_name)
 
-    if not os.path.exists(table_out_dir):
-        os.mkdir(table_out_dir)
+    if not os.path.exists(out_dir):
+        os.mkdir(out_dir)
 
     per_args = {
         "frame_length_in_sec": frame_length_in_sec,
-        "out_dir": table_out_dir,
+        "out_dir": out_dir,
+        "use_rttm": use_rttm,
     }
     per_args = {**per_args, **postprocessing_params}
     num_workers = None
@@ -741,12 +757,11 @@ def generate_vad_segment_table(
                     leave=True,
                 )
             )
-
     else:
         for vad_pred_filepath in tqdm(vad_pred_filepath_list, desc='creating speech segments', leave=True):
             generate_vad_segment_table_per_file(vad_pred_filepath, per_args)
 
-    return table_out_dir
+    return out_dir
 
 
 def generate_vad_segment_table_per_file_star(args):
@@ -955,33 +970,50 @@ def pred_rttm_map(vad_pred: str, groundtruth_RTTM: str, vad_pred_method: str = "
 
 def plot(
     path2audio_file: str,
-    path2_vad_pred: str,
-    path2ground_truth_label: str = None,
+    path2_vad_pred: Optional[str] = None,
+    path2groundtruth_rttm: Optional[str] = None,
+    groundtruth_labels: Optional[str] = None,
+    sample_rate: int = 16000,
     offset: float = 0,
     duration: float = None,
     threshold: float = None,
     per_args: dict = None,
+    unit_frame_len: float = 0.01,
+    label_repeat: int = 1,
+    xticks_step: int = 5,
 ) -> ipd.Audio:
     """
-    Plot VAD outputs for demonstration in tutorial
+    Plot Audio and/or VAD output and/or groundtruth labels for visualization
     Args:
         path2audio_file (str):  path to audio file.
         path2_vad_pred (str): path to vad prediction file,
-        path2ground_truth_label(str): path to groundtruth label file.
+        path2groundtruth_rttm(str): path to groundtruth RTTM file.
+        ground_truth_labels(str): a list of groundtruth label.
+        sample_rate (int): sample rate of audio file.
+        offset (float): offset in seconds.
+        duration (float): duration in seconds.
         threshold (float): threshold for prediction score (from 0 to 1).
         per_args(dict): a dict that stores the thresholds for postprocessing.
+        unit_frame_len (float): unit frame length in seconds for VAD predictions.
+        label_repeat (int): repeat the label for this number of times to match different frame lengths in preds and labels.
+        xticks_step (int): step size for xticks.
     """
     plt.figure(figsize=[20, 2])
-    UNIT_FRAME_LEN = 0.01
 
-    audio, sample_rate = librosa.load(path=path2audio_file, sr=16000, mono=True, offset=offset, duration=duration)
+    audio, sample_rate = librosa.load(
+        path=path2audio_file, sr=sample_rate, mono=True, offset=offset, duration=duration
+    )
     dur = librosa.get_duration(y=audio, sr=sample_rate)
 
-    time = np.arange(offset, offset + dur, UNIT_FRAME_LEN)
-    frame, _ = load_tensor_from_file(path2_vad_pred)
-    frame_snippet = frame[int(offset / UNIT_FRAME_LEN) : int((offset + dur) / UNIT_FRAME_LEN)]
+    time = np.arange(offset, offset + dur, unit_frame_len)
+    len_pred = int(dur / unit_frame_len) + 1
+
+    frame_snippet = None
+    if path2_vad_pred:
+        frame, _ = load_tensor_from_file(path2_vad_pred)
+        frame_snippet = frame[int(offset / unit_frame_len) : int((offset + dur) / unit_frame_len)]
+        len_pred = len(frame_snippet)
 
-    len_pred = len(frame_snippet)
     ax1 = plt.subplot()
     ax1.plot(np.arange(audio.size) / sample_rate, audio, 'gray')
     ax1.set_xlim([0, int(dur) + 1])
@@ -995,27 +1027,41 @@ def plot(
     if not threshold and not per_args:
         raise ValueError("One and only one of threshold and per_args must have been used!")
 
-    if threshold:
+    if threshold and frame_snippet is not None:
         pred_snippet = np.where(frame_snippet >= threshold, 1, 0)
-    if per_args:
+    elif per_args and frame_snippet is not None:
         _, per_args_float = prepare_gen_segment_table(
             frame, per_args
         )  # take whole frame here for calculating onset and offset
         speech_segments = generate_vad_segment_table_per_tensor(frame, per_args_float)
         pred = gen_pred_from_speech_segments(speech_segments, frame)
-        pred_snippet = pred[int(offset / UNIT_FRAME_LEN) : int((offset + dur) / UNIT_FRAME_LEN)]
+        pred_snippet = pred[int(offset / unit_frame_len) : int((offset + dur) / unit_frame_len)]
+    else:
+        pred_snippet = None
+
+    if path2groundtruth_rttm and path2groundtruth_rttm.endswith('.rttm'):
+        label = extract_labels(path2groundtruth_rttm, time)
+    elif groundtruth_labels:
+        label = [float(x) for x in groundtruth_labels]
+        if label_repeat > 1:
+            label = np.repeat(label, label_repeat)
+        label = label[int(offset / unit_frame_len) : int((offset + dur) / unit_frame_len)]
+    else:
+        label = None
 
-    if path2ground_truth_label:
-        label = extract_labels(path2ground_truth_label, time)
-        ax2.plot(np.arange(len_pred) * UNIT_FRAME_LEN, label, 'r', label='label')
+    if label:
+        ax2.plot(np.arange(len_pred) * unit_frame_len, label, 'r', label='label')
+    if pred_snippet:
+        ax2.plot(np.arange(len_pred) * unit_frame_len, pred_snippet, 'b', label='pred')
+    if frame_snippet:
+        ax2.plot(np.arange(len_pred) * unit_frame_len, frame_snippet, 'g--', label='speech prob')
 
-    ax2.plot(np.arange(len_pred) * UNIT_FRAME_LEN, pred_snippet, 'b', label='pred')
-    ax2.plot(np.arange(len_pred) * UNIT_FRAME_LEN, frame_snippet, 'g--', label='speech prob')
     ax2.tick_params(axis='y', labelcolor='r')
     ax2.legend(loc='lower right', shadow=True)
     ax2.set_ylabel('Preds and Probas')
     ax2.set_ylim([-0.1, 1.1])
-    return ipd.Audio(audio, rate=16000)
+    ax2.set_xticks(np.arange(0, int(dur) + 1, xticks_step))
+    return ipd.Audio(audio, rate=sample_rate)
 
 
 def gen_pred_from_speech_segments(
@@ -1038,11 +1084,11 @@ def gen_pred_from_speech_segments(
 def extract_labels(path2ground_truth_label: str, time: list) -> list:
     """
     Extract ground-truth label for given time period.
-    path2ground_truth_label (str): path of groundtruth label file 
+    path2ground_truth_label (str): path of groundtruth RTTM file 
     time (list) : a list of array representing time period.
     """
 
-    data = pd.read_csv(path2ground_truth_label, sep=" ", delimiter=None, header=None)
+    data = pd.read_csv(path2ground_truth_label, sep="\s+", delimiter=None, header=None)
     data = data.rename(columns={3: "start", 4: "dur", 7: "speaker"})
     labels = []
     for pos in time:
@@ -1086,9 +1132,14 @@ def generate_vad_frame_pred(
             else:
                 log_probs = vad_model(input_signal=test_batch[0], input_signal_length=test_batch[1])
             probs = torch.softmax(log_probs, dim=-1)
+            if len(probs.shape) == 3 and probs.shape[0] == 1:
+                # squeeze the batch dimension, since batch size is 1 for frame-VAD
+                probs = probs.squeeze(0)  # [1,T,C] -> [T,C]
             pred = probs[:, 1]
 
-            if status[i] == 'start':
+            if window_length_in_sec == 0:
+                to_save = pred
+            elif status[i] == 'start':
                 to_save = pred[:-trunc]
             elif status[i] == 'next':
                 to_save = pred[trunc:-trunc_l]
@@ -1097,6 +1148,7 @@ def generate_vad_frame_pred(
             else:
                 to_save = pred
 
+            to_save = to_save.cpu().tolist()
             all_len += len(to_save)
             outpath = os.path.join(out_dir, data[i] + ".frame")
             with open(outpath, "a", encoding='utf-8') as fout:
@@ -1125,6 +1177,21 @@ def init_vad_model(model_path: str):
     return vad_model
 
 
+def init_frame_vad_model(model_path: str):
+    """
+    Initiate VAD model with model path
+    """
+    if model_path.endswith('.nemo'):
+        logging.info(f"Using local VAD model from {model_path}")
+        vad_model = EncDecFrameClassificationModel.restore_from(restore_path=model_path)
+    elif model_path.endswith('.ckpt'):
+        vad_model = EncDecFrameClassificationModel.load_from_checkpoint(checkpoint_path=model_path)
+    else:
+        logging.info(f"Using NGC cloud VAD model {model_path}")
+        vad_model = EncDecFrameClassificationModel.from_pretrained(model_name=model_path)
+    return vad_model
+
+
 def stitch_segmented_asr_output(
     segmented_output_manifest: str,
     speech_segments_tensor_dir: str = "speech_segments",
@@ -1238,32 +1305,6 @@ def construct_manifest_eval(
     return aligned_vad_asr_output_manifest
 
 
-def extract_audio_features(vad_model: EncDecClassificationModel, manifest_vad_input: str, out_dir: str) -> str:
-    """
-    Extract audio features and write to out_dir
-    """
-
-    file_list = []
-    with open(manifest_vad_input, 'r', encoding='utf-8') as fin:
-        for line in fin.readlines():
-            file_list.append(Path(json.loads(line)['audio_filepath']).stem)
-
-    logging.info(f"Extracting features on {len(file_list)} audio files/json lines!")
-
-    for i, test_batch in enumerate(tqdm(vad_model.test_dataloader(), total=len(vad_model.test_dataloader()))):
-        test_batch = [x.to(vad_model.device) for x in test_batch]
-        with autocast():
-            processed_signal, processed_signal_length = vad_model.preprocessor(
-                input_signal=test_batch[0], length=test_batch[1],
-            )
-            processed_signal = processed_signal.squeeze(0)[:, :processed_signal_length]
-            processed_signal = processed_signal.cpu()
-            outpath = os.path.join(out_dir, file_list[i] + ".pt")
-            torch.save(processed_signal, outpath)
-        del test_batch
-    return out_dir
-
-
 def load_rttm_file(filepath: str) -> pd.DataFrame:
     """
     Load rttm file and extract speech segments
@@ -1321,7 +1362,7 @@ def load_speech_overlap_segments_from_rttm(rttm_file: str) -> Tuple[List[List[fl
 
     Returns:
         merged (List[List[float]]): merged speech intervals without overlaps
-        overlaps (List[List[float]]): intervals without overlap speech
+        overlaps (List[List[float]]): intervals with overlap speech
     """
     speech_segments = list(load_rttm_file(rttm_file)['segment'])
     speech_segments = [list(x) for x in speech_segments]
@@ -1367,7 +1408,9 @@ def get_nonspeech_segments(
     return nonspeech_segments
 
 
-def get_frame_labels(segments: List[List[float]], frame_length: float, offset: float, duration: float) -> str:
+def get_frame_labels(
+    segments: List[List[float]], frame_length: float, offset: float, duration: float, as_str: bool = True
+) -> str:
     """
     Generate frame-level binary labels for audio, '0' for non-speech and '1' for speech
 
@@ -1379,30 +1422,39 @@ def get_frame_labels(segments: List[List[float]], frame_length: float, offset: f
     """
     labels = []
     n_frames = int(np.ceil(duration / frame_length))
-
     sid = 0
     for i in range(n_frames):
         t = offset + i * frame_length
         while sid < len(segments) - 1 and segments[sid][1] < t:
             sid += 1
-        if segments[sid][0] <= t <= segments[sid][1]:
-            labels.append('1')
+        if segments[sid][1] != 0 and segments[sid][0] <= t <= segments[sid][1]:
+            labels.append(1)
         else:
-            labels.append('0')
-    return ' '.join(labels)
+            labels.append(0)
+    if as_str:
+        return ' '.join([str(x) for x in labels])
+    return [float(x) for x in labels]
 
 
 def plot_sample_from_rttm(
-    audio_file: str, rttm_file: str, max_duration: Optional[float] = None, save_path: str = "", show: bool = True
+    audio_file: str,
+    rttm_file: str,
+    max_duration: Optional[float] = None,
+    save_path: str = "",
+    show: bool = True,
+    offset: float = 0.0,
+    unit_frame_len: float = 0.01,
 ):
+    """
+    Plot audio signal and frame-level labels from RTTM file
+    """
     plt.figure(figsize=[20, 2])
-    UNIT_FRAME_LEN = 0.01
 
-    audio, sample_rate = librosa.load(path=audio_file, sr=16000, mono=True, offset=0, duration=max_duration)
+    audio, sample_rate = librosa.load(path=audio_file, sr=16000, mono=True, offset=offset, duration=max_duration)
     dur = librosa.get_duration(y=audio, sr=sample_rate)
 
     segments = load_speech_segments_from_rttm(rttm_file)
-    labels = get_frame_labels(segments, UNIT_FRAME_LEN, 0.0, dur)
+    labels = get_frame_labels(segments, unit_frame_len, offset, dur)
     labels = [float(x) for x in labels.split()]
 
     length = len(labels)
@@ -1415,7 +1467,7 @@ def plot_sample_from_rttm(
     ax1.set_ylim([-1, 1])
     ax2 = ax1.twinx()
 
-    ax2.plot(np.arange(length) * UNIT_FRAME_LEN, labels, 'r', label='label')
+    ax2.plot(np.arange(length) * unit_frame_len, labels, 'r', label='label')
     ax2.tick_params(axis='y', labelcolor='r')
     ax2.legend(loc='lower right', shadow=True)
     ax2.set_ylabel('Labels')
@@ -1425,3 +1477,240 @@ def plot_sample_from_rttm(
     if save_path:
         plt.savefig(save_path)
     return ipd.Audio(audio, rate=16000)
+
+
+def align_labels_to_frames(probs, labels, threshold=0.2):
+    """
+    Aligns labels to frames when the frame length (e.g., 10ms) is different from the label length (e.g., 20ms). 
+    The threshold 0.2 is not important, since the actual ratio will always be close to an integer unless using frame/label 
+    lengths that are not multiples of each other (e.g., 15ms frame length and 20ms label length), which is not valid.
+    The value 0.2 here is just for easier unit testing.
+    Args:
+        probs (List[float]): list of probabilities
+        labels (List[int]): list of labels
+        threshold (float): threshold for rounding ratio to integer
+    Returns:
+        labels (List[int]): list of labels aligned to frames
+    """
+    frames_len = len(probs)
+    labels_len = len(labels)
+    probs = torch.tensor(probs).float()
+    labels = torch.tensor(labels).long()
+
+    if frames_len < labels_len:
+        # pad labels with zeros until labels_len is a multiple of frames_len
+        ratio = labels_len / frames_len
+        res = labels_len % frames_len
+        if (
+            ceil(ratio) - ratio < threshold
+        ):  # e.g., ratio = 2.9, ceil(ratio) = 3, then we pad labels to make it a multiple of 3
+            # pad labels with zeros until labels_max_len is a multiple of logits_max_len
+            labels = labels.tolist()
+            if len(labels) % ceil(ratio) != 0:
+                labels += [0] * (ceil(ratio) - len(labels) % ceil(ratio))
+            labels = torch.tensor(labels).long()
+            labels = labels.view(-1, ceil(ratio)).amax(1)
+            return align_labels_to_frames(probs.tolist(), labels.long().tolist())
+        # otherwise, truncate additional labels until labels_max_len is a multiple of logits_max_len
+        if res > 0:
+            labels = labels[:-res]
+        labels = labels.view(-1, floor(ratio)).amax(1)
+        return labels.long().tolist()
+    elif frames_len > labels_len:
+        # repeat labels until labels_len is a multiple of frames_len
+        ratio = frames_len / labels_len
+        res = frames_len % labels_len
+        if ceil(ratio) - ratio < threshold:
+            # e.g., ratio is 1.83, ceil(ratio) = 2, then we repeat labels to make it a multiple of 2, and discard the redundant labels
+            labels = labels.repeat_interleave(ceil(ratio), dim=0).long().tolist()
+            labels = labels[:frames_len]
+        else:
+            # e.g., ratio is 2.02, floor(ratio) = 2, then we repeat labels to make it a multiple of 2 and add additional labels
+            labels = labels.repeat_interleave(floor(ratio), dim=0).long().tolist()
+            if res > 0:
+                labels += labels[-res:]
+        return labels
+    else:
+        return labels.long().tolist()
+
+
+def read_rttm_as_pyannote_object(rttm_file: str, speaker_override: Optional[str] = None) -> Annotation:
+    """
+    Read rttm file and construct a Pyannote object.
+    Args:
+        rttm_file(str) : path of rttm file.
+        speaker_override(str) : if not None, all speakers will be replaced by this value.
+    Returns:
+        annotation(pyannote.Annotation): annotation object
+    """
+    annotation = Annotation()
+    data = pd.read_csv(rttm_file, sep="\s+", delimiter=None, header=None)
+    data = data.rename(columns={3: "start", 4: "dur", 7: "speaker"})
+    for index, row in data.iterrows():
+        if speaker_override is not None:
+            annotation[Segment(row['start'], row['start'] + row['dur'])] = speaker_override
+        else:
+            annotation[Segment(row['start'], row['start'] + row['dur'])] = row['speaker']
+    return annotation
+
+
+def convert_labels_to_speech_segments(labels: List[float], frame_length_in_sec: float = 0.01):
+    """
+    Convert a list of labels to a list of speech segments.
+    Args:
+        labels (List[float]): list of labels
+        frame_length_in_sec (float): frame length in seconds
+    Returns:
+        segments (List[Tuple[float, float]]): list of speech segments
+    """
+    segments = []
+    start = -1
+    for i, label in enumerate(labels):
+        if label == 1:
+            if start == -1:
+                start = i * frame_length_in_sec
+        else:
+            if start > -1:
+                segments.append([start, (i - 1) * frame_length_in_sec])
+                start = -1
+    if start != -1:
+        segments.append([start, (len(labels) - 1) * frame_length_in_sec])
+    return segments
+
+
+def frame_vad_construct_pyannote_object_per_file(
+    prediction: Union[str, List[float]], groundtruth: Union[str, List[float]], frame_length_in_sec: float = 0.01
+) -> Tuple[Annotation, Annotation]:
+    """
+    Construct a Pyannote object for evaluation.
+    Args:
+        prediction (str) : path of VAD predictions stored as RTTM or CSV-like txt.
+        groundtruth (str): path of groundtruth rttm file.
+        frame_length_in_sec(float): frame length in seconds
+    Returns:
+        reference(pyannote.Annotation): groundtruth
+        hypothesis(pyannote.Annotation): prediction
+    """
+
+    hypothesis = Annotation()
+    if isinstance(groundtruth, str) and prediction.endswith('.rttm'):
+        hypothesis = read_rttm_as_pyannote_object(prediction, speaker_override='speech')
+    elif isinstance(groundtruth, str) and prediction.endswith('.txt'):
+        pred = pd.read_csv(prediction, sep=" ", header=None)
+        for index, row in pred.iterrows():
+            hypothesis[Segment(float(row[0]), float(row[0]) + float(row[1]))] = 'speech'
+    elif isinstance(groundtruth, list):
+        segments = convert_labels_to_speech_segments(prediction, frame_length_in_sec)
+        for segment in segments:
+            hypothesis[Segment(segment[0], segment[1])] = 'speech'
+    else:
+        raise ValueError('prediction must be a path to rttm file or a list of frame labels.')
+
+    reference = Annotation()
+    if isinstance(groundtruth, str) and groundtruth.endswith('.rttm'):
+        reference = read_rttm_as_pyannote_object(groundtruth, speaker_override='speech')
+    elif isinstance(groundtruth, list):
+        segments = convert_labels_to_speech_segments(groundtruth, frame_length_in_sec)
+        for segment in segments:
+            reference[Segment(segment[0], segment[1])] = 'speech'
+    else:
+        raise ValueError('groundtruth must be a path to rttm file or a list of frame labels.')
+    return reference, hypothesis
+
+
+def frame_vad_infer_load_manifest(cfg: DictConfig):
+    """
+    Load manifest file and prepare label/rttm mapping
+    Args:
+        cfg: config file
+    Returns:
+        manifest_orig (List[Dict]): original manifest data 
+        key_labels_map (Dict): mapping from unique_audio_name to its labels
+        key_rttm_map (Dict): mapping from unique_audio_name to its rttm file
+    """
+    unique_audio_names = set()
+    key_labels_map = {}
+    key_rttm_map = {}
+    manifest_orig = []
+    manifest_file = Path(cfg.dataset).absolute().as_posix()
+    with open(manifest_file, 'r') as fin:
+        for line in fin.readlines():
+            entry = json.loads(line.strip())
+            audio_filepath = get_full_path(audio_file=entry['audio_filepath'], manifest_file=manifest_file)
+            entry['audio_filepath'] = str(audio_filepath)
+            uniq_audio_name = Path(audio_filepath).stem
+
+            if uniq_audio_name in unique_audio_names:
+                raise ValueError("Please make sure each line is with different audio_filepath! ")
+            else:
+                unique_audio_names.add(uniq_audio_name)
+
+            manifest_orig.append(entry)
+
+            # always prefer RTTM labels if exist
+            if "label" not in entry or "rttm_filepath" in entry or "rttm_file" in entry:
+                rttm_key = "rttm_filepath" if "rttm_filepath" in entry else "rttm_file"
+                segments = load_speech_segments_from_rttm(entry[rttm_key])
+                label_str = get_frame_labels(
+                    segments=segments,
+                    frame_length=cfg.vad.parameters.shift_length_in_sec,
+                    duration=entry['duration'],
+                    offset=entry['offset'],
+                )
+                key_rttm_map[uniq_audio_name] = entry[rttm_key]
+                key_labels_map[uniq_audio_name] = [float(x) for x in label_str.split()]
+            elif entry.get("label", None) is not None:
+                key_labels_map[uniq_audio_name] = [float(x) for x in entry["label"].split()]
+            else:
+                raise ValueError("Must have either `label` or `rttm_filepath` in manifest")
+
+    return manifest_orig, key_labels_map, key_rttm_map
+
+
+def frame_vad_eval_detection_error(
+    pred_dir: str, key_labels_map: dict, key_rttm_map: dict, key_pred_rttm_map: dict, frame_length_in_sec: float
+):
+    """
+    Perform evaluation on frame-VAD results
+    Args:
+        pred_dir: directory of frame-VAD prediction files with in `<unique_audio_name>.frame` format
+        key_labels_map: dictionary of mapping each <unique_audio_name> to its labels
+        key_rttm_map: dictionary of mapping each <unique_audio_name> to its GROUNDTRUTH rttm file
+        key_pred_rttm_map: dictionary of mapping each <unique_audio_name> to its PREDICTED rttm file
+        frame_length_in_sec: frame length in seconds, e.g. 0.02s
+    Returns:
+        auroc: AUROC score in 0~100%
+        report: Pyannote detection.DetectionErrorRate() report
+    """
+    all_probs = []
+    all_labels = []
+    metric = detection.DetectionErrorRate()
+    key_probs_map = {}
+    predictions_list = list(Path(pred_dir).glob("*.frame"))
+    for frame_pred in tqdm(predictions_list, desc="Evaluating VAD results", total=len(predictions_list)):
+        pred_probs = []
+        with frame_pred.open("r") as fin:
+            for line in fin.readlines():
+                line = line.strip()
+                if not line:
+                    continue
+                pred_probs.append(float(line))
+        key = frame_pred.stem
+        key_probs_map[key] = pred_probs
+        key_labels_map[key] = align_labels_to_frames(probs=pred_probs, labels=key_labels_map[key])
+        all_probs.extend(key_probs_map[key])
+        all_labels.extend(key_labels_map[key])
+
+        if key in key_rttm_map:
+            groundtruth = key_rttm_map[key]
+        else:
+            groundtruth = key_labels_map[key]
+
+        reference, hypothesis = frame_vad_construct_pyannote_object_per_file(
+            prediction=key_pred_rttm_map[key], groundtruth=groundtruth, frame_length_in_sec=frame_length_in_sec,
+        )
+        metric(reference, hypothesis)
+
+    auroc = roc_auc_score(y_true=all_labels, y_score=all_probs)
+    report = metric.report(display=False)
+    return auroc, report
diff --git a/tests/collections/asr/test_asr_classification_model.py b/tests/collections/asr/test_asr_classification_model.py
index 44125de92b3d..876bb6073a38 100644
--- a/tests/collections/asr/test_asr_classification_model.py
+++ b/tests/collections/asr/test_asr_classification_model.py
@@ -255,52 +255,13 @@ def test_EncDecClassificationDatasetConfig_for_AudioToSpeechLabelDataset(self):
 
 
 class TestEncDecFrameClassificationModel(TestEncDecClassificationModel):
+    @pytest.mark.parametrize(["logits_len", "labels_len"], [(20, 10), (21, 10), (19, 10), (20, 9), (20, 11)])
     @pytest.mark.unit
-    def test_reshape_labels(self, frame_classification_model):
+    def test_reshape_labels(self, frame_classification_model, logits_len, labels_len):
         model = frame_classification_model.eval()
 
-        logits = torch.ones(4, 20, 2)
-        labels = torch.ones(4, 10)
-        logits_len = torch.tensor([6, 7, 8, 9])
-        labels_len = torch.tensor([5, 6, 7, 8])
-        labels_new, labels_len_new = model.reshape_labels(
-            logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len
-        )
-        assert labels_new.size(1) == logits.size(1)
-        assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9]))
-
-        logits = torch.ones(4, 21, 2)
-        labels = torch.ones(4, 10)
-        logits_len = torch.tensor([6, 7, 8, 9])
-        labels_len = torch.tensor([5, 6, 7, 8])
-        labels_new, labels_len_new = model.reshape_labels(
-            logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len
-        )
-        assert labels_new.size(1) == logits.size(1)
-        assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9]))
-
-        logits = torch.ones(4, 19, 2)
-        labels = torch.ones(4, 10)
-        logits_len = torch.tensor([6, 7, 8, 9])
-        labels_len = torch.tensor([5, 6, 7, 8])
-        labels_new, labels_len_new = model.reshape_labels(
-            logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len
-        )
-        assert labels_new.size(1) == logits.size(1)
-        assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9]))
-
-        logits = torch.ones(4, 20, 2)
-        labels = torch.ones(4, 9)
-        logits_len = torch.tensor([6, 7, 8, 9])
-        labels_len = torch.tensor([5, 6, 7, 8])
-        labels_new, labels_len_new = model.reshape_labels(
-            logits=logits, labels=labels, logits_len=logits_len, labels_len=labels_len
-        )
-        assert labels_new.size(1) == logits.size(1)
-        assert torch.equal(labels_len_new, torch.tensor([6, 7, 8, 9]))
-
-        logits = torch.ones(4, 20, 2)
-        labels = torch.ones(4, 11)
+        logits = torch.ones(4, logits_len, 2)
+        labels = torch.ones(4, labels_len)
         logits_len = torch.tensor([6, 7, 8, 9])
         labels_len = torch.tensor([5, 6, 7, 8])
         labels_new, labels_len_new = model.reshape_labels(
diff --git a/tests/collections/asr/utils/test_vad_utils.py b/tests/collections/asr/utils/test_vad_utils.py
new file mode 100644
index 000000000000..a7672e1aa43d
--- /dev/null
+++ b/tests/collections/asr/utils/test_vad_utils.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+from pyannote.core import Annotation, Segment
+
+from nemo.collections.asr.parts.utils.vad_utils import (
+    align_labels_to_frames,
+    convert_labels_to_speech_segments,
+    frame_vad_construct_pyannote_object_per_file,
+    get_frame_labels,
+    get_nonspeech_segments,
+    load_speech_overlap_segments_from_rttm,
+    load_speech_segments_from_rttm,
+    read_rttm_as_pyannote_object,
+)
+
+
+def get_simple_rttm_without_overlap(rttm_file="test1.rttm"):
+    line = "SPEAKER <NA> 1 0 2 <NA> <NA> speech <NA> <NA>\n"
+    speech_segments = [[0.0, 2.0]]
+    with open(rttm_file, "w") as f:
+        f.write(line)
+    return rttm_file, speech_segments
+
+
+def get_simple_rttm_with_overlap(rttm_file="test2.rttm"):
+    speech_segments = [[0.0, 3.0]]
+    overlap_segments = [[1.0, 2.0]]
+    with open(rttm_file, "w") as f:
+        f.write("SPEAKER <NA> 1 0 2 <NA> <NA> speech <NA> <NA>\n")
+        f.write("SPEAKER <NA> 1 1 2 <NA> <NA> speech <NA> <NA>\n")
+    return rttm_file, speech_segments, overlap_segments
+
+
+def get_simple_rttm_with_silence(rttm_file="test3.rttm"):
+    line = "SPEAKER <NA> 1 1 2 <NA> <NA> speech <NA> <NA>\n"
+    speech_segments = [[1.0, 2.0]]
+    silence_segments = [[0.0, 1.0]]
+    with open(rttm_file, "w") as f:
+        f.write(line)
+    return rttm_file, speech_segments, silence_segments
+
+
+class TestVADUtils:
+    @pytest.mark.parametrize(["logits_len", "labels_len"], [(20, 10), (20, 11), (20, 9), (10, 21), (10, 19)])
+    @pytest.mark.unit
+    def test_align_label_logits(self, logits_len, labels_len):
+        logits = np.arange(logits_len).tolist()
+        labels = np.arange(labels_len).tolist()
+        labels_new = align_labels_to_frames(probs=logits, labels=labels)
+
+        assert len(labels_new) == len(logits)
+
+    @pytest.mark.unit
+    def test_load_speech_segments_from_rttm(self, test_data_dir):
+        rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test1.rttm")
+        speech_segments_new = load_speech_segments_from_rttm(rttm_file)
+        assert speech_segments_new == speech_segments
+
+    @pytest.mark.unit
+    def test_load_speech_overlap_segments_from_rttm(self, test_data_dir):
+        rttm_file, speech_segments, overlap_segments = get_simple_rttm_with_overlap(test_data_dir + "/test2.rttm")
+        speech_segments_new, overlap_segments_new = load_speech_overlap_segments_from_rttm(rttm_file)
+        assert speech_segments_new == speech_segments
+        assert overlap_segments_new == overlap_segments
+
+    @pytest.mark.unit
+    def test_get_nonspeech_segments(self, test_data_dir):
+        rttm_file, speech_segments, silence_segments = get_simple_rttm_with_silence(test_data_dir + "/test3.rttm")
+        speech_segments_new = load_speech_segments_from_rttm(rttm_file)
+        silence_segments_new = get_nonspeech_segments(speech_segments_new)
+        assert silence_segments_new == silence_segments
+
+    @pytest.mark.unit
+    def test_get_frame_labels(self, test_data_dir):
+        rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test4.rttm")
+        speech_segments_new = load_speech_segments_from_rttm(rttm_file)
+        frame_labels = get_frame_labels(speech_segments_new, 0.02, 0.0, 3.0, as_str=False)
+        assert frame_labels[0] == 1
+        assert len(frame_labels) == 150
+
+    @pytest.mark.unit
+    def test_convert_labels_to_speech_segments(self, test_data_dir):
+        rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test5.rttm")
+        speech_segments_new = load_speech_segments_from_rttm(rttm_file)
+        frame_labels = get_frame_labels(speech_segments_new, 0.02, 0.0, 3.0, as_str=False)
+        speech_segments_new = convert_labels_to_speech_segments(frame_labels, 0.02)
+        assert speech_segments_new == speech_segments
+
+    @pytest.mark.unit
+    def test_read_rttm_as_pyannote_object(self, test_data_dir):
+        rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test6.rttm")
+        pyannote_object = read_rttm_as_pyannote_object(rttm_file)
+        pyannote_object_gt = Annotation()
+        pyannote_object_gt[Segment(0.0, 2.0)] = 'speech'
+        assert pyannote_object == pyannote_object_gt
+
+    @pytest.mark.unit
+    def test_frame_vad_construct_pyannote_object_per_file(self, test_data_dir):
+        rttm_file, speech_segments = get_simple_rttm_without_overlap(test_data_dir + "/test7.rttm")
+        # test for rttm input
+        ref, hyp = frame_vad_construct_pyannote_object_per_file(rttm_file, rttm_file)
+        pyannote_object_gt = Annotation()
+        pyannote_object_gt[Segment(0.0, 2.0)] = 'speech'
+        assert ref == hyp == pyannote_object_gt
+
+        # test for list input
+        speech_segments = load_speech_segments_from_rttm(rttm_file)
+        frame_labels = get_frame_labels(speech_segments, 0.02, 0.0, 3.0, as_str=False)
+        speech_segments_new = convert_labels_to_speech_segments(frame_labels, 0.02)
+        assert speech_segments_new == speech_segments
+        ref, hyp = frame_vad_construct_pyannote_object_per_file(frame_labels, frame_labels, 0.02)
+        assert ref == hyp == pyannote_object_gt

From cfbe0924db02a1a557aeeea55cc1a01b83903e71 Mon Sep 17 00:00:00 2001
From: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Date: Thu, 1 Jun 2023 17:39:14 -0700
Subject: [PATCH 003/123] [TTS][zh] refine hardcoded lowercase for ASCII
 letters. (#6781)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
---
 .../text_to_speech/tokenizer_utils.py         |  7 +-
 .../text_to_speech/tts_tokenizers.py          | 30 +++----
 .../tts/g2p/models/zh_cn_pinyin.py            | 82 +++++++++++++------
 3 files changed, 77 insertions(+), 42 deletions(-)

diff --git a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py
index 2644e487d585..92a3e0fb49e0 100644
--- a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py
+++ b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py
@@ -185,10 +185,9 @@ def any_locale_word_tokenize(text: str) -> List[Tuple[List[str], bool]]:
     return _word_tokenize(words)
 
 
-# TODO @xueyang: deprecate language-specific text preprocessing and use any_locale_text_preprocessing.
-def spanish_text_preprocessing(text):
+def spanish_text_preprocessing(text: str) -> str:
     return text.lower()
 
 
-def chinese_text_preprocessing(text):
-    return text.lower()
+def chinese_text_preprocessing(text: str) -> str:
+    return text
diff --git a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py
index 21f352d64710..abcbdb1661b9 100644
--- a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py
+++ b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py
@@ -692,7 +692,7 @@ def __init__(
         sep='|',  # To be able to distinguish between 2/3 letters codes.
         add_blank_at=None,
         pad_with_space=False,
-        text_preprocessing_func=lambda text: chinese_text_preprocessing(text),
+        text_preprocessing_func=chinese_text_preprocessing,
     ):
         """Chinese phoneme-based tokenizer.
         Args:
@@ -716,12 +716,15 @@ def __init__(
         if silence is not None:
             self.silence, tokens = len(tokens), tokens + [silence]  # Silence
 
-        self.phonemes_list = g2p.phonemes_list
-        self.tones_list = g2p.tones_list
+        self.phoneme_list = g2p.phoneme_list
+        self.tone_list = g2p.tone_list
+        self.ascii_letter_list = g2p.ascii_letter_list
 
-        tokens.extend(self.phonemes_list)
-        tokens.extend(self.tones_list)
-        tokens.extend(string.ascii_lowercase)
+        tokens.extend(self.phoneme_list)
+        tokens.extend(self.tone_list)
+        tokens.extend(self.ascii_letter_list)
+
+        self.text_preprocessing_func = text_preprocessing_func
 
         if apostrophe:
             tokens.append("'")  # Apostrophe
@@ -737,15 +740,12 @@ def __init__(
 
         self.punct = punct
         self.pad_with_space = pad_with_space
-
-        self.text_preprocessing_func = text_preprocessing_func
         self.g2p = g2p
 
-    def encode(self, text):
+    def encode(self, text: str) -> List[int]:
         """See base class for more information."""
-
         text = self.text_preprocessing_func(text)
-        g2p_text = self.g2p(text)  # TODO: handle infer
+        g2p_text = self.g2p(text)
         return self.encode_from_g2p(g2p_text, text)
 
     def encode_from_g2p(self, g2p_text: List[str], raw_text: Optional[str] = None):
@@ -762,15 +762,15 @@ def encode_from_g2p(self, g2p_text: List[str], raw_text: Optional[str] = None):
             # Add space if last one isn't one
             if p == space and len(ps) > 0 and ps[-1] != space:
                 ps.append(p)
-            # Add next phoneme or char (if chars=True)
-            elif (p.isalnum() or p == "'" or p in self.phonemes_list or p in self.tones_list) and p in tokens:
+            # Add next phoneme or tone or ascii letter or apostrophe.
+            elif (p.isalnum() or p == "'" or p in self.phoneme_list + self.tone_list + self.ascii_letter_list) and p in tokens:
                 ps.append(p)
-            # Add punct
+            # Add punctuation
             elif (p in self.PUNCT_LIST) and self.punct:
                 ps.append(p)
             # Warn about unknown char/phoneme
             elif p != space:
-                message = f"Text: [{''.join(g2p_text)}] contains unknown char/phoneme: [{p}]."
+                message = f"Text: [{' '.join(g2p_text)}] contains unknown char/phoneme: [{p}]."
                 if raw_text is not None:
                     message += f"Original text: [{raw_text}]. Symbol will be skipped."
                 logging.warning(message)
diff --git a/nemo/collections/tts/g2p/models/zh_cn_pinyin.py b/nemo/collections/tts/g2p/models/zh_cn_pinyin.py
index 73bcbec5a414..35a22f6ba118 100644
--- a/nemo/collections/tts/g2p/models/zh_cn_pinyin.py
+++ b/nemo/collections/tts/g2p/models/zh_cn_pinyin.py
@@ -16,7 +16,9 @@
 from collections import defaultdict
 from typing import Dict, List, Optional, Union
 
+from nemo.collections.common.tokenizers.text_to_speech.ipa_lexicon import get_grapheme_character_set
 from nemo.collections.tts.g2p.models.base import BaseG2p
+from nemo.collections.tts.g2p.utils import set_grapheme_case
 from nemo.utils import logging
 
 
@@ -25,25 +27,38 @@ def __init__(
         self,
         phoneme_dict: Union[str, pathlib.Path, Dict[str, List[str]]],
         phoneme_prefix: str = "#",
+        phoneme_case: str = "upper",
         tone_prefix: str = "#",
+        ascii_letter_prefix: str = "",
+        ascii_letter_case: str = "lower",
         word_tokenize_func=None,
         apply_to_oov_word=None,
         mapping_file: Optional[str] = None,
         word_segmenter: Optional[str] = None,
     ):
-        """Chinese G2P module. This module first converts Chinese characters into pinyin sequences using pypinyin, then pinyin sequences would
-           be further converted into phoneme sequences using pinyin_dict_nv_22.10.txt dict file. For Chinese and English bilingual sentences, the English words
-           would be converted into letters.
+        """
+        Chinese G2P module. This module first converts Chinese characters into pinyin sequences using pypinyin, then
+            pinyin sequences would be further converted into phoneme sequences by looking them up in the `phoneme_dict`.
+            This G2P module also works with Chinese/English bilingual sentences where English words would be converted
+            into letters. It is advised to attach prefix symbols for Chinese phonemes and tones to discriminate them
+            from English letters to avoid any potential symbol set overlaps.
         Args:
             phoneme_dict (str, Path, Dict): Path to pinyin_dict_nv_22.10.txt dict file or a dict object.
             phoneme_prefix (str): Prepend a special symbol to any phonemes in order to distinguish phonemes from
                 graphemes because there may be overlaps between the two sets. Phoneme dictionary typically applies
                 uppercase initials and finals. It is suggested to choose a prefix that
                 is not used or preserved somewhere else. Default to "#".
+            phoneme_case (str): Specify the case chosen from `"lower"`, `"upper"`, or `"mixed"`, and process the
+                cases of Chinese phonemes. Default to `"upper"`.
             tone_prefix (str): Prepend a special symbol to any tone digits. Default to "#".
+            ascii_letter_prefix (str): Prepend a special symbol to any ASCII letters. Default to "".
+            ascii_letter_case (str): Specify the case chosen from `"lower"`, `"upper"`, or `"mixed"`, and process the
+                cases of non-Chinese words. Default to `"lower"`.
             word_tokenize_func: Function for tokenizing text to words.
-                It has to return List[Tuple[Union[str, List[str]], bool]] where every tuple denotes word representation and flag whether to leave unchanged or not.
-                It is expected that unchangeable word representation will be represented as List[str], other cases are represented as str.
+                It has to return List[Tuple[Union[str, List[str]], bool]] where every tuple denotes word representation
+                    and flag whether to leave unchanged or not.
+                It is expected that unchangeable word representation will be represented as List[str], other cases are
+                    represented as str.
                 It is useful to mark word as unchangeable which is already in phoneme representation.
             apply_to_oov_word: Function that will be applied to out of phoneme_dict word.
             word_segmenter: method that will be applied to segment utterances into words for better polyphone disambiguation.
@@ -58,13 +73,27 @@ def __init__(
             phoneme_prefix = ""
         if tone_prefix is None:
             tone_prefix = ""
+        if ascii_letter_prefix is None:
+            ascii_letter_prefix = ""
 
+        # phonemes
         phoneme_dict = (
-            self._parse_as_pinyin_dict(phoneme_dict, phoneme_prefix)
+            self._parse_as_pinyin_dict(phoneme_dict, phoneme_prefix, phoneme_case)
             if isinstance(phoneme_dict, str) or isinstance(phoneme_dict, pathlib.Path)
             else phoneme_dict
         )
-        self.phonemes_list = list({pron for prons in phoneme_dict.values() for pron in prons})
+        self.phoneme_list = list({pron for prons in phoneme_dict.values() for pron in prons})
+
+        # tones
+        self.tone_dict = {str(x): tone_prefix + str(x) for x in range(1, 6)}
+        self.tone_list = list(self.tone_dict.values())
+
+        # ascii letters
+        self.ascii_letter_dict = {
+            x: ascii_letter_prefix + x for x in get_grapheme_character_set(locale="en-US", case=ascii_letter_case)
+        }
+        self.ascii_letter_list = sorted(self.ascii_letter_dict)
+        self.ascii_letter_case = ascii_letter_case
 
         if apply_to_oov_word is None:
             logging.warning(
@@ -81,9 +110,6 @@ def __init__(
             mapping_file=mapping_file,
         )
 
-        self.tones = {str(x): tone_prefix + str(x) for x in range(1, 6)}
-        self.tones_list = list(self.tones.values())
-
         if word_segmenter == "jieba":
             try:
                 import jieba
@@ -109,7 +135,7 @@ def __init__(
 
     @staticmethod
     def _parse_as_pinyin_dict(
-        phoneme_dict_path: Union[str, pathlib.Path], phoneme_prefix: str
+        phoneme_dict_path: Union[str, pathlib.Path], phoneme_prefix: str, phoneme_case: str
     ) -> Dict[str, List[str]]:
         """Loads pinyin dict file, and generates a set of all valid symbols."""
         g2p_dict = defaultdict(list)
@@ -120,11 +146,13 @@ def _parse_as_pinyin_dict(
                     continue
 
                 parts = line.split('\t')
-                # lowercase the Chinese syllables because pypinyin requires lowercase inputs.
+                # Convert the cases of Chinese syllables loaded from the dictionary to lowercase to match the lowercase
+                # Chinese syllable outputs generated by the function `pypinyin.lazy_pinyin`. Note that the function
+                # `pypinyin.lazy_pinyin` preserves the cases of ASCII letters.
                 syllable = parts[0].lower()
-                pronunciation = parts[1].split()
+                pronunciation = set_grapheme_case(parts[1], case=phoneme_case).split()
 
-                # add phoneme prefix to distinguish from other symbols.
+                # add a prefix to distinguish phoneme symbols from non-phoneme symbols.
                 pronunciation_with_prefix = [phoneme_prefix + pron for pron in pronunciation]
                 g2p_dict[syllable] = pronunciation_with_prefix
 
@@ -132,14 +160,19 @@ def _parse_as_pinyin_dict(
 
     def __call__(self, text: str) -> List[str]:
         """
-        errors func handle below is to process the bilingual situation,
-        where English words would be split into letters.
-        e.g. 我今天去了Apple Store, 买了一个iPhone。
-        would return a list
-        ['wo3', 'jin1', 'tian1', 'qu4', 'le5', 'A', 'p', 'p', 'l', 'e',
-        ' ', 'S', 't', 'o', 'r', 'e', ',', ' ', 'mai3', 'le5', 'yi2',
-        'ge4', 'i', 'P', 'h', 'o', 'n', 'e', '。']
+        This forward pass function translates Chinese characters into pinyin sequences and then converts the pinyin
+        into phonemes. It is primarily designed to process texts containing with Chinese characters, but we have
+        extended its support to handle texts that include both Chinese and English. This extension was mainly
+        necessitated by the limited availability of bilingual datasets. The `errors` argument used in the
+        `pypinyin.lazy_pinyin` function below is used to process non-Chinese words, where each English word is split
+        into letters.
+
+        For example, The text "我今天去了Apple Store, 买了一个iPhone。" would be converted as a list,
+        `['wo3', 'jin1', 'tian1', 'qu4', 'le5', 'A', 'p', 'p', 'l', 'e', ' ', 'S', 't', 'o', 'r', 'e', ',', ' ', 'mai3',
+         'le5', 'yi2', 'ge4', 'i', 'P', 'h', 'o', 'n', 'e', '。']`
         """
+        text = set_grapheme_case(text, case=self.ascii_letter_case)
+
         pinyin_seq = []
         words_list = self.word_segmenter(text)
 
@@ -154,15 +187,18 @@ def __call__(self, text: str) -> List[str]:
             )
         phoneme_seq = []
         for pinyin in pinyin_seq:
+            # only pinyin has tones while non-pinyin doesn't.
             tone_hyp = pinyin[-1]
-            if tone_hyp in self.tones:
+            if tone_hyp in self.tone_dict:
                 syllable = pinyin[:-1]
                 assert syllable in self.phoneme_dict, f"Syllable <{syllable}> does not exist in the dictionary."
                 phoneme_seq += self.phoneme_dict[syllable]
-                phoneme_seq.append(self.tones[tone_hyp])
+                phoneme_seq.append(self.tone_dict[tone_hyp])
             # All pinyin would end up with a number in 1-5, which represents tones of the pinyin.
             # For symbols which are not pinyin, such as English letters and Chinese punctuations, we directly
             # use them as inputs.
+            elif tone_hyp in self.ascii_letter_dict:
+                phoneme_seq.append(self.ascii_letter_dict[tone_hyp])
             else:
                 phoneme_seq.append(pinyin)
         return phoneme_seq

From 5428a97e3c9578b79fa7b30b6c53f2ae9759f418 Mon Sep 17 00:00:00 2001
From: bene-ges <antonova_sasha@list.ru>
Date: Fri, 2 Jun 2023 18:44:18 +0300
Subject: [PATCH 004/123] Spellchecking ASR customization model (#6179)

* bug fixes

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* fix bugs, add preparation and evaluation scripts, add readme

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* small fixes

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add real coverage calculation, small fixes, more debug information

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add option to pass a filelist and output folder - to handle inference from multiple input files

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* added preprocessing for yago wikipedia articles - finding yago entities and their subphrases

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* yago wiki preprocessing, sampling, pseudonormalization

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* more scripts for preparation of training examples

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some alphabet checks

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add bert on subwords, concatenate it to bert on characters

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add calculation of character_pos_to_subword_pos

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* tensor join bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* double hidden_size in classifier

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* default index value 0 instead of -1 because index cannot be negative

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pad index value 0 instead of -1 because index cannot be negative

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* remove pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bugs, add creation of tarred dataset

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add possibility to change sequence len at inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change sampling of dummy candidates at inference, add candidate info file

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix import

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* update transcription now uses info

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* write path

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* 1. add tarred dataset support(untested). 2. fix bug with ban_ngrams in indexing

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* skip short_sent if no real candidates

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix import

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add braceexpand

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug in np.ones

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug in collate

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change tensor type to long because of error in torch.gather

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix for empty spans tensor

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* same fixes in _collate_fn for tarred dataset

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug from previous commit

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change int types to be shorter to minimize tar size

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring of datasets and inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* tar by 100k examples, small fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes, add analytics script

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* Add functions for dynamic programming comparison to get best path by ngrams

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes to support testing on SPGISpeech

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add preprocessing for userlibri

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* some refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* some refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small refactoring before pr. Add bash-scripts reproducing evaluation

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* style fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes in inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix - didn't move window on last symbol

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug - shuffle was before truncation of sorted candidates

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring, fix some bugs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* variour fixes. Add word_indices at inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add candidate positions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Move data preparation and evaluation to other repo

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add infer_reproduce_paper. Refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactor inference using fragment indices

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some helper functions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug with parameters order

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bugs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring, fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add multiple variants of adjusting start/end positions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* more fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add unit tests, other fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix CodeQl warnings

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fixes

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* fix bugs, add preparation and evaluation scripts, add readme

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* small fixes

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add real coverage calculation, small fixes, more debug information

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add option to pass a filelist and output folder - to handle inference from multiple input files

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* added preprocessing for yago wikipedia articles - finding yago entities and their subphrases

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* yago wiki preprocessing, sampling, pseudonormalization

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* more scripts for preparation of training examples

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some alphabet checks

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add bert on subwords, concatenate it to bert on characters

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add calculation of character_pos_to_subword_pos

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* tensor join bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* double hidden_size in classifier

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* default index value 0 instead of -1 because index cannot be negative

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pad index value 0 instead of -1 because index cannot be negative

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* remove pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bugs, add creation of tarred dataset

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add possibility to change sequence len at inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change sampling of dummy candidates at inference, add candidate info file

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix import

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* update transcription now uses info

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* write path

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* 1. add tarred dataset support(untested). 2. fix bug with ban_ngrams in indexing

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* skip short_sent if no real candidates

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix import

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add braceexpand

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug in np.ones

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug in collate

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change tensor type to long because of error in torch.gather

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix for empty spans tensor

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* same fixes in _collate_fn for tarred dataset

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug from previous commit

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change int types to be shorter to minimize tar size

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring of datasets and inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* tar by 100k examples, small fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes, add analytics script

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* Add functions for dynamic programming comparison to get best path by ngrams

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes to support testing on SPGISpeech

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add preprocessing for userlibri

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* some refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* some refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small refactoring before pr. Add bash-scripts reproducing evaluation

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* style fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes in inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix - didn't move window on last symbol

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug - shuffle was before truncation of sorted candidates

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring, fix some bugs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* variour fixes. Add word_indices at inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add candidate positions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Move data preparation and evaluation to other repo

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add infer_reproduce_paper. Refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactor inference using fragment indices

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some helper functions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug with parameters order

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bugs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring, fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add multiple variants of adjusting start/end positions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* more fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add unit tests, other fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix CodeQl warnings

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add script for full inference pipeline, refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add tutorial

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* take example data from HuggingFace

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add docs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix comment

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes for PR

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some more tests

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* try to fix tests adding with_downloads

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* skip tests with tokenizer download

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

---------

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>
Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>
Co-authored-by: Alexandra Antonova <aleksandraa@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 docs/source/nlp/models.rst                    |    1 +
 .../nlp/spellchecking_asr_customization.rst   |  128 ++
 docs/source/starthere/tutorials.rst           |    3 +
 .../spellchecking_asr_customization/README.md |   32 +
 .../checkpoint_to_nemo.py                     |   38 +
 ...pellchecking_asr_customization_config.yaml |   97 ++
 .../convert_data_to_tarred.sh                 |   50 +
 .../create_custom_vocab_index.py              |   72 +
 .../create_tarred_dataset.py                  |   99 ++
 .../helpers.py                                |   86 +
 .../postprocess_and_update_manifest.py        |   79 +
 .../prepare_input_from_manifest.py            |  129 ++
 .../run_infer.sh                              |   99 ++
 .../run_training.sh                           |   56 +
 .../run_training_tarred.sh                    |   63 +
 .../spellchecking_asr_customization_infer.py  |  123 ++
 .../spellchecking_asr_customization_train.py  |   66 +
 .../extract_giza_alignments.py                |  215 +--
 .../__init__.py                               |   20 +
 .../bert_example.py                           |  593 +++++++
 .../dataset.py                                |  521 ++++++
 .../spellchecking_asr_customization/utils.py  |  845 ++++++++++
 .../text_normalization_as_tagging/utils.py    |  196 +++
 nemo/collections/nlp/models/__init__.py       |    1 +
 .../__init__.py                               |   18 +
 .../spellchecking_model.py                    |  526 ++++++
 .../spoken_wikipedia/run.sh                   |    2 +-
 .../test_spellchecking_asr_customization.py   | 1102 +++++++++++++
 .../ctc_segmentation/scripts/prepare_data.py  |    2 +-
 ...pellMapper_English_ASR_Customization.ipynb | 1403 +++++++++++++++++
 .../spellmapper_customization_vocabulary.png  |  Bin 0 -> 39243 bytes
 .../images/spellmapper_data_preparation.png   |  Bin 0 -> 75265 bytes
 .../images/spellmapper_inference_pipeline.png |  Bin 0 -> 146148 bytes
 33 files changed, 6459 insertions(+), 206 deletions(-)
 create mode 100644 docs/source/nlp/spellchecking_asr_customization.rst
 create mode 100644 examples/nlp/spellchecking_asr_customization/README.md
 create mode 100644 examples/nlp/spellchecking_asr_customization/checkpoint_to_nemo.py
 create mode 100644 examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml
 create mode 100644 examples/nlp/spellchecking_asr_customization/convert_data_to_tarred.sh
 create mode 100644 examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py
 create mode 100644 examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py
 create mode 100644 examples/nlp/spellchecking_asr_customization/helpers.py
 create mode 100644 examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py
 create mode 100644 examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py
 create mode 100644 examples/nlp/spellchecking_asr_customization/run_infer.sh
 create mode 100644 examples/nlp/spellchecking_asr_customization/run_training.sh
 create mode 100644 examples/nlp/spellchecking_asr_customization/run_training_tarred.sh
 create mode 100644 examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py
 create mode 100644 examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py
 create mode 100644 nemo/collections/nlp/data/spellchecking_asr_customization/__init__.py
 create mode 100644 nemo/collections/nlp/data/spellchecking_asr_customization/bert_example.py
 create mode 100644 nemo/collections/nlp/data/spellchecking_asr_customization/dataset.py
 create mode 100644 nemo/collections/nlp/data/spellchecking_asr_customization/utils.py
 create mode 100644 nemo/collections/nlp/models/spellchecking_asr_customization/__init__.py
 create mode 100644 nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py
 create mode 100644 tests/collections/nlp/test_spellchecking_asr_customization.py
 create mode 100644 tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
 create mode 100644 tutorials/nlp/images/spellmapper_customization_vocabulary.png
 create mode 100644 tutorials/nlp/images/spellmapper_data_preparation.png
 create mode 100644 tutorials/nlp/images/spellmapper_inference_pipeline.png

diff --git a/docs/source/nlp/models.rst b/docs/source/nlp/models.rst
index 932be201bfb2..ad50d976db9f 100755
--- a/docs/source/nlp/models.rst
+++ b/docs/source/nlp/models.rst
@@ -9,6 +9,7 @@ NeMo's NLP collection supports provides the following task-specific models:
    :maxdepth: 1
 
    punctuation_and_capitalization_models
+   spellchecking_asr_customization
    token_classification
    joint_intent_slot
    text_classification
diff --git a/docs/source/nlp/spellchecking_asr_customization.rst b/docs/source/nlp/spellchecking_asr_customization.rst
new file mode 100644
index 000000000000..f9009b520361
--- /dev/null
+++ b/docs/source/nlp/spellchecking_asr_customization.rst
@@ -0,0 +1,128 @@
+.. _spellchecking_asr_customization:
+
+SpellMapper (Spellchecking ASR Customization) Model
+=====================================================
+
+SpellMapper is a non-autoregressive model for postprocessing of ASR output. It gets as input a single ASR hypothesis (text) and a custom vocabulary and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any. Unlike traditional spellchecking approaches, which aim to correct known words using language models, SpellMapper's goal is to correct highly specific user terms, out-of-vocabulary (OOV) words or spelling variations (e.g., "John Koehn", "Jon Cohen").
+
+This model is an alternative to word boosting/shallow fusion approaches:
+
+- does not require retraining ASR model;
+- does not require beam-search/language model (LM);
+- can be applied on top of any English ASR model output;
+
+Model Architecture
+------------------
+Though SpellMapper is based on `BERT <https://arxiv.org/abs/1810.04805>`__ :cite:`nlp-ner-devlin2018bert` architecture, it uses some non-standard tricks that make it different from other BERT-based models:
+
+- ten separators (``[SEP]`` tokens) are used to combine the ASR hypothesis and ten candidate phrases into a single input;
+- the model works on character level;
+- subword embeddings are concatenated to the embeddings of each character that belongs to this subword;
+ 
+ .. code::
+
+    Example input:   [CLS] a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o [SEP] d i d i e r _ s a u m o n [SEP] a s t r o n o m i e [SEP] t r i s t a n _ g u i l l o t [SEP] ...
+    Input segments:      0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0     1 1 1 1 1 1 1 1 1 1 1 1 1 1     2 2 2 2 2 2 2 2 2 2 2     3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3     4      
+    Example output:      0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3 0     ...
+
+The model calculates logits for each character x 11 labels: 
+
+- ``0`` - character doesn't belong to any candidate,
+- ``1..10`` - character belongs to candidate with this id.
+
+At inference average pooling is applied to calculate replacement probability for the whole fragments.
+
+Quick Start Guide
+-----------------
+
+We recommend you try this model in a Jupyter notebook (need GPU): 
+`NeMo/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb <https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb>`__.
+
+A pretrained English checkpoint can be found at `HuggingFace <https://huggingface.co/bene-ges/spellmapper_asr_customization_en>`__. 
+
+An example inference pipeline can be found here: `NeMo/examples/nlp/spellchecking_asr_customization/run_infer.sh <https://github.com/NVIDIA/NeMo/blob/stable/examples/nlp/spellchecking_asr_customization/run_infer.sh>`__.
+
+An example script on how to train the model can be found here: `NeMo/examples/nlp/spellchecking_asr_customization/run_training.sh <https://github.com/NVIDIA/NeMo/blob/stable/examples/nlp/spellchecking_asr_customization/run_training.sh>`__.
+
+An example script on how to train on large datasets can be found here: `NeMo/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh <https://github.com/NVIDIA/NeMo/blob/stable/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh>`__.
+
+The default configuration file for the model can be found here: `NeMo/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml <https://github.com/NVIDIA/NeMo/blob/stable/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml>`__.
+
+.. _dataset_spellchecking_asr_customization:
+
+Input/Output Format at Inference stage
+--------------------------------------
+Here we describe input/output format of the SpellMapper model. 
+
+.. note::
+
+    If you use `inference pipeline <https://github.com/NVIDIA/NeMo/blob/stable/examples/nlp/spellchecking_asr_customization/run_infer.sh>`__ this format will be hidden inside and you only need to provide an input manifest and user vocabulary and you will get a corrected manifest.
+
+An input line should consist of 4 tab-separated columns:
+    1. text of ASR-hypothesis
+    2. texts of 10 candidates separated by semicolon
+    3. 1-based ids of non-dummy candidates, separated by space
+    4. approximate start/end coordinates of non-dummy candidates (correspond to ids in third column)
+
+Example input (in one line):
+
+.. code::
+
+    t h e _ t a r a s i c _ o o r d a _ i s _ a _ p a r t _ o f _ t h e _ a o r t a _ l o c a t e d _ i n _ t h e _ t h o r a x	
+    h e p a t i c _ c i r r h o s i s;u r a c i l;c a r d i a c _ a r r e s t;w e a n;a p g a r;p s y c h o m o t o r;t h o r a x;t h o r a c i c _ a o r t a;a v f;b l o c k a d e d
+    1 2 6 7 8 9 10
+    CUSTOM 6 23;CUSTOM 4 10;CUSTOM 4 15;CUSTOM 56 62;CUSTOM 5 19;CUSTOM 28 31;CUSTOM 39 48
+
+Each line in SpellMapper output is tab-separated and consists of 4 columns:
+    1. ASR-hypothesis (same as in input)
+    2. 10 candidates separated by semicolon (same as in input)
+    3. fragment predictions, separated by semicolon, each prediction is a tuple (start, end, candidate_id, probability)
+    4. letter predictions - candidate_id predicted for each letter (this is only for debug purposes)
+
+Example output (in one line):
+
+.. code::
+
+    t h e _ t a r a s i c _ o o r d a _ i s _ a _ p a r t _ o f _ t h e _ a o r t a _ l o c a t e d _ i n _ t h e _ t h o r a x
+    h e p a t i c _ c i r r h o s i s;u r a c i l;c a r d i a c _ a r r e s t;w e a n;a p g a r;p s y c h o m o t o r;t h o r a x;t h o r a c i c _ a o r t a;a v f;b l o c k a d e d
+    56 62 7 0.99998;4 20 8 0.95181;12 20 8 0.44829;4 17 8 0.99464;12 17 8 0.97645
+    8 8 8 0 8 8 8 8 8 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7    
+
+Training Data Format
+--------------------
+
+For training, the data should consist of 5 files:
+
+- ``config.json`` - BERT config
+- ``label_map.txt`` - labels from 0 to 10, do not change
+- ``semiotic_classes.txt`` - currently there are only two classes: ``PLAIN`` and ``CUSTOM``, do not change
+- ``train.tsv`` - training examples
+- ``test.tsv`` - validation examples
+
+Note that since all these examples are synthetic, we do not reserve a set for final testing. Instead, we run `inference pipeline <https://github.com/NVIDIA/NeMo/blob/stable/examples/nlp/spellchecking_asr_customization/run_infer.sh>`__ and compare resulting word error rate (WER) to the WER of baseline ASR output. 
+
+One (non-tarred) training example should consist of 4 tab-separated columns:
+    1. text of ASR-hypothesis
+    2. texts of 10 candidates separated by semicolon
+    3. 1-based ids of correct candidates, separated by space, or 0 if none
+    4. start/end coordinates of correct candidates (correspond to ids in third column)
+
+Example (in one line):
+
+.. code::
+
+    a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o
+    d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y
+    1 3
+    CUSTOM 12 23;CUSTOM 28 41
+
+For data preparation see `this script <https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/dataset_preparation/build_training_data.sh>`__
+
+
+References
+----------
+
+.. bibliography:: nlp_all.bib
+    :style: plain
+    :labelprefix: NLP-NER
+    :keyprefix: nlp-ner-
diff --git a/docs/source/starthere/tutorials.rst b/docs/source/starthere/tutorials.rst
index cb81aecc1109..9c960053398b 100644
--- a/docs/source/starthere/tutorials.rst
+++ b/docs/source/starthere/tutorials.rst
@@ -130,6 +130,9 @@ To run a tutorial:
    * - NLP
      - Punctuation and Capitalization
      - `Punctuation and Capitalization <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/nlp/Punctuation_and_Capitalization.ipynb>`_
+   * - NLP
+     - Spellchecking ASR Customization - SpellMapper
+     - `Spellchecking ASR Customization - SpellMapper <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb>`_
    * - NLP
      - Entity Linking
      - `Entity Linking <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/nlp/Entity_Linking_Medical.ipynb>`_
diff --git a/examples/nlp/spellchecking_asr_customization/README.md b/examples/nlp/spellchecking_asr_customization/README.md
new file mode 100644
index 000000000000..2d83fd8d11ad
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/README.md
@@ -0,0 +1,32 @@
+# SpellMapper - spellchecking model for ASR Customization
+
+This model is inspired by Microsoft's paper https://arxiv.org/pdf/2203.00888.pdf, but does not repeat its implementation.
+The goal is to build a model that gets as input a single ASR hypothesis (text) and a vocabulary of custom words/phrases and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any.
+Our model is non-autoregressive (NAR) based on transformer architecture (BERT with multiple separators).
+
+As initial data we use about 5 mln entities from [YAGO corpus](https://www.mpi-inf.mpg.de/departments/databases-and-information-systems/research/yago-naga/yago/downloads/). These entities are short phrases from Wikipedia headings.
+In order to get misspelled predictions we feed these data to TTS model and then to ASR model.
+Having a "parallel" corpus of "correct + misspelled" phrases, we use statistical machine translation techniques to create a dictionary of possible ngram mappings with their respective frequencies.
+We create an auxiliary algorithm that takes as input a sentence (ASR hypothesis) and a large custom dictionary (e.g. 5000 phrases) and selects top 10 candidate phrases that are probably contained in this sentence in a misspelled way.
+The task of our final neural model is to predict which fragments in the ASR hypothesis should be replaced by which of top-10 candidate phrases if any.
+
+The pipeline consists of multiple steps:
+
+1. Download or generate training data. 
+   See `https://github.com/bene-ges/nemo_compatible/tree/main/scripts/nlp/en_spellmapper/dataset_preparation`
+
+2. [Optional] Convert training dataset to tarred files.
+   `convert_dataset_to_tarred.sh`
+ 
+3. Train spellchecking model.
+   `run_training.sh`
+   or 
+   `run_training_tarred.sh`
+
+4. Run evaluation.
+   - [test_on_kensho.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)
+   - [test_on_userlibri.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)
+   - [test_on_spoken_wikipedia.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)
+
+5. Run inference.
+   `python run_infer.sh`
diff --git a/examples/nlp/spellchecking_asr_customization/checkpoint_to_nemo.py b/examples/nlp/spellchecking_asr_customization/checkpoint_to_nemo.py
new file mode 100644
index 000000000000..c2f514f3e67e
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/checkpoint_to_nemo.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This script converts checkpoint .ckpt to .nemo file.
+
+This script uses the `examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml`
+config file by default. The other option is to set another config file via command
+line arguments by `--config-name=CONFIG_FILE_PATH'.
+"""
+
+from omegaconf import DictConfig, OmegaConf
+
+from nemo.collections.nlp.models import SpellcheckingAsrCustomizationModel
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+
+
+@hydra_runner(config_path="conf", config_name="spellchecking_asr_customization_config")
+def main(cfg: DictConfig) -> None:
+    logging.debug(f'Config Params: {OmegaConf.to_yaml(cfg)}')
+    SpellcheckingAsrCustomizationModel.load_from_checkpoint(cfg.checkpoint_path).save_to(cfg.target_nemo_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml b/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml
new file mode 100644
index 000000000000..c98915cdfc6f
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml
@@ -0,0 +1,97 @@
+name: &name spellchecking
+lang: ???        # e.g. 'ru', 'en'
+
+# Pretrained Nemo Models
+pretrained_model: null
+
+trainer:
+  devices: 1 # the number of gpus, 0 for CPU
+  num_nodes: 1
+  max_epochs: 3  # the number of training epochs
+  enable_checkpointing: false  # provided by exp_manager
+  logger: false  # provided by exp_manager
+  accumulate_grad_batches: 1 # accumulates grads every k batches
+  gradient_clip_val: 0.0
+  precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP.
+  accelerator: gpu
+  strategy: ddp
+  log_every_n_steps: 1  # Interval of logging.
+  val_check_interval: 1.0  # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
+  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
+
+model:
+  do_training: true
+  label_map: ???  # path/.../label_map.txt
+  semiotic_classes: ???  # path/.../semiotic_classes.txt
+  max_sequence_len: 128
+  lang: ${lang}
+  hidden_size: 768
+
+  optim:
+    name: adamw
+    lr: 3e-5
+    weight_decay: 0.1
+
+    sched:
+      name: WarmupAnnealing                      
+
+      # pytorch lightning args
+      monitor: val_loss
+      reduce_on_plateau: false
+
+      # scheduler config override
+      warmup_ratio: 0.1
+      last_epoch: -1
+
+  language_model:
+    pretrained_model_name: bert-base-uncased     # For ru, try DeepPavlov/rubert-base-cased | For de or multilingual, try bert-base-multilingual-cased
+    lm_checkpoint: null
+    config_file: null # json file, precedence over config
+    config: null
+
+  tokenizer:
+    tokenizer_name: ${model.language_model.pretrained_model_name} # or sentencepiece
+    vocab_file: null # path to vocab file
+    tokenizer_model: null # only used if tokenizer is sentencepiece
+    special_tokens: null
+
+exp_manager:
+  exp_dir: nemo_experiments # where to store logs and checkpoints
+  name: training # name of experiment
+  create_tensorboard_logger: True
+  create_checkpoint_callback: True
+  checkpoint_callback_params:
+    save_top_k: 3
+    monitor: "val_loss"
+    mode: "min"
+
+tokenizer:
+    tokenizer_name: ${model.transformer} # or sentencepiece
+    vocab_file: null # path to vocab file
+    tokenizer_model: null # only used if tokenizer is sentencepiece
+    special_tokens: null
+
+# Data
+data:
+  train_ds:
+    data_path: ???  # provide the full path to the file
+    batch_size: 8
+    shuffle: true
+    num_workers: 3
+    pin_memory: false
+    drop_last: false
+
+  validation_ds:
+    data_path: ???  # provide the full path to the file.
+    batch_size: 8
+    shuffle: false
+    num_workers: 3
+    pin_memory: false
+    drop_last: false
+
+
+# Inference
+inference:
+  from_file: null # Path to the raw text, no labels required. Each sentence on a separate line
+  out_file: null # Path to the output file
+  batch_size: 16 # batch size for inference.from_file
diff --git a/examples/nlp/spellchecking_asr_customization/convert_data_to_tarred.sh b/examples/nlp/spellchecking_asr_customization/convert_data_to_tarred.sh
new file mode 100644
index 000000000000..d4265eb4beb6
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/convert_data_to_tarred.sh
@@ -0,0 +1,50 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Path to NeMo repository
+NEMO_PATH=NeMo
+
+DATA_PATH="data_folder"
+
+## data_folder_example
+##   ├── tarred_data
+##   |    └── (output)
+##   ├── config.json
+##   ├── label_map.txt
+##   ├── semiotic_classes.txt
+##   ├── test.tsv
+##   ├── 1.tsv
+##   ├── ...
+##   └── 200.tsv
+
+## Each of {1-200}.tsv input files are 110'000 examples subsets of all.tsv (except for validation part),
+## generated by https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/dataset_preparation/build_training_data.sh
+## Note that in this example we use 110'000 as input and only pack 100'000 of them to tar file. 
+## This is because some input examples, e.g. too long, can be skipped during preprocessing, and we want all tar files to contain fixed equal number of examples.
+
+for part in {1..200}
+do
+    python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py \
+    lang="en" \
+    data.train_ds.data_path=${DATA_PATH}/${part}.tsv \
+    data.validation_ds.data_path=${DATA_PATH}/test.tsv \
+    model.max_sequence_len=256 \
+    model.language_model.pretrained_model_name=huawei-noah/TinyBERT_General_6L_768D \
+    model.language_model.config_file=${DATA_PATH}/config.json \
+    model.label_map=${DATA_PATH}/label_map.txt \
+    model.semiotic_classes=${DATA_PATH}/semiotic_classes.txt \
+    +output_tar_file=${DATA_PATH}/tarred_data/part${part}.tar \
+    +take_first_n_lines=100000
+done
diff --git a/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py b/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py
new file mode 100644
index 000000000000..07d64ec5b723
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This script is used to create an index of custom vocabulary and save it to file.
+See "examples/nlp/spellchecking_asr_customization/run_infer.sh" for the whole inference pipeline.
+"""
+
+from argparse import ArgumentParser
+
+from nemo.collections.nlp.data.spellchecking_asr_customization.utils import get_index, load_ngram_mappings
+
+parser = ArgumentParser(description="Create an index of custom vocabulary and save it to file")
+
+parser.add_argument(
+    "--input_name", required=True, type=str, help="Path to input file with custom vocabulary (plain text)"
+)
+parser.add_argument(
+    "--ngram_mappings", required=True, type=str, help="Path to input file with n-gram mapping vocabulary"
+)
+parser.add_argument("--output_name", required=True, type=str, help="Path to output file with custom vocabulary index")
+parser.add_argument("--min_log_prob", default=-4.0, type=float, help="Threshold on log probability")
+parser.add_argument(
+    "--max_phrases_per_ngram",
+    default=500,
+    type=int,
+    help="Threshold on number of phrases that can be stored for one n-gram key in index. Keys with more phrases are discarded.",
+)
+parser.add_argument(
+    "--max_misspelled_freq", default=125000, type=int, help="Threshold on maximum frequency of misspelled n-gram"
+)
+
+args = parser.parse_args()
+
+# Load custom vocabulary
+custom_phrases = set()
+with open(args.input_name, "r", encoding="utf-8") as f:
+    for line in f:
+        phrase = line.strip()
+        custom_phrases.add(" ".join(list(phrase.replace(" ", "_"))))
+print("Size of customization vocabulary:", len(custom_phrases))
+
+# Load n-gram mappings vocabulary
+ngram_mapping_vocab, ban_ngram = load_ngram_mappings(args.ngram_mappings, max_misspelled_freq=125000)
+
+# Generate index of custom phrases
+phrases, ngram2phrases = get_index(
+    custom_phrases,
+    ngram_mapping_vocab,
+    ban_ngram,
+    min_log_prob=args.min_log_prob,
+    max_phrases_per_ngram=args.max_phrases_per_ngram,
+)
+
+# Save index to file
+with open(args.output_name, "w", encoding="utf-8") as out:
+    for ngram in ngram2phrases:
+        for phrase_id, begin, size, logprob in ngram2phrases[ngram]:
+            phrase = phrases[phrase_id]
+            out.write(ngram + "\t" + phrase + "\t" + str(begin) + "\t" + str(size) + "\t" + str(logprob) + "\n")
diff --git a/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py b/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py
new file mode 100644
index 000000000000..d0bdc2c9bd30
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This script is used to create a tarred dataset for SpellcheckingAsrCustomizationModel.
+
+This script uses the `/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml`
+config file by default. The other option is to set another config file via command
+line arguments by `--config-name=CONFIG_FILE_PATH'. Probably it is worth looking
+at the example config file to see the list of parameters used for training.
+
+USAGE Example:
+1. Obtain a processed dataset
+2. Run:
+    python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/create_tarred_dataset.py \
+      lang=${LANG} \
+      data.train_ds.data_path=${DATA_PATH}/train.tsv \
+      model.language_model.pretrained_model_name=${LANGUAGE_MODEL} \
+      model.label_map=${DATA_PATH}/label_map.txt \
+      +output_tar_file=tarred/part1.tar \
+      +take_first_n_lines=100000
+
+"""
+import pickle
+import tarfile
+from io import BytesIO
+
+from helpers import MODEL, instantiate_model_and_trainer
+from omegaconf import DictConfig, OmegaConf
+
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+
+
+@hydra_runner(config_path="conf", config_name="spellchecking_asr_customization_config")
+def main(cfg: DictConfig) -> None:
+    logging.info(f'Config Params: {OmegaConf.to_yaml(cfg)}')
+    logging.info("Start creating tar file from " + cfg.data.train_ds.data_path + " ...")
+    _, model = instantiate_model_and_trainer(
+        cfg, MODEL, True
+    )  # instantiate model like for training because we may not have pretrained model
+    dataset = model._train_dl.dataset
+    archive = tarfile.open(cfg.output_tar_file, mode="w")
+    max_lines = int(cfg.take_first_n_lines)
+    for i in range(len(dataset)):
+        if i >= max_lines:
+            logging.info("Reached " + str(max_lines) + " examples")
+            break
+        (
+            input_ids,
+            input_mask,
+            segment_ids,
+            input_ids_for_subwords,
+            input_mask_for_subwords,
+            segment_ids_for_subwords,
+            character_pos_to_subword_pos,
+            labels_mask,
+            labels,
+            spans,
+        ) = dataset[i]
+
+        # do not store masks as they are just arrays of 1
+        content = {
+            "input_ids": input_ids,
+            "input_mask": input_mask,
+            "segment_ids": segment_ids,
+            "input_ids_for_subwords": input_ids_for_subwords,
+            "input_mask_for_subwords": input_mask_for_subwords,
+            "segment_ids_for_subwords": segment_ids_for_subwords,
+            "character_pos_to_subword_pos": character_pos_to_subword_pos,
+            "labels_mask": labels_mask,
+            "labels": labels,
+            "spans": spans,
+        }
+        b = BytesIO()
+        pickle.dump(content, b)
+        b.seek(0)
+        tarinfo = tarfile.TarInfo(name="example_" + str(i) + ".pkl")
+        tarinfo.size = b.getbuffer().nbytes
+        archive.addfile(tarinfo=tarinfo, fileobj=b)
+
+    archive.close()
+    logging.info("Tar file " + cfg.output_tar_file + " created!")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/nlp/spellchecking_asr_customization/helpers.py b/examples/nlp/spellchecking_asr_customization/helpers.py
new file mode 100644
index 000000000000..2db11b0e7d96
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/helpers.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+from typing import Tuple
+
+import pytorch_lightning as pl
+from omegaconf import DictConfig
+
+from nemo.collections.nlp.models import SpellcheckingAsrCustomizationModel
+from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector
+from nemo.utils import logging
+
+__all__ = ["MODEL", "MODEL_NAMES", "instantiate_model_and_trainer"]
+
+MODEL = "spellchecking"
+MODEL_NAMES = [MODEL]
+
+
+def instantiate_model_and_trainer(
+    cfg: DictConfig, model_name: str, do_training: bool
+) -> Tuple[pl.Trainer, SpellcheckingAsrCustomizationModel]:
+    """ Function for instantiating a model and a trainer
+    Args:
+        cfg: The config used to instantiate the model and the trainer.
+        model_name: A str indicates the model direction, currently only 'itn'.
+        do_training: A boolean flag indicates whether the model will be trained or evaluated.
+
+    Returns:
+        trainer: A PyTorch Lightning trainer
+        model: A SpellcheckingAsrCustomizationModel
+    """
+
+    if model_name not in MODEL_NAMES:
+        raise ValueError(f"{model_name} is unknown model type")
+
+    # Get configs for the corresponding models
+    trainer_cfg = cfg.get("trainer")
+    model_cfg = cfg.get("model")
+    pretrained_cfg = cfg.get("pretrained_model", None)
+    trainer = pl.Trainer(**trainer_cfg)
+    if not pretrained_cfg:
+        logging.info(f"Initializing {model_name} model")
+        if model_name == MODEL:
+            model = SpellcheckingAsrCustomizationModel(model_cfg, trainer=trainer)
+        else:
+            raise ValueError(f"{model_name} is unknown model type")
+    elif os.path.exists(pretrained_cfg):
+        logging.info(f"Restoring pretrained {model_name} model from {pretrained_cfg}")
+        save_restore_connector = NLPSaveRestoreConnector()
+        model = SpellcheckingAsrCustomizationModel.restore_from(
+            pretrained_cfg, save_restore_connector=save_restore_connector
+        )
+    else:
+        logging.info(f"Loading pretrained model {pretrained_cfg}")
+        if model_name == MODEL:
+            if pretrained_cfg not in SpellcheckingAsrCustomizationModel.get_available_model_names():
+                raise (
+                    ValueError(
+                        f"{pretrained_cfg} not in the list of available Tagger models."
+                        f"Select from {SpellcheckingAsrCustomizationModel.list_available_models()}"
+                    )
+                )
+            model = SpellcheckingAsrCustomizationModel.from_pretrained(pretrained_cfg)
+        else:
+            raise ValueError(f"{model_name} is unknown model type")
+
+    # Setup train and validation data
+    if do_training:
+        model.setup_training_data(train_data_config=cfg.data.train_ds)
+        model.setup_validation_data(val_data_config=cfg.data.validation_ds)
+
+    logging.info(f"Model {model_name} -- Device {model.device}")
+    return trainer, model
diff --git a/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py b/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py
new file mode 100644
index 000000000000..871d5e5c0c0c
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This script is used to postprocess SpellMapper results and generate an updated nemo ASR manifest.
+See "examples/nlp/spellchecking_asr_customization/run_infer.sh" for the whole inference pipeline.
+"""
+
+from argparse import ArgumentParser
+
+from nemo.collections.nlp.data.spellchecking_asr_customization.utils import (
+    update_manifest_with_spellmapper_corrections,
+)
+
+parser = ArgumentParser(description="Postprocess SpellMapper results and generate an updated nemo ASR manifest")
+
+parser.add_argument("--input_manifest", required=True, type=str, help="Path to input nemo ASR manifest")
+parser.add_argument(
+    "--field_name", default="pred_text", type=str, help="Name of json field with original ASR hypothesis text"
+)
+parser.add_argument(
+    "--short2full_name",
+    required=True,
+    type=str,
+    help="Path to input file with correspondence between sentence fragments and full sentences",
+)
+parser.add_argument(
+    "--spellmapper_results", required=True, type=str, help="Path to input file with SpellMapper inference results"
+)
+parser.add_argument("--output_manifest", required=True, type=str, help="Path to output nemo ASR manifest")
+parser.add_argument("--min_prob", default=0.5, type=float, help="Threshold on replacement probability")
+parser.add_argument(
+    "--use_dp",
+    action="store_true",
+    help="Whether to use additional replacement filtering by using dynamic programming",
+)
+parser.add_argument(
+    "--replace_hyphen_to_space",
+    action="store_true",
+    help="Whether to use space instead of hyphen in replaced fragments",
+)
+parser.add_argument(
+    "--ngram_mappings", type=str, required=True, help="File with ngram mappings, only needed if use_dp=true"
+)
+parser.add_argument(
+    "--min_dp_score_per_symbol",
+    default=-1.5,
+    type=float,
+    help="Minimum dynamic programming sum score averaged by hypothesis length",
+)
+
+args = parser.parse_args()
+
+update_manifest_with_spellmapper_corrections(
+    input_manifest_name=args.input_manifest,
+    short2full_name=args.short2full_name,
+    output_manifest_name=args.output_manifest,
+    spellmapper_results_name=args.spellmapper_results,
+    min_prob=args.min_prob,
+    replace_hyphen_to_space=args.replace_hyphen_to_space,
+    field_name=args.field_name,
+    use_dp=args.use_dp,
+    ngram_mappings=args.ngram_mappings,
+    min_dp_score_per_symbol=args.min_dp_score_per_symbol,
+)
+
+print("Resulting manifest saved to: ", args.output_manifest)
diff --git a/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py b/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py
new file mode 100644
index 000000000000..6fd5e524390a
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This script contains an example on how to prepare input for SpellMapper inference from a nemo ASR manifest.
+It splits sentences to shorter fragments, runs candidate retrieval and generates input in the required format.
+It produces two output files:
+    1. File with correspondence between sentence fragments and full sentences.
+    2. File that will serve as input for SpellMapper inference.
+
+See "examples/nlp/spellchecking_asr_customization/run_infer.sh" for the whole inference pipeline.
+"""
+
+from argparse import ArgumentParser
+
+from nemo.collections.nlp.data.spellchecking_asr_customization.utils import (
+    extract_and_split_text_from_manifest,
+    get_candidates,
+    load_index,
+)
+
+parser = ArgumentParser(description="Prepare input for SpellMapper inference from a nemo ASR manifest")
+parser.add_argument("--manifest", required=True, type=str, help="Path to input manifest file")
+parser.add_argument(
+    "--custom_vocab_index", required=True, type=str, help="Path to input file with custom vocabulary index"
+)
+parser.add_argument(
+    "--big_sample",
+    required=True,
+    type=str,
+    help="Path to input file with big sample of phrases to sample dummy candidates if there less than 10 are found by retrieval",
+)
+parser.add_argument(
+    "--short2full_name",
+    required=True,
+    type=str,
+    help="Path to output file with correspondence between sentence fragments and full sentences",
+)
+parser.add_argument(
+    "--output_name",
+    required=True,
+    type=str,
+    help="Path to output file that will serve as input for SpellMapper inference",
+)
+parser.add_argument("--field_name", default="pred_text", type=str, help="Name of json field with ASR hypothesis text")
+parser.add_argument("--len_in_words", default=16, type=int, help="Maximum fragment length in words")
+parser.add_argument(
+    "--step_in_words",
+    default=8,
+    type=int,
+    help="Step in words for moving to next fragment. If less than len_in_words, fragments will intersect",
+)
+
+args = parser.parse_args()
+
+# Split ASR hypotheses to shorter fragments, because SpellMapper can't handle arbitrarily long sequences.
+# The correspondence between short and original fragments is saved to a file and will be used at post-processing.
+extract_and_split_text_from_manifest(
+    input_name=args.manifest,
+    output_name=args.short2full_name,
+    field_name=args.field_name,
+    len_in_words=args.len_in_words,
+    step_in_words=args.step_in_words,
+)
+
+# Load index of custom vocabulary from file
+phrases, ngram2phrases = load_index(args.custom_vocab_index)
+
+# Load big sample of phrases to sample dummy candidates if there less than 10 are found by retrieval
+big_sample_of_phrases = set()
+with open(args.big_sample, "r", encoding="utf-8") as f:
+    for line in f:
+        phrase, freq = line.strip().split("\t")
+        if int(freq) > 50:  # do not want to use frequent phrases as dummy candidates
+            continue
+        if len(phrase) < 6 or len(phrase) > 15:  # do not want to use too short or too long phrases as dummy candidates
+            continue
+        big_sample_of_phrases.add(phrase)
+
+big_sample_of_phrases = list(big_sample_of_phrases)
+
+# Generate input for SpellMapper inference
+out = open(args.output_name, "w", encoding="utf-8")
+with open(args.short2full_name, "r", encoding="utf-8") as f:
+    for line in f:
+        short_sent, _ = line.strip().split("\t")
+        sent = "_".join(short_sent.split())
+        letters = list(sent)
+        candidates = get_candidates(ngram2phrases, phrases, letters, big_sample_of_phrases)
+        if len(candidates) == 0:
+            continue
+        if len(candidates) != 10:
+            raise ValueError("expect 10 candidates, got: ", len(candidates))
+
+        # We add two columns with targets and span_info.
+        # They have same format as during training, but start and end positions are APPROXIMATE, they will be adjusted when constructing BertExample.
+        targets = []
+        span_info = []
+        for idx, c in enumerate(candidates):
+            if c[1] == -1:
+                continue
+            targets.append(str(idx + 1))  # targets are 1-based
+            start = c[1]
+            # ensure that end is not outside sentence length (it can happen because c[2] is candidate length used as approximation)
+            end = min(c[1] + c[2], len(letters))
+            span_info.append("CUSTOM " + str(start) + " " + str(end))
+        out.write(
+            " ".join(letters)
+            + "\t"
+            + ";".join([x[0] for x in candidates])
+            + "\t"
+            + " ".join(targets)
+            + "\t"
+            + ";".join(span_info)
+            + "\n"
+        )
+out.close()
diff --git a/examples/nlp/spellchecking_asr_customization/run_infer.sh b/examples/nlp/spellchecking_asr_customization/run_infer.sh
new file mode 100644
index 000000000000..09da98171c16
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/run_infer.sh
@@ -0,0 +1,99 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## RUN INFERENCE ON NEMO MANIFEST AND CUSTOM VOCABULARY
+
+## Path to NeMo repository
+NEMO_PATH=NeMo
+
+## Download model repo from Hugging Face (if clone doesn't work, run "git lfs install" and try again)
+git clone https://huggingface.co/bene-ges/spellmapper_asr_customization_en
+## Download repo with test data
+git clone https://huggingface.co/datasets/bene-ges/spellmapper_en_evaluation
+
+## Files in model repo
+PRETRAINED_MODEL=spellmapper_asr_customization_en/training_10m_5ep.nemo
+NGRAM_MAPPINGS=spellmapper_asr_customization_en/replacement_vocab_filt.txt
+BIG_SAMPLE=spellmapper_asr_customization_en/big_sample.txt
+
+## Override these two files if you want to test on your own data
+## File with input nemo ASR manifest
+INPUT_MANIFEST=spellmapper_en_evaluation/medical_manifest_ctc.json
+## File containing custom words and phrases (plain text)
+CUSTOM_VOCAB=spellmapper_en_evaluation/medical_custom_vocab.json
+
+## Other files will be created 
+## File with index of custom vocabulary
+INDEX="index.txt"
+## File with short fragments and corresponding original sentences
+SHORT2FULL="short2full.txt"
+## File with input for SpellMapper inference
+SPELLMAPPER_INPUT="spellmapper_input.txt"
+## File with output of SpellMapper inference
+SPELLMAPPER_OUTPUT="spellmapper_output.txt"
+## File with output nemo ASR manifest
+OUTPUT_MANIFEST="out_manifest.json"
+
+
+# Create index of custom vocabulary
+python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py \
+  --input_name ${CUSTOM_VOCAB} \
+  --ngram_mappings ${NGRAM_MAPPINGS} \
+  --output_name ${INDEX} \
+  --min_log_prob -4.0 \
+  --max_phrases_per_ngram 600
+
+# Prepare input for SpellMapper inference
+python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py \
+  --manifest ${INPUT_MANIFEST} \
+  --custom_vocab_index ${INDEX} \
+  --big_sample ${BIG_SAMPLE} \
+  --short2full_name ${SHORT2FULL} \
+  --output_name ${SPELLMAPPER_INPUT} \
+  --field_name "pred_text" \
+  --len_in_words 16 \
+  --step_in_words 8
+
+# Run SpellMapper inference
+python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \
+  pretrained_model=${PRETRAINED_MODEL} \
+  model.max_sequence_len=512 \
+  inference.from_file=${SPELLMAPPER_INPUT} \
+  inference.out_file=${SPELLMAPPER_OUTPUT} \
+  inference.batch_size=16 \
+  lang=en
+
+# Postprocess and create output corrected manifest
+python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py \
+  --input_manifest ${INPUT_MANIFEST} \
+  --short2full_name ${SHORT2FULL} \
+  --output_manifest ${OUTPUT_MANIFEST} \
+  --spellmapper_result ${SPELLMAPPER_OUTPUT} \
+  --replace_hyphen_to_space \
+  --field_name "pred_text" \
+  --use_dp \
+  --ngram_mappings ${NGRAM_MAPPINGS} \
+  --min_dp_score_per_symbol -1.5
+
+# Check WER of initial manifest
+python ${NEMO_PATH}/examples/asr/speech_to_text_eval.py \
+  dataset_manifest=${INPUT_MANIFEST} \
+  use_cer=False \
+  only_score_manifest=True
+
+# Check WER of corrected manifest
+python ${NEMO_PATH}/examples/asr/speech_to_text_eval.py \
+  dataset_manifest=${OUTPUT_MANIFEST} \
+  use_cer=False \
+  only_score_manifest=True
diff --git a/examples/nlp/spellchecking_asr_customization/run_training.sh b/examples/nlp/spellchecking_asr_customization/run_training.sh
new file mode 100644
index 000000000000..85dddbb2a038
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/run_training.sh
@@ -0,0 +1,56 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## TRAIN WITH NON-TARRED DATA 
+
+# Path to NeMo repository
+NEMO_PATH=NeMo
+
+## Download repo with training data (very small example)
+## If clone doesn't work, run "git lfs install" and try again
+git clone https://huggingface.co/datasets/bene-ges/spellmapper_en_train_micro
+
+DATA_PATH=spellmapper_en_train_micro
+
+## Example of all files needed to run training with non-tarred data:
+## spellmapper_en_train_micro
+##   ├── config.json
+##   ├── label_map.txt
+##   ├── semiotic_classes.txt
+##   ├── test.tsv
+##   └── train.tsv
+
+## To generate files config.json, label_map.txt, semiotic_classes.txt - run generate_configs.sh
+## Files "train.tsv" and "test.tsv" contain training examples. 
+## For data preparation see https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/dataset_preparation/build_training_data.sh
+
+## Note that training with non-tarred data only works on single gpu. It makes sense if you use 1-2 million examples or less.
+
+python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py \
+  lang="en" \
+  data.validation_ds.data_path=${DATA_PATH}/test.tsv \
+  data.train_ds.data_path=${DATA_PATH}/train.tsv \
+  data.train_ds.batch_size=32 \
+  data.train_ds.num_workers=8 \
+  model.max_sequence_len=512 \
+  model.language_model.pretrained_model_name=huawei-noah/TinyBERT_General_6L_768D \
+  model.language_model.config_file=${DATA_PATH}/config.json \
+  model.label_map=${DATA_PATH}/label_map.txt \
+  model.semiotic_classes=${DATA_PATH}/semiotic_classes.txt \
+  model.optim.lr=3e-5 \
+  trainer.devices=[1] \
+  trainer.num_nodes=1 \
+  trainer.accelerator=gpu \
+  trainer.strategy=ddp \
+  trainer.max_epochs=5
diff --git a/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh b/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh
new file mode 100644
index 000000000000..655c3e23e610
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh
@@ -0,0 +1,63 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## TRAIN WITH TARRED DATA
+
+# Path to NeMo repository
+NEMO_PATH=NeMo
+
+DATA_PATH=data_folder
+
+## data_folder_example
+##   ├── train_tarred
+##   |   ├── part1.tar
+##   |   ├── ...
+##   |   └── part200.tar
+##   ├── config.json
+##   ├── label_map.txt
+##   ├── semiotic_classes.txt
+##   └── test.tsv
+## To generate files config.json, label_map.txt, semiotic_classes.txt, run generate_configs.sh
+## To prepare data, see ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/dataset_preparation/build_training_data.sh
+## To convert data to tarred format, split all.tsv to pieces of 110'000 examples (except for validation part) and use ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/dataset_preparation/convert_data_to_tarred.sh
+## To run training with tarred data, use ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/run_training_tarred.sh
+
+## ATTENTION: How to calculate model.optim.sched.max_steps:
+##   Suppose, you have 2'000'000 training examples, and want to train for 5 epochs on 4 gpus with batch size 32.
+##   5 (epochs) * 32 (bs) * 4 (gpus)
+##   1 step consumes 128 examples (32(bs) * 4(gpus))
+##   1 epoch makes 2000000/128=15625 steps (updates)
+##   5 epochs make 5*15625=78125 steps
+
+python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py \
+  lang="en" \
+  data.validation_ds.data_path=${DATA_PATH}/test.tsv \
+  data.train_ds.data_path=${DATA_PATH}/train_tarred/part_OP_1..100_CL_.tar \
+  data.train_ds.batch_size=32 \
+  data.train_ds.num_workers=16 \
+  +data.train_ds.use_tarred_dataset=true \
+  data.train_ds.shuffle=false \
+  data.validation_ds.batch_size=16 \
+  model.max_sequence_len=512 \
+  model.language_model.pretrained_model_name=huawei-noah/TinyBERT_General_6L_768D \
+  model.language_model.config_file=${DATA_PATH}/config.json \
+  model.label_map=${DATA_PATH}/label_map.txt \
+  model.semiotic_classes=${DATA_PATH}/semiotic_classes.txt \
+  model.optim.sched.name=CosineAnnealing \
+  +model.optim.sched.max_steps=195313 \
+  trainer.devices=8 \
+  trainer.num_nodes=1 \
+  trainer.accelerator=gpu \
+  trainer.strategy=ddp \
+  trainer.max_epochs=5
diff --git a/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py b/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py
new file mode 100644
index 000000000000..593264f14a5d
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This script contains an example on how to run inference with the SpellcheckingAsrCustomizationModel.
+
+An input line should consist of 4 tab-separated columns:
+    1. text of ASR-hypothesis
+    2. texts of 10 candidates separated by semicolon
+    3. 1-based ids of non-dummy candidates
+    4. approximate start/end coordinates of non-dummy candidates (correspond to ids in third column)
+
+Example input (in one line):
+    t h e _ t a r a s i c _ o o r d a _ i s _ a _ p a r t _ o f _ t h e _ a o r t a _ l o c a t e d _ i n _ t h e _ t h o r a x	
+    h e p a t i c _ c i r r h o s i s;u r a c i l;c a r d i a c _ a r r e s t;w e a n;a p g a r;p s y c h o m o t o r;t h o r a x;t h o r a c i c _ a o r t a;a v f;b l o c k a d e d
+    1 2 6 7 8 9 10
+    CUSTOM 6 23;CUSTOM 4 10;CUSTOM 4 15;CUSTOM 56 62;CUSTOM 5 19;CUSTOM 28 31;CUSTOM 39 48
+
+Each line in SpellMapper output is tab-separated and consists of 4 columns:
+    1. ASR-hypothesis (same as in input)
+    2. 10 candidates separated with semicolon (same as in input)
+    3. fragment predictions, separated with semicolon, each prediction is a tuple (start, end, candidate_id, probability)
+    4. letter predictions - candidate_id predicted for each letter (this is only for debug purposes)
+
+Example output (in one line):
+    t h e _ t a r a s i c _ o o r d a _ i s _ a _ p a r t _ o f _ t h e _ a o r t a _ l o c a t e d _ i n _ t h e _ t h o r a x
+    h e p a t i c _ c i r r h o s i s;u r a c i l;c a r d i a c _ a r r e s t;w e a n;a p g a r;p s y c h o m o t o r;t h o r a x;t h o r a c i c _ a o r t a;a v f;b l o c k a d e d
+    56 62 7 0.99998;4 20 8 0.95181;12 20 8 0.44829;4 17 8 0.99464;12 17 8 0.97645
+    8 8 8 0 8 8 8 8 8 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7    
+   
+
+USAGE Example:
+1. Train a model, or use a pretrained checkpoint.
+2. Run on a single file:
+    python nemo/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \
+        pretrained_model=${PRETRAINED_NEMO_CHECKPOINT} \
+        model.max_sequence_len=512 \
+        inference.from_file=input.txt \
+        inference.out_file=output.txt \
+        inference.batch_size=16 \
+        lang=en
+or on multiple files:
+    python ${NEMO_PATH}/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \
+        pretrained_model=${PRETRAINED_NEMO_CHECKPOINT} \
+        model.max_sequence_len=512 \
+        +inference.from_filelist=filelist.txt \
+        +inference.output_folder=output_folder \
+        inference.batch_size=16 \
+        lang=en
+
+This script uses the `/examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml`
+config file by default. The other option is to set another config file via command
+line arguments by `--config-name=CONFIG_FILE_PATH'.
+"""
+
+
+import os
+
+from helpers import MODEL, instantiate_model_and_trainer
+from omegaconf import DictConfig, OmegaConf
+
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+
+
+@hydra_runner(config_path="conf", config_name="spellchecking_asr_customization_config")
+def main(cfg: DictConfig) -> None:
+    logging.debug(f'Config Params: {OmegaConf.to_yaml(cfg)}')
+
+    if cfg.pretrained_model is None:
+        raise ValueError("A pre-trained model should be provided.")
+    _, model = instantiate_model_and_trainer(cfg, MODEL, False)
+
+    if cfg.model.max_sequence_len != model.max_sequence_len:
+        model.max_sequence_len = cfg.model.max_sequence_len
+        model.builder._max_seq_length = cfg.model.max_sequence_len
+    input_filenames = []
+    output_filenames = []
+
+    if "from_filelist" in cfg.inference and "output_folder" in cfg.inference:
+        filelist_file = cfg.inference.from_filelist
+        output_folder = cfg.inference.output_folder
+        with open(filelist_file, "r", encoding="utf-8") as f:
+            for line in f:
+                path = line.strip()
+                input_filenames.append(path)
+                folder, name = os.path.split(path)
+                output_filenames.append(os.path.join(output_folder, name))
+    else:
+        text_file = cfg.inference.from_file
+        logging.info(f"Running inference on {text_file}...")
+        if not os.path.exists(text_file):
+            raise ValueError(f"{text_file} not found.")
+        input_filenames.append(text_file)
+        output_filenames.append(cfg.inference.out_file)
+
+    dataloader_cfg = {
+        "batch_size": cfg.inference.get("batch_size", 8),
+        "num_workers": cfg.inference.get("num_workers", 4),
+        "pin_memory": cfg.inference.get("num_workers", False),
+    }
+    for input_filename, output_filename in zip(input_filenames, output_filenames):
+        if not os.path.exists(input_filename):
+            logging.info(f"Skip non-existing {input_filename}.")
+            continue
+        model.infer(dataloader_cfg, input_filename, output_filename)
+        logging.info(f"Predictions saved to {output_filename}.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py b/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py
new file mode 100644
index 000000000000..7ea9314d196d
--- /dev/null
+++ b/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_train.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+This script contains an example on how to train SpellMapper (SpellcheckingAsrCustomizationModel).
+It uses the `examples/nlp/spellchecking_asr_customization/conf/spellchecking_asr_customization_config.yaml`
+config file by default. The other option is to set another config file via command
+line arguments by `--config-name=CONFIG_FILE_PATH'. Probably it is worth looking
+at the example config file to see the list of parameters used for training.
+
+USAGE Example:
+    See `examples/nlp/spellchecking_asr_customization/run_training.sh` for training on non-tarred data.
+    and
+    `examples/nlp/spellchecking_asr_customization/run_training_tarred.sh` for training on tarred data.
+
+One (non-tarred) training example should consist of 4 tab-separated columns:
+    1. text of ASR-hypothesis
+    2. texts of 10 candidates separated by semicolon
+    3. 1-based ids of correct candidates, or 0 if none
+    4. start/end coordinates of correct candidates (correspond to ids in third column)
+Example (in one line):
+    a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o
+    d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y
+    1 3
+    CUSTOM 12 23;CUSTOM 28 41
+"""
+
+from helpers import MODEL, instantiate_model_and_trainer
+from omegaconf import DictConfig, OmegaConf
+
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+from nemo.utils.exp_manager import exp_manager
+
+
+@hydra_runner(config_path="conf", config_name="spellchecking_asr_customization_config")
+def main(cfg: DictConfig) -> None:
+    logging.info(f'Config Params: {OmegaConf.to_yaml(cfg)}')
+
+    # Train the model
+    if cfg.model.do_training:
+        logging.info(
+            "================================================================================================"
+        )
+        logging.info('Start training...')
+        trainer, model = instantiate_model_and_trainer(cfg, MODEL, True)
+        spellchecking_exp_manager = cfg.get('exp_manager', None)
+        exp_manager(trainer, spellchecking_exp_manager)
+        trainer.fit(model)
+        logging.info('Training finished!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py b/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py
index e2ae48a37a0b..f5a53b1f331d 100644
--- a/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py
+++ b/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py
@@ -19,9 +19,14 @@
 
 import re
 from argparse import ArgumentParser
-from typing import List, Tuple
 
-import numpy as np
+from nemo.collections.nlp.data.text_normalization_as_tagging.utils import (
+    check_monotonicity,
+    fill_alignment_matrix,
+    get_targets,
+    get_targets_from_back,
+)
+
 
 parser = ArgumentParser(description='Extract final alignments from GIZA++ alignments')
 parser.add_argument('--mode', type=str, required=True, help='tn or itn')
@@ -34,211 +39,13 @@
 args = parser.parse_args()
 
 
-def fill_alignment_matrix(
-    fline2: str, fline3: str, gline2: str, gline3: str
-) -> Tuple[np.ndarray, List[str], List[str]]:
-    """Parse Giza++ direct and reverse alignment results and represent them as an alignment matrix
-
-    Args:
-        fline2: e.g. "_2 0 1 4_"
-        fline3: e.g. "NULL ({ }) twenty ({ 1 }) fourteen ({ 2 3 4 })"
-        gline2: e.g. "twenty fourteen"
-        gline3: e.g. "NULL ({ }) _2 ({ 1 }) 0 ({ }) 1 ({ }) 4_ ({ 2 })"
-
-    Returns:
-        matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment
-         the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the
-         words were aligned only in one direction, 0 - no alignment.
-        srctokens: e.g. ["twenty", "fourteen"]
-        dsttokens: e.g. ["_2", "0", "1", "4_"]
-
-    For example, the alignment matrix for the above example may look like:
-    [[3, 0, 0, 0]
-     [0, 2, 2, 3]]
-    """
-    if fline2 is None or gline2 is None or fline3 is None or gline3 is None:
-        raise ValueError(f"empty params")
-    srctokens = gline2.split()
-    dsttokens = fline2.split()
-    pattern = r"([^ ]+) \(\{ ([^\(\{\}\)]*) \}\)"
-    src2dst = re.findall(pattern, fline3.replace("({ })", "({  })"))
-    dst2src = re.findall(pattern, gline3.replace("({ })", "({  })"))
-    if len(src2dst) != len(srctokens) + 1:
-        raise ValueError(
-            "length mismatch: len(src2dst)="
-            + str(len(src2dst))
-            + "; len(srctokens)"
-            + str(len(srctokens))
-            + "\n"
-            + gline2
-            + "\n"
-            + fline3
-        )
-    if len(dst2src) != len(dsttokens) + 1:
-        raise ValueError(
-            "length mismatch: len(dst2src)="
-            + str(len(dst2src))
-            + "; len(dsttokens)"
-            + str(len(dsttokens))
-            + "\n"
-            + fline2
-            + "\n"
-            + gline3
-        )
-    matrix = np.zeros((len(srctokens), len(dsttokens)))
-    for i in range(1, len(src2dst)):
-        token, to_str = src2dst[i]
-        if to_str == "":
-            continue
-        to = list(map(int, to_str.split()))
-        for t in to:
-            matrix[i - 1][t - 1] = 2
-
-    for i in range(1, len(dst2src)):
-        token, to_str = dst2src[i]
-        if to_str == "":
-            continue
-        to = list(map(int, to_str.split()))
-        for t in to:
-            matrix[t - 1][i - 1] += 1
-
-    return matrix, srctokens, dsttokens
-
-
-def check_monotonicity(matrix: np.ndarray) -> bool:
-    """Check if alignment is monotonous - i.e. the relative order is preserved (no swaps).
-
-    Args:
-        matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment
-         the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the
-         words were aligned only in one direction, 0 - no alignment.
-    """
-    is_sorted = lambda k: np.all(k[:-1] <= k[1:])
-
-    a = np.argwhere(matrix == 3)
-    b = np.argwhere(matrix == 2)
-    c = np.vstack((a, b))
-    d = c[c[:, 1].argsort()]  # sort by second column (less important)
-    d = d[d[:, 0].argsort(kind="mergesort")]
-    return is_sorted(d[:, 1])
-
-
-def get_targets(matrix: np.ndarray, dsttokens: List[str]) -> List[str]:
-    """Join some of the destination tokens, so that their number becomes the same as the number of input words.
-    Unaligned tokens tend to join to the left aligned token.
-
-    Args:
-        matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment
-         the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the
-         words were aligned only in one direction, 0 - no alignment.
-        dsttokens: e.g. ["_2", "0", "1", "4_"]
-    Returns:
-        targets: list of string tokens, with one-to-one correspondence to matrix.shape[0]
-
-    Example:
-        If we get
-            matrix=[[3, 0, 0, 0]
-                    [0, 2, 2, 3]]
-            dsttokens=["_2", "0", "1", "4_"]
-        it gives
-            targets = ["_201", "4_"]
-        Actually, this is a mistake instead of ["_20", "14_"]. That will be further corrected by regular expressions.
-    """
-    targets = []
-    last_covered_dst_id = -1
-    for i in range(len(matrix)):
-        dstlist = []
-        for j in range(last_covered_dst_id + 1, len(dsttokens)):
-            # matrix[i][j] == 3: safe alignment point
-            if matrix[i][j] == 3 or (
-                j == last_covered_dst_id + 1
-                and np.all(matrix[i, :] == 0)  # if the whole line does not have safe points
-                and np.all(matrix[:, j] == 0)  # and the whole column does not have safe points, match them
-            ):
-                if len(targets) == 0:  # if this is first safe point, attach left unaligned columns to it, if any
-                    for k in range(0, j):
-                        if np.all(matrix[:, k] == 0):  # if column k does not have safe points
-                            dstlist.append(dsttokens[k])
-                        else:
-                            break
-                dstlist.append(dsttokens[j])
-                last_covered_dst_id = j
-                for k in range(j + 1, len(dsttokens)):
-                    if np.all(matrix[:, k] == 0):  # if column k does not have safe points
-                        dstlist.append(dsttokens[k])
-                        last_covered_dst_id = k
-                    else:
-                        break
-
-        if len(dstlist) > 0:
-            if args.mode == "tn":
-                targets.append("_".join(dstlist))
-            else:
-                targets.append("".join(dstlist))
-        else:
-            targets.append("<DELETE>")
-    return targets
-
-
-def get_targets_from_back(matrix: np.ndarray, dsttokens: List[str]) -> List[str]:
-    """Join some of the destination tokens, so that their number becomes the same as the number of input words.
-    Unaligned tokens tend to join to the right aligned token.
-
-    Args:
-        matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment
-         the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the
-         words were aligned only in one direction, 0 - no alignment.
-        dsttokens: e.g. ["_2", "0", "1", "4_"]
-    Returns:
-        targets: list of string tokens, with one-to-one correspondence to matrix.shape[0]
-
-    Example:
-        If we get
-            matrix=[[3, 0, 0, 0]
-                    [0, 2, 2, 3]]
-            dsttokens=["_2", "0", "1", "4_"]
-        it gives
-            targets = ["_2", "014_"]
-        Actually, this is a mistake instead of ["_20", "14_"]. That will be further corrected by regular expressions.
-    """
-
-    targets = []
-    last_covered_dst_id = len(dsttokens)
-    for i in range(len(matrix) - 1, -1, -1):
-        dstlist = []
-        for j in range(last_covered_dst_id - 1, -1, -1):
-            if matrix[i][j] == 3 or (
-                j == last_covered_dst_id - 1 and np.all(matrix[i, :] == 0) and np.all(matrix[:, j] == 0)
-            ):
-                if len(targets) == 0:
-                    for k in range(len(dsttokens) - 1, j, -1):
-                        if np.all(matrix[:, k] == 0):
-                            dstlist.append(dsttokens[k])
-                        else:
-                            break
-                dstlist.append(dsttokens[j])
-                last_covered_dst_id = j
-                for k in range(j - 1, -1, -1):
-                    if np.all(matrix[:, k] == 0):
-                        dstlist.append(dsttokens[k])
-                        last_covered_dst_id = k
-                    else:
-                        break
-        if len(dstlist) > 0:
-            if args.mode == "tn":
-                targets.append("_".join(list(reversed(dstlist))))
-            else:
-                targets.append("".join(list(reversed(dstlist))))
-        else:
-            targets.append("<DELETE>")
-    return list(reversed(targets))
-
-
 def main() -> None:
     g = open(args.giza_dir + "/GIZA++." + args.giza_suffix, "r", encoding="utf-8")
     f = open(args.giza_dir + "/GIZA++reverse." + args.giza_suffix, "r", encoding="utf-8")
+    target_inner_delimiter = ""
     if args.mode == "tn":
         g, f = f, g
+        target_inner_delimiter = "_"
     out = open(args.giza_dir + "/" + args.out_filename, "w", encoding="utf-8")
     cache = {}
     good_count, not_mono_count, not_covered_count, exception_count = 0, 0, 0, 0
@@ -277,8 +84,8 @@ def main() -> None:
         else:
             matrix[matrix <= 2] = 0  # leave only 1-to-1 alignment points
             if check_monotonicity(matrix):
-                targets = get_targets(matrix, dsttokens)
-                targets_from_back = get_targets_from_back(matrix, dsttokens)
+                targets = get_targets(matrix, dsttokens, delimiter=target_inner_delimiter)
+                targets_from_back = get_targets_from_back(matrix, dsttokens, delimiter=target_inner_delimiter)
                 if len(targets) != len(srctokens):
                     raise ValueError(
                         "targets length doesn't match srctokens length: len(targets)="
diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/__init__.py b/nemo/collections/nlp/data/spellchecking_asr_customization/__init__.py
new file mode 100644
index 000000000000..4e786276108c
--- /dev/null
+++ b/nemo/collections/nlp/data/spellchecking_asr_customization/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from nemo.collections.nlp.data.spellchecking_asr_customization.dataset import (
+    SpellcheckingAsrCustomizationDataset,
+    SpellcheckingAsrCustomizationTestDataset,
+    TarredSpellcheckingAsrCustomizationDataset,
+)
diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/bert_example.py b/nemo/collections/nlp/data/spellchecking_asr_customization/bert_example.py
new file mode 100644
index 000000000000..803d0eaf8aed
--- /dev/null
+++ b/nemo/collections/nlp/data/spellchecking_asr_customization/bert_example.py
@@ -0,0 +1,593 @@
+# Copyright 2019 The Google Research Authors.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from collections import OrderedDict
+from os import path
+from typing import Dict, List, Optional, Tuple, Union
+
+from transformers import PreTrainedTokenizerBase
+
+"""Build BERT Examples from asr hypothesis, customization candidates, target labels, span info.
+"""
+
+
+class BertExample(object):
+    """Class for training and inference examples for BERT.
+
+    Attributes:
+        features: Feature dictionary.
+    """
+
+    def __init__(
+        self,
+        input_ids: List[int],
+        input_mask: List[int],
+        segment_ids: List[int],
+        input_ids_for_subwords: List[int],
+        input_mask_for_subwords: List[int],
+        segment_ids_for_subwords: List[int],
+        character_pos_to_subword_pos: List[int],
+        fragment_indices: List[Tuple[int, int, int]],
+        labels_mask: List[int],
+        labels: List[int],
+        spans: List[Tuple[int, int, int]],
+        default_label: int,
+    ) -> None:
+        """Inputs to the example wrapper
+
+        Args:
+            input_ids: indices of single characters (treated as subwords)
+            input_mask: list of bools with 0s in place of input_ids to be masked
+            segment_ids: list of ints from 0 to 10 to denote the text segment type (
+                0 - for tokens of ASR hypothesis, 
+                1 - for tokens of the first candidate
+                ...
+                10 - for tokens of the tenth candidate
+            )
+            input_ids_for_subwords: indices of real subwords (as tokenized by bert tokenizer)
+            input_mask_for_subwords: list of bools with 0s in place of input_ids_for_subwords to be masked
+            segment_ids_for_subwords: same as segment_ids but for input_ids_for_subwords
+            character_pos_to_subword_pos: list of size=len(input_ids), value=(position of corresponding subword in input_ids_for_subwords) 
+            fragment_indices: list of tuples (start_position, end_position, candidate_id), end is exclusive, candidate_id can be -1 if not set
+            labels_mask: bool tensor with 0s in place of label tokens to be masked
+            labels: indices of semiotic classes which should be predicted from each of the
+                corresponding input tokens
+            spans: list of tuples (class_id, start_position, end_position), end is exclusive, class is always 1(CUSTOM)
+            default_label: The default label
+        """
+        input_len = len(input_ids)
+        if not (
+            input_len == len(input_mask)
+            and input_len == len(segment_ids)
+            and input_len == len(labels_mask)
+            and input_len == len(labels)
+            and input_len == len(character_pos_to_subword_pos)
+        ):
+            raise ValueError("All feature lists should have the same length ({})".format(input_len))
+
+        input_len_for_subwords = len(input_ids_for_subwords)
+        if not (
+            input_len_for_subwords == len(input_mask_for_subwords)
+            and input_len_for_subwords == len(segment_ids_for_subwords)
+        ):
+            raise ValueError(
+                "All feature lists for subwords should have the same length ({})".format(input_len_for_subwords)
+            )
+
+        self.features = OrderedDict(
+            [
+                ("input_ids", input_ids),
+                ("input_mask", input_mask),
+                ("segment_ids", segment_ids),
+                ("input_ids_for_subwords", input_ids_for_subwords),
+                ("input_mask_for_subwords", input_mask_for_subwords),
+                ("segment_ids_for_subwords", segment_ids_for_subwords),
+                ("character_pos_to_subword_pos", character_pos_to_subword_pos),
+                ("fragment_indices", fragment_indices),
+                ("labels_mask", labels_mask),
+                ("labels", labels),
+                ("spans", spans),
+            ]
+        )
+        self._default_label = default_label
+
+
+class BertExampleBuilder(object):
+    """Builder class for BertExample objects."""
+
+    def __init__(
+        self,
+        label_map: Dict[str, int],
+        semiotic_classes: Dict[str, int],
+        tokenizer: PreTrainedTokenizerBase,
+        max_seq_length: int,
+    ) -> None:
+        """Initializes an instance of BertExampleBuilder.
+
+        Args:
+            label_map: Mapping from tags to tag IDs.
+            semiotic_classes: Mapping from semiotic classes to their ids.
+            tokenizer: Tokenizer object.
+            max_seq_length: Maximum sequence length.
+        """
+        self._label_map = label_map
+        self._semiotic_classes = semiotic_classes
+        self._tokenizer = tokenizer
+        self._max_seq_length = max_seq_length
+        # one span usually covers one or more words and it only exists for custom phrases, so there are much less spans than characters.
+        self._max_spans_length = max(4, int(max_seq_length / 20))
+        self._pad_id = self._tokenizer.pad_token_id
+        self._default_label = 0
+
+    def build_bert_example(
+        self, hyp: str, ref: str, target: Optional[str] = None, span_info: Optional[str] = None, infer: bool = False
+    ) -> Optional[BertExample]:
+        """Constructs a BERT Example.
+
+        Args:
+            hyp: Hypothesis text.
+            ref: Candidate customization variants divided by ';'
+            target:
+                if infer==False, string of labels (each label is 1-based index of correct candidate) or 0.
+                if infer==True, it can be None or string of labels (each label is 1-based index of some candidate). In inference this can be used to get corresponding fragments to fragment_indices.
+            span_info:
+                string of format "CUSTOM 6 20;CUSTOM 40 51", number of parts corresponds to number of targets. Can be empty if target is 0.
+                If infer==False, numbers are correct start and end(exclusive) positions of the corresponding target candidate in the text.
+                If infer==True, numbers are EXPECTED positions in the text. In inference this can be used to get corresponding fragments to fragment_indices.
+            infer: inference mode
+        Returns:
+            BertExample, or None if the conversion from text to tags was infeasible
+
+        Example (infer=False):
+            hyp: "a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o"
+            ref: "d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y"
+            target: "1 3"
+            span_info: "CUSTOM 12 23;CUSTOM 28 41"
+        """
+        if not ref.count(";") == 9:
+            raise ValueError("Expect 10 candidates: " + ref)
+
+        span_info_parts = []
+        targets = []
+
+        if len(target) > 0 and target != "0":
+            span_info_parts = span_info.split(";")
+            targets = list(map(int, target.split(" ")))
+            if len(span_info_parts) != len(targets):
+                raise ValueError(
+                    "len(span_info_parts)="
+                    + str(len(span_info_parts))
+                    + " is different from len(target_parts)="
+                    + str(len(targets))
+                )
+
+        tags = [0 for _ in hyp.split()]
+        if not infer:
+            for p, t in zip(span_info_parts, targets):
+                c, start, end = p.split(" ")
+                start = int(start)
+                end = int(end)
+                tags[start:end] = [t for i in range(end - start)]
+
+        # get input features for characters
+        (input_ids, input_mask, segment_ids, labels_mask, labels, _, _,) = self._get_input_features(
+            hyp=hyp, ref=ref, tags=tags
+        )
+
+        # get input features for words
+        hyp_with_words = hyp.replace(" ", "").replace("_", " ")
+        ref_with_words = ref.replace(" ", "").replace("_", " ")
+        (
+            input_ids_for_subwords,
+            input_mask_for_subwords,
+            segment_ids_for_subwords,
+            _,
+            _,
+            _,
+            _,
+        ) = self._get_input_features(hyp=hyp_with_words, ref=ref_with_words, tags=None)
+
+        # used in forward to concatenate subword embeddings to character embeddings
+        character_pos_to_subword_pos = self._map_characters_to_subwords(input_ids, input_ids_for_subwords)
+
+        fragment_indices = []
+        if infer:
+            # used in inference to take argmax over whole fragments instead of separate characters to get more consistent predictions
+            fragment_indices = self._get_fragment_indices(hyp, targets, span_info_parts)
+
+        spans = []
+        if not infer:
+            # during training spans are used in validation step to calculate accuracy on whole custom phrases instead of separate characters
+            spans = self._get_spans(span_info_parts)
+
+        if len(input_ids) > self._max_seq_length or len(spans) > self._max_spans_length:
+            print(
+                "Max len exceeded: len(input_ids)=",
+                len(input_ids),
+                "; _max_seq_length=",
+                self._max_seq_length,
+                "; len(spans)=",
+                len(spans),
+                "; _max_spans_length=",
+                self._max_spans_length,
+            )
+            return None
+
+        example = BertExample(
+            input_ids=input_ids,
+            input_mask=input_mask,
+            segment_ids=segment_ids,
+            input_ids_for_subwords=input_ids_for_subwords,
+            input_mask_for_subwords=input_mask_for_subwords,
+            segment_ids_for_subwords=segment_ids_for_subwords,
+            character_pos_to_subword_pos=character_pos_to_subword_pos,
+            fragment_indices=fragment_indices,
+            labels_mask=labels_mask,
+            labels=labels,
+            spans=spans,
+            default_label=self._default_label,
+        )
+        return example
+
+    def _get_spans(self, span_info_parts: List[str]) -> List[Tuple[int, int, int]]:
+        """ Converts span_info string into a list of (class_id, start, end) where start, end are coordinates of starting and ending(exclusive) tokens in input_ids of BertExample
+            
+            Example:
+                span_info_parts: ["CUSTOM 37 41", "CUSTOM 47 52", "CUSTOM 42 46", "CUSTOM 0 7"]
+                result: [(1, 38, 42), (1, 48, 53), (1, 43, 47), (1, 1, 8)]
+        """
+        result_spans = []
+
+        for p in span_info_parts:
+            if p == "":
+                break
+            c, start, end = p.split(" ")
+            if c not in self._semiotic_classes:
+                raise KeyError("class=" + c + " not found in self._semiotic_classes")
+            cid = self._semiotic_classes[c]
+            # +1 because this should be indexing on input_ids which has [CLS] token at beginning
+            start = int(start) + 1
+            end = int(end) + 1
+            result_spans.append((cid, start, end))
+        return result_spans
+
+    def _get_fragment_indices(
+        self, hyp: str, targets: List[int], span_info_parts: List[str]
+    ) -> Tuple[List[Tuple[int, int, int]]]:
+        """ Build fragment indices for real candidates.
+            This is used only at inference.
+            After external candidate retrieval we know approximately, where the candidate is located in the text (from the positions of matched n-grams).
+            In this function we 
+               1) adjust start/end positions to match word borders (possibly in multiple ways). 
+               2) generate content for fragment_indices tensor (it will be used during inference to average all predictions inside each fragment). 
+
+            Args:
+                hyp: ASR-hypothesis where space separates single characters (real space is replaced to underscore).
+                targets: list of candidate ids (only for real candidates, not dummy)
+                span_info_parts: list of strings of format like "CUSTOM 12 25", corresponding to each of targets, with start/end coordinates in text.
+            Returns:
+                List of tuples (start, end, target) where start and end are positions in ASR-hypothesis, target is candidate_id.
+                Note that returned fragments can be unsorted and can overlap, it's ok.
+            Example:
+                hyp: "a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o"
+                targets: [1 2 3 4 6 7 9]
+                span_info_parts: ["CUSTOM 12 25", "CUSTOM 0 10", "CUSTOM 27 42", ...], where numbers are EXPECTED start/end positions of corresponding target candidates in the text. These positions will be adjusted in this functuion.
+                fragment_indices: [(1, 12, 2), (13, 24, 1), (13, 28, 1), ..., (29, 42, 3)]
+            """
+
+        fragment_indices = []
+
+        letters = hyp.split()
+
+        for target, p in zip(targets, span_info_parts):
+            _, start, end = p.split(" ")
+            start = int(start)
+            end = min(int(end), len(hyp))  # guarantee that end is not outside length
+
+            # Adjusting strategy 1: expand both sides to the nearest space.
+            # Adjust start by finding the nearest left space or beginning of text. If start is already some word beginning, it won't change.
+            k = start
+            while k > 0 and letters[k] != '_':
+                k -= 1
+            adjusted_start = k if k == 0 else k + 1
+
+            # Adjust end by finding the nearest right space. If end is already space or sentence end, it won't change.
+            k = end
+            while k < len(letters) and letters[k] != '_':
+                k += 1
+            adjusted_end = k
+
+            # +1 because this should be indexing on input_ids which has [CLS] token at beginning
+            fragment_indices.append((adjusted_start + 1, adjusted_end + 1, target))
+
+            # Adjusting strategy 2: try to shrink to the closest space (from left or right or both sides).
+            # For example, here the candidate "shippers" has a matching n-gram covering part of previous word
+            # a b o u t _ o u r _ s h i p e r s _ b u t _ y o u _ k n o w
+            # 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
+            expanded_fragment = "".join(letters[adjusted_start:adjusted_end])
+            left_space_position = expanded_fragment.find("_")
+            right_space_position = expanded_fragment.rfind("_")
+            is_left_shrink = False
+            is_right_shrink = False
+            if left_space_position > -1 and left_space_position < len(expanded_fragment) / 2:
+                # +1 because of CLS token, another +1 to put start position after found space
+                fragment_indices.append((adjusted_start + 1 + left_space_position + 1, adjusted_end + 1, target))
+                is_left_shrink = True
+            if right_space_position > -1 and right_space_position > len(expanded_fragment) / 2:
+                fragment_indices.append((adjusted_start + 1, adjusted_start + 1 + right_space_position, target))
+                is_right_shrink = True
+            if is_left_shrink and is_right_shrink:
+                fragment_indices.append(
+                    (adjusted_start + 1 + left_space_position + 1, adjusted_start + 1 + right_space_position, target)
+                )
+
+        return fragment_indices
+
+    def _map_characters_to_subwords(self, input_ids: List[int], input_ids_for_subwords: List[int]) -> List[int]:
+        """ Maps each single character to the position of its corresponding subword.
+
+            Args:
+                input_ids: List of character token ids.
+                input_ids_for_subwords: List of subword token ids.
+            Returns:
+                List of subword positions in input_ids_for_subwords. Its length is equal to len(input_ids)
+
+            Example:
+                input_ids: [101, 1037, 1055, 1056, 1054, 1051, 1050, ..., 1051, 102, 1040, ..., 1050, 102, 1037, ..., 1041, 102, ..., 102]
+                input_ids_for_subwords: [101, 26357, 2106, 2666, 2061, 8202, 1998, 13012, 16643, 2319, 1043, 7174, 102, 2106, 3771, 7842, 2819, 2239, 102, ..., 102]
+                result: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, ... , 45, 46, 46, 46, 46, 46, 47]
+        """
+        character_pos_to_subword_pos = [0 for _ in input_ids]
+
+        ## '[CLS]', 'a', 's', 't', 'r', 'o', 'n', 'o', 'm', 'e', 'r', 's', '_', 'd', 'i', ..., 'l', 'o', '[SEP]', 'd', 'i', 'd', 'i', 'e', 'r', '_', 's', 'a', 'u', 'm', 'o', 'n', ..., '[SEP]'
+        tokens = self._tokenizer.convert_ids_to_tokens(input_ids)
+        ## '[CLS]', 'astronomers', 'did', '##ie', 'so', '##mon', 'and', 'tri', '##sti', '##an', 'g', '##llo', '[SEP]', 'did', '##ier', 'sa', '##um', '##on', '[SEP]', 'astro', '##no', '##mie', '[SEP]', 'tristan', 'gui', '##llo', '##t', '[SEP]', ..., '[SEP]', 'mercy', '[SEP]']
+        tokens_for_subwords = self._tokenizer.convert_ids_to_tokens(input_ids_for_subwords)
+        j = 0  # index for tokens_for_subwords
+        j_offset = 0  # current letter index within subword
+        for i in range(len(tokens)):
+            character = tokens[i]
+            subword = tokens_for_subwords[j]
+            if character == "[CLS]" and subword == "[CLS]":
+                character_pos_to_subword_pos[i] = j
+                j += 1
+                continue
+            if character == "[SEP]" and subword == "[SEP]":
+                character_pos_to_subword_pos[i] = j
+                j += 1
+                continue
+            if character == "[CLS]" or character == "[SEP]" or subword == "[CLS]" or subword == "[SEP]":
+                raise IndexError(
+                    "character["
+                    + str(i)
+                    + "]="
+                    + character
+                    + "; subword["
+                    + str(j)
+                    + ";="
+                    + subword
+                    + "subwords="
+                    + str(tokens_for_subwords)
+                )
+            # At this point we expect that
+            #    subword either 1) is a normal first token of a word or 2) starts with "##" (not first word token)
+            #    character either 1) is a normal character or 2) is a space character "_"
+            if character == "_":
+                character_pos_to_subword_pos[i] = j - 1  # space is assigned to previous subtoken
+                continue
+            if j_offset < len(subword):
+                if character == subword[j_offset]:
+                    character_pos_to_subword_pos[i] = j
+                    j_offset += 1
+                else:
+                    raise IndexError(
+                        "character mismatch:"
+                        + "i="
+                        + str(i)
+                        + "j="
+                        + str(j)
+                        + "j_offset="
+                        + str(j_offset)
+                        + "; len(tokens)="
+                        + str(len(tokens))
+                        + "; len(subwords)="
+                        + str(len(tokens_for_subwords))
+                    )
+            # if subword is finished, increase j
+            if j_offset >= len(subword):
+                j += 1
+                j_offset = 0
+                if j >= len(tokens_for_subwords):
+                    break
+                if tokens_for_subwords[j].startswith("##"):
+                    j_offset = 2
+        # check that all subword tokens are processed
+        if j < len(tokens_for_subwords):
+            raise IndexError(
+                "j="
+                + str(j)
+                + "; len(tokens)="
+                + str(len(tokens))
+                + "; len(subwords)="
+                + str(len(tokens_for_subwords))
+            )
+        return character_pos_to_subword_pos
+
+    def _get_input_features(
+        self, hyp: str, ref: str, tags: List[int]
+    ) -> Tuple[List[int], List[int], List[int], List[int], List[int], List[str], List[int]]:
+        """Converts given ASR-hypothesis(hyp) and candidate string(ref) to features(token ids, mask, segment ids, etc).
+
+        Args:
+            hyp: Hypothesis text.
+            ref: Candidate customization variants divided by ';'
+            tags: List of labels corresponding to each token of ASR-hypothesis or None when building an example during inference.
+        Returns:
+            Features (input_ids, input_mask, segment_ids, labels_mask, labels, hyp_tokens, token_start_indices)
+
+        Note that this method is called both for character-based example and for word-based example (to split to subwords).
+
+        Character-based example:
+            hyp:  "a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o"
+            ref:  "d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y"
+            tags: "0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3"
+
+            resulting token sequence:
+                '[CLS]', 'a', 's', 't', 'r', 'o', 'n', 'o', 'm', 'e', 'r', 's', '_', 'd', 'i', ..., 'l', 'o', '[SEP]', 'd', 'i', 'd', 'i', 'e', 'r', '_', 's', 'a', 'u', 'm', 'o', 'n', ..., '[SEP]'
+
+        Word-based example:
+            hyp:  "astronomers didie somon and tristian gllo"
+            ref:  "didier saumon;astronomie;tristan guillot;tristesse;monade;christian;astronomer;solomon;dididididi;mercy"
+            tags: None (not used for word-based case)
+
+            resulting token sequence: 
+                '[CLS]', 'astronomers', 'did', '##ie', 'so', '##mon', 'and', 'tri', '##sti', '##an', 'g', '##llo', '[SEP]', 'did', '##ier', 'sa', '##um', '##on', '[SEP]', 'astro', '##no', '##mie', '[SEP]', 'tristan', 'gui', '##llo', '##t', '[SEP]', ..., '[SEP]', 'mercy', '[SEP]']
+        """
+
+        labels_mask = []
+        labels = []
+        if tags is None:
+            hyp_tokens, token_start_indices = self._split_to_wordpieces(hyp.split())
+        else:
+            hyp_tokens, labels, token_start_indices = self._split_to_wordpieces_with_labels(hyp.split(), tags)
+        references = ref.split(";")
+        all_ref_tokens = []
+        all_ref_segment_ids = []
+        for i in range(len(references)):
+            ref_tokens, _ = self._split_to_wordpieces(references[i].split())
+            all_ref_tokens.extend(ref_tokens + ["[SEP]"])
+            all_ref_segment_ids.extend([i + 1] * (len(ref_tokens) + 1))
+
+        input_tokens = ["[CLS]"] + hyp_tokens + ["[SEP]"] + all_ref_tokens  # ends with [SEP]
+        input_ids = self._tokenizer.convert_tokens_to_ids(input_tokens)
+        input_mask = [1] * len(input_ids)
+        segment_ids = [0] + [0] * len(hyp_tokens) + [0] + all_ref_segment_ids
+        if len(input_ids) != len(segment_ids):
+            raise ValueError(
+                "len(input_ids)="
+                + str(len(input_ids))
+                + " is different from len(segment_ids)="
+                + str(len(segment_ids))
+            )
+
+        if tags:
+            labels_mask = [0] + [1] * len(labels) + [0] + [0] * len(all_ref_tokens)
+            labels = [0] + labels + [0] + [0] * len(all_ref_tokens)
+        return (input_ids, input_mask, segment_ids, labels_mask, labels, hyp_tokens, token_start_indices)
+
+    def _split_to_wordpieces_with_labels(
+        self, tokens: List[str], labels: List[int]
+    ) -> Tuple[List[str], List[int], List[int]]:
+        """Splits tokens (and the labels accordingly) to WordPieces.
+
+        Args:
+            tokens: Tokens to be split.
+            labels: Labels (one per token) to be split.
+
+        Returns:
+            3-tuple with the split tokens, split labels, and the indices of starting tokens of words
+        """
+        bert_tokens = []  # Original tokens split into wordpieces.
+        bert_labels = []  # Label for each wordpiece.
+        # Index of each wordpiece that starts a new token.
+        token_start_indices = []
+        for i, token in enumerate(tokens):
+            # '+ 1' is because bert_tokens will be prepended by [CLS] token later.
+            token_start_indices.append(len(bert_tokens) + 1)
+            pieces = self._tokenizer.tokenize(token)
+            bert_tokens.extend(pieces)
+            bert_labels.extend([labels[i]] * len(pieces))
+        return bert_tokens, bert_labels, token_start_indices
+
+    def _split_to_wordpieces(self, tokens: List[str]) -> Tuple[List[str], List[int]]:
+        """Splits tokens to WordPieces.
+
+        Args:
+            tokens: Tokens to be split.
+
+        Returns:
+            tuple with the split tokens, and the indices of the WordPieces that start a token.
+        """
+        bert_tokens = []  # Original tokens split into wordpieces.
+        # Index of each wordpiece that starts a new token.
+        token_start_indices = []
+        for i, token in enumerate(tokens):
+            # '+ 1' is because bert_tokens will be prepended by [CLS] token later.
+            token_start_indices.append(len(bert_tokens) + 1)
+            pieces = self._tokenizer.tokenize(token)
+            bert_tokens.extend(pieces)
+        return bert_tokens, token_start_indices
+
+    def read_input_file(
+        self, input_filename: str, infer: bool = False
+    ) -> Union[List['BertExample'], Tuple[List['BertExample'], Tuple[str, str]]]:
+        """Reads in Tab Separated Value file and converts to training/inference-ready examples.
+
+        Args:
+            example_builder: Instance of BertExampleBuilder
+            input_filename: Path to the TSV input file.
+            infer: If true, input examples do not contain target info.
+
+        Returns:
+            examples: List of converted examples (BertExample). 
+               or
+            (examples, hyps_refs): If infer==true, returns h 
+        """
+
+        if not path.exists(input_filename):
+            raise ValueError("Cannot find file: " + input_filename)
+        examples = []  # output list of BertExample
+        hyps_refs = []  # output list of tuples (ASR-hypothesis, candidate_str)
+        with open(input_filename, 'r') as f:
+            for line in f:
+                if len(examples) % 1000 == 0:
+                    logging.info("{} examples processed.".format(len(examples)))
+                if infer:
+                    parts = line.rstrip('\n').split('\t')
+                    hyp, ref, target, span_info = parts[0], parts[1], None, None
+                    if len(parts) == 4:
+                        target, span_info = parts[2], parts[3]
+                    try:
+                        example = self.build_bert_example(hyp, ref, target=target, span_info=span_info, infer=infer)
+                    except Exception as e:
+                        logging.warning(str(e))
+                        logging.warning(line)
+                        continue
+                    if example is None:
+                        logging.info("cannot create example: ")
+                        logging.info(line)
+                        continue
+                    hyps_refs.append((hyp, ref))
+                    examples.append(example)
+                else:
+                    hyp, ref, target, semiotic_info = line.rstrip('\n').split('\t')
+                    try:
+                        example = self.build_bert_example(
+                            hyp, ref, target=target, span_info=semiotic_info, infer=infer
+                        )
+                    except Exception as e:
+                        logging.warning(str(e))
+                        logging.warning(line)
+                        continue
+                    if example is None:
+                        logging.info("cannot create example: ")
+                        logging.info(line)
+                        continue
+                    examples.append(example)
+        logging.info(f"Done. {len(examples)} examples converted.")
+        if infer:
+            return examples, hyps_refs
+        return examples
diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/dataset.py b/nemo/collections/nlp/data/spellchecking_asr_customization/dataset.py
new file mode 100644
index 000000000000..69705ec21b9d
--- /dev/null
+++ b/nemo/collections/nlp/data/spellchecking_asr_customization/dataset.py
@@ -0,0 +1,521 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pickle
+from io import BytesIO
+from typing import Dict, List, Optional, Tuple
+
+import braceexpand
+import numpy as np
+import torch
+import webdataset as wd
+
+from nemo.collections.nlp.data.spellchecking_asr_customization.bert_example import BertExampleBuilder
+from nemo.core.classes.dataset import Dataset, IterableDataset
+from nemo.core.neural_types import ChannelType, IntType, LabelsType, MaskType, NeuralType
+from nemo.utils import logging
+
+__all__ = [
+    "SpellcheckingAsrCustomizationDataset",
+    "SpellcheckingAsrCustomizationTestDataset",
+    "TarredSpellcheckingAsrCustomizationDataset",
+]
+
+
+def collate_train_dataset(
+    batch: List[
+        Tuple[
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+            np.ndarray,
+        ]
+    ],
+    pad_token_id: int,
+) -> Tuple[
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+]:
+    """collate batch of training items 
+    Args:
+        batch: A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos, labels_mask, labels, spans).
+        pad_token_id: integer id of padding token (to use in padded_input_ids, padded_input_ids_for_subwords)
+    """
+    max_length = 0
+    max_length_for_subwords = 0
+    max_length_for_spans = 1  # to avoid empty tensor
+    for (
+        input_ids,
+        input_mask,
+        segment_ids,
+        input_ids_for_subwords,
+        input_mask_for_subwords,
+        segment_ids_for_subwords,
+        character_pos_to_subword_pos,
+        labels_mask,
+        labels,
+        spans,
+    ) in batch:
+        if len(input_ids) > max_length:
+            max_length = len(input_ids)
+        if len(input_ids_for_subwords) > max_length_for_subwords:
+            max_length_for_subwords = len(input_ids_for_subwords)
+        if len(spans) > max_length_for_spans:
+            max_length_for_spans = len(spans)
+
+    padded_input_ids = []
+    padded_input_mask = []
+    padded_segment_ids = []
+    padded_input_ids_for_subwords = []
+    padded_input_mask_for_subwords = []
+    padded_segment_ids_for_subwords = []
+    padded_character_pos_to_subword_pos = []
+    padded_labels_mask = []
+    padded_labels = []
+    padded_spans = []
+    for (
+        input_ids,
+        input_mask,
+        segment_ids,
+        input_ids_for_subwords,
+        input_mask_for_subwords,
+        segment_ids_for_subwords,
+        character_pos_to_subword_pos,
+        labels_mask,
+        labels,
+        spans,
+    ) in batch:
+        if len(input_ids) < max_length:
+            pad_length = max_length - len(input_ids)
+            padded_input_ids.append(np.pad(input_ids, pad_width=[0, pad_length], constant_values=pad_token_id))
+            padded_input_mask.append(np.pad(input_mask, pad_width=[0, pad_length], constant_values=0))
+            padded_segment_ids.append(np.pad(segment_ids, pad_width=[0, pad_length], constant_values=0))
+            padded_labels_mask.append(np.pad(labels_mask, pad_width=[0, pad_length], constant_values=0))
+            padded_labels.append(np.pad(labels, pad_width=[0, pad_length], constant_values=0))
+            padded_character_pos_to_subword_pos.append(
+                np.pad(character_pos_to_subword_pos, pad_width=[0, pad_length], constant_values=0)
+            )
+        else:
+            padded_input_ids.append(input_ids)
+            padded_input_mask.append(input_mask)
+            padded_segment_ids.append(segment_ids)
+            padded_labels_mask.append(labels_mask)
+            padded_labels.append(labels)
+            padded_character_pos_to_subword_pos.append(character_pos_to_subword_pos)
+
+        if len(input_ids_for_subwords) < max_length_for_subwords:
+            pad_length = max_length_for_subwords - len(input_ids_for_subwords)
+            padded_input_ids_for_subwords.append(
+                np.pad(input_ids_for_subwords, pad_width=[0, pad_length], constant_values=pad_token_id)
+            )
+            padded_input_mask_for_subwords.append(
+                np.pad(input_mask_for_subwords, pad_width=[0, pad_length], constant_values=0)
+            )
+            padded_segment_ids_for_subwords.append(
+                np.pad(segment_ids_for_subwords, pad_width=[0, pad_length], constant_values=0)
+            )
+        else:
+            padded_input_ids_for_subwords.append(input_ids_for_subwords)
+            padded_input_mask_for_subwords.append(input_mask_for_subwords)
+            padded_segment_ids_for_subwords.append(segment_ids_for_subwords)
+
+        if len(spans) < max_length_for_spans:
+            padded_spans.append(np.ones((max_length_for_spans, 3), dtype=int) * -1)  # pad value is [-1, -1, -1]
+            if len(spans) > 0:
+                padded_spans[-1][: spans.shape[0], : spans.shape[1]] = spans  # copy actual spans to the beginning
+        else:
+            padded_spans.append(spans)
+
+    return (
+        torch.LongTensor(np.array(padded_input_ids)),
+        torch.LongTensor(np.array(padded_input_mask)),
+        torch.LongTensor(np.array(padded_segment_ids)),
+        torch.LongTensor(np.array(padded_input_ids_for_subwords)),
+        torch.LongTensor(np.array(padded_input_mask_for_subwords)),
+        torch.LongTensor(np.array(padded_segment_ids_for_subwords)),
+        torch.LongTensor(np.array(padded_character_pos_to_subword_pos)),
+        torch.LongTensor(np.array(padded_labels_mask)),
+        torch.LongTensor(np.array(padded_labels)),
+        torch.LongTensor(np.array(padded_spans)),
+    )
+
+
+def collate_test_dataset(
+    batch: List[Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]],
+    pad_token_id: int,
+) -> Tuple[
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+    torch.LongTensor,
+]:
+    """collate batch of test items 
+    Args:
+        batch: A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos, fragment_indices).
+        pad_token_id: integer id of padding token (to use in padded_input_ids, padded_input_ids_for_subwords)
+    """
+    max_length = 0
+    max_length_for_subwords = 0
+    max_length_for_fragment_indices = 1  # to avoid empty tensor
+    for (
+        input_ids,
+        input_mask,
+        segment_ids,
+        input_ids_for_subwords,
+        input_mask_for_subwords,
+        segment_ids_for_subwords,
+        character_pos_to_subword_pos,
+        fragment_indices,
+    ) in batch:
+        if len(input_ids) > max_length:
+            max_length = len(input_ids)
+        if len(input_ids_for_subwords) > max_length_for_subwords:
+            max_length_for_subwords = len(input_ids_for_subwords)
+        if len(fragment_indices) > max_length_for_fragment_indices:
+            max_length_for_fragment_indices = len(fragment_indices)
+
+    padded_input_ids = []
+    padded_input_mask = []
+    padded_segment_ids = []
+    padded_input_ids_for_subwords = []
+    padded_input_mask_for_subwords = []
+    padded_segment_ids_for_subwords = []
+    padded_character_pos_to_subword_pos = []
+    padded_fragment_indices = []
+    for (
+        input_ids,
+        input_mask,
+        segment_ids,
+        input_ids_for_subwords,
+        input_mask_for_subwords,
+        segment_ids_for_subwords,
+        character_pos_to_subword_pos,
+        fragment_indices,
+    ) in batch:
+        if len(input_ids) < max_length:
+            pad_length = max_length - len(input_ids)
+            padded_input_ids.append(np.pad(input_ids, pad_width=[0, pad_length], constant_values=pad_token_id))
+            padded_input_mask.append(np.pad(input_mask, pad_width=[0, pad_length], constant_values=0))
+            padded_segment_ids.append(np.pad(segment_ids, pad_width=[0, pad_length], constant_values=0))
+            padded_character_pos_to_subword_pos.append(
+                np.pad(character_pos_to_subword_pos, pad_width=[0, pad_length], constant_values=0)
+            )
+        else:
+            padded_input_ids.append(input_ids)
+            padded_input_mask.append(input_mask)
+            padded_segment_ids.append(segment_ids)
+            padded_character_pos_to_subword_pos.append(character_pos_to_subword_pos)
+
+        if len(input_ids_for_subwords) < max_length_for_subwords:
+            pad_length = max_length_for_subwords - len(input_ids_for_subwords)
+            padded_input_ids_for_subwords.append(
+                np.pad(input_ids_for_subwords, pad_width=[0, pad_length], constant_values=pad_token_id)
+            )
+            padded_input_mask_for_subwords.append(
+                np.pad(input_mask_for_subwords, pad_width=[0, pad_length], constant_values=0)
+            )
+            padded_segment_ids_for_subwords.append(
+                np.pad(segment_ids_for_subwords, pad_width=[0, pad_length], constant_values=0)
+            )
+        else:
+            padded_input_ids_for_subwords.append(input_ids_for_subwords)
+            padded_input_mask_for_subwords.append(input_mask_for_subwords)
+            padded_segment_ids_for_subwords.append(segment_ids_for_subwords)
+
+        if len(fragment_indices) < max_length_for_fragment_indices:
+            # we use [0, 1, 0] as padding value for fragment_indices, it corresponds to [CLS] token, which is ignored and won't affect anything
+            p = np.zeros((max_length_for_fragment_indices, 3), dtype=int)
+            p[:, 1] = 1
+            p[:, 2] = 0
+            padded_fragment_indices.append(p)
+            if len(fragment_indices) > 0:
+                padded_fragment_indices[-1][
+                    : fragment_indices.shape[0], : fragment_indices.shape[1]
+                ] = fragment_indices  # copy actual fragment_indices to the beginning
+        else:
+            padded_fragment_indices.append(fragment_indices)
+
+    return (
+        torch.LongTensor(np.array(padded_input_ids)),
+        torch.LongTensor(np.array(padded_input_mask)),
+        torch.LongTensor(np.array(padded_segment_ids)),
+        torch.LongTensor(np.array(padded_input_ids_for_subwords)),
+        torch.LongTensor(np.array(padded_input_mask_for_subwords)),
+        torch.LongTensor(np.array(padded_segment_ids_for_subwords)),
+        torch.LongTensor(np.array(padded_character_pos_to_subword_pos)),
+        torch.LongTensor(np.array(padded_fragment_indices)),
+    )
+
+
+class SpellcheckingAsrCustomizationDataset(Dataset):
+    """
+    Dataset as used by the SpellcheckingAsrCustomizationModel for training and validation pipelines.
+
+    Args:
+        input_file (str): path to tsv-file with data
+        example_builder: instance of BertExampleBuilder
+    """
+
+    @property
+    def output_types(self) -> Optional[Dict[str, NeuralType]]:
+        """Returns definitions of module output ports.
+        """
+        return {
+            "input_ids": NeuralType(('B', 'T'), ChannelType()),
+            "input_mask": NeuralType(('B', 'T'), MaskType()),
+            "segment_ids": NeuralType(('B', 'T'), ChannelType()),
+            "input_ids_for_subwords": NeuralType(('B', 'T'), ChannelType()),
+            "input_mask_for_subwords": NeuralType(('B', 'T'), MaskType()),
+            "segment_ids_for_subwords": NeuralType(('B', 'T'), ChannelType()),
+            "character_pos_to_subword_pos": NeuralType(('B', 'T'), ChannelType()),
+            "labels_mask": NeuralType(('B', 'T'), MaskType()),
+            "labels": NeuralType(('B', 'T'), LabelsType()),
+            "spans": NeuralType(('B', 'T', 'C'), IntType()),
+        }
+
+    def __init__(self, input_file: str, example_builder: BertExampleBuilder) -> None:
+        self.example_builder = example_builder
+        self.examples = self.example_builder.read_input_file(input_file, infer=False)
+        self.pad_token_id = self.example_builder._pad_id
+
+    def __len__(self):
+        return len(self.examples)
+
+    def __getitem__(self, idx: int):
+        example = self.examples[idx]
+        input_ids = np.array(example.features["input_ids"], dtype=np.int16)
+        input_mask = np.array(example.features["input_mask"], dtype=np.int8)
+        segment_ids = np.array(example.features["segment_ids"], dtype=np.int8)
+        input_ids_for_subwords = np.array(example.features["input_ids_for_subwords"], dtype=np.int16)
+        input_mask_for_subwords = np.array(example.features["input_mask_for_subwords"], dtype=np.int8)
+        segment_ids_for_subwords = np.array(example.features["segment_ids_for_subwords"], dtype=np.int8)
+        character_pos_to_subword_pos = np.array(example.features["character_pos_to_subword_pos"], dtype=np.int16)
+        labels_mask = np.array(example.features["labels_mask"], dtype=np.int8)
+        labels = np.array(example.features["labels"], dtype=np.int8)
+        spans = np.array(example.features["spans"], dtype=np.int16)
+        return (
+            input_ids,
+            input_mask,
+            segment_ids,
+            input_ids_for_subwords,
+            input_mask_for_subwords,
+            segment_ids_for_subwords,
+            character_pos_to_subword_pos,
+            labels_mask,
+            labels,
+            spans,
+        )
+
+    def _collate_fn(self, batch):
+        """collate batch of items
+        Args:
+            batch:  A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos, labels_mask, labels, spans).
+        """
+        return collate_train_dataset(batch, pad_token_id=self.pad_token_id)
+
+
+class TarredSpellcheckingAsrCustomizationDataset(IterableDataset):
+    """
+    This Dataset loads training examples from tarred tokenized pickle files.
+    If using multiple processes the number of shards should be divisible by the number of workers to ensure an
+    even split among workers. If it is not divisible, logging will give a warning but training will proceed.
+    Additionally, please note that the len() of this DataLayer is assumed to be the number of tokens
+    of the text data. Shard strategy is scatter - each node gets a unique set of shards, which are permanently
+    pre-allocated and never changed at runtime.
+    Args:
+        text_tar_filepaths: a string (can be brace-expandable).
+        shuffle_n (int): How many samples to look ahead and load to be shuffled.
+            See WebDataset documentation for more details.
+            Defaults to 0.
+        global_rank (int): Worker rank, used for partitioning shards. Defaults to 0.
+        world_size (int): Total number of processes, used for partitioning shards. Defaults to 1.
+        pad_token_id: id of pad token (used in collate_fn)
+    """
+
+    def __init__(
+        self,
+        text_tar_filepaths: str,
+        shuffle_n: int = 1,
+        global_rank: int = 0,
+        world_size: int = 1,
+        pad_token_id: int = -1,  # use real value or get error
+    ):
+        super(TarredSpellcheckingAsrCustomizationDataset, self).__init__()
+        if pad_token_id < 0:
+            raise ValueError("use non-negative pad_token_id: " + str(pad_token_id))
+
+        self.pad_token_id = pad_token_id
+
+        # Replace '(', '[', '<' and '_OP_' with '{'
+        brace_keys_open = ['(', '[', '<', '_OP_']
+        for bkey in brace_keys_open:
+            if bkey in text_tar_filepaths:
+                text_tar_filepaths = text_tar_filepaths.replace(bkey, "{")
+
+        # Replace ')', ']', '>' and '_CL_' with '}'
+        brace_keys_close = [')', ']', '>', '_CL_']
+        for bkey in brace_keys_close:
+            if bkey in text_tar_filepaths:
+                text_tar_filepaths = text_tar_filepaths.replace(bkey, "}")
+
+        # Brace expand
+        text_tar_filepaths = list(braceexpand.braceexpand(text_tar_filepaths))
+
+        logging.info("Tarred dataset shards will be scattered evenly across all nodes.")
+        if len(text_tar_filepaths) % world_size != 0:
+            logging.warning(
+                f"Number of shards in tarred dataset ({len(text_tar_filepaths)}) is not divisible "
+                f"by number of distributed workers ({world_size}). "
+                f"Some shards will not be used ({len(text_tar_filepaths) % world_size})."
+            )
+        begin_idx = (len(text_tar_filepaths) // world_size) * global_rank
+        end_idx = begin_idx + (len(text_tar_filepaths) // world_size)
+        logging.info('Begin Index : %d' % (begin_idx))
+        logging.info('End Index : %d' % (end_idx))
+        text_tar_filepaths = text_tar_filepaths[begin_idx:end_idx]
+        logging.info(
+            "Partitioning tarred dataset: process (%d) taking shards [%d, %d)", global_rank, begin_idx, end_idx
+        )
+
+        self.tarpath = text_tar_filepaths
+
+        # Put together WebDataset
+        self._dataset = wd.WebDataset(urls=text_tar_filepaths, nodesplitter=None)
+
+        if shuffle_n > 0:
+            self._dataset = self._dataset.shuffle(shuffle_n, initial=shuffle_n)
+        else:
+            logging.info("WebDataset will not shuffle files within the tar files.")
+
+        self._dataset = self._dataset.rename(pkl='pkl', key='__key__').to_tuple('pkl', 'key').map(f=self._build_sample)
+
+    def _build_sample(self, fname):
+        # Load file
+        pkl_file, _ = fname
+        pkl_file = BytesIO(pkl_file)
+        data = pickle.load(pkl_file)
+        pkl_file.close()
+        input_ids = data["input_ids"]
+        input_mask = data["input_mask"]
+        segment_ids = data["segment_ids"]
+        input_ids_for_subwords = data["input_ids_for_subwords"]
+        input_mask_for_subwords = data["input_mask_for_subwords"]
+        segment_ids_for_subwords = data["segment_ids_for_subwords"]
+        character_pos_to_subword_pos = data["character_pos_to_subword_pos"]
+        labels_mask = data["labels_mask"]
+        labels = data["labels"]
+        spans = data["spans"]
+
+        return (
+            input_ids,
+            input_mask,
+            segment_ids,
+            input_ids_for_subwords,
+            input_mask_for_subwords,
+            segment_ids_for_subwords,
+            character_pos_to_subword_pos,
+            labels_mask,
+            labels,
+            spans,
+        )
+
+    def __iter__(self):
+        return self._dataset.__iter__()
+
+    def _collate_fn(self, batch):
+        """collate batch of items
+        Args:
+            batch:  A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos, labels_mask, labels, spans).
+        """
+        return collate_train_dataset(batch, pad_token_id=self.pad_token_id)
+
+
+class SpellcheckingAsrCustomizationTestDataset(Dataset):
+    """
+    Dataset for inference pipeline.
+
+    Args:
+        sents: list of strings
+        example_builder: instance of BertExampleBuilder
+    """
+
+    @property
+    def output_types(self) -> Optional[Dict[str, NeuralType]]:
+        """Returns definitions of module output ports.
+        """
+        return {
+            "input_ids": NeuralType(('B', 'T'), ChannelType()),
+            "input_mask": NeuralType(('B', 'T'), MaskType()),
+            "segment_ids": NeuralType(('B', 'T'), ChannelType()),
+            "input_ids_for_subwords": NeuralType(('B', 'T'), ChannelType()),
+            "input_mask_for_subwords": NeuralType(('B', 'T'), MaskType()),
+            "segment_ids_for_subwords": NeuralType(('B', 'T'), ChannelType()),
+            "character_pos_to_subword_pos": NeuralType(('B', 'T'), ChannelType()),
+            "fragment_indices": NeuralType(('B', 'T', 'C'), IntType()),
+        }
+
+    def __init__(self, input_file: str, example_builder: BertExampleBuilder) -> None:
+        self.example_builder = example_builder
+        self.examples, self.hyps_refs = self.example_builder.read_input_file(input_file, infer=True)
+        self.pad_token_id = self.example_builder._pad_id
+
+    def __len__(self):
+        return len(self.examples)
+
+    def __getitem__(self, idx: int):
+        example = self.examples[idx]
+        input_ids = np.array(example.features["input_ids"])
+        input_mask = np.array(example.features["input_mask"])
+        segment_ids = np.array(example.features["segment_ids"])
+        input_ids_for_subwords = np.array(example.features["input_ids_for_subwords"])
+        input_mask_for_subwords = np.array(example.features["input_mask_for_subwords"])
+        segment_ids_for_subwords = np.array(example.features["segment_ids_for_subwords"])
+        character_pos_to_subword_pos = np.array(example.features["character_pos_to_subword_pos"], dtype=np.int64)
+        fragment_indices = np.array(example.features["fragment_indices"], dtype=np.int16)
+        return (
+            input_ids,
+            input_mask,
+            segment_ids,
+            input_ids_for_subwords,
+            input_mask_for_subwords,
+            segment_ids_for_subwords,
+            character_pos_to_subword_pos,
+            fragment_indices,
+        )
+
+    def _collate_fn(self, batch):
+        """collate batch of items
+        Args:
+            batch:  A list of tuples of (input_ids, input_mask, segment_ids, input_ids_for_subwords, input_mask_for_subwords, segment_ids_for_subwords, character_pos_to_subword_pos).
+        """
+        return collate_test_dataset(batch, pad_token_id=self.pad_token_id)
diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py b/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py
new file mode 100644
index 000000000000..cda551189d78
--- /dev/null
+++ b/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py
@@ -0,0 +1,845 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import json
+import math
+import random
+import re
+from collections import defaultdict, namedtuple
+from typing import Dict, List, Set, Tuple, Union
+
+import numpy as np
+from numba import jit
+
+"""Utility functions for Spellchecking ASR Customization."""
+
+
+def replace_diacritics(text):
+    text = re.sub(r"[éèëēêęěė]", "e", text)  # latin
+    text = re.sub(r"[ё]", "е", text)  # cyrillic
+    text = re.sub(r"[ãâāáäăàąåạảǎ]", "a", text)
+    text = re.sub(r"[úūüùưûů]", "u", text)
+    text = re.sub(r"[ôōóöõòőø]", "o", text)
+    text = re.sub(r"[ćçč]", "c", text)
+    text = re.sub(r"[ïīíîıì]", "i", text)
+    text = re.sub(r"[ñńňņ]", "n", text)
+    text = re.sub(r"[țťţ]", "t", text)
+    text = re.sub(r"[łľļ]", "l", text)
+    text = re.sub(r"[żžź]", "z", text)
+    text = re.sub(r"[ğ]", "g", text)
+    text = re.sub(r"[ďđ]", "d", text)
+    text = re.sub(r"[ķ]", "k", text)
+    text = re.sub(r"[ř]", "r", text)
+    text = re.sub(r"[ý]", "y", text)
+    text = re.sub(r"[æ]", "ae", text)
+    text = re.sub(r"[œ]", "oe", text)
+    text = re.sub(r"[șşšś]", "s", text)
+    return text
+
+
+def load_ngram_mappings(input_name: str, max_misspelled_freq: int = 1000000000) -> Tuple[defaultdict, Set]:
+    """Loads n-gram mapping vocabularies in form required by dynamic programming
+    Args:
+        input_name: file with n-gram mappings
+        max_misspelled_freq: threshold on misspelled n-gram frequency
+    Returns:
+        vocab: dict {key=original_ngram, value=dict{key=misspelled_ngram, value=frequency}}
+        ban_ngram: set of banned misspelled n-grams
+
+    Input format:
+        u t o	u+i t o	49	8145	114
+        u t o	<DELETE> t e	63	8145	16970
+        u t o	o+_ t o	42	8145	1807
+    """
+    vocab = defaultdict(dict)
+    ban_ngram = set()
+
+    with open(input_name, "r", encoding="utf-8") as f:
+        for line in f:
+            orig, misspelled, joint_freq, orig_freq, misspelled_freq = line.strip().split("\t")
+            if orig == "" or misspelled == "":
+                raise ValueError("Empty n-gram: orig=" + orig + "; misspelled=" + misspelled)
+            misspelled = misspelled.replace("<DELETE>", "=")
+            if misspelled.replace("=", "").strip() == "":  # skip if resulting ngram doesn't contain any real character
+                continue
+            if int(misspelled_freq) > max_misspelled_freq:
+                ban_ngram.add(misspelled + " ")  # space at the end is required within get_index function
+            vocab[orig][misspelled] = int(joint_freq) / int(orig_freq)
+    return vocab, ban_ngram
+
+
+def load_ngram_mappings_for_dp(input_name: str) -> Tuple[defaultdict, defaultdict, defaultdict, int]:
+    """Loads n-gram mapping vocabularies in form required by dynamic programming
+    Args:
+        input_name: file with n-gram mappings
+    Returns:
+        joint_vocab: dict where key=(original_ngram, misspelled_ngram), value=frequency
+        orig_vocab: dict where key=original_ngram, value=frequency
+        misspelled_vocab: dict where key=misspelled_ngram, value=frequency
+        max_len: maximum n-gram length seen in vocabulary
+
+    Input format: original \t misspelled \t joint_freq \t original_freq \t misspelled_freq
+        u t o	u+i t o	49	8145	114
+        u t o	<DELETE> t e	63	8145	16970
+        u t o	o+_ t o	42	8145	1807
+    """
+    joint_vocab = defaultdict(int)
+    orig_vocab = defaultdict(int)
+    misspelled_vocab = defaultdict(int)
+    max_len = 0
+    with open(input_name, "r", encoding="utf-8") as f:
+        for line in f:
+            orig, misspelled, joint_freq, _, _ = line.strip().split("\t")
+            if orig == "" or misspelled == "":
+                raise ValueError("Emty n-gram: orig=" + orig + "; misspelled=" + misspelled)
+            misspelled = misspelled.replace("<DELETE>", " ").replace("+", " ")
+            misspelled = " ".join(misspelled.split())
+            if misspelled == "":  # skip if resulting ngram doesn't contain any real character
+                continue
+            max_len = max(max_len, orig.count(" ") + 1, misspelled.count(" ") + 1)
+            joint_vocab[(orig, misspelled)] += int(joint_freq)
+            orig_vocab[orig] += int(joint_freq)
+            misspelled_vocab[misspelled] += int(joint_freq)
+    return joint_vocab, orig_vocab, misspelled_vocab, max_len
+
+
+def get_alignment_by_dp(
+    ref_phrase: str, hyp_phrase: str, dp_data: Tuple[defaultdict, defaultdict, defaultdict, int]
+) -> List[Tuple[str, str, float, float, int, int, int]]:
+    """Get best alignment path between a reference and (possibly) misspelled phrase using n-gram mappings vocabulary.
+    Args:
+        ref_phrase: candidate reference phrase (letters separated by space, real space replaced by underscore) 
+        hyp_phrase: (possibly) misspelled phrase (letters separated by space, real space replaced by underscore)
+        dp_data: n-gram mapping vocabularies used by dynamic programming
+    Returns:
+        list of tuples (hyp_ngram, ref_ngram, logprob, sum_logprob, joint_freq, orig_freq, misspelled_freq)
+            This is best alignment path.
+
+    Example:
+        ref_phrase: "a n h y d r i d e"
+        hyp_phrase: "a n d _ h y d r o d"
+
+    Result:
+	    [("*", "*", 0.0, 0.0, 0, 0, 0)
+	     ("a n d _ h", "a n h", -2.34, -2.34, 226, 2338, 2203)
+	     ("y d r o", "y d r i", -2.95, -5.29, 11, 211, 1584)
+	     ("d", "d e", -1.99, -7.28, 60610, 444714, 2450334)
+        ]
+    Final path score is in path[-1][3]:  -7.28
+    Note that the order of ref_phrase and hyp_phrase matters, because n-gram mappings vocabulary is not symmetrical.
+    """
+    joint_vocab, orig_vocab, misspelled_vocab, max_len = dp_data
+    hyp_letters = ["*"] + hyp_phrase.split()
+    ref_letters = ["*"] + ref_phrase.split()
+    DpInfo = namedtuple(
+        "DpInfo", ["hyp_pos", "ref_pos", "best_hyp_ngram_len", "best_ref_ngram_len", "score", "sum_score"]
+    )
+    history = defaultdict(DpInfo)
+    history[(0, 0)] = DpInfo(
+        hyp_pos=0, ref_pos=0, best_hyp_ngram_len=1, best_ref_ngram_len=1, score=0.0, sum_score=0.0
+    )
+    for hyp_pos in range(len(hyp_letters)):
+        for ref_pos in range(len(ref_letters)):
+            if hyp_pos == 0 and ref_pos == 0:  # cell (0, 0) is already defined
+                continue
+            # consider cell (hyp_pos, ref_pos) and find best path to get there
+            best_hyp_ngram_len = 0
+            best_ref_ngram_len = 0
+            best_ngram_score = float("-inf")
+            best_sum_score = float("-inf")
+            # loop over paths ending on non-empty ngram mapping
+            for hyp_ngram_len in range(1, 1 + min(max_len, hyp_pos + 1)):
+                hyp_ngram = " ".join(hyp_letters[(hyp_pos - hyp_ngram_len + 1) : (hyp_pos + 1)])
+                for ref_ngram_len in range(1, 1 + min(max_len, ref_pos + 1)):
+                    ref_ngram = " ".join(ref_letters[(ref_pos - ref_ngram_len + 1) : (ref_pos + 1)])
+                    if (ref_ngram, hyp_ngram) not in joint_vocab:
+                        continue
+                    joint_freq = joint_vocab[(ref_ngram, hyp_ngram)]
+                    orig_freq = orig_vocab.get(ref_ngram, 1)
+                    ngram_score = math.log(joint_freq / orig_freq)
+                    previous_cell = (hyp_pos - hyp_ngram_len, ref_pos - ref_ngram_len)
+                    if previous_cell not in history:
+                        print("cell ", previous_cell, "does not exist")
+                        continue
+                    previous_score = history[previous_cell].sum_score
+                    sum_score = ngram_score + previous_score
+                    if sum_score > best_sum_score:
+                        best_sum_score = sum_score
+                        best_ngram_score = ngram_score
+                        best_hyp_ngram_len = hyp_ngram_len
+                        best_ref_ngram_len = ref_ngram_len
+            # loop over two variants with deletion of one character
+            deletion_score = -6.0
+            insertion_score = -6.0
+            if hyp_pos > 0:
+                previous_cell = (hyp_pos - 1, ref_pos)
+                previous_score = history[previous_cell].sum_score
+                sum_score = deletion_score + previous_score
+                if sum_score > best_sum_score:
+                    best_sum_score = sum_score
+                    best_ngram_score = deletion_score
+                    best_hyp_ngram_len = 1
+                    best_ref_ngram_len = 0
+
+            if ref_pos > 0:
+                previous_cell = (hyp_pos, ref_pos - 1)
+                previous_score = history[previous_cell].sum_score
+                sum_score = insertion_score + previous_score
+                if sum_score > best_sum_score:
+                    best_sum_score = sum_score
+                    best_ngram_score = insertion_score
+                    best_hyp_ngram_len = 0
+                    best_ref_ngram_len = 1
+
+            if best_hyp_ngram_len == 0 and best_ref_ngram_len == 0:
+                raise ValueError("best_hyp_ngram_len = 0 and best_ref_ngram_len = 0")
+
+            # save cell to history
+            history[(hyp_pos, ref_pos)] = DpInfo(
+                hyp_pos=hyp_pos,
+                ref_pos=ref_pos,
+                best_hyp_ngram_len=best_hyp_ngram_len,
+                best_ref_ngram_len=best_ref_ngram_len,
+                score=best_ngram_score,
+                sum_score=best_sum_score,
+            )
+    # now trace back on best path starting from last positions
+    path = []
+    hyp_pos = len(hyp_letters) - 1
+    ref_pos = len(ref_letters) - 1
+    cell_info = history[(hyp_pos, ref_pos)]
+    path.append(cell_info)
+    while hyp_pos > 0 or ref_pos > 0:
+        hyp_pos -= cell_info.best_hyp_ngram_len
+        ref_pos -= cell_info.best_ref_ngram_len
+        cell_info = history[(hyp_pos, ref_pos)]
+        path.append(cell_info)
+
+    result = []
+    for info in reversed(path):
+        hyp_ngram = " ".join(hyp_letters[(info.hyp_pos - info.best_hyp_ngram_len + 1) : (info.hyp_pos + 1)])
+        ref_ngram = " ".join(ref_letters[(info.ref_pos - info.best_ref_ngram_len + 1) : (info.ref_pos + 1)])
+        joint_freq = joint_vocab.get((ref_ngram, hyp_ngram), 0)
+        orig_freq = orig_vocab.get(ref_ngram, 0)
+        misspelled_freq = misspelled_vocab.get(hyp_ngram, 0)
+        result.append((hyp_ngram, ref_ngram, info.score, info.sum_score, joint_freq, orig_freq, misspelled_freq))
+    return result
+
+
+def get_index(
+    custom_phrases: List[str],
+    vocab: defaultdict,
+    ban_ngram_global: Set[str],
+    min_log_prob: float = -4.0,
+    max_phrases_per_ngram: int = 100,
+) -> Tuple[List[str], Dict[str, List[Tuple[int, int, int, float]]]]:
+    """Given a restricted vocabulary of replacements,
+    loops through custom phrases,
+    generates all possible conversions and creates index.
+
+    Args:
+        custom_phrases: list of all custom phrases, characters should be split by space,  real space replaced to underscore.
+        vocab: n-gram mappings vocabulary - dict {key=original_ngram, value=dict{key=misspelled_ngram, value=frequency}}
+        ban_ngram_global: set of banned misspelled n-grams
+        min_log_prob: minimum log probability, after which we stop growing this n-gram.
+        max_phrases_per_ngram: maximum phrases that we allow to store per one n-gram. N-grams exceeding that quantity get banned.
+
+    Returns:
+        phrases - list of phrases. Position in this list is used as phrase_id.
+        ngram2phrases - resulting index, i.e. dict where key=ngram, value=list of tuples (phrase_id, begin_pos, size, logprob)
+    """
+
+    ban_ngram_local = set()  # these ngrams are banned only for given custom_phrases
+    ngram_to_phrase_and_position = defaultdict(list)
+
+    for custom_phrase in custom_phrases:
+        inputs = custom_phrase.split(" ")
+        begin = 0
+        index_keys = [{} for _ in inputs]  # key - letter ngram, index - beginning positions in phrase
+
+        for begin in range(len(inputs)):
+            for end in range(begin + 1, min(len(inputs) + 1, begin + 5)):
+                inp = " ".join(inputs[begin:end])
+                if inp not in vocab:
+                    continue
+                for rep in vocab[inp]:
+                    lp = math.log(vocab[inp][rep])
+
+                    for b in range(max(0, end - 5), end):  # try to grow previous ngrams with new replacement
+                        new_ngrams = {}
+                        for ngram in index_keys[b]:
+                            lp_prev = index_keys[b][ngram]
+                            if len(ngram) + len(rep) <= 10 and b + ngram.count(" ") == begin:
+                                if lp_prev + lp > min_log_prob:
+                                    new_ngrams[ngram + rep + " "] = lp_prev + lp
+                        index_keys[b].update(new_ngrams)  #  join two dictionaries
+                    # add current replacement as ngram
+                    if lp > min_log_prob:
+                        index_keys[begin][rep + " "] = lp
+
+        for b in range(len(index_keys)):
+            for ngram, lp in sorted(index_keys[b].items(), key=lambda item: item[1], reverse=True):
+                if ngram in ban_ngram_global:  # here ngram ends with a space
+                    continue
+                real_length = ngram.count(" ")
+                ngram = ngram.replace("+", " ").replace("=", " ")
+                ngram = " ".join(ngram.split())  # here ngram doesn't end with a space anymore
+                if ngram + " " in ban_ngram_global:  # this can happen after deletion of + and =
+                    continue
+                if ngram in ban_ngram_local:
+                    continue
+                ngram_to_phrase_and_position[ngram].append((custom_phrase, b, real_length, lp))
+                if len(ngram_to_phrase_and_position[ngram]) > max_phrases_per_ngram:
+                    ban_ngram_local.add(ngram)
+                    del ngram_to_phrase_and_position[ngram]
+                    continue
+
+    phrases = []  # id to phrase
+    phrase2id = {}  # phrase to id
+    ngram2phrases = defaultdict(list)  # ngram to list of tuples (phrase_id, begin, length, logprob)
+
+    for ngram in ngram_to_phrase_and_position:
+        for phrase, b, length, lp in ngram_to_phrase_and_position[ngram]:
+            if phrase not in phrase2id:
+                phrases.append(phrase)
+                phrase2id[phrase] = len(phrases) - 1
+            ngram2phrases[ngram].append((phrase2id[phrase], b, length, lp))
+
+    return phrases, ngram2phrases
+
+
+def load_index(input_name: str) -> Tuple[List[str], Dict[str, List[Tuple[int, int, int, float]]]]:
+    """ Load index from file
+    Args:
+        input_name: file with index
+    Returns:
+        phrases: List of all phrases in custom vocabulary. Position corresponds to phrase_id.
+        ngram2phrases: dict where key=ngram, value=list of tuples (phrase_id, begin_pos, size, logprob)
+    """
+    phrases = []  # id to phrase
+    phrase2id = {}  # phrase to id
+    ngram2phrases = defaultdict(list)  # ngram to list of tuples (phrase_id, begin_pos, size, logprob)
+    with open(input_name, "r", encoding="utf-8") as f:
+        for line in f:
+            ngram, phrase, b, size, lp = line.split("\t")
+            b = int(b)
+            size = int(size)
+            lp = float(lp)
+            if phrase not in phrase2id:
+                phrases.append(phrase)
+                phrase2id[phrase] = len(phrases) - 1
+            ngram2phrases[ngram].append((phrase2id[phrase], b, size, lp))
+    return phrases, ngram2phrases
+
+
+def search_in_index(
+    ngram2phrases: Dict[str, List[Tuple[int, int, int, float]]], phrases: List[str], letters: Union[str, List[str]]
+) -> Tuple[np.ndarray, List[Set[str]]]:
+    """ Function used to search in index
+
+    Args:
+        ngram2phrases: dict where key=ngram, value=list of tuples (phrase_id, begin_pos, size, logprob)
+        phrases: List of all phrases in custom vocabulary. Position corresponds to phrase_id.
+        letters: list of letters of ASR-hypothesis. Should not contain spaces - real spaces should be replaced with underscores.
+
+    Returns:
+        phrases2positions: a matrix of size (len(phrases), len(letters)).
+            It is filled with 1.0 (hits) on intersection of letter n-grams and phrases that are indexed by these n-grams, 0.0 - elsewhere.
+            It is used later to find phrases with many hits within a contiguous window - potential matching candidates.
+        position2ngrams: positions in ASR-hypothesis mapped to sets of ngrams starting from that position.
+            It is used later to check how well each found candidate is covered by n-grams (to avoid cases where some repeating n-gram gives many hits to a phrase, but the phrase itself is not well covered).
+    """
+
+    if " " in letters:
+        raise ValueError("letters should not contain space: " + str(letters))
+
+    phrases2positions = np.zeros((len(phrases), len(letters)), dtype=float)
+    # positions mapped to sets of ngrams starting from that position
+    position2ngrams = [set() for _ in range(len(letters))]
+
+    begin = 0
+    for begin in range(len(letters)):
+        for end in range(begin + 1, min(len(letters) + 1, begin + 7)):
+            ngram = " ".join(letters[begin:end])
+            if ngram not in ngram2phrases:
+                continue
+            for phrase_id, b, size, lp in ngram2phrases[ngram]:
+                phrases2positions[phrase_id, begin:end] = 1.0
+            position2ngrams[begin].add(ngram)
+    return phrases2positions, position2ngrams
+
+
+@jit(nopython=True)  # Set "nopython" mode for best performance, equivalent to @njit
+def get_all_candidates_coverage(phrases, phrases2positions):
+    """Get maximum hit coverage for each phrase - within a moving window of length of the phrase.
+    Args:
+        phrases: List of all phrases in custom vocabulary. Position corresponds to phrase_id.
+        phrases2positions: a matrix of size (len(phrases), len(ASR-hypothesis)).
+            It is filled with 1.0 (hits) on intersection of letter n-grams and phrases that are indexed by these n-grams, 0.0 - elsewhere.
+    Returns:
+        candidate2coverage: list of size len(phrases) containing coverage (0.0 to 1.0) in best window.
+        candidate2position: list of size len(phrases) containing starting position of best window.
+    """
+    candidate2coverage = [0.0] * len(phrases)
+    candidate2position = [-1] * len(phrases)
+
+    for i in range(len(phrases)):
+        phrase_length = phrases[i].count(" ") + 1
+        all_coverage = np.sum(phrases2positions[i]) / phrase_length
+        # if total coverage on whole ASR-hypothesis is too small, there is no sense in using moving window
+        if all_coverage < 0.4:
+            continue
+        moving_sum = np.sum(phrases2positions[i, 0:phrase_length])
+        max_sum = moving_sum
+        best_pos = 0
+        for pos in range(1, phrases2positions.shape[1] - phrase_length + 1):
+            moving_sum -= phrases2positions[i, pos - 1]
+            moving_sum += phrases2positions[i, pos + phrase_length - 1]
+            if moving_sum > max_sum:
+                max_sum = moving_sum
+                best_pos = pos
+
+        coverage = max_sum / (phrase_length + 2)  # smoothing
+        candidate2coverage[i] = coverage
+        candidate2position[i] = best_pos
+    return candidate2coverage, candidate2position
+
+
+def get_candidates(
+    ngram2phrases: Dict[str, List[Tuple[int, int, int, float]]],
+    phrases: List[str],
+    letters: Union[str, List[str]],
+    pool_for_random_candidates: List[str],
+    min_phrase_coverage: float = 0.8,
+) -> List[Tuple[str, int, int, float, float]]:
+    """Given an index of custom vocabulary and an ASR-hypothesis retrieve 10 candidates.
+    Args:
+        ngram2phrases: dict where key=ngram, value=list of tuples (phrase_id, begin_pos, size, logprob)
+        phrases: List of all phrases in custom vocabulary. Position corresponds to phrase_id.
+        letters: list of letters of ASR-hypothesis. Should not contain spaces - real spaces should be replaced with underscores.
+        pool_for_random_candidates: large list of strings, from which to sample random candidates in case when there are less than 10 real candidates
+        min_phrase_coverage: We discard candidates which are not covered by n-grams to at least to this extent
+          (to avoid cases where some repeating n-gram gives many hits to a phrase, but the phrase itself is not well covered).
+     Returns:
+        candidates: list of tuples (candidate_text, approximate_begin_position, length, coverage of window in ASR-hypothesis, coverage of phrase itself).
+    """
+    phrases2positions, position2ngrams = search_in_index(ngram2phrases, phrases, letters)
+    candidate2coverage, candidate2position = get_all_candidates_coverage(phrases, phrases2positions)
+
+    # mask for each custom phrase, how many which symbols are covered by input ngrams
+    phrases2coveredsymbols = [[0 for x in phrases[i].split(" ")] for i in range(len(phrases))]
+    candidates = []
+    k = 0
+    for idx, coverage in sorted(enumerate(candidate2coverage), key=lambda item: item[1], reverse=True):
+        begin = candidate2position[idx]  # this is most likely beginning of this candidate
+        phrase_length = phrases[idx].count(" ") + 1
+        for pos in range(begin, begin + phrase_length):
+            # we do not know exact end of custom phrase in text, it can be different from phrase length
+            if pos >= len(position2ngrams):
+                break
+            for ngram in position2ngrams[pos]:
+                for phrase_id, b, size, lp in ngram2phrases[ngram]:
+                    if phrase_id != idx:
+                        continue
+                    for ppos in range(b, b + size):
+                        if ppos >= phrase_length:
+                            break
+                        phrases2coveredsymbols[phrase_id][ppos] = 1
+        k += 1
+        if k > 100:
+            break
+        real_coverage = sum(phrases2coveredsymbols[idx]) / len(phrases2coveredsymbols[idx])
+        if real_coverage < min_phrase_coverage:
+            continue
+        candidates.append((phrases[idx], begin, phrase_length, coverage, real_coverage))
+
+    # no need to process this sentence further if it does not contain any real candidates
+    if len(candidates) == 0:
+        print("WARNING: no real candidates", candidates)
+        return []
+
+    while len(candidates) < 10:
+        dummy = random.choice(pool_for_random_candidates)
+        dummy = " ".join(list(dummy.replace(" ", "_")))
+        candidates.append((dummy, -1, dummy.count(" ") + 1, 0.0, 0.0))
+
+    candidates = candidates[:10]
+    random.shuffle(candidates)
+    if len(candidates) != 10:
+        print("WARNING: cannot get 10 candidates", candidates)
+        return []
+
+    return candidates
+
+
+def read_spellmapper_predictions(filename: str) -> List[Tuple[str, List[Tuple[int, int, str, float]], List[int]]]:
+    """Read results of SpellMapper inference from file.
+    Args:
+        filename: file with SpellMapper results
+    Returns:
+        list of tuples (sent, list of fragment predictions, list of letter predictions)
+    One fragment prediction is a tuple (begin, end, replacement_text, prob)
+    """
+    results = []
+    with open(filename, "r", encoding="utf-8") as f:
+        for line in f:
+            text, candidate_str, fragment_predictions_str, letter_predictions_str = line.strip().split("\t")
+            text = text.replace(" ", "").replace("_", " ")
+            candidate_str = candidate_str.replace(" ", "").replace("_", " ")
+            candidates = candidate_str.split(";")
+            letter_predictions = list(map(int, letter_predictions_str.split()))
+            if len(candidates) != 10:
+                raise IndexError("expect 10 candidates, got: ", len(candidates))
+            if len(text) != len(letter_predictions):
+                raise IndexError("len(text)=", len(text), "; len(letter_predictions)=", len(letter_predictions))
+            replacements = []
+            if fragment_predictions_str != "":
+                for prediction in fragment_predictions_str.split(";"):
+                    begin, end, candidate_id, prob = prediction.split(" ")
+                    begin = int(begin)
+                    end = int(end)
+                    candidate_id = int(candidate_id)
+                    prob = float(prob)
+                    replacements.append((begin, end, candidates[candidate_id - 1], prob))
+                    replacements.sort()  # it will sort by begin, then by end
+            results.append((text, replacements, letter_predictions))
+    return results
+
+
+def substitute_replacements_in_text(
+    text: str, replacements: List[Tuple[int, int, str, float]], replace_hyphen_to_space: bool
+) -> str:
+    """Substitute replacements to the input text, iterating from end to beginning, so that indexing does not change.
+       Note that we expect intersecting replacements to be already filtered.
+    Args:
+        text: sentence;
+        replacements: list of replacements, each is a tuple (begin, end, text, probability);
+        replace_hyphen_to_space: if True, hyphens in replacements will be converted to spaces;
+    Returns:
+        corrected sentence
+    """
+    replacements.sort()
+    last_begin = len(text) + 1
+    corrected_text = text
+    for begin, end, candidate, prob in reversed(replacements):
+        if end > last_begin:
+            print("WARNING: skip intersecting replacement [", candidate, "] in text: ", text)
+            continue
+        if replace_hyphen_to_space:
+            candidate = candidate.replace("-", " ")
+        corrected_text = corrected_text[:begin] + candidate + corrected_text[end:]
+        last_begin = begin
+    return corrected_text
+
+
+def apply_replacements_to_text(
+    text: str,
+    replacements: List[Tuple[int, int, str, float]],
+    min_prob: float = 0.5,
+    replace_hyphen_to_space: bool = False,
+    dp_data: Tuple[defaultdict, defaultdict, defaultdict, int] = None,
+    min_dp_score_per_symbol: float = -99.9,
+) -> str:
+    """Filter and apply replacements to the input sentence.
+    Args:
+        text: input sentence;
+        replacements: list of proposed replacements (probably intersecting), each is a tuple (begin, end, text, probability);
+        min_prob: threshold on replacement probability;
+        replace_hyphen_to_space: if True, hyphens in replacements will be converted to spaces;
+        dp_data: n-gram mapping vocabularies used by dynamic programming, if None - dynamic programming is not used;
+        min_dp_score_per_symbol: threshold on dynamic programming sum score averaged by hypothesis length
+    Returns:
+        corrected sentence
+    """
+    # sort replacements by positions
+    replacements.sort()
+    # filter replacements
+    # Note that we do not skip replacements with same text, otherwise intersecting candidates with lower probability can win
+    filtered_replacements = []
+    for j in range(len(replacements)):
+        replacement = replacements[j]
+        begin, end, candidate, prob = replacement
+        fragment = text[begin:end]
+        candidate_spaced = " ".join(list(candidate.replace(" ", "_")))
+        fragment_spaced = " ".join(list(fragment.replace(" ", "_")))
+        # apply penalty if candidate length is bigger than fragment length
+        # to avoid cases like "forward-looking" replacing "looking" in "forward looking" resulting in "forward forward looking"
+        if len(candidate) > len(fragment):
+            penalty = len(fragment) / len(candidate)
+            prob *= penalty
+        # skip replacement with low probability
+        if prob < min_prob:
+            continue
+        # skip replacements with some predefined templates, e.g. "*'s" => "*s"
+        if check_banned_replacements(fragment, candidate):
+            continue
+        if dp_data is not None:
+            path = get_alignment_by_dp(candidate_spaced, fragment_spaced, dp_data)
+            # path[-1][3] is the sum of logprobs for best path of dynamic programming: divide sum_score by length
+            if path[-1][3] / (len(fragment)) < min_dp_score_per_symbol:
+                continue
+
+        # skip replacement if it intersects with previous replacement and has lower probability, otherwise remove previous replacement
+        if len(filtered_replacements) > 0 and filtered_replacements[-1][1] > begin:
+            if filtered_replacements[-1][3] > prob:
+                continue
+            else:
+                filtered_replacements.pop()
+        filtered_replacements.append((begin, end, candidate, prob))
+
+    return substitute_replacements_in_text(text, filtered_replacements, replace_hyphen_to_space)
+
+
+def update_manifest_with_spellmapper_corrections(
+    input_manifest_name: str,
+    short2full_name: str,
+    output_manifest_name: str,
+    spellmapper_results_name: str,
+    min_prob: float = 0.5,
+    replace_hyphen_to_space: bool = True,
+    field_name: str = "pred_text",
+    use_dp: bool = True,
+    ngram_mappings: Union[str, None] = None,
+    min_dp_score_per_symbol: float = -1.5,
+) -> None:
+    """Post-process SpellMapper predictions and write corrected sentence to the specified field of nemo manifest.
+    The previous content of this field will be copied to "*_before_correction" field.
+    If the sentence was split into fragments before running SpellMapper, all replacements will be first gathered together and then applied to the original long sentence.
+    Args:
+        input_manifest_name: input nemo manifest;
+        short2full_name: text file with two columns: short_sent \t full_sent;
+        output_manifest_name: output nemo manifest;
+        spellmapper_results_name: text file with SpellMapper inference results;
+        min_prob: threshold on replacement probability;
+        replace_hyphen_to_space: if True, hyphens in replacements will be converted to spaces;
+        field_name: name of json field whose text we want to correct;
+        use_dp: bool = If True, additional replacement filtering will be applied using dynamic programming (works slow);
+        ngram_mappings: file with n-gram mappings, only needed if use_dp=True
+        min_dp_score_per_symbol: threshold on dynamic programming sum score averaged by hypothesis length
+    """
+    short2full_sent = defaultdict(list)
+    sent2corrections = defaultdict(dict)
+    with open(short2full_name, "r", encoding="utf-8") as f:
+        for line in f:
+            s = line.strip()
+            short_sent, full_sent = s.split("\t")
+            short2full_sent[short_sent].append(full_sent)
+            sent2corrections[full_sent] = []
+
+    spellmapper_results = read_spellmapper_predictions(spellmapper_results_name)
+    dp_data = None
+    if use_dp:
+        dp_data = load_ngram_mappings_for_dp(ngram_mappings)
+
+    for text, replacements, _ in spellmapper_results:
+        short_sent = text
+        if short_sent not in short2full_sent:
+            continue
+        # it can happen that one short sentence occurred in multiple full sentences
+        for full_sent in short2full_sent[short_sent]:
+            offset = full_sent.find(short_sent)
+            for begin, end, candidate, prob in replacements:
+                sent2corrections[full_sent].append((begin + offset, end + offset, candidate, prob))
+
+    out = open(output_manifest_name, "w", encoding="utf-8")
+    with open(input_manifest_name, "r", encoding="utf-8") as f:
+        for line in f:
+            record = json.loads(line.strip())
+            sent = record[field_name]
+            record[field_name + "_before_correction"] = record[field_name]
+            if sent in sent2corrections:
+                record[field_name] = apply_replacements_to_text(
+                    sent,
+                    sent2corrections[sent],
+                    min_prob=min_prob,
+                    replace_hyphen_to_space=replace_hyphen_to_space,
+                    dp_data=dp_data,
+                    min_dp_score_per_symbol=min_dp_score_per_symbol,
+                )
+            out.write(json.dumps(record) + "\n")
+    out.close()
+
+
+def extract_and_split_text_from_manifest(
+    input_name: str, output_name: str, field_name: str = "pred_text", len_in_words: int = 16, step_in_words: int = 8
+) -> None:
+    """Extract text of the specified field in nemo manifest and split it into fragments (possibly with intersection).
+    The result is saved to a text file with two columns: short_sent \t full_sent.
+    This is useful if we want to process shorter sentences and then apply the results to the original long sentence.
+    Args:
+        input_name: input nemo manifest,
+        output_name: output text file,
+        field_name: name of json field from which we extract the sentence text,
+        len_in_words: maximum number of words in a fragment,
+        step_in_words: on how many words we move at each step.
+    For example, if the len_in_words=16 and step_in_words=8 the fragments will be intersected by half.
+    """
+    short2full_sent = set()
+    with open(input_name, "r", encoding="utf-8") as f:
+        for line in f:
+            record = json.loads(line.strip())
+            sent = record[field_name]
+            if "  " in sent:
+                raise ValueError("found multiple space in: " + sent)
+            words = sent.split()
+            for i in range(0, len(words), step_in_words):
+                short_sent = " ".join(words[i : i + len_in_words])
+                short2full_sent.add((short_sent, sent))
+
+    with open(output_name, "w", encoding="utf-8") as out:
+        for short_sent, full_sent in short2full_sent:
+            out.write(short_sent + "\t" + full_sent + "\n")
+
+
+def check_banned_replacements(src: str, dst: str) -> bool:
+    """This function is used to check is a pair of words/phrases is matching some common template that we don't want to replace with one another.
+    Args:
+        src: first phrase
+        dst: second phrase
+    Returns True if this replacement should be banned.
+    """
+    # customers' => customer's
+    if src.endswith("s'") and dst.endswith("'s") and src[0:-2] == dst[0:-2]:
+        return True
+    # customer's => customers'
+    if src.endswith("'s") and dst.endswith("s'") and src[0:-2] == dst[0:-2]:
+        return True
+    # customers => customer's
+    if src.endswith("s") and dst.endswith("'s") and src[0:-1] == dst[0:-2]:
+        return True
+    # customer's => customers
+    if src.endswith("'s") and dst.endswith("s") and src[0:-2] == dst[0:-1]:
+        return True
+    # customers => customers'
+    if src.endswith("s") and dst.endswith("s'") and src[0:-1] == dst[0:-2]:
+        return True
+    # customers' => customers
+    if src.endswith("s'") and dst.endswith("s") and src[0:-2] == dst[0:-1]:
+        return True
+    # utilities => utility's
+    if src.endswith("ies") and dst.endswith("y's") and src[0:-3] == dst[0:-3]:
+        return True
+    # utility's => utilities
+    if src.endswith("y's") and dst.endswith("ies") and src[0:-3] == dst[0:-3]:
+        return True
+    # utilities => utility
+    if src.endswith("ies") and dst.endswith("y") and src[0:-3] == dst[0:-1]:
+        return True
+    # utility => utilities
+    if src.endswith("y") and dst.endswith("ies") and src[0:-1] == dst[0:-3]:
+        return True
+    # group is => group's
+    if src.endswith(" is") and dst.endswith("'s") and src[0:-3] == dst[0:-2]:
+        return True
+    # group's => group is
+    if src.endswith("'s") and dst.endswith(" is") and src[0:-2] == dst[0:-3]:
+        return True
+    # trex's => trex
+    if src.endswith("'s") and src[0:-2] == dst:
+        return True
+    # trex => trex's
+    if dst.endswith("'s") and dst[0:-2] == src:
+        return True
+    # increases => increase (but trimass => trimas is ok)
+    if src.endswith("s") and (not src.endswith("ss")) and src[0:-1] == dst:
+        return True
+    # increase => increases ((but trimas => trimass is ok))
+    if dst.endswith("s") and (not dst.endswith("ss")) and dst[0:-1] == src:
+        return True
+    # anticipate => anticipated
+    if src.endswith("e") and dst.endswith("ed") and src[0:-1] == dst[0:-2]:
+        return True
+    # anticipated => anticipate
+    if src.endswith("ed") and dst.endswith("e") and src[0:-2] == dst[0:-1]:
+        return True
+    # regarded => regard
+    if src.endswith("ed") and src[0:-2] == dst:
+        return True
+    # regard => regarded
+    if dst.endswith("ed") and dst[0:-2] == src:
+        return True
+    # longer => long
+    if src.endswith("er") and src[0:-2] == dst:
+        return True
+    # long => longer
+    if dst.endswith("er") and dst[0:-2] == src:
+        return True
+    # discussed => discussing
+    if src.endswith("ed") and dst.endswith("ing") and src[0:-2] == dst[0:-3]:
+        return True
+    # discussing => discussed
+    if src.endswith("ing") and dst.endswith("ed") and src[0:-3] == dst[0:-2]:
+        return True
+    # discussion => discussing
+    if src.endswith("ion") and dst.endswith("ing") and src[0:-3] == dst[0:-3]:
+        return True
+    # discussing => discussion
+    if src.endswith("ing") and dst.endswith("ion") and src[0:-3] == dst[0:-3]:
+        return True
+    # dispensers => dispensing
+    if src.endswith("ers") and dst.endswith("ing") and src[0:-3] == dst[0:-3]:
+        return True
+    # dispensing => dispensers
+    if src.endswith("ing") and dst.endswith("ers") and src[0:-3] == dst[0:-3]:
+        return True
+    # discussion => discussed
+    if src.endswith("ion") and dst.endswith("ed") and src[0:-3] == dst[0:-2]:
+        return True
+    # discussed => discussion
+    if src.endswith("ed") and dst.endswith("ion") and src[0:-2] == dst[0:-3]:
+        return True
+    # incremental => increment
+    if src.endswith("ntal") and dst.endswith("nt") and src[0:-4] == dst[0:-2]:
+        return True
+    # increment => incremental
+    if src.endswith("nt") and dst.endswith("ntal") and src[0:-2] == dst[0:-4]:
+        return True
+    # delivery => deliverer
+    if src.endswith("ery") and dst.endswith("erer") and src[0:-3] == dst[0:-4]:
+        return True
+    # deliverer => delivery
+    if src.endswith("erer") and dst.endswith("ery") and src[0:-4] == dst[0:-3]:
+        return True
+    # comparably => comparable
+    if src.endswith("bly") and dst.endswith("ble") and src[0:-3] == dst[0:-3]:
+        return True
+    # comparable => comparably
+    if src.endswith("ble") and dst.endswith("bly") and src[0:-3] == dst[0:-3]:
+        return True
+    # beautiful => beautifully
+    if src.endswith("l") and dst.endswith("lly") and src[0:-1] == dst[0:-3]:
+        return True
+    # beautifully => beautiful
+    if src.endswith("lly") and dst.endswith("l") and src[0:-3] == dst[0:-1]:
+        return True
+    # america => american
+    if src.endswith("a") and dst.endswith("an") and src[0:-1] == dst[0:-2]:
+        return True
+    # american => america
+    if src.endswith("an") and dst.endswith("a") and src[0:-2] == dst[0:-1]:
+        return True
+    # reinvesting => investing
+    if src.startswith("re") and src[2:] == dst:
+        return True
+    # investing => reinvesting
+    if dst.startswith("re") and dst[2:] == src:
+        return True
+    # outperformance => performance
+    if src.startswith("out") and src[3:] == dst:
+        return True
+    # performance => outperformance
+    if dst.startswith("out") and dst[3:] == src:
+        return True
+    return False
diff --git a/nemo/collections/nlp/data/text_normalization_as_tagging/utils.py b/nemo/collections/nlp/data/text_normalization_as_tagging/utils.py
index 253f7a41c703..9d5f5b7b23ad 100644
--- a/nemo/collections/nlp/data/text_normalization_as_tagging/utils.py
+++ b/nemo/collections/nlp/data/text_normalization_as_tagging/utils.py
@@ -17,6 +17,8 @@
 from itertools import groupby
 from typing import Dict, List, Tuple
 
+import numpy as np
+
 """Utility functions for Thutmose Tagger."""
 
 
@@ -305,3 +307,197 @@ def get_src_and_dst_for_alignment(
         )
 
     return written_str, spoken, " ".join(same_begin), " ".join(same_end)
+
+
+def fill_alignment_matrix(
+    fline2: str, fline3: str, gline2: str, gline3: str
+) -> Tuple[np.ndarray, List[str], List[str]]:
+    """Parse Giza++ direct and reverse alignment results and represent them as an alignment matrix
+
+    Args:
+        fline2: e.g. "_2 0 1 4_"
+        fline3: e.g. "NULL ({ }) twenty ({ 1 }) fourteen ({ 2 3 4 })"
+        gline2: e.g. "twenty fourteen"
+        gline3: e.g. "NULL ({ }) _2 ({ 1 }) 0 ({ }) 1 ({ }) 4_ ({ 2 })"
+
+    Returns:
+        matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment
+         the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the
+         words were aligned only in one direction, 0 - no alignment.
+        srctokens: e.g. ["twenty", "fourteen"]
+        dsttokens: e.g. ["_2", "0", "1", "4_"]
+
+    For example, the alignment matrix for the above example may look like:
+    [[3, 0, 0, 0]
+     [0, 2, 2, 3]]
+    """
+    if fline2 is None or gline2 is None or fline3 is None or gline3 is None:
+        raise ValueError(f"empty params")
+    srctokens = gline2.split()
+    dsttokens = fline2.split()
+    pattern = r"([^ ]+) \(\{ ([^\(\{\}\)]*) \}\)"
+    src2dst = re.findall(pattern, fline3.replace("({ })", "({  })"))
+    dst2src = re.findall(pattern, gline3.replace("({ })", "({  })"))
+    if len(src2dst) != len(srctokens) + 1:
+        raise ValueError(
+            "length mismatch: len(src2dst)="
+            + str(len(src2dst))
+            + "; len(srctokens)"
+            + str(len(srctokens))
+            + "\n"
+            + gline2
+            + "\n"
+            + fline3
+        )
+    if len(dst2src) != len(dsttokens) + 1:
+        raise ValueError(
+            "length mismatch: len(dst2src)="
+            + str(len(dst2src))
+            + "; len(dsttokens)"
+            + str(len(dsttokens))
+            + "\n"
+            + fline2
+            + "\n"
+            + gline3
+        )
+    matrix = np.zeros((len(srctokens), len(dsttokens)))
+    for i in range(1, len(src2dst)):
+        token, to_str = src2dst[i]
+        if to_str == "":
+            continue
+        to = list(map(int, to_str.split()))
+        for t in to:
+            matrix[i - 1][t - 1] = 2
+
+    for i in range(1, len(dst2src)):
+        token, to_str = dst2src[i]
+        if to_str == "":
+            continue
+        to = list(map(int, to_str.split()))
+        for t in to:
+            matrix[t - 1][i - 1] += 1
+
+    return matrix, srctokens, dsttokens
+
+
+def check_monotonicity(matrix: np.ndarray) -> bool:
+    """Check if alignment is monotonous - i.e. the relative order is preserved (no swaps).
+
+    Args:
+        matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment
+         the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the
+         words were aligned only in one direction, 0 - no alignment.
+    """
+    is_sorted = lambda k: np.all(k[:-1] <= k[1:])
+
+    a = np.argwhere(matrix == 3)
+    b = np.argwhere(matrix == 2)
+    c = np.vstack((a, b))
+    d = c[c[:, 1].argsort()]  # sort by second column (less important)
+    d = d[d[:, 0].argsort(kind="mergesort")]
+    return is_sorted(d[:, 1])
+
+
+def get_targets(matrix: np.ndarray, dsttokens: List[str], delimiter: str) -> List[str]:
+    """Join some of the destination tokens, so that their number becomes the same as the number of input words.
+    Unaligned tokens tend to join to the left aligned token.
+
+    Args:
+        matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment
+         the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the
+         words were aligned only in one direction, 0 - no alignment.
+        dsttokens: e.g. ["_2", "0", "1", "4_"]
+    Returns:
+        targets: list of string tokens, with one-to-one correspondence to matrix.shape[0]
+
+    Example:
+        If we get
+            matrix=[[3, 0, 0, 0]
+                    [0, 2, 2, 3]]
+            dsttokens=["_2", "0", "1", "4_"]
+        it gives
+            targets = ["_201", "4_"]
+        Actually, this is a mistake instead of ["_20", "14_"]. That will be further corrected by regular expressions.
+    """
+    targets = []
+    last_covered_dst_id = -1
+    for i in range(len(matrix)):
+        dstlist = []
+        for j in range(last_covered_dst_id + 1, len(dsttokens)):
+            # matrix[i][j] == 3: safe alignment point
+            if matrix[i][j] == 3 or (
+                j == last_covered_dst_id + 1
+                and np.all(matrix[i, :] == 0)  # if the whole line does not have safe points
+                and np.all(matrix[:, j] == 0)  # and the whole column does not have safe points, match them
+            ):
+                if len(targets) == 0:  # if this is first safe point, attach left unaligned columns to it, if any
+                    for k in range(0, j):
+                        if np.all(matrix[:, k] == 0):  # if column k does not have safe points
+                            dstlist.append(dsttokens[k])
+                        else:
+                            break
+                dstlist.append(dsttokens[j])
+                last_covered_dst_id = j
+                for k in range(j + 1, len(dsttokens)):
+                    if np.all(matrix[:, k] == 0):  # if column k does not have safe points
+                        dstlist.append(dsttokens[k])
+                        last_covered_dst_id = k
+                    else:
+                        break
+
+        if len(dstlist) > 0:
+            targets.append(delimiter.join(dstlist))
+        else:
+            targets.append("<DELETE>")
+    return targets
+
+
+def get_targets_from_back(matrix: np.ndarray, dsttokens: List[str], delimiter: str) -> List[str]:
+    """Join some of the destination tokens, so that their number becomes the same as the number of input words.
+    Unaligned tokens tend to join to the right aligned token.
+
+    Args:
+        matrix: a numpy array of shape (src_len, dst_len) filled with [0, 1, 2, 3], where 3 means a reliable alignment
+         the corresponding words were aligned to one another in direct and reverse alignment runs, 1 and 2 mean that the
+         words were aligned only in one direction, 0 - no alignment.
+        dsttokens: e.g. ["_2", "0", "1", "4_"]
+    Returns:
+        targets: list of string tokens, with one-to-one correspondence to matrix.shape[0]
+
+    Example:
+        If we get
+            matrix=[[3, 0, 0, 0]
+                    [0, 2, 2, 3]]
+            dsttokens=["_2", "0", "1", "4_"]
+        it gives
+            targets = ["_2", "014_"]
+        Actually, this is a mistake instead of ["_20", "14_"]. That will be further corrected by regular expressions.
+    """
+
+    targets = []
+    last_covered_dst_id = len(dsttokens)
+    for i in range(len(matrix) - 1, -1, -1):
+        dstlist = []
+        for j in range(last_covered_dst_id - 1, -1, -1):
+            if matrix[i][j] == 3 or (
+                j == last_covered_dst_id - 1 and np.all(matrix[i, :] == 0) and np.all(matrix[:, j] == 0)
+            ):
+                if len(targets) == 0:
+                    for k in range(len(dsttokens) - 1, j, -1):
+                        if np.all(matrix[:, k] == 0):
+                            dstlist.append(dsttokens[k])
+                        else:
+                            break
+                dstlist.append(dsttokens[j])
+                last_covered_dst_id = j
+                for k in range(j - 1, -1, -1):
+                    if np.all(matrix[:, k] == 0):
+                        dstlist.append(dsttokens[k])
+                        last_covered_dst_id = k
+                    else:
+                        break
+        if len(dstlist) > 0:
+            targets.append(delimiter.join(list(reversed(dstlist))))
+        else:
+            targets.append("<DELETE>")
+    return list(reversed(targets))
diff --git a/nemo/collections/nlp/models/__init__.py b/nemo/collections/nlp/models/__init__.py
index 90e692a238a6..75b48f64df13 100644
--- a/nemo/collections/nlp/models/__init__.py
+++ b/nemo/collections/nlp/models/__init__.py
@@ -30,6 +30,7 @@
 from nemo.collections.nlp.models.language_modeling.transformer_lm_model import TransformerLMModel
 from nemo.collections.nlp.models.machine_translation import MTEncDecModel
 from nemo.collections.nlp.models.question_answering.qa_model import QAModel
+from nemo.collections.nlp.models.spellchecking_asr_customization import SpellcheckingAsrCustomizationModel
 from nemo.collections.nlp.models.text2sparql.text2sparql_model import Text2SparqlModel
 from nemo.collections.nlp.models.text_classification import TextClassificationModel
 from nemo.collections.nlp.models.text_normalization_as_tagging import ThutmoseTaggerModel
diff --git a/nemo/collections/nlp/models/spellchecking_asr_customization/__init__.py b/nemo/collections/nlp/models/spellchecking_asr_customization/__init__.py
new file mode 100644
index 000000000000..5e94de32e9aa
--- /dev/null
+++ b/nemo/collections/nlp/models/spellchecking_asr_customization/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from nemo.collections.nlp.models.spellchecking_asr_customization.spellchecking_model import (
+    SpellcheckingAsrCustomizationModel,
+)
diff --git a/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py b/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py
new file mode 100644
index 000000000000..fc889de2dc63
--- /dev/null
+++ b/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py
@@ -0,0 +1,526 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from time import perf_counter
+from typing import Dict, Optional
+
+import torch
+from omegaconf import DictConfig
+from pytorch_lightning import Trainer
+
+from nemo.collections.common.losses import CrossEntropyLoss
+from nemo.collections.nlp.data.spellchecking_asr_customization import (
+    SpellcheckingAsrCustomizationDataset,
+    SpellcheckingAsrCustomizationTestDataset,
+    TarredSpellcheckingAsrCustomizationDataset,
+    bert_example,
+)
+from nemo.collections.nlp.data.text_normalization_as_tagging.utils import read_label_map
+from nemo.collections.nlp.metrics.classification_report import ClassificationReport
+from nemo.collections.nlp.models.nlp_model import NLPModel
+from nemo.collections.nlp.modules.common.token_classifier import TokenClassifier
+from nemo.collections.nlp.parts.utils_funcs import tensor2list
+from nemo.core.classes.common import PretrainedModelInfo, typecheck
+from nemo.core.neural_types import LogitsType, NeuralType
+from nemo.utils import logging
+from nemo.utils.decorators import experimental
+
+__all__ = ["SpellcheckingAsrCustomizationModel"]
+
+
+@experimental
+class SpellcheckingAsrCustomizationModel(NLPModel):
+    """
+    BERT-based model for Spellchecking ASR Customization.
+    It takes as input ASR hypothesis and candidate customization entries.
+    It labels the hypothesis with correct entry index or 0.
+    Example input:   [CLS] a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o [SEP] d i d i e r _ s a u m o n [SEP] a s t r o n o m i e [SEP] t r i s t a n _ g u i l l o t [SEP] ...
+    Input segments:      0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0     1 1 1 1 1 1 1 1 1 1 1 1 1 1     2 2 2 2 2 2 2 2 2 2 2     3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3     4      
+    Example output:      0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3 0     ...
+    """
+
+    @property
+    def output_types(self) -> Optional[Dict[str, NeuralType]]:
+        return {
+            "logits": NeuralType(('B', 'T', 'D'), LogitsType()),
+        }
+
+    @property
+    def input_module(self):
+        return self
+
+    @property
+    def output_module(self):
+        return self
+
+    def __init__(self, cfg: DictConfig, trainer: Trainer = None) -> None:
+        super().__init__(cfg=cfg, trainer=trainer)
+
+        # Label map contains 11 labels: 0 for nothing, 1..10 for target candidate ids
+        label_map_file = self.register_artifact("label_map", cfg.label_map, verify_src_exists=True)
+
+        # Semiotic classes for this model consist only of classes CUSTOM(means fragment containing custom candidate) and PLAIN (any other single-character fragment)
+        # They are used only during validation step, to calculate accuracy for CUSTOM and PLAIN classes separately
+        semiotic_classes_file = self.register_artifact(
+            "semiotic_classes", cfg.semiotic_classes, verify_src_exists=True
+        )
+        self.label_map = read_label_map(label_map_file)
+        self.semiotic_classes = read_label_map(semiotic_classes_file)
+
+        self.num_labels = len(self.label_map)
+        self.num_semiotic_labels = len(self.semiotic_classes)
+        self.id_2_tag = {tag_id: tag for tag, tag_id in self.label_map.items()}
+        self.id_2_semiotic = {semiotic_id: semiotic for semiotic, semiotic_id in self.semiotic_classes.items()}
+        self.max_sequence_len = cfg.get('max_sequence_len', self.tokenizer.tokenizer.model_max_length)
+
+        # Setup to track metrics
+        # We will have (len(self.semiotic_classes) + 1) labels.
+        # Last one stands for WRONG (span in which the predicted tags don't match the labels)
+        # This is needed to feed the sequence of classes to classification_report during validation
+        label_ids = self.semiotic_classes.copy()
+        label_ids["WRONG"] = len(self.semiotic_classes)
+        self.tag_classification_report = ClassificationReport(
+            len(self.semiotic_classes) + 1, label_ids=label_ids, mode='micro', dist_sync_on_step=True
+        )
+
+        self.hidden_size = cfg.hidden_size
+
+        # hidden size is doubled because in forward we concatenate embeddings for characters and embeddings for subwords
+        self.logits = TokenClassifier(
+            self.hidden_size * 2, num_classes=self.num_labels, num_layers=1, log_softmax=False, dropout=0.1
+        )
+
+        self.loss_fn = CrossEntropyLoss(logits_ndim=3)
+
+        self.builder = bert_example.BertExampleBuilder(
+            self.label_map, self.semiotic_classes, self.tokenizer.tokenizer, self.max_sequence_len
+        )
+
+    @typecheck()
+    def forward(
+        self,
+        input_ids,
+        input_mask,
+        segment_ids,
+        input_ids_for_subwords,
+        input_mask_for_subwords,
+        segment_ids_for_subwords,
+        character_pos_to_subword_pos,
+    ):
+        """
+        Same BERT-based model is used to calculate embeddings for sequence of single characters and for sequence of subwords.
+        Then we concatenate subword embeddings to each character corresponding to this subword.
+        We return logits for each character x 11 labels: 0 - character doesn't belong to any candidate, 1..10 - character belongs to candidate with this id.
+
+        # Arguments
+            input_ids: token_ids for single characters; .shape = [batch_size, char_seq_len]; .dtype = int64
+            input_mask: mask for input_ids(1 - real, 0 - padding); .shape = [batch_size, char_seq_len]; .dtype = int64
+            segment_ids: segment types for input_ids (0 - ASR-hypothesis, 1..10 - candidate); .shape = [batch_size, char_seq_len]; .dtype = int64
+            input_ids_for_subwords: token_ids for subwords; .shape = [batch_size, subword_seq_len]; .dtype = int64
+            input_mask_for_subwords: mask for input_ids_for_subwords(1 - real, 0 - padding); .shape = [batch_size, subword_seq_len]; .dtype = int64
+            segment_ids_for_subwords: segment types for input_ids_for_subwords (0 - ASR-hypothesis, 1..10 - candidate); .shape = [batch_size, subword_seq_len]; .dtype = int64
+            character_pos_to_subword_pos: tensor mapping character position in the input sequence to subword position; .shape = [batch_size, char_seq_len]; .dtype = int64
+        """
+
+        # src_hiddens.shape = [batch_size, char_seq_len, bert_hidden_size]; .dtype=float32
+        src_hiddens = self.bert_model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask)
+        # src_hiddens_for_subwords.shape = [batch_size, subword_seq_len, bert_hidden_size]; .dtype=float32
+        src_hiddens_for_subwords = self.bert_model(
+            input_ids=input_ids_for_subwords,
+            token_type_ids=segment_ids_for_subwords,
+            attention_mask=input_mask_for_subwords,
+        )
+
+        # Next three commands concatenate subword embeddings to each character embedding of the corresponding subword
+        # index.shape = [batch_size, char_seq_len, bert_hidden_size]; .dtype=int64
+        index = character_pos_to_subword_pos.unsqueeze(-1).expand((-1, -1, src_hiddens_for_subwords.shape[2]))
+        # src_hiddens_2.shape = [batch_size, char_seq_len, bert_hidden_size]; .dtype=float32
+        src_hiddens_2 = torch.gather(src_hiddens_for_subwords, 1, index)
+        # src_hiddens.shape = [batch_size, char_seq_len, bert_hidden_size * 2]; .dtype=float32
+        src_hiddens = torch.cat((src_hiddens, src_hiddens_2), 2)
+
+        # logits.shape = [batch_size, char_seq_len, num_labels]; num_labels=11: ids from 0 to 10; .dtype=float32
+        logits = self.logits(hidden_states=src_hiddens)
+        return logits
+
+    # Training
+    def training_step(self, batch, batch_idx):
+        """
+        Lightning calls this inside the training loop with the data from the training dataloader
+        passed in as `batch`.
+        """
+
+        (
+            input_ids,
+            input_mask,
+            segment_ids,
+            input_ids_for_subwords,
+            input_mask_for_subwords,
+            segment_ids_for_subwords,
+            character_pos_to_subword_pos,
+            labels_mask,
+            labels,
+            _,
+        ) = batch
+        logits = self.forward(
+            input_ids=input_ids,
+            input_mask=input_mask,
+            segment_ids=segment_ids,
+            input_ids_for_subwords=input_ids_for_subwords,
+            input_mask_for_subwords=input_mask_for_subwords,
+            segment_ids_for_subwords=segment_ids_for_subwords,
+            character_pos_to_subword_pos=character_pos_to_subword_pos,
+        )
+        loss = self.loss_fn(logits=logits, labels=labels, loss_mask=labels_mask)
+        lr = self._optimizer.param_groups[0]['lr']
+        self.log('train_loss', loss)
+        self.log('lr', lr, prog_bar=True)
+        return {'loss': loss, 'lr': lr}
+
+    # Validation and Testing
+    def validation_step(self, batch, batch_idx):
+        """
+        Lightning calls this inside the validation loop with the data from the validation dataloader
+        passed in as `batch`.
+        """
+        (
+            input_ids,
+            input_mask,
+            segment_ids,
+            input_ids_for_subwords,
+            input_mask_for_subwords,
+            segment_ids_for_subwords,
+            character_pos_to_subword_pos,
+            labels_mask,
+            labels,
+            spans,
+        ) = batch
+        logits = self.forward(
+            input_ids=input_ids,
+            input_mask=input_mask,
+            segment_ids=segment_ids,
+            input_ids_for_subwords=input_ids_for_subwords,
+            input_mask_for_subwords=input_mask_for_subwords,
+            segment_ids_for_subwords=segment_ids_for_subwords,
+            character_pos_to_subword_pos=character_pos_to_subword_pos,
+        )
+        tag_preds = torch.argmax(logits, dim=2)
+
+        # Update tag classification_report
+        for input_mask_seq, segment_seq, prediction_seq, label_seq, span_seq in zip(
+            input_mask.tolist(), segment_ids.tolist(), tag_preds.tolist(), labels.tolist(), spans.tolist()
+        ):
+            # Here we want to track whether the predicted output matches ground truth labels for each whole span.
+            # We construct the special input for classification report, for example:
+            #   span_labels = [PLAIN, PLAIN, PLAIN, PLAIN, CUSTOM, CUSTOM]
+            #   span_predictions = [PLAIN, WRONG, PLAIN, PLAIN, WRONG, CUSTOM]
+            # Note that the number of PLAIN and CUSTOM occurrences in the report is not comparable,
+            #   because PLAIN is for characters, and CUSTOM is for phrases.
+            span_labels = []
+            span_predictions = []
+            plain_cid = self.semiotic_classes["PLAIN"]
+            wrong_cid = self.tag_classification_report.num_classes - 1
+
+            # First we loop through all predictions for input characters with label=0, they are regarded as separate spans with PLAIN class.
+            # It either stays as PLAIN if the model prediction is 0, or turns to WRONG.
+            for i in range(len(segment_seq)):
+                if input_mask_seq[i] == 0:
+                    continue
+                if segment_seq[i] > 0:  # token does not belong to ASR-hypothesis => it's over
+                    break
+                if label_seq[i] == 0:
+                    span_labels.append(plain_cid)
+                    if prediction_seq[i] == 0:
+                        span_predictions.append(plain_cid)
+                    else:
+                        span_predictions.append(wrong_cid)
+                # if label_seq[i] != 0 then it belongs to CUSTOM span and will be handled later
+
+            # Second we loop through spans tensor which contains only spans for CUSTOM class.
+            # It stays as CUSTOM if all predictions for the whole span are equal to the labels, otherwise it turns to WRONG.
+            for cid, start, end in span_seq:
+                if cid == -1:
+                    break
+                span_labels.append(cid)
+                if prediction_seq[start:end] == label_seq[start:end]:
+                    span_predictions.append(cid)
+                else:
+                    span_predictions.append(wrong_cid)
+
+            if len(span_labels) != len(span_predictions):
+                raise ValueError(
+                    "Length mismatch: len(span_labels)="
+                    + str(len(span_labels))
+                    + "; len(span_predictions)="
+                    + str(len(span_predictions))
+                )
+            self.tag_classification_report(
+                torch.tensor(span_predictions).to(self.device), torch.tensor(span_labels).to(self.device)
+            )
+
+        val_loss = self.loss_fn(logits=logits, labels=labels, loss_mask=labels_mask)
+        return {'val_loss': val_loss}
+
+    def validation_epoch_end(self, outputs):
+        """
+        Called at the end of validation to aggregate outputs.
+        :param outputs: list of individual outputs of each validation step.
+        """
+        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
+
+        # Calculate metrics and classification report
+        # Note that in our task recall = accuracy, and the recall column is the per class accuracy
+        _, tag_accuracy, _, tag_report = self.tag_classification_report.compute()
+
+        logging.info("Total tag accuracy: " + str(tag_accuracy))
+        logging.info(tag_report)
+
+        self.log('val_loss', avg_loss, prog_bar=True)
+        self.log('tag accuracy', tag_accuracy)
+
+        self.tag_classification_report.reset()
+
+    def test_step(self, batch, batch_idx):
+        """
+        Lightning calls this inside the test loop with the data from the test dataloader
+        passed in as `batch`.
+        """
+        return self.validation_step(batch, batch_idx)
+
+    def test_epoch_end(self, outputs):
+        """
+        Called at the end of test to aggregate outputs.
+        :param outputs: list of individual outputs of each test step.
+        """
+        return self.validation_epoch_end(outputs)
+
+    # Functions for inference
+
+    @torch.no_grad()
+    def infer(self, dataloader_cfg: DictConfig, input_name: str, output_name: str) -> None:
+        """ Main function for Inference
+
+        Args:
+            dataloader_cfg: config for dataloader
+            input_name: Input file with tab-separated text records. Each record consists of 2 items:
+                - ASR hypothesis
+                - candidate phrases separated by semicolon
+            output_name: Output file with tab-separated text records. Each record consists of 2 items:
+                - ASR hypothesis
+                - candidate phrases separated by semicolon
+                - list of possible replacements with probabilities (start, pos, candidate_id, prob), separated by semicolon
+                - list of labels, predicted for each letter (for debug purposes)
+
+        Returns: None
+        """
+        mode = self.training
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        try:
+            # Switch model to evaluation mode
+            self.eval()
+            self.to(device)
+            logging_level = logging.get_verbosity()
+            logging.set_verbosity(logging.WARNING)
+            infer_datalayer = self._setup_infer_dataloader(dataloader_cfg, input_name)
+
+            all_tag_preds = (
+                []
+            )  # list(size=number of sentences) of lists(size=number of letters) of tag predictions (best candidate_id for each letter)
+            all_possible_replacements = (
+                []
+            )  # list(size=number of sentences) of lists(size=number of potential replacements) of tuples(start, pos, candidate_id, prob)
+            for batch in iter(infer_datalayer):
+                (
+                    input_ids,
+                    input_mask,
+                    segment_ids,
+                    input_ids_for_subwords,
+                    input_mask_for_subwords,
+                    segment_ids_for_subwords,
+                    character_pos_to_subword_pos,
+                    fragment_indices,
+                ) = batch
+
+                # tag_logits.shape = [batch_size, char_seq_len, num_labels]; num_labels=11: ids from 0 to 10; .dtype=float32
+                tag_logits = self.forward(
+                    input_ids=input_ids.to(self.device),
+                    input_mask=input_mask.to(self.device),
+                    segment_ids=segment_ids.to(self.device),
+                    input_ids_for_subwords=input_ids_for_subwords.to(self.device),
+                    input_mask_for_subwords=input_mask_for_subwords.to(self.device),
+                    segment_ids_for_subwords=segment_ids_for_subwords.to(self.device),
+                    character_pos_to_subword_pos=character_pos_to_subword_pos.to(self.device),
+                )
+
+                # fragment_indices.shape=[batsh_size, num_fragments, 3], where last dimension is [start, end, label], where label is candidate id from 1 to 10
+                # Next we want to convert predictions for separate letters to probabilities for each whole fragment from fragment_indices.
+                # To achieve this we first sum the letter logits in each fragment and divide by its length.
+                # (We use .cumsum and then difference between end and start to get sum per fragment).
+                # Then we convert logits to probs with softmax and for each fragment extract only the prob for given label.
+                # Finally we get a list of tuples (start, end, label, prob)
+                indices_len = fragment_indices.shape[1]
+                # this padding adds a row of zeros (size=num_labels) as first element of sequence in second dimension. This is needed for cumsum operations.
+                padded_logits = torch.nn.functional.pad(tag_logits, pad=(0, 0, 1, 0))
+                (
+                    batch_size,
+                    seq_len,
+                    num_labels,
+                ) = padded_logits.shape  # seq_len is +1 compared to that of tag_logits, because of padding
+                # cumsum.shape=[batch_size, seq_len, num_labels]
+                cumsum = padded_logits.cumsum(dim=1)
+                # the size -1 is inferred from other dimensions. We get rid of batch dimension.
+                cumsum_view = cumsum.view(-1, num_labels)
+                word_index = (
+                    torch.ones((batch_size, indices_len), dtype=torch.long)
+                    * torch.arange(batch_size).reshape((-1, 1))
+                    * seq_len
+                ).view(-1)
+                lower_index = (fragment_indices[..., 0]).view(-1) + word_index
+                higher_index = (fragment_indices[..., 1]).view(-1) + word_index
+                d_index = (higher_index - lower_index).reshape((-1, 1)).to(self.device)  # word lengths
+                dlog = cumsum_view[higher_index, :] - cumsum_view[lower_index, :]  # sum of logits
+                # word_logits.shape=[batch_size, indices_len, num_labels]
+                word_logits = (dlog / d_index.float()).view(batch_size, indices_len, num_labels)
+                # convert logits to probs, same shape
+                word_probs = torch.nn.functional.softmax(word_logits, dim=-1).to(self.device)
+                # candidate_index.shape=[batch_size, indices_len]
+                candidate_index = fragment_indices[:, :, 2].to(self.device)
+                # candidate_probs.shape=[batch_size, indices_len]
+                candidate_probs = torch.take_along_dim(word_probs, candidate_index.unsqueeze(2), dim=-1).squeeze(2)
+                for i in range(batch_size):
+                    possible_replacements = []
+                    for j in range(indices_len):
+                        start, end, candidate_id = (
+                            int(fragment_indices[i][j][0]),
+                            int(fragment_indices[i][j][1]),
+                            int(fragment_indices[i][j][2]),
+                        )
+                        if candidate_id == 0:  # this is padding
+                            continue
+                        prob = round(float(candidate_probs[i][j]), 5)
+                        if prob < 0.01:
+                            continue
+                        # -1 because in the output file we will not have a [CLS] token
+                        possible_replacements.append(
+                            str(start - 1) + " " + str(end - 1) + " " + str(candidate_id) + " " + str(prob)
+                        )
+                    all_possible_replacements.append(possible_replacements)
+
+                # torch.argmax(tag_logits, dim=-1) gives a tensor of best predicted labels with shape [batch_size, char_seq_len], .dtype = int64
+                # character_preds is list of lists of predicted labels
+                character_preds = tensor2list(torch.argmax(tag_logits, dim=-1))
+                all_tag_preds.extend(character_preds)
+
+            if len(all_possible_replacements) != len(all_tag_preds) or len(all_possible_replacements) != len(
+                infer_datalayer.dataset.examples
+            ):
+                raise IndexError(
+                    "number of sentences mismatch: len(all_possible_replacements)="
+                    + str(len(all_possible_replacements))
+                    + "; len(all_tag_preds)="
+                    + str(len(all_tag_preds))
+                    + "; len(infer_datalayer.dataset.examples)="
+                    + str(len(infer_datalayer.dataset.examples))
+                )
+            # save results to file
+            with open(output_name, "w", encoding="utf-8") as out:
+                for i in range(len(infer_datalayer.dataset.examples)):
+                    hyp, ref = infer_datalayer.dataset.hyps_refs[i]
+                    num_letters = hyp.count(" ") + 1
+                    tag_pred_str = " ".join(list(map(str, all_tag_preds[i][1 : (num_letters + 1)])))
+                    possible_replacements_str = ";".join(all_possible_replacements[i])
+                    out.write(hyp + "\t" + ref + "\t" + possible_replacements_str + "\t" + tag_pred_str + "\n")
+
+        except Exception as e:
+            raise ValueError("Error processing file " + input_name)
+
+        finally:
+            # set mode back to its original value
+            self.train(mode=mode)
+            logging.set_verbosity(logging_level)
+
+    # Functions for processing data
+    def setup_training_data(self, train_data_config: Optional[DictConfig]):
+        if not train_data_config or not train_data_config.data_path:
+            logging.info(
+                f"Dataloader config or file_path for the train is missing, so no data loader for train is created!"
+            )
+            self._train_dl = None
+            return
+        self._train_dl = self._setup_dataloader_from_config(cfg=train_data_config, data_split="train")
+
+    def setup_validation_data(self, val_data_config: Optional[DictConfig]):
+        if not val_data_config or not val_data_config.data_path:
+            logging.info(
+                f"Dataloader config or file_path for the validation is missing, so no data loader for validation is created!"
+            )
+            self._validation_dl = None
+            return
+        self._validation_dl = self._setup_dataloader_from_config(cfg=val_data_config, data_split="val")
+
+    def setup_test_data(self, test_data_config: Optional[DictConfig]):
+        if not test_data_config or test_data_config.data_path is None:
+            logging.info(
+                f"Dataloader config or file_path for the test is missing, so no data loader for test is created!"
+            )
+            self._test_dl = None
+            return
+        self._test_dl = self._setup_dataloader_from_config(cfg=test_data_config, data_split="test")
+
+    def _setup_dataloader_from_config(self, cfg: DictConfig, data_split: str):
+        start_time = perf_counter()
+        logging.info(f'Creating {data_split} dataset')
+        if cfg.get("use_tarred_dataset", False):
+            dataset = TarredSpellcheckingAsrCustomizationDataset(
+                cfg.data_path,
+                shuffle_n=cfg.get("tar_shuffle_n", 100),
+                global_rank=self.global_rank,
+                world_size=self.world_size,
+                pad_token_id=self.builder._pad_id,
+            )
+        else:
+            input_file = cfg.data_path
+            dataset = SpellcheckingAsrCustomizationDataset(input_file=input_file, example_builder=self.builder)
+        dl = torch.utils.data.DataLoader(
+            dataset=dataset, batch_size=cfg.batch_size, shuffle=cfg.shuffle, collate_fn=dataset.collate_fn
+        )
+        running_time = perf_counter() - start_time
+        logging.info(f'Took {running_time} seconds')
+        return dl
+
+    def _setup_infer_dataloader(self, cfg: DictConfig, input_name: str) -> 'torch.utils.data.DataLoader':
+        """
+        Setup function for a infer data loader.
+        Args:
+            cfg: config dictionary containing data loader params like batch_size, num_workers and pin_memory
+            input_name: path to input file. 
+        Returns:
+            A pytorch DataLoader.
+        """
+        dataset = SpellcheckingAsrCustomizationTestDataset(input_name, example_builder=self.builder)
+        return torch.utils.data.DataLoader(
+            dataset=dataset,
+            batch_size=cfg["batch_size"],
+            shuffle=False,
+            num_workers=cfg.get("num_workers", 0),
+            pin_memory=cfg.get("pin_memory", False),
+            drop_last=False,
+            collate_fn=dataset.collate_fn,
+        )
+
+    @classmethod
+    def list_available_models(cls) -> Optional[PretrainedModelInfo]:
+        return None
diff --git a/scripts/dataset_processing/spoken_wikipedia/run.sh b/scripts/dataset_processing/spoken_wikipedia/run.sh
index 2894eb1dc55e..5ae447c9a1a4 100644
--- a/scripts/dataset_processing/spoken_wikipedia/run.sh
+++ b/scripts/dataset_processing/spoken_wikipedia/run.sh
@@ -102,7 +102,7 @@ ${NEMO_PATH}/tools/ctc_segmentation/run_segmentation.sh \
 --MODEL_NAME_OR_PATH=${MODEL_FOR_SEGMENTATION} \
 --DATA_DIR=${INPUT_DIR}_prepared \
 --OUTPUT_DIR=${OUTPUT_DIR} \
---MIN_SCORE=${MIN_SCORE}
+--MIN_SCORE=${THRESHOLD}
 
 # Thresholds for filtering
 CER_THRESHOLD=20
diff --git a/tests/collections/nlp/test_spellchecking_asr_customization.py b/tests/collections/nlp/test_spellchecking_asr_customization.py
new file mode 100644
index 000000000000..8e4d6e9a7b8f
--- /dev/null
+++ b/tests/collections/nlp/test_spellchecking_asr_customization.py
@@ -0,0 +1,1102 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from transformers import AutoTokenizer
+
+from nemo.collections.nlp.data.spellchecking_asr_customization.bert_example import BertExampleBuilder
+from nemo.collections.nlp.data.spellchecking_asr_customization.utils import (
+    apply_replacements_to_text,
+    substitute_replacements_in_text,
+)
+
+
+@pytest.mark.unit
+def test_substitute_replacements_in_text():
+    text = "we began the further diversification of our revenue base with the protterra supply agreement and the navastar joint development agreement"
+    replacements = [(66, 75, 'pro-terra', 0.99986), (101, 109, 'navistar', 0.996)]
+    gold_text = "we began the further diversification of our revenue base with the pro-terra supply agreement and the navistar joint development agreement"
+    corrected_text = substitute_replacements_in_text(text, replacements, replace_hyphen_to_space=False)
+    assert corrected_text == gold_text
+
+    gold_text_no_hyphen = "we began the further diversification of our revenue base with the pro terra supply agreement and the navistar joint development agreement"
+    corrected_text = substitute_replacements_in_text(text, replacements, replace_hyphen_to_space=True)
+    assert corrected_text == gold_text_no_hyphen
+
+
+@pytest.mark.unit
+def test_apply_replacements_to_text():
+
+    # min_prob = 0.5
+    # dp_data = None,
+    # min_dp_score_per_symbol: float = -99.9
+
+    # test more than one fragment to replace, test multiple same replacements
+    text = "we began the further diversification of our revenue base with the protterra supply agreement and the navastar joint development agreement"
+    replacements = [
+        (66, 75, 'proterra', 0.99986),
+        (66, 75, 'proterra', 0.9956),
+        (101, 109, 'navistar', 0.93),
+        (101, 109, 'navistar', 0.91),
+        (101, 109, 'navistar', 0.92),
+    ]
+    gold_text = "we began the further diversification of our revenue base with the proterra supply agreement and the navistar joint development agreement"
+    corrected_text = apply_replacements_to_text(
+        text, replacements, min_prob=0.5, replace_hyphen_to_space=False, dp_data=None
+    )
+    assert corrected_text == gold_text
+
+    # test that min_prob works
+    gold_text = "we began the further diversification of our revenue base with the proterra supply agreement and the navastar joint development agreement"
+    corrected_text = apply_replacements_to_text(
+        text, replacements, min_prob=0.95, replace_hyphen_to_space=False, dp_data=None
+    )
+    assert corrected_text == gold_text
+
+
+@pytest.fixture()
+def bert_example_builder():
+    tokenizer = AutoTokenizer.from_pretrained("huawei-noah/TinyBERT_General_6L_768D")
+    label_map = {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "10": 10}
+    semiotic_classes = {"PLAIN": 0, "CUSTOM": 1}
+    max_seq_len = 256
+    builder = BertExampleBuilder(label_map, semiotic_classes, tokenizer, max_seq_len)
+    return builder
+
+
+@pytest.mark.skip("Doesn't work download when testing on github, for unknown reason")
+@pytest.mark.with_downloads
+@pytest.mark.unit
+def test_creation(bert_example_builder):
+    assert bert_example_builder._tokenizer is not None
+
+
+@pytest.mark.skip("Doesn't work download when testing on github, for unknown reason")
+@pytest.mark.with_downloads
+@pytest.mark.unit
+def test_builder_get_spans(bert_example_builder):
+    span_info_parts = ["CUSTOM 37 41", "CUSTOM 47 52", "CUSTOM 42 46", "CUSTOM 0 7"]
+    gold_sorted_spans = [(1, 1, 8), (1, 38, 42), (1, 43, 47), (1, 48, 53)]
+    spans = bert_example_builder._get_spans(span_info_parts)
+    spans.sort()
+    assert spans == gold_sorted_spans
+
+
+@pytest.mark.skip("Doesn't work download when testing on github, for unknown reason")
+@pytest.mark.with_downloads
+@pytest.mark.unit
+def test_builder_get_fragment_indices(bert_example_builder):
+    hyp = "a b o u t _ o u r _ s h i p e r s _ b u t _ y o u _ k n o w"
+    targets = [1]
+    # a b o u t _ o u r _ s h i p e r s _ b u t _ y o u _ k n o w
+    # 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
+    span_info_parts = ["CUSTOM 8 17"]
+    gold_sorted_fragment_indices = [(7, 18, 1), (11, 18, 1)]
+    fragment_indices = bert_example_builder._get_fragment_indices(hyp, targets, span_info_parts)
+    fragment_indices.sort()
+    assert fragment_indices == gold_sorted_fragment_indices
+
+    # a b o u t _ o u r _ s h i p e r s _ b u t _ y o u _ k n o w
+    # 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+    span_info_parts = ["CUSTOM 10 16"]
+    gold_sorted_fragment_indices = [(11, 18, 1)]
+    fragment_indices = bert_example_builder._get_fragment_indices(hyp, targets, span_info_parts)
+    fragment_indices.sort()
+    assert fragment_indices == gold_sorted_fragment_indices
+
+
+@pytest.mark.skip("Doesn't work download when testing on github, for unknown reason")
+@pytest.mark.with_downloads
+@pytest.mark.unit
+def test_builder_get_input_features(bert_example_builder):
+    hyp = "a s t r o n o m e r s _ d i d i e _ s o m o n _ a n d _ t r i s t i a n _ g l l o"
+    ref = "d i d i e r _ s a u m o n;a s t r o n o m i e;t r i s t a n _ g u i l l o t;t r i s t e s s e;m o n a d e;c h r i s t i a n;a s t r o n o m e r;s o l o m o n;d i d i d i d i d i;m e r c y"
+    targets = [1, 3]
+    span_info_parts = ["CUSTOM 12 23", "CUSTOM 28 41"]
+
+    gold_tags = [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+        0,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+    ]
+    gold_input_ids = [
+        101,
+        1037,
+        1055,
+        1056,
+        1054,
+        1051,
+        1050,
+        1051,
+        1049,
+        1041,
+        1054,
+        1055,
+        1035,
+        1040,
+        1045,
+        1040,
+        1045,
+        1041,
+        1035,
+        1055,
+        1051,
+        1049,
+        1051,
+        1050,
+        1035,
+        1037,
+        1050,
+        1040,
+        1035,
+        1056,
+        1054,
+        1045,
+        1055,
+        1056,
+        1045,
+        1037,
+        1050,
+        1035,
+        1043,
+        1048,
+        1048,
+        1051,
+        102,
+        1040,
+        1045,
+        1040,
+        1045,
+        1041,
+        1054,
+        1035,
+        1055,
+        1037,
+        1057,
+        1049,
+        1051,
+        1050,
+        102,
+        1037,
+        1055,
+        1056,
+        1054,
+        1051,
+        1050,
+        1051,
+        1049,
+        1045,
+        1041,
+        102,
+        1056,
+        1054,
+        1045,
+        1055,
+        1056,
+        1037,
+        1050,
+        1035,
+        1043,
+        1057,
+        1045,
+        1048,
+        1048,
+        1051,
+        1056,
+        102,
+        1056,
+        1054,
+        1045,
+        1055,
+        1056,
+        1041,
+        1055,
+        1055,
+        1041,
+        102,
+        1049,
+        1051,
+        1050,
+        1037,
+        1040,
+        1041,
+        102,
+        1039,
+        1044,
+        1054,
+        1045,
+        1055,
+        1056,
+        1045,
+        1037,
+        1050,
+        102,
+        1037,
+        1055,
+        1056,
+        1054,
+        1051,
+        1050,
+        1051,
+        1049,
+        1041,
+        1054,
+        102,
+        1055,
+        1051,
+        1048,
+        1051,
+        1049,
+        1051,
+        1050,
+        102,
+        1040,
+        1045,
+        1040,
+        1045,
+        1040,
+        1045,
+        1040,
+        1045,
+        1040,
+        1045,
+        102,
+        1049,
+        1041,
+        1054,
+        1039,
+        1061,
+        102,
+    ]
+    gold_input_mask = [
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+    ]
+    gold_segment_ids = [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        2,
+        2,
+        2,
+        2,
+        2,
+        2,
+        2,
+        2,
+        2,
+        2,
+        2,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        3,
+        4,
+        4,
+        4,
+        4,
+        4,
+        4,
+        4,
+        4,
+        4,
+        4,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        5,
+        6,
+        6,
+        6,
+        6,
+        6,
+        6,
+        6,
+        6,
+        6,
+        6,
+        7,
+        7,
+        7,
+        7,
+        7,
+        7,
+        7,
+        7,
+        7,
+        7,
+        7,
+        8,
+        8,
+        8,
+        8,
+        8,
+        8,
+        8,
+        8,
+        9,
+        9,
+        9,
+        9,
+        9,
+        9,
+        9,
+        9,
+        9,
+        9,
+        9,
+        10,
+        10,
+        10,
+        10,
+        10,
+        10,
+    ]
+    gold_labels_mask = [
+        0,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+    ]
+    gold_input_ids_for_subwords = [
+        101,
+        26357,
+        2106,
+        2666,
+        2061,
+        8202,
+        1998,
+        13012,
+        16643,
+        2319,
+        1043,
+        7174,
+        102,
+        2106,
+        3771,
+        7842,
+        2819,
+        2239,
+        102,
+        28625,
+        3630,
+        9856,
+        102,
+        9822,
+        26458,
+        7174,
+        2102,
+        102,
+        13012,
+        13473,
+        11393,
+        102,
+        13813,
+        3207,
+        102,
+        3017,
+        102,
+        15211,
+        102,
+        9168,
+        102,
+        2106,
+        28173,
+        4305,
+        4305,
+        102,
+        8673,
+        102,
+    ]
+    gold_input_mask_for_subwords = [
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+    ]
+    gold_segment_ids_for_subwords = [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        2,
+        2,
+        2,
+        2,
+        3,
+        3,
+        3,
+        3,
+        3,
+        4,
+        4,
+        4,
+        4,
+        5,
+        5,
+        5,
+        6,
+        6,
+        7,
+        7,
+        8,
+        8,
+        9,
+        9,
+        9,
+        9,
+        9,
+        10,
+        10,
+    ]
+    gold_character_pos_to_subword_pos = [
+        0,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        1,
+        2,
+        2,
+        2,
+        3,
+        3,
+        3,
+        4,
+        4,
+        5,
+        5,
+        5,
+        5,
+        6,
+        6,
+        6,
+        6,
+        7,
+        7,
+        7,
+        8,
+        8,
+        8,
+        9,
+        9,
+        9,
+        10,
+        11,
+        11,
+        11,
+        12,
+        13,
+        13,
+        13,
+        14,
+        14,
+        14,
+        14,
+        15,
+        15,
+        16,
+        16,
+        17,
+        17,
+        18,
+        19,
+        19,
+        19,
+        19,
+        19,
+        20,
+        20,
+        21,
+        21,
+        21,
+        22,
+        23,
+        23,
+        23,
+        23,
+        23,
+        23,
+        23,
+        23,
+        24,
+        24,
+        24,
+        25,
+        25,
+        25,
+        26,
+        27,
+        28,
+        28,
+        28,
+        29,
+        29,
+        29,
+        30,
+        30,
+        30,
+        31,
+        32,
+        32,
+        32,
+        32,
+        33,
+        33,
+        34,
+        35,
+        35,
+        35,
+        35,
+        35,
+        35,
+        35,
+        35,
+        35,
+        36,
+        37,
+        37,
+        37,
+        37,
+        37,
+        37,
+        37,
+        37,
+        37,
+        37,
+        38,
+        39,
+        39,
+        39,
+        39,
+        39,
+        39,
+        39,
+        40,
+        41,
+        41,
+        41,
+        42,
+        42,
+        42,
+        43,
+        43,
+        44,
+        44,
+        45,
+        46,
+        46,
+        46,
+        46,
+        46,
+        47,
+    ]
+
+    tags = [0 for _ in hyp.split()]
+    for p, t in zip(span_info_parts, targets):
+        c, start, end = p.split(" ")
+        start = int(start)
+        end = int(end)
+        tags[start:end] = [t for i in range(end - start)]
+
+    # get input features for characters
+    (input_ids, input_mask, segment_ids, labels_mask, labels, _, _,) = bert_example_builder._get_input_features(
+        hyp=hyp, ref=ref, tags=tags
+    )
+
+    # get input features for words
+    hyp_with_words = hyp.replace(" ", "").replace("_", " ")
+    ref_with_words = ref.replace(" ", "").replace("_", " ")
+    (
+        input_ids_for_subwords,
+        input_mask_for_subwords,
+        segment_ids_for_subwords,
+        _,
+        _,
+        _,
+        _,
+    ) = bert_example_builder._get_input_features(hyp=hyp_with_words, ref=ref_with_words, tags=None)
+
+    character_pos_to_subword_pos = bert_example_builder._map_characters_to_subwords(input_ids, input_ids_for_subwords)
+
+    assert tags == gold_tags
+    assert input_ids == gold_input_ids
+    assert input_mask == gold_input_mask
+    assert segment_ids == gold_segment_ids
+    assert labels_mask == gold_labels_mask
+    assert input_ids_for_subwords == gold_input_ids_for_subwords
+    assert input_mask_for_subwords == gold_input_mask_for_subwords
+    assert segment_ids_for_subwords == gold_segment_ids_for_subwords
+    assert character_pos_to_subword_pos == gold_character_pos_to_subword_pos
diff --git a/tools/ctc_segmentation/scripts/prepare_data.py b/tools/ctc_segmentation/scripts/prepare_data.py
index 429b642d5ba0..c6ea024273fb 100644
--- a/tools/ctc_segmentation/scripts/prepare_data.py
+++ b/tools/ctc_segmentation/scripts/prepare_data.py
@@ -151,7 +151,7 @@ def split_text(
     )
 
     # end of quoted speech - to be able to split sentences by full stop
-    transcript = re.sub(r"([\.\?\!])([\"\'])", r"\g<2>\g<1> ", transcript)
+    transcript = re.sub(r"([\.\?\!])([\"\'”])", r"\g<2>\g<1> ", transcript)
 
     # remove extra space
     transcript = re.sub(r" +", " ", transcript)
diff --git a/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
new file mode 100644
index 000000000000..189ac958d377
--- /dev/null
+++ b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
@@ -0,0 +1,1403 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PiRuohn_FQco"
+      },
+      "source": [
+        "# Overview\n",
+        "This tutorial demonstrates how to run inference with SpellMapper - a model for Spellchecking ASR (Automatic Speech Recognition) Customization.\n",
+        "\n",
+        "Estimated time: 10-15 min.\n",
+        "\n",
+        "SpellMapper is a non-autoregressive (NAR) model based on transformer architecture ([BERT](https://arxiv.org/pdf/1810.04805.pdf) with multiple separators).\n",
+        "It gets as input a single ASR hypothesis (text) and a **custom vocabulary** and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any.\n",
+        "\n",
+        "This model is an alternative to word boosting/shallow fusion approaches:\n",
+        "  - does not require retraining ASR model;\n",
+        "  - does not require beam-search/language model(LM);\n",
+        "  - can be applied on top of any English ASR model output;"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qm5wmxVEGXgH"
+      },
+      "source": [
+        "## What is custom vocabulary?\n",
+        "**Custom vocabulary** is a list of words/phrases that are important for a particular user. For example, user's contact names, playlist, selected terminology and so on. The size of the custom vocabulary can vary from several hundreds to **several thousand entries** - but this is not an equivalent to ngram language model.\n",
+        "\n",
+        "![Scope of customization with user vocabulary](images/spellmapper_customization_vocabulary.png)\n",
+        "\n",
+        "Note that unlike traditional spellchecking approaches, which aim to correct known words using language models, the goal of contextual spelling correction is to correct highly specific user terms, most of which can be 1) out-of-vocabulary (OOV) words, 2) spelling variations (e.g., \"John Koehn\", \"Jon Cohen\") and language models cannot help much with that."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D5_XwuXDOKho"
+      },
+      "source": [
+        "## Tutorial Plan\n",
+        "\n",
+        "1.   Create a sample custom vocabulary using some medical terminology.\n",
+        "2.   Study what customization does - a detailed analysis of a small example.\n",
+        "3.   Run a bigger example:\n",
+        "   *  Create sample ASR results by running TTS (text-to-speech synthesis) + ASR on some medical paper abstracts.\n",
+        "   *  Run SpellMapper inference and show how it can improve ASR results using custom vocabulary.\n",
+        "\n",
+        "TL;DR We reduce WER from `14.3%` to `11.4%` by correcting medical terms, e.g.\n",
+        "* `puramesin` => `puromycin`\n",
+        "* `parromsin` => `puromycin`\n",
+        "* `and hydrod` => `anhydride`\n",
+        "* `lesh night and` => `lesch-nyhan`\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "agz8B2CxXBBG"
+      },
+      "source": [
+        "# Preparation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "koRPpYISNPuH"
+      },
+      "source": [
+        "## Installing NeMo"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HCnnz3cgVc4Q"
+      },
+      "outputs": [],
+      "source": [
+        "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n",
+        "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n",
+        "GITHUB_ACCOUNT = \"bene-ges\"\n",
+        "BRANCH = \"spellchecking_asr_customization_double_bert\"\n",
+        "!python -m pip install git+https://github.com/{GITHUB_ACCOUNT}/NeMo.git@{BRANCH}#egg=nemo_toolkit[all]\n",
+        "\n",
+        "# Download local version of NeMo scripts. If you are running locally and want to use your own local NeMo code,\n",
+        "# comment out the below lines and set NEMO_DIR to your local path.\n",
+        "NEMO_DIR = 'nemo'\n",
+        "!git clone -b {BRANCH} https://github.com/{GITHUB_ACCOUNT}/NeMo.git $NEMO_DIR"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_M92gCn_NW1_"
+      },
+      "source": [
+        "## Additional installs\n",
+        "We will use `sentence_splitter` to split abstracts to sentences."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ddyJA3NtGl9C"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install sentence_splitter"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qVa91rGkeFje"
+      },
+      "source": [
+        "Clone the SpellMapper model from HuggingFace.\n",
+        "Note that we will need not only the checkpoint itself, but also the ngram mapping vocabulary `replacement_vocab_filt.txt` from the same folder."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JiI9dkEm5cpW"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://huggingface.co/bene-ges/spellmapper_asr_customization_en"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8saqFOePVfFf"
+      },
+      "source": [
+        "## Imports\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tAJyiYn_VnrF"
+      },
+      "outputs": [],
+      "source": [
+        "import IPython.display as ipd\n",
+        "import json\n",
+        "import random\n",
+        "import re\n",
+        "import soundfile as sf\n",
+        "import torch\n",
+        "\n",
+        "from collections import Counter, defaultdict\n",
+        "from difflib import SequenceMatcher\n",
+        "from matplotlib.pyplot import imshow\n",
+        "from matplotlib import pyplot as plt\n",
+        "from sentence_splitter import SentenceSplitter\n",
+        "from typing import List, Set, Tuple\n",
+        "\n",
+        "from nemo.collections.tts.models import FastPitchModel\n",
+        "from nemo.collections.tts.models import HifiGanModel\n",
+        "\n",
+        "from nemo.collections.asr.parts.utils.manifest_utils import read_manifest\n",
+        "\n",
+        "from nemo.collections.nlp.data.spellchecking_asr_customization.utils import (\n",
+        "    get_all_candidates_coverage,\n",
+        "    get_index,\n",
+        "    load_ngram_mappings,\n",
+        "    search_in_index,\n",
+        "    get_candidates,\n",
+        "    read_spellmapper_predictions,\n",
+        "    apply_replacements_to_text,\n",
+        "    load_ngram_mappings_for_dp,\n",
+        "    get_alignment_by_dp,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mfAaOdAWUGUV"
+      },
+      "source": [
+        "Use seed to get a reproducible behaviour."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UlGnNKTuT_6A"
+      },
+      "outputs": [],
+      "source": [
+        "random.seed(0)\n",
+        "torch.manual_seed(0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RPPHI7Zd_fDz"
+      },
+      "source": [
+        "## Download data\n",
+        "\n",
+        "File `pubmed23n0009.xml` taken from public ftp server of https://www.ncbi.nlm.nih.gov/pmc/ contains information about 5593 medical papers, from which we extract only their abstracts. We will feed sentences from there to TTS + ASR to get initial ASR results.\n",
+        "\n",
+        "File `wordlist.txt` contains 100k **single-word** medical terms.\n",
+        "\n",
+        "File `valid_adam.txt` contains 24k medical abbreviations with their full forms. We will use those full forms as examples of **multi-word** medical terms.\n",
+        "\n",
+        "File `count_1w.txt` contains 330k single words with their frequencies from Google Ngrams corpus. We will use this file to filter out frequent words from our custom vocabulary.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mX6cvE8xw2n1"
+      },
+      "outputs": [],
+      "source": [
+        "!wget https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/pubmed23n0009.xml.gz\n",
+        "!gunzip pubmed23n0009.xml.gz\n",
+        "!grep \"AbstractText\" pubmed23n0009.xml > abstract.txt\n",
+        "\n",
+        "!wget https://raw.githubusercontent.com/McGill-NLP/medal/master/toy_data/valid_adam.txt\n",
+        "!wget https://raw.githubusercontent.com/glutanimate/wordlist-medicalterms-en/master/wordlist.txt\n",
+        "!wget https://norvig.com/ngrams/count_1w.txt"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mBm9BeqNaRlC"
+      },
+      "source": [
+        "## Auxiliary functions\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kVUKhSh48Ypi"
+      },
+      "outputs": [],
+      "source": [
+        "CHARS_TO_IGNORE_REGEX = re.compile(r\"[\\.\\,\\?\\:!;()«»…\\]\\[/\\*–‽+&_\\\\½√>€™$•¼}{~—=“\\\"”″‟„]\")\n",
+        "\n",
+        "\n",
+        "def get_medical_vocabulary() -> Tuple[Set[str], Set[str]]:\n",
+        "    \"\"\"This function builds a vocabulary of medical terms using downloaded sources:\n",
+        "        wordlist.txt - 100k single-word medical terms.\n",
+        "        valid_adam.txt - 24k medical abbreviations with their full forms. We use those full forms as examples of multi-word medical terms.\n",
+        "        count_1w.txt - 330k single words with their frequencies from Google Ngrams corpus. We will use this file to filter out frequent words from our custom vocabulary.\n",
+        "    \"\"\"\n",
+        "    common_words  = set()\n",
+        "    with open(\"count_1w.txt\", \"r\", encoding=\"utf-8\") as f:\n",
+        "        for line in f:\n",
+        "            word, freq = line.strip().casefold().split(\"\\t\")\n",
+        "            if int(freq) < 500000:\n",
+        "                break\n",
+        "            common_words.add(word)\n",
+        "    print(\"Size of common words vocabulary:\", len(common_words))\n",
+        "\n",
+        "    abbreviations = defaultdict(set)\n",
+        "    medical_vocabulary = set()\n",
+        "    with open(\"valid_adam.txt\", \"r\", encoding=\"utf-8\") as f:\n",
+        "        lines = f.readlines()\n",
+        "        # first line is header\n",
+        "        for line in lines[1:]:\n",
+        "            abbrev, _, phrase = line.strip().split(\"\\t\")\n",
+        "            # skip phrases longer than 3 words because some of them are long explanations\n",
+        "            if phrase.count(\" \") > 2:\n",
+        "                continue\n",
+        "            if phrase in common_words:\n",
+        "                continue\n",
+        "            medical_vocabulary.add(phrase)\n",
+        "            abbrev = abbrev.lower()\n",
+        "            abbreviations[abbrev].add(phrase)\n",
+        "\n",
+        "    with open(\"wordlist.txt\", \"r\", encoding=\"utf-8\") as f:\n",
+        "        for line in f:\n",
+        "            word = line.strip().casefold()\n",
+        "            # skip words contaning digits\n",
+        "            if re.match(r\".*\\d.*\", word):\n",
+        "                continue\n",
+        "            if re.match(r\".*[\\[\\]\\(\\)\\+\\,\\.].*\", word):\n",
+        "                continue\n",
+        "            if word in common_words:\n",
+        "                continue\n",
+        "            medical_vocabulary.add(word)\n",
+        "\n",
+        "    print(\"Size of medical vocabulary:\", len(medical_vocabulary))\n",
+        "    print(\"Size of abbreviation vocabulary:\", len(abbreviations))\n",
+        "    return medical_vocabulary, abbreviations\n",
+        "\n",
+        "\n",
+        "def read_abstracts(medical_vocabulary: Set[str]) -> Tuple[List[str], Set[str], Set[str]]:\n",
+        "    \"\"\"This function reads the downloaded medical abstracts, and extracts sentences containing any word/phrase from the medical vocabulary.\n",
+        "    Args:\n",
+        "        medical_vocabulary: set of known medical words or phrases\n",
+        "    Returns:\n",
+        "        sentences: list of extracted sentences\n",
+        "        all_found_singleword: set of single words from medical vocabulary that occurred at least in one sentence\n",
+        "        all_found_multiword: set of multi-word phrases from medical vocabulary that occurred at least in one sentence\n",
+        "    \"\"\"\n",
+        "    splitter = SentenceSplitter(language='en')\n",
+        "\n",
+        "    all_sentences = []\n",
+        "    all_found_singleword = set()\n",
+        "    all_found_multiword = set()\n",
+        "    with open(\"abstract.txt\", \"r\", encoding=\"utf-8\") as f:\n",
+        "        for line in f:\n",
+        "            text = line.strip().replace(\"<AbstractText>\", \"\").replace(\"</AbstractText>\", \"\")\n",
+        "            sents = splitter.split(text)\n",
+        "            found_singleword = set()\n",
+        "            found_multiword = set()\n",
+        "            for sent in sents:\n",
+        "                # remove anything in brackets from text\n",
+        "                sent = re.sub(r\"\\(.+\\)\", r\"\", sent)\n",
+        "                # remove quotes from text\n",
+        "                sent = sent.replace(\"\\\"\", \"\")\n",
+        "                # skip sentences contaning digits because normalization is out of scope of this tutorial\n",
+        "                if re.match(r\".*\\d.*\", sent):\n",
+        "                    continue\n",
+        "                # skip sentences contaning abbreviations with period inside the sentence (for the same reason)\n",
+        "                if \". \" in sent:\n",
+        "                    continue\n",
+        "                # skip long sentences as they may cause OOM issues\n",
+        "                if len(sent) > 150:\n",
+        "                    continue\n",
+        "                # replace all punctuation to space and convert to lowercase\n",
+        "                sent_clean = CHARS_TO_IGNORE_REGEX.sub(\" \", sent).lower()\n",
+        "                sent_clean = \" \".join(sent_clean.split(\" \"))\n",
+        "                words = sent_clean.split(\" \")\n",
+        "\n",
+        "                found_phrases = set()\n",
+        "                for begin in range(len(words)):\n",
+        "                    for end in range(begin + 1, min(begin + 4, len(words))):\n",
+        "                        phrase = \" \".join(words[begin:end])\n",
+        "                        if phrase in medical_vocabulary:\n",
+        "                            found_phrases.add(phrase)\n",
+        "                            if end - begin == 1:\n",
+        "                                found_singleword.add(phrase)\n",
+        "                            else:\n",
+        "                                found_multiword.add(phrase)\n",
+        "                if len(found_phrases) > 0:\n",
+        "                    all_sentences.append((sent, \";\".join(found_phrases)))\n",
+        "            all_found_singleword = all_found_singleword.union(found_singleword)\n",
+        "            all_found_multiword = all_found_multiword.union(found_multiword)\n",
+        "\n",
+        "    print(\"Sentences:\", len(all_sentences))\n",
+        "    print(\"Unique single-word terms found:\", len(all_found_singleword))\n",
+        "    print(\"Unique multi-word terms found:\", len(all_found_multiword))\n",
+        "    print(\"Examples of multi-word terms\", str(list(all_found_multiword)[0:10]))\n",
+        "    \n",
+        "    return all_sentences, all_found_singleword, all_found_multiword"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "XU3xeCBVpWOL"
+      },
+      "outputs": [],
+      "source": [
+        "def get_fragments(i_words: List[str], j_words: List[str]) -> List[Tuple[str, str, str, int, int, int, int]]:\n",
+        "    \"\"\"This function is used to compare two word sequences to find minimal fragments that differ.\n",
+        "    Args:\n",
+        "        i_words: list of words in first sequence\n",
+        "        j_words: list of words in second sequence\n",
+        "    Returns:\n",
+        "        list of tuples (difference_type, fragment1, fragment2, begin_of_fragment1, end_of_fragment1, begin_of_fragment2, end_of_fragment2)\n",
+        "    \"\"\"\n",
+        "    s = SequenceMatcher(None, i_words, j_words)\n",
+        "    result = []\n",
+        "    for tag, i1, i2, j1, j2 in s.get_opcodes():\n",
+        "        result.append((tag, \" \".join(i_words[i1:i2]), \" \".join(j_words[j1:j2]), i1, i2, j1, j2))\n",
+        "    result = sorted(result, key=lambda x: x[3])\n",
+        "    return result"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2ydXp_pFYmYu"
+      },
+      "source": [
+        "## Read medical data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WAeauax0SV1-"
+      },
+      "outputs": [],
+      "source": [
+        "medical_vocabulary, _ = get_medical_vocabulary()\n",
+        "sentences, found_singleword, found_multiword = read_abstracts(medical_vocabulary)\n",
+        "# in case if we need random candidates from a big sample - we will use full medical vocabulary for that purpose.\n",
+        "big_sample = list(medical_vocabulary)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FRli7-Kx7sOO"
+      },
+      "outputs": [],
+      "source": [
+        "for sent, phrases in sentences[0:10]:\n",
+        "    print(sent, \"\\t\", phrases)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rL1VqH2_dk93"
+      },
+      "source": [
+        "# SpellMapper ASR Customization\n",
+        "\n",
+        "SpellMapper model relies on two offline preparation steps:\n",
+        "1. Collecting n-gram mappings from a large corpus (this mappings vocabulary had been collected once on a large corpus and is supplied with the model).\n",
+        "2. Indexing of user vocabulary by n-grams.\n",
+        "\n",
+        "![Offline data preparation](images/spellmapper_data_preparation.png)\n",
+        "\n",
+        "At inference time we take as input an ASR hypothesis and an n-gram-indexed user vocabulary and perform following steps:\n",
+        "1. Retrieve the top 10 candidate phrases from the user vocabulary that are likely to be contained in the given ASR-hypothesis, possibly in a misspelled form.\n",
+        "2. Run the neural model that tags the input characters with correct candidate labels or 0 if no match is found.\n",
+        "3. Do post-processing to combine results.\n",
+        "\n",
+        "![Inference pipeline](images/spellmapper_inference_pipeline.png)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OeJpsMwslmrd"
+      },
+      "source": [
+        "## N-gram mappings\n",
+        "Note that n-gram mappings vocabulary had been collected from a large corpus and is supplied with the model. It is supposed to be \"universal\" for English language.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uH6p0mOd12pi"
+      },
+      "source": [
+        "Let's see what n-gram mappings are like, for example, for an n-gram `l u c`.\n",
+        "Note that n-grams in `replacement_vocab_filt.txt` preserve one-to-one correspondence between original letters and misspelled fragments (this additional markup is handled during loading). \n",
+        "* `+` means that adjacent letters are concatenated and correspond to a single source letter. \n",
+        "* `<DELETE>` means that the original letter is deleted. \n",
+        "This auxiliary markup will be removed automatically during loading.\n",
+        "\n",
+        "`_` is used instead of real space symbol.\n",
+        "\n",
+        "Last three columns are:\n",
+        "* joint frequency\n",
+        "* frequency of original n-gram\n",
+        "* frequency of misspelled n-gram\n",
+        "\n",
+        "$$\\frac{JointFrequency}{SourceFrequency}=TranslationProbability$$\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qul163dB1sKp"
+      },
+      "outputs": [],
+      "source": [
+        "!awk 'BEGIN {FS=\"\\t\"} ($1==\"l u c\"){print $0}' < spellmapper_asr_customization_en/replacement_vocab_filt.txt | sort -t$'\\t' -k3nr"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eWxcrVWZ3Pfq"
+      },
+      "source": [
+        "Now we read n-gram mappings from the file. Parameter `max_misspelled_freq` controls maximum frequency of misspelled n-grams. N-grams more frequent than that are put in the list of banned n-grams and won't be used in indexing."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WHKhE945-N7o"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"load n-gram mappings...\")\n",
+        "ngram_mapping_vocab, ban_ngram = load_ngram_mappings(\"spellmapper_asr_customization_en/replacement_vocab_filt.txt\", max_misspelled_freq=125000)\n",
+        "# CAUTION: entries in ban_ngram end with a space and can contain \"+\" \"=\"\n",
+        "print(\"Size of ngram mapping vocabulary:\", len(ngram_mapping_vocab))\n",
+        "print(\"Size of banned ngrams:\", len(ban_ngram))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "49IcMBfllvXN"
+      },
+      "source": [
+        "## Indexing of custom vocabulary"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b1K6paeee2Iu"
+      },
+      "source": [
+        "As we mentioned earlier, this model pipeline is intended to work with custom vocabularies up to several thousand entries. Since the whole medical vocabulary contains 110k entries, we restrict our custom vocabulary to 5000+ terms that occured in given corpus of abstracts.\n",
+        "\n",
+        "The goal of indexing our custom vocabulary is to build an index where key is a letter n-gram and value is the whole phrase. The keys are n-grams in the given user phrase and their misspelled variants taken from our collection of n-\n",
+        "gram mappings (see Index of custom vocabulary in Fig. 1)\n",
+        "\n",
+        "*Though it is possible to index and search the whole 110k vocabulary, it will require additional optimizations and is beyond the scope of this tutorial.*"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xWb0jGqw6Woi"
+      },
+      "outputs": [],
+      "source": [
+        "custom_phrases = []\n",
+        "for phrase in medical_vocabulary:\n",
+        "    if phrase not in found_singleword and phrase not in found_multiword:\n",
+        "        continue\n",
+        "    custom_phrases.append(\" \".join(list(phrase.replace(\" \", \"_\"))))\n",
+        "print(\"Size of customization vocabulary:\", len(custom_phrases))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UHWor5pD2Eyb"
+      },
+      "source": [
+        "Now we build the index for our custom phrases.\n",
+        "\n",
+        "Parameter `min_log_prob` controls minimum log probability, after which we stop growing this n-gram.\n",
+        "\n",
+        "Parameter `max_phrases_per_ngram` controls maximum number of phrases that can be indexed by one ngram. N-grams exceeding this limit are also banned and not used in indexing.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hs4RDXj0-xW9"
+      },
+      "outputs": [],
+      "source": [
+        "phrases, ngram2phrases = get_index(custom_phrases, ngram_mapping_vocab, ban_ngram, min_log_prob=-4.0, max_phrases_per_ngram=600)\n",
+        "print(\"Size of phrases:\", len(phrases))\n",
+        "print(\"Size of ngram2phrases:\", len(ngram2phrases))\n",
+        "\n",
+        "# Save index to file - later we will use it in other script\n",
+        "with open(\"index.txt\", \"w\", encoding=\"utf-8\") as out:\n",
+        "    for ngram in ngram2phrases:\n",
+        "        for phrase_id, begin, size, logprob in ngram2phrases[ngram]:\n",
+        "            phrase = phrases[phrase_id]\n",
+        "            out.write(ngram + \"\\t\" + phrase + \"\\t\" + str(begin) + \"\\t\" + str(size) + \"\\t\" + str(logprob) + \"\\n\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RV1sdQ9rvar8"
+      },
+      "source": [
+        "## Small detailed example\n",
+        "\n",
+        "Let's consider, for example, one custom phrase `thoracic aorta` and an incorrect ASR-hypothesis `the tarasic oorda is a part of the aorta located in the thorax`, containing a misspelled phrase `tarasic_oorda`. \n",
+        "\n",
+        "We will see \n",
+        "1. How this custom phrase is indexed.\n",
+        "2. How candidate retrieval works, given ASR-hypothesis.\n",
+        "3. How inference and post-processing work.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kGBTTJXixnrG"
+      },
+      "source": [
+        "### N-grams in index"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ryfUlqNMl4vQ"
+      },
+      "source": [
+        "Let's look, for example, by what n-grams a custom phrase `thoracic aorta` is indexed. \n",
+        "Columns: \n",
+        "1. n-gram\n",
+        "2. beginning position in the phrase\n",
+        "3. length\n",
+        "4. log probability\n",
+        "\n",
+        "Note that many n-grams are not from n-gram mappings file. Those are derived by  growing previous n-grams with new replacements. In this case log probabilities are summed up. Growing stops, when minimum log prob is exceeded.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "x0ZVsXGBo8pt"
+      },
+      "outputs": [],
+      "source": [
+        "for ngram in ngram2phrases:\n",
+        "    for phrase_id, b, length, lprob in ngram2phrases[ngram]:\n",
+        "        if phrases[phrase_id] == \"t h o r a c i c _ a o r t a\":\n",
+        "            print(ngram.ljust(16) + \"\\t\" + str(b).rjust(4) + \"\\t\" + str(length).rjust(4) + \"\\t\" + str(lprob))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "20ov23ze4xeQ"
+      },
+      "source": [
+        "### Candidate retrieval\n",
+        "Candidate retrieval tasks are:\n",
+        " - Given an input sentence and an index of custom vocabulary find all n-grams from the index matching the sentence. \n",
+        " - Find which sentence fragments and which custom phrases have most \"hits\" - potential candidates.\n",
+        " - Find approximate starting position for each candidate phrase. \n",
+        "\n",
+        "\n",
+        "Let's look at the hits, that phrase \"thoracic aorta\" gets by searching all ngrams in the input text. We can see some hits in different part of the sentence, but a moving window can find a fragment with most hits."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "t_rhKQ3Xqa8A"
+      },
+      "outputs": [],
+      "source": [
+        "sent = \"the_tarasic_oorda_is_a_part_of_the_aorta_located_in_the_thorax\"\n",
+        "phrases2positions, position2ngrams = search_in_index(ngram2phrases, phrases, sent)\n",
+        "print(\" \".join(list(sent)))\n",
+        "print(\" \".join(list(map(str, phrases2positions[phrases.index(\"t h o r a c i c _ a o r t a\")].astype(int)))))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "orkRapbjF4aZ"
+      },
+      "source": [
+        "`phrases2positions` is a matrix of size (len(phrases), len(ASR_hypothesis)).\n",
+        "It is filled with 1.0 (hits) on intersection of letter n-grams and phrases that are indexed by these n-grams, 0.0 - elsewhere.\n",
+        "It is used to find phrases with many hits within a contiguous window - potential matching candidates.\n",
+        "\n",
+        "`position2ngrams` is a list of sets of ngrams. List index is the starting position in the ASR-hypothesis.\n",
+        "It is used later to check how well each found candidate is covered by n-grams (to avoid cases where some repeating n-gram gives many hits to a phrase, but the phrase itself is not well covered)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JF7u4_iiHLyI"
+      },
+      "outputs": [],
+      "source": [
+        "candidate2coverage, candidate2position = get_all_candidates_coverage(phrases, phrases2positions)\n",
+        "print(\"Coverage=\", candidate2coverage[phrases.index(\"t h o r a c i c _ a o r t a\")])\n",
+        "print(\"Starting position=\", candidate2position[phrases.index(\"t h o r a c i c _ a o r t a\")])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "45mvKg8ZyNbr"
+      },
+      "source": [
+        "`candidate2coverage` is a list of size len(phrases) containing coverage (0.0 to 1.0) in best window.\n",
+        "Coverage is a smoothed percentage of hits in the window of size of the given phrase.\n",
+        "\n",
+        "`candidate2position` is a list of size len(phrases) containing starting position of best window.\n",
+        "\n",
+        "Starting position is approximate, it's ok. If it is not at the beginning of some word, SpellMapper will try to adjust it later. In this particular example we get 5 as starting position instead of 4, missing the first letter."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Sjyn9I98udL9"
+      },
+      "source": [
+        "### Inference\n",
+        "\n",
+        "Now let's generate input for SpellMapper inference. \n",
+        "An input line should consist of 4 tab-separated columns:\n",
+        "  - text of ASR-hypothesis\n",
+        "  - texts of 10 candidates separated by semicolon\n",
+        "  - 1-based ids of non-dummy candidates\n",
+        "  - approximate start/end coordinates of non-dummy candidates (correspond to ids)\n",
+        "Note that candidate retrieval is done inside the function `get_candidates`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cJnusVfBRhRX"
+      },
+      "outputs": [],
+      "source": [
+        "out = open(\"spellmapper_input.txt\", \"w\", encoding=\"utf-8\")\n",
+        "letters = list(sent)\n",
+        "candidates = get_candidates(ngram2phrases, phrases, letters, big_sample)\n",
+        "# We add two columns with targets and span_info. \n",
+        "# They have same format as during training, but start and end positions are APPROXIMATE, they will be adjusted when constructing BertExample.\n",
+        "targets = []\n",
+        "span_info = []\n",
+        "for idx, c in enumerate(candidates):\n",
+        "    if c[1] == -1:\n",
+        "        continue\n",
+        "    targets.append(str(idx + 1))  # targets are 1-based\n",
+        "    start = c[1]\n",
+        "    end = min(c[1] + c[2], len(letters))  # ensure that end is not outside sentence length (it can happen because c[2] is candidate length used as approximation)\n",
+        "    span_info.append(\"CUSTOM \" + str(start) + \" \" + str(end))\n",
+        "\n",
+        "out.write(\" \".join(letters) + \"\\t\" + \";\".join([x[0] for x in candidates])  + \"\\t\" + \" \".join(targets) + \"\\t\" + \";\".join(span_info) + \"\\n\")\n",
+        "out.close()\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Qpei5o89SmaU"
+      },
+      "outputs": [],
+      "source": [
+        "!cat spellmapper_input.txt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9rAmO15SS6go"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \\\n",
+        "      pretrained_model=spellmapper_asr_customization_en/training_10m_5ep.nemo \\\n",
+        "      model.max_sequence_len=512 \\\n",
+        "      inference.from_file=spellmapper_input.txt \\\n",
+        "      inference.out_file=spellmapper_output.txt \\\n",
+        "      inference.batch_size=16 \\\n",
+        "      lang=en\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wd2aq4T1N5cs"
+      },
+      "source": [
+        "Each line in SpellMapper output is tab-separated and consists of 4 columns:\n",
+        "1. ASR-hypothesis (same as in input)\n",
+        "2. 10 candidates separated with semicolon (same as in input)\n",
+        "3. fragment predictions, separated with semicolon, each prediction is a tuple (start, end, candidate_id, probability)\n",
+        "4. letter predictions - candidate_id predicted for each letter (this is only for debug purposes)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ravgEX8cTFty"
+      },
+      "outputs": [],
+      "source": [
+        "!cat spellmapper_output.txt"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "az26364-PHb2"
+      },
+      "source": [
+        "We can use some utility functions to apply found replacements and get actual corrected text."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lPtFa_EhK8pb"
+      },
+      "outputs": [],
+      "source": [
+        "spellmapper_results = read_spellmapper_predictions(\"spellmapper_output.txt\")\n",
+        "text, replacements, _ = spellmapper_results[0]\n",
+        "corrected_text = apply_replacements_to_text(text, replacements, replace_hyphen_to_space=False)\n",
+        "print(\"Text before correction:\\n\", text)\n",
+        "print(\"Text after correction:\\n\", corrected_text)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "efF7O-D91FLX"
+      },
+      "source": [
+        "# Bigger customization example\n",
+        "\n",
+        "Let's test customization on more data. The plan is\n",
+        "   *  Get baseline ASR transcriptions by running TTS + ASR on some medical paper abstracts.\n",
+        "   *  Run SpellMapper inference and show how it can improve ASR results using custom vocabulary.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r_EFPnyDcXZt"
+      },
+      "source": [
+        "## Run TTS"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i9F5SBhmr8rk"
+      },
+      "outputs": [],
+      "source": [
+        "# create a folder for wav files (TTS output)\n",
+        "!rm -r audio\n",
+        "!mkdir audio"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JMbkNVt7YBAO"
+      },
+      "outputs": [],
+      "source": [
+        "if torch.cuda.is_available():\n",
+        "  device = \"cuda\"\n",
+        "else:\n",
+        "  device = \"cpu\"\n",
+        "\n",
+        "# Load FastPitch from HuggingFace\n",
+        "spectrogram_generator = FastPitchModel.from_pretrained(\"nvidia/tts_en_fastpitch\").eval().to(device)\n",
+        "# Load HifiGan vocoder from HuggingFace\n",
+        "vocoder = HifiGanModel.from_pretrained(model_name=\"nvidia/tts_hifigan\").eval().to(device)\n",
+        "\n",
+        "# Write sentences that we want to feed to TTS\n",
+        "with open(\"tts_input.txt\", \"w\", encoding=\"utf-8\") as out:\n",
+        "    for sent, _ in sentences[0:100]:\n",
+        "        out.write(sent + \"\\n\")\n",
+        "\n",
+        "out_manifest = open(\"manifest.json\", \"w\", encoding=\"utf-8\")\n",
+        "i = 0\n",
+        "with open(\"tts_input.txt\", \"r\", encoding=\"utf-8\") as inp:\n",
+        "    for line in inp:\n",
+        "        text = line.strip()\n",
+        "        text_clean = CHARS_TO_IGNORE_REGEX.sub(\" \", text).lower()  #replace all punctuation to space and convert to lowercase\n",
+        "        text_clean = \" \".join(text_clean.split())\n",
+        "\n",
+        "        parsed = spectrogram_generator.parse(text, normalize=True)\n",
+        "\n",
+        "        spectrogram = spectrogram_generator.generate_spectrogram(tokens=parsed)\n",
+        "        audio = vocoder.convert_spectrogram_to_audio(spec=spectrogram)\n",
+        "\n",
+        "        # Note that vocoder return a batch of audio. In this example, we just take the first and only sample.\n",
+        "        filename = \"audio/\" + str(i) + \".wav\"\n",
+        "        sf.write(filename, audio.to('cpu').detach().numpy()[0], 16000)\n",
+        "        out_manifest.write(\n",
+        "            \"{\\\"audio_filepath\\\": \\\"\" + filename + \"\\\", \\\"text\\\": \\\"\" + text_clean + \"\\\", \\\"orig_text\\\": \\\"\" + text + \"\\\"}\\n\"\n",
+        "        )\n",
+        "        i += 1\n",
+        "\n",
+        "        # display some examples\n",
+        "        if i < 10:\n",
+        "            print(f'\"{text}\"\\n')\n",
+        "            ipd.display(ipd.Audio(audio.to('cpu').detach(), rate=22050))\n",
+        "\n",
+        "out_manifest.close()\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9T3CZcCAmxCz"
+      },
+      "source": [
+        "Now we have a folder with generated audios `audio/*.wav` and a nemo manifest with json records like `{\"audio_filepath\": \"audio/0.wav\", \"text\": \"no renal auditory or vestibular toxicity was observed\", \"orig_text\": \"No renal, auditory, or vestibular toxicity was observed.\"}`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pR_T1HnttVjm"
+      },
+      "outputs": [],
+      "source": [
+        "lines = []\n",
+        "with open(\"manifest.json\", \"r\", encoding=\"utf-8\") as f:\n",
+        "    lines = f.readlines()\n",
+        "\n",
+        "for line in lines:\n",
+        "    try:\n",
+        "        data = json.loads(line.strip())\n",
+        "    except:\n",
+        "        print(line)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Free GPU memory to avoid OOM."
+      ],
+      "metadata": {
+        "id": "bt2TMLLvdUHm"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "del spectrogram_generator\n",
+        "del vocoder\n",
+        "torch.cuda.empty_cache()"
+      ],
+      "metadata": {
+        "id": "ZwEpAOCaRH7s"
+      },
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HrensakWdLkt"
+      },
+      "source": [
+        "## Run baseline ASR"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IQNIo2M_mqJc"
+      },
+      "source": [
+        "Next we transcribe our .wav files with a general domain [ASR model](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_conformer_ctc_large). It will generate an output file `ctc_baseline_transcript.json` where the predicted transcriptions are stored in the field `pred_text` of each record.\n",
+        "\n",
+        "Note that this ASR model was not trained or fine-tuned on medical domain, so we expect it to make mistakes on medical terms."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NMN63ux1mJiG"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/asr/transcribe_speech.py \\\n",
+        "      pretrained_name=\"stt_en_conformer_ctc_large\" \\\n",
+        "      dataset_manifest=manifest.json \\\n",
+        "      output_filename=ctc_baseline_transcript_tmp.json \\\n",
+        "      batch_size=2"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L3swQ8uqqgnp"
+      },
+      "source": [
+        "ATTENTION: SpellMapper relies on words to be separated by _single_ space\n",
+        "\n",
+        "There is a bug with multiple space, observed in ASR results produced by Conformer-CTC, probably connected to this issue: https://github.com/NVIDIA/NeMo/issues/4034.\n",
+        "\n",
+        "So we need to correct the manifests to ensure that all spaces are single."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "z17sxkmXrXpJ"
+      },
+      "outputs": [],
+      "source": [
+        "test_data = read_manifest(\"ctc_baseline_transcript_tmp.json\")\n",
+        "\n",
+        "for i in range(len(test_data)):\n",
+        "    # if there are multiple spaces in the string they will be merged to one\n",
+        "    test_data[i][\"pred_text\"] = \" \".join(test_data[i][\"pred_text\"].split())\n",
+        "\n",
+        "with open(\"ctc_baseline_transcript.json\", \"w\", encoding=\"utf-8\") as out:\n",
+        "    for d in test_data:\n",
+        "        line = json.dumps(d)\n",
+        "        out.write(line + \"\\n\")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PuKtfhbVkVJY"
+      },
+      "outputs": [],
+      "source": [
+        "!head -n 4 ctc_baseline_transcript.json"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aCJw9NEXqRg8"
+      },
+      "source": [
+        "### Calculating WER of baseline transcript\n",
+        "We use the standard script from NeMo to calculate WER and CER of our baseline transcript. Internally it compares the text in `pred_text` (predicted transcript) to `text` (reference transcript). "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZmNEGVWQsGo2"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/asr/speech_to_text_eval.py \\\n",
+        "  dataset_manifest=ctc_baseline_transcript.json \\\n",
+        "  only_score_manifest=True\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AvPwJr0ZqdkN"
+      },
+      "source": [
+        "### See fragments that differ\n",
+        "We use SequenceMatcher to see fragments that differ. (Another option is to use a more powerful analytics tool [Speech Data Explorer](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tools/speech_data_explorer.html))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RAeaVCpMv78y"
+      },
+      "outputs": [],
+      "source": [
+        "test_data = read_manifest(\"ctc_baseline_transcript.json\")\n",
+        "pred_text = [data['pred_text'] for data in test_data]\n",
+        "ref_text = [data['text'] for data in test_data]\n",
+        "audio_filepath = [data['audio_filepath'] for data in test_data]\n",
+        "\n",
+        "diff_vocab = Counter()\n",
+        "\n",
+        "for i in range(len(test_data)):\n",
+        "    ref_sent = \" \" + ref_text[i] + \" \"\n",
+        "    pred_sent = \" \" + pred_text[i] + \" \"\n",
+        "\n",
+        "    pred_words = pred_sent.strip().split()\n",
+        "    ref_words = ref_sent.strip().split()\n",
+        "\n",
+        "    for tag, hyp_fragment, ref_fragment, i1, i2, j1, j2 in get_fragments(pred_words, ref_words):\n",
+        "        if tag != \"equal\":\n",
+        "            diff_vocab[(tag, hyp_fragment, ref_fragment)] += 1\n",
+        "\n",
+        "sum_ = 0\n",
+        "print(\"PRED vs REF\")\n",
+        "for k, v in diff_vocab.most_common(1000000):\n",
+        "    sum_ += v\n",
+        "    print(k, v, \"sum=\", sum_)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dUSOF7iD1w_9"
+      },
+      "source": [
+        "## Run SpellMapper"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "x39BQhYB6_Fr"
+      },
+      "source": [
+        "Now we run retrieval on our input manifest and prepare input for SpellMapper inference. Note that we use index of custom vocabulary (file `index.txt` that we saved earlier)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "y8x-yT5WqfFz"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/nlp/spellchecking_asr_customization/prepare_input_from_manifest.py \\\n",
+        "  --manifest ctc_baseline_transcript.json \\\n",
+        "  --custom_vocab_index index.txt \\\n",
+        "  --big_sample spellmapper_asr_customization_en/big_sample.txt \\\n",
+        "  --short2full_name short2full.txt \\\n",
+        "  --output_name spellmapper_input.txt"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ueq_JAPWGs_Y"
+      },
+      "source": [
+        "Run the inference."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zgkqiiZtJjcB"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/nlp/spellchecking_asr_customization/spellchecking_asr_customization_infer.py \\\n",
+        "      pretrained_model=spellmapper_asr_customization_en/training_10m_5ep.nemo \\\n",
+        "      model.max_sequence_len=512 \\\n",
+        "      inference.from_file=spellmapper_input.txt \\\n",
+        "      inference.out_file=spellmapper_output.txt \\\n",
+        "      inference.batch_size=16 \\\n",
+        "      lang=en\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RPQWJX8dFLfX"
+      },
+      "source": [
+        "Now we postprocess SpellMapper output and create output corrected manifest."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3eFU515yKvXP"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py \\\n",
+        "  --input_manifest ctc_baseline_transcript.json \\\n",
+        "  --short2full_name short2full.txt \\\n",
+        "  --output_manifest ctc_corrected_transcript.json \\\n",
+        "  --spellmapper_result spellmapper_output.txt \\\n",
+        "  --replace_hyphen_to_space \\\n",
+        "  --field_name pred_text \\\n",
+        "  --ngram_mappings \"\"\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hRoIhhGh17tp"
+      },
+      "source": [
+        "### Calculating WER of corrected transcript."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qIT957bGo9AY"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/asr/speech_to_text_eval.py \\\n",
+        "  dataset_manifest=ctc_corrected_transcript.json \\\n",
+        "  only_score_manifest=True\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NYXIPusupqOQ"
+      },
+      "outputs": [],
+      "source": [
+        "test_data = read_manifest(\"ctc_corrected_transcript.json\")\n",
+        "pred_text = [data['pred_text'] for data in test_data]\n",
+        "ref_text = [data['pred_text_before_correction'] for data in test_data]\n",
+        "\n",
+        "diff_vocab = Counter()\n",
+        "\n",
+        "for i in range(len(test_data)):\n",
+        "    ref_sent = \" \" + ref_text[i] + \" \"\n",
+        "    pred_sent = \" \" + pred_text[i] + \" \"\n",
+        "\n",
+        "    pred_words = pred_sent.strip().split()\n",
+        "    ref_words = ref_sent.strip().split()\n",
+        "\n",
+        "    for tag, hyp_fragment, ref_fragment, i1, i2, j1, j2 in get_fragments(pred_words, ref_words):\n",
+        "        if tag != \"equal\":\n",
+        "            diff_vocab[(tag, hyp_fragment, ref_fragment)] += 1\n",
+        "\n",
+        "sum_ = 0\n",
+        "print(\"Corrected vs baseline\")\n",
+        "for k, v in diff_vocab.most_common(1000000):\n",
+        "    sum_ += v\n",
+        "    print(k, v, \"sum=\", sum_)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DJtXlqXbTD6M"
+      },
+      "source": [
+        "### Filtering by Dynamic Programming(DP) score\n",
+        "\n",
+        "What else can be done?\n",
+        "Given a fragment and its potential replacement, we can apply **dynamic programming** to find the most probable \"translation\" path between them. We will use the same n-gram mapping vocabulary, because its frequencies give us \"translation probability\" of each n-gram pair. The final path score can be calculated as maximum sum of log probalities of matching n-grams along this path.\n",
+        "Let's look at an example. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "05Qf9wgHU_UR"
+      },
+      "outputs": [],
+      "source": [
+        "joint_vocab, orig_vocab, misspelled_vocab, max_len = load_ngram_mappings_for_dp(\"spellmapper_asr_customization_en/replacement_vocab_filt.txt\")\n",
+        "\n",
+        "fragment = \"and hydrod\"\n",
+        "replacement = \"anhydride\"\n",
+        "fragment_spaced = \" \".join(list(fragment.replace(\" \", \"_\")))\n",
+        "replacement_spaced = \" \".join(list(replacement.replace(\" \", \"_\")))\n",
+        "path = get_alignment_by_dp(\n",
+        "    replacement_spaced,\n",
+        "    fragment_spaced,\n",
+        "    dp_data=(joint_vocab, orig_vocab, misspelled_vocab, max_len)\n",
+        ")\n",
+        "print(\"Dynamic Programming path:\")\n",
+        "for fragment_ngram, replacement_ngram, score, sum_score, joint_freq, orig_freq, misspelled_freq in path:\n",
+        "    print(\n",
+        "        \"\\t\",\n",
+        "        \"frag=\",\n",
+        "        fragment_ngram,\n",
+        "        \"; repl=\",\n",
+        "        replacement_ngram,\n",
+        "        \"; score=\",\n",
+        "        score,\n",
+        "        \"; sum_score=\",\n",
+        "        sum_score,\n",
+        "        \"; joint_freq=\",\n",
+        "        joint_freq,\n",
+        "        \"; orig_freq=\",\n",
+        "        orig_freq,\n",
+        "        \"; misspelled_freq=\",\n",
+        "        misspelled_freq,\n",
+        "    )\n",
+        "\n",
+        "print(\"Final path score is in path[-1][3]: \", path[-1][3])\n",
+        "print(\"Dynamic programming(DP) score per symbol is final score divided by len(fragment): \", path[-1][3] / (len(fragment)))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hgfKPKckaLnc"
+      },
+      "source": [
+        "The idea is that we can skip replacements whose average DP score per symbol is below some predefined minimum, say -1.5.\n",
+        "Note that dynamic programming works slow because of quadratic complexity, but it allows to get rid of some false positives. Let's apply it on the same test set."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UhSXh7ht_JRn"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/nlp/spellchecking_asr_customization/postprocess_and_update_manifest.py \\\n",
+        "  --input_manifest ctc_baseline_transcript.json \\\n",
+        "  --short2full_name short2full.txt \\\n",
+        "  --output_manifest ctc_corrected_transcript_dp.json \\\n",
+        "  --spellmapper_result spellmapper_output.txt \\\n",
+        "  --replace_hyphen_to_space \\\n",
+        "  --field_name pred_text \\\n",
+        "  --use_dp \\\n",
+        "  --ngram_mappings spellmapper_asr_customization_en/replacement_vocab_filt.txt \\\n",
+        "  --min_dp_score_per_symbol -1.5"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "u8R5YHB3vPC8"
+      },
+      "outputs": [],
+      "source": [
+        "!python nemo/examples/asr/speech_to_text_eval.py \\\n",
+        "  dataset_manifest=ctc_corrected_transcript_dp.json \\\n",
+        "  only_score_manifest=True"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "upvTbkFAeYtR"
+      },
+      "source": [
+        "# Final notes\n",
+        "1. Our paper...\n",
+        "\n",
+        "2. To reproduce evaluation experiments from this paper see these scripts:\n",
+        " - [test_on_kensho.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n",
+        " - [test_on_userlibri.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n",
+        " - [test_on_spoken_wikipedia.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n",
+        "\n",
+        "3. To reproduce training see [README.md](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/README.md)\n",
+        "\n",
+        "4. Promising future research directions would be:\n",
+        "  - add a simple trainable classifier on top of SpellMapper predictions instead of using multiple thresholds\n",
+        "  - retrain with adding more various false positives to the training data"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "toc_visible": true,
+      "provenance": []
+    },
+    "gpuClass": "standard",
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/tutorials/nlp/images/spellmapper_customization_vocabulary.png b/tutorials/nlp/images/spellmapper_customization_vocabulary.png
new file mode 100644
index 0000000000000000000000000000000000000000..1ecd7ab5add501b7e2889142a4df67442318b161
GIT binary patch
literal 39243
zcmd42WmHw&7e0Cb0g+M>kyNC+4j|niT>{eGDczt*cXvxj!=Y2@?oNS2!yyix|Bb%C
zalhP;_ug^W7!3D5?mhQfbImp9T=RKWu!5Wf<}-q4AP@*s@}sB{2!y-}0wIt-MFf6v
z!N0Wy{y}h1k`M-!4HNAEH&0B3WQ9PW%1HEEePrMs&F-VR0|<oG@%V?(V_Rqh0ww*C
z6czg9s&lY}t)+Z+FK{$5l_nKzAXn;W5z-+*-3MB)%Gwh>#4|vaqG`uwQqVzUk6U{=
z)BGz>ungr(3n^#Y`xI)&z$29}OIgv+A`my<Ql@Y-{7xo+vpK$d!R3Rp@aAd1kFLDi
zzUS~(S~{G|)PZNKWh7I=`{sc2aVUSA`_y4Q&HrvrNjlB`ckTYx0Q|q}7Hk%R|E_(P
z1K(i&cP)4*5{B~Mb=C!`IMRRDqc_iU5&pa0_6i#N?-F#%|NoG9lB<Kdm3pJhi}gqW
z4lo$Z0k(&OMsZOvNsP?RxePjk^g9B+sQ>%y==!xm*yiR?s)~wABBQp29+-opDp=R&
zE?yG})c8A!RAg2U)6EEngM$NNI^qqDmZ)d{_h#0@Y4)3;WH!^Hq9VBrUN_hx7vsQe
zg?>k`EZOfNBtf;yjYM4nLPFc+=ARQg_4G)7sMy};bEaE^$%cl8Y&MG@81eo|YScLF
zF^eGKa}UX$vS^?WN#S)n!AQlAjC4)I?5`m4I#sk`0XZV2%elZeWjS<21UQyIWh(l>
zhd37vLQ*)aQ!ovO)3~%=|C8nzy&N{?c{ZhnicP}nbq(gZIh~M~PiF0!pHEptz#`_3
z8+ziqd^RC3Ff}=u#^<40syZ`0U6l;q8quvb8>=>-nEg*Tu68XrxeczzYhRI2TW?RK
zV=}lL^3&6o;%Jn+BZvnWk$qct7Hr%2JkIwk4SQN|U_7jQ!-7cEfBWIfI5xqO(%s#S
zgo;JP>v}Yt&VxfG=ji0*<md=ZKoIn?yMQe*y_qT3_1UlNdCd}l{dSn)pSk$njOVft
zG-!GM`SrC}tvrH&y?<(IYG}xMG)ria6|}8pv)p{ug{MpXDkMKYpDNSSek?}}{Wblc
zgnwowxOJu#X0kP$4rqV~Y!*dm2<?qtL<DWum1#G1Pyf6+o6*&EUG>k(dS~;WAv!9_
zcbdVLTT(MJ?ty8OOXV0^Snz27jt0E;<~gHoYqQ4%OgfQqD2>ZusnLm-BvSLAwn4<x
z2P`Sq*VnSC9NNvUV{>zEeK8bo1PJv6Ev&2-TfIDFWruL&d5@<xZTZe7l>vEw+jbHx
ze9R1-QdmeG6D5`RGWa_%D_?2(_*#mJLdcih_e!}OHY<|<Jv{ni=}j(|^ZsWG3tGu1
zsHh|9Jc*BwGqABiN=m}WRm-%3b-w=_3U^tP0r=+Pa0OzDEqS}a<Res!>KoW^n(1|(
z`EVOZetUO^$Dq+^@~<C)H;okH4yW5=*aEjv<h53F->}R;B*K8Y_X7omg}({D(_{N5
zH;cG!Jzslke-5(WGRJe+><7a9)zR18P5JJfFL_Q*4lpNu{}am7_(2@rfi2K{vFt%!
z>3MZ>yxs#z1|2=Ox3fDsI(irU;|Dnh>c6qgk9DyS*t4Aj_Q>~e3m@UTAWB3AVHbCW
z;*DeqJS;XiBqt~T&$g?x=?uYdJ&%3^|GPDmTIaZ<#E1?evGMlyo}Qi##3H7prv6j)
zPw=la#?Ud(lRheAV`Hd^)kc5(qZBa)%~~s9yiG2L{O+gJz^DGUQb*aa{A))iZ}0n9
zdHx2^D|<k$FN?|G8(m%9n9C8LdpOX+%Kzp=FUQgVjEY6_E)uXh*R^1(St5|H37hG#
zql3c@tmzPoC`*9upWv(-#-K6JE5N3_&Zfff*@%dV8|XnGLmL~LNP+vy$CY(=t_ghX
zZ^Pg<3)=)%%0Biw6eVm30c2(k^cAYSR*>n<?Rxa?f89kP0gH>+)G_KbZ*OmZFzm(v
z1pFf6<g5c0Lsenn$2_I~`M0bKL6#I?5s`eb(*u@Ip(qmQ0&LX#{2YSVGXC$Q)#rl1
z4CGGD&VICMq?AeeWsCr-K4{p=Nagoxl**%}rCs|^rM_PtSW<XgVAS#bfD6rji3xHf
zx(Pl8mJ<Z>Pu8!-hd1FGrLAAqyC;7tOGFU$0m2psfPvF9FvtU5a&Ma9e}g3<7gyHS
zmL(~WOYa{>1UYtQ3HjyArshxV03Nj-P|{;_je3LW;o}bk6~}~{8g@6pHInoEr>#z}
zZnH9-7WdV5WNGOE$ymzAK@%HVT1u11r*n4!GXJ(OnJfh7n;~~BM4&8X5}(`SpUMRm
zH5%dK9YEh68}BKMO}rzb?;G%y#Y}aGNwkEc<iBV&y8qPxJW(KTYKj2j_P2)pQ2O~l
z?^vJtMpi@w$qjn!nkx}$0>p1_<Neu6DBJft=Ks9PnzsQsJUkrwr8DToK0WA{*Sk#8
z|HV6S(}O!0xcTJ={0LaWdU@M@|Ioj$@-6}q(s%i{cR&{Hf6)}(Ivted^6tMNs`vEF
z47jbxB()m+FNpko<E#6BK{;S_8V%%*`o9<*0HYGmo!IFmi|jE3Mgl{+!Eg7@_&a#e
zue<*%>}Fa1e;B6!U(N#l|118Y*S77hn}Ua)kr8&fGhsPhlFVUE0Icm%Nz;*xr7Gi4
zPQL*}y~ReS;;~t#w%c|Fq&L(km;~VV*`w8t24i5}jw*zK-0E?H^|oh#Y~^~dyzP3I
z?!*<y5E$|P=(8GhK9!J=@bI_+5|n$GOW_U%@DlU?Ui!st5B&)EMHQ7Pzzg+mV!Y*F
z3^{&cIGDr|$$w4V^i=Oxy~Eb<MYkh2ag!Nfy(9cL`z7j-3=bIBU&l@I5-?rCL%}c<
ze!yuz++VceF>1Xtw^z2XwH0uIY0=RYE9Qz<(t&tCwLaY6G&VMt<Ga8XNx5RF<nKG)
z-XFE2GgUmg$X~vX3F9eNrx}S!z!<(gp2dhOkPpn}fv_7aXUZz0Qvh8h48B(aR>slM
zn^VBM+V^28Q@?`l{_g8D2fQQT)di`hW@Zu-5{C8c)Q_HDZl>DC#KsB$9V6tlnO#_D
zyIc!pYB?SMAsHKhf!|(=a5M_3s7T^=Dhdr9P?qNtGbt`Eo{Jg+Jp5fJj(o-5-+p~3
z0aEp>z)vbFU0q$;^>$wuEdu=gkx@{FGx$<9Q9z}G={y(mKGzdPD#axwCA~grKYsi$
z?1}g?0KBgE?orhs_x5NqiDjP$uXH(1pZnX23g&EuUIvh_^H2(VsI{K=-gN2Y<m8CJ
zL)zbT0txGJ!Hym{A1<dSB#6J1PM|-A!$oG;K<TXRr`zn7(;v9GV;sfB#f5|rQn7*W
zAYuG{qval$0Z3h(ZR=HjaWP#g8Yq3^@UYFG>sicfi^s*o-O9t?*u@<6*Mlr==SK2#
z1D;g!cKz*x>t?d)tPH4LKu%6BI5?QR63}~55iJJ}ARh(e(K%!_K{q$OkGt^5j}zN&
z{d?8{VMp)i>FH;MxK?}~?sj)A{ymNI_i4?zgoMq_&6v&@3h7zW?+97afdAhFRD~ni
z+1;HX;FB@*1O)O$06lISw%viWd%nSeO7a#MgTz}fNE<e1n(5@?LKR$HUHv%wf1kd`
z`u8-DO<t}BzO}Np1|q5o5K9^-Lz!AXIBX8i&ud`70m00Ae+Q{8lcz-UDx@!#s$8of
zW&qLG=3Ts$MA#dl`=;mz$G;iHsp+FP7Q~~YrzejDl<jspCNW0@+QyRSzk;rHg-U+A
zlYdN8?uh?tIX;4jOG!~t-_Vfk`BNZ3ZF9gkhX`2=o?OXG-0%I3xTCgb#czg|0#ahq
z!Z26I6PFH3PWPAcm;!r#Y*91q)KC3cj_X_8LGNv7H+Dj{mU~yX1hr?h1kui9Hj`5%
zB7sn5SKvVAIxvhVSXWh1;R<XRBoS52AE(0C`(>{AXs@zJnu)CQ$+KXYA*EMz?2CvY
z+~uEMndY?+ukpJyFO4Uq?V>Qg1#C|X$UGYywmu@V5cra?*)H>*e7xUN$t-&eRiLcQ
zT~wBofRHd>f!QmY-k)Pt_@0({BUxvam4rKOqZ<ZeuQ)QtJriE9?Z1gp_agu{JuPd4
z$!t(X@|S$)vxZh!Ajdiy<^0*%*{IH)-3j0x1Ugmr;uFuMlh?Tp?7f8#4!)8!v&e2#
z)lR7+W1$6u!|p?6<8c#abg2g-&J1|?M>k8{Sm^8WGJqI&nlw?cqbVBwu@e~ih+gsd
zC$(G>;CI1?={iGjqJ8`A+8n>YWh+hI!%&1a;~G54%`jntJbNJ&J$JSV(OS+i<(514
z$+rFZi?fomdURyw6ZnXLnu?10=g)&_T(Q9XlN|3uMl{sb)A_wL3Pg+z02A~sVd^of
z<{}ifpFNj7ALHS?QrBeN;ZP5mE-0Yai<u9*T%x4}QbfM*cbGo&)#gADKYc?I*%R@@
z`2;HY*Argnwi4!!#UI`>N6Jbm-Q$v0=}jEq(4&R^VWUxzL;D6SXuja^G2Y`&oX59+
zjO9wi_5kZNARUnFdtW%xZRTivb!-*o|MqpG=nCBt-?7$fl!ef{b|q)edL!<{nGm_+
zLD^7@IBv8#(zt^~U98ACikWJ5@;L7jV;kNblyFZqnR>hnt855ZPdniDwxv6^aX|sD
zt}2r>xs>6S5<l8U_l}iaqR;93Vt;%Z$fU(rt16(3lCygzd6{%h+f}L9bf>xq(S2RX
zgcxP673?XJaBIbh;g~l;3tPvikdbe_=hZULwW%F*dsh(6%h%vG<BZ<t0Jp@rSX7A3
zB*3qTB2G)U;^pO1``I$U04`oFc^}BzWh8j<s~3DC3h}qF+&^G+%LJX8&Xj4BMH+U8
z>6;>CQAHmToB;OI`<SQ+`b?@Bf*V=gXcM*8HqFY=Q)+|WV+|;AT)Pc(EPTPtqX~*0
ze$j~9V+5r+MG`ZQ>ryw@Z*`6sPg?W{Lz_~F-rO_WB1}k7Y3I|_EbV>xFg~-an383-
zcb`g<(r;chF@997{fuJafhDh$rf+2S`5I$2`bU*ok9BX6<kK8wyOidXM`ftC^8}87
z@nW%7qR8ye3OLL3ruCx^=&F=Dyojk5{5Q<zF_j#pM85FOrBB}CfABoRqa5HZE9|Dy
z^&v4CiWc9h)0JDAE3u<GIlbJ}w(Re7KuRMLv1c5EIL;BJBXSr)6Cvs1vEim&s7;|Q
zvGKAp0l|Cy)q+iZ5#!HKUd19T!C4KyE%`hA=*zpnQE6zqKoj)o={8e{M9EUCcsTo;
z)ojmPTZC;^+rLeG_Hx0AQgnhO-5xTzBp~~H87%+8tt>1H^+@L}d8IzVM{`{B-%Qsw
zs<pWJ4T>?%r`Rm^CysXxWU5jnbG0haKFRSY8Mbvv^OJV%)WG~T&CtX^*4%)=>p2LL
zqWKADeN(_j9s5fa5$C~>Clq36<fkNL-(IW3@tW7S+S74lZDvD%FTOao?)28*kAl7s
z(s}Q^_uWM8<Hw?+yIEz_v7QrqRu{ypX_@F0c|<OxRTkvI_iT<<Kl-2z3{;uCUY5Qd
zn!jJ=n3yJ+y1wQLR~FsZ4`7^mo6A8(PA#jR(i}&V`fHhKavMW=KIRhlK_MyQE2Z&C
zF^t7+KNv5l`P=(#)2{A-tKf$BWBwrzK6~(TI|pJ%pF<oVx&SG%`-@C@pNYTWV6>d?
z1SnQ~>B9;Q4dwB=K22e_Y%DkYXl>)m{2^0rD1zbs{bWL)ryyqFd*Vc6W>Unjy1Nmd
zbu|Zm4<ZJfLoTB@XSb0APA7^!tNHy>aI2bXcV(0n;N)>`5PIlxVdv6^k<JYUT#fnf
zKF$5@E%&2A;B5yucr|pUAis@<rDS<RAsrX;Etp3k&PZCU*2h81-+Y~o?mHJU^e6vL
z;Di@t6`N7ytFDA*W8&74>2^n5zXBX?+53PfU&mTLxxx|-)x|1H<EnTbhmRRMR4$ji
zebA)|C$}FECJz)bQl9a3UgROFqcYWuigp(V>4nVgj<Xl%S}CKO{y|7{SXoywNvD&{
zEGtLQMDjr7o(Un$DpzCYIZDZa9d=%9Xu~njgr67#(_&tUZ^P<!-s#D<_0JcK*H6*K
z%&SBe)hcf=`&*;TDzwvC`<D2=n#-BK)vjj}Hd-We5^>WAdW&P4O?{m3EO}LW;?_He
z7ECa|BeYC~od*=nZ04%EQhyr@`8WfC&DYmgg2W996bGPHpe$2=fB)mFTOS||S>$~T
zAe>&+B(h6+tc>i&`yUw3d_Sq9S-E~agvcgTEm8}L9N~$YKG;6N!5GiK<I63*51`QO
zbqGgIBA`E-izDdFues`xub@#C@#ZGX$`)1Rf#ilEg1_g5>-HDyWXNdTUvTuGCw7X~
zJu!Rg)yJM`T)B$o{n@=p(l_7_8ZW1~gEqA?1(n5Y=biFF#MW?@t^2hH3Ic%vv+hes
zo1GWoZ*aQ(mO%ZBM)K5i({lD_qt87Pg9b~?Z2p`^u0z6x_0jE<l8Z4A!JVBHJ{k)|
z(W`;?bQ8>iB2PVYSH2j>-l%BNu?3V_2@q!Kd;H+Q8hxH{o*m5clA}}s|IBsbMY?Au
z2;&K`M<3l2k0c5IyP3PwcpE;p4XbiFJy71gv0;zgGfv2+>ytM=4AGt`h1=i`>m8*n
z2Ynv?(XYH=Rac(>?OXF8-;6;-C{wD^5{H$d1HXVJUg|TA3)AQ?p5nGS@}1%w;J9cN
ztMfv>#QG!#JHPX?nu6lb;4Y5Ao~)2C(hR$6PYz?Piks}Xf=VIG<3>)eLEIp!XTeA7
z<Km7z*tUl9_xrQk@zXN>uSWRa2lHXa(W*=rX}=uW*FX8rM$`b^ub>F1$^7tbN6QPB
z!FX0}*oE7o;q$)1kS=U%PCRn*+YXl2uDb}8ybje06$nYV1Cu+++<cA}iU#5{25NP6
zHcM+Jk4ehgUC27OQzhF5&)j8TY3FaxC|?uv8;@G0P!3)5GRp8kzb{ikCbwTUVelHN
zSN0q=N^7I%v#Yi5%UZ?nD`_3h*sYKQ;>|QZhp1A*E<dS$)*<3f9oQA$8i}hgwX5yS
zE~T`|CGOJE8XrxLwMr%JKgUgUPAQ#hWYkZzaG@DtMl)oD47J$b7MwP`w}e-6u5<IG
z(ZtMtN~SOWl)gK6#u@iGXaCxBFR(aSQhT=)b<yg%yw<Le8T+;VcWM=H4GRrTAFiJI
zcfm!lfaDM?&w*&IB9Yx;y81~9H`QXVnp@(M)pm@n@?|}ofaWKgBG?C`>}Dgh8x8m@
z`gCjX`exL;DLYg)LxY9Vk;`xRTK$3@Q?~WKi5*a9Y~vhuwJ%~G;uDSHFMOaMA@Tkw
z*c&emC7dOz#b&Y;<DsS$qSs~1-NsI;kIMenL;;(s{uDvYpKA_L38Kzf4r`~jsU%&p
zX3XxFi<o;pF|F%q;_yk#GG9uXDi_4SP|H8SJQqEtYr#LGCUZPBogExu_Mv!iCH{3A
zqu_N_Q+j;-)McNrgv-jG-fC@>bxDCY2QJt5R{3FUyT@IhrR_@n%Y;@&hG(hf`Fs+{
zdhBV^%g){{!jZKn*`is}RnOGr#4Thl2b6w7qPX0g%XAP{CGq$5wk~o*bHx7j`AA`>
z*gD;csD4@Hr<c@e*?eulZrU5BRU?TD--;w?BpkOZKnTCJCRB70X(;RZXOo7Qh3}GU
z_TG3@FQ&PA*E4^81(T3o2Ly4Qe}y66j0qs+NWfyWx@d`WUJ!!t^)44|cW61Kdd$Km
z<gBBIPZskxIXtk_2_x0jGMXp3$iw%j0@fCSx&}lHIVtlGfn@e4(`1n}T<NZ{5GgJp
zMRsd%{-=&ofwlIRKvS+1Y3;k#rQ>J<8?6l`Lxu%dYeA@)eS7CP=GFDuGtsgPp*=%G
zWtZNQ5IV-m=|ntOkXA1Puz`7O+umNaBYFe=(hd5#HK`Nl_z@|d0vgwnie3{cr9QCI
zhE;iQS<d}W1+!4R5zKw!7#+`A9WPNz>2OR;^b_@WuVvRYQ{L|GcpoDha41d=|E_lA
zGyG-0&!#2+yN9XTgJihq91=YDt#WDc^G66BD|u=#`!U6s+wS~zNV~iXxwwA-<Hf6{
z*=LQHi~m)#7Bw@&#}s(++CJDsy>q+V>+7fF@X9PPb+9ftagcVW|JYII{O21E&p91Q
z$*oD6s=bKHaqjvuymD$rg(XK>%}FKmx3U$56@<q6{<u~nlbtxxXOIF@Hj_fX!@7u%
zVl!aCU?)!ON(+;CW^Q6m)Mc!w6)L`X5+)yRx0;ICXC{|u_a7GJ3!8B<1izv<noAn4
z`9K?~`ywshqAnXlq~MO8|9)jLj8vKvyG&!)^viLBwV1hM2&Qm0<kNN4<i&ce9~v>|
z0+|^?>}Y=1gYukeD1&T%d3+1c+)qcp+vUL7eY}uRvoPX$@lxz`Rto-bX@L)#k<XpO
z>{Mi|5*pYFmG2#tQ4%@Y9gZ;!*LkQ6rm2zzup3XXS0wdDxC9Ko8oJCFn!`lgOT-A9
z8sl&tT)a^KE3|@^%7N_$wMCA+g=%$DaH$z~S3S2>$HWHvwa~XPuz6fVAL+2ecj;JM
zlP2laV80bpB>wp{J#)b#VM|u~D7n~&^U(26rL`K_)SGUN_Uw8jVS19Ezzbbrtor`8
zj&k9W7@IbJ_QmBl1YAx!KdDtmoN-)of+MFsmmQA<PhvwfyJX6W?Q^`;$SbU~&FVBG
z<0d5$cN)z+D9_aEZ;I&G4b2s-&o~#;Sv1=U-*JeF5BoisOvIKmNS@EsEv(&8A~!j@
z1a-#<L`xgrl8KlTyqPeU9!J!{>NEEyyAyWm0v-LcUeLkL0t-RN+MGl$*5Okp{am_`
zk~**SeDyhoIFyo5d1+ZPpHtrn^n()0*#^{}t?IA=vZF<6=@WwvM50!;-C=4Dy_~+4
zceum$F#%o7gu#&*j&+2#cu~V*Xr8qc&MIn6yiGx;)>>8{E7?2EtI|F6jChq#l9+!^
zl=Q*I>}yAJ3eykkohdn3j8`)~=a42hOR8|VZ^S7_Q)Q*Y(Jt&?$~j=p6^$x+{(c1`
zpY$~4aGt_{ox>SpIA<a~zi)T_c^0RhbaBbMs`y@TuuAJyvA~sv8@<<w*PI$N>G+H~
z^=XJ{=AcXPazo$5w(;I;UI`jJp4|TRrBv+iZafVOk$CQ1&`c`R&+qhRXhwOD8va8+
z>jPU!a<gSm!-nYSmR{42^~i!ZLT)2t4LLfa1l)h^oFr(FdlE{HdaycKcSSI>_%J8_
z?7=Br(M{RyS#q_seAP6b6i#jm-3ctoSi|?oqF+kY%#(iFJTbr@u4jo9T}20HjY*Cv
zI8d6bsJr+4vRylVw<nff!q<MWSk0Cat`nX6Gr^R6EH!*CDnUiqHtBFJ@nofHTvmD(
zw|}^9s=XM;6(Yh0pYWTV2SWiZB&Fp2;BndvHGBq{frM%p%H7v0z<A9pq5E`6aim-C
zEj^EmPVbt?M3?w;aKa<ft!LVBQXX}{()0c(tiOypUaY=~t`Mv?`=4hMblCy$;*`7^
z7b|DC1AkWaYmaR2G6O|vK0LsA&pH^3uGQ|Z^=q6(?{_}0yyDk0=iS?&!NsFBS}^cy
z?wr1UMz~)%MdKmO6bF0iw0lkJPp_S8Of}V&Op`r;Da9{Fe*P?ENwu&nrpsM(MIKIq
zt|+Rb-ldZ}^lZP&TR(Uwa7x9E#@ji8mxpCRv+8_Bd%~8*Hbs7I6&=?DM^o<4tLUx%
zSz?KPBTF$d*>!^eSEyK=%1`ugIexb*UO$d}yrQWp*SC8lXrmQRbZBsQ+fT!VG8h?*
zNy|TqS~%~_g%IyieEW^4iM9Y`@L4g{e!lu25MkCs)^>MJn7Ch2UtbS^3grOQ02CGe
z7BJKKJe&s-7?uGf=Nhs*F;)4c66*2QlgCbdbS0ijlOe2tdgL^g$1?+ZlEp;!wg{T*
z1mLItCCztvkEIgclK$9I4Twu3ig^A8tt^l4p)TWw1f!$9Mne96h<k*c?T1xhd5+d)
z15ck~P3Ge3HCB>fs-@;)dVc8)+!;eZ4OhFt{kI_^7lvCHHhFz)8s0m02ltcHT|5an
zky5iYZCYrPx3(@Q&pbj6a0g>UGJWo^0H{Z!^gQKjK4mA_TZ`APUjsY*RrnS33)t`k
z+6RE2PwL))uE~Mo;3I^=_UlwIjMCuHpR=BHj#|p4riff%@9jd;i@QsH&GUlm6}29n
zmOU%gU&X7&@?t-Hh!{wQHqJ<q7{w=M<S&s#!ZufbjG09c^QdaLe=5Gq7|5Ysj#>%F
zXWy>WkF<jyDKt9W)L9DLp2Wh`Y%+2HVEKH}VYm~hv;)Z7w&6dZ<joLh0*NK%ag(%I
zZ$Cuvs!}>HHU5<`;g>Atd2`To*tRSZlQ`QbsW`lXhJk_4c|S=>k{{`34}<p>v%~iH
zO23{k0<g_#7L_=b!3;wE7r5F1cAFNN&skesMzi(}-_pm~;b9mTrA9{?637en8lXJ*
z-VZ1d#b&^yZ8}u?Y>f-+Wq=|m?B=jdUP?+zQc{sNzFkt}_F?Pz`{)>SY{+}I#5CZY
zAX0!h|2@xMa3uE#KCKfZ=mEYRl}Hc7TmYCGA0J;^TLZxVj#5;R7t-G<WjUJXJ&|~1
ztj~M)Hqjmv{PBb>@<7S-?=tco!c^v;?d2L0G^d4Aae7Ole*7p5Ncje}V}jv3qj$uj
zTM*Pw$-G7u&8Hi5(eP9&9<)|h-loS~iOMr;S+K9=f%>M=X`y7x7@Ta!D9p!%^$NQ1
zQ2NcKE9u5ynT_^NkRiRU8ZpVpNLleD&nK`riEUpxNLD#mLYg*CqAC3v06z(HLRh?3
zb%uS7HfJ!EJd~0HwSeDkz2tgnET>dInauT8m`XSHz+vm_&s$-rf5lcf9~uM9P4%#W
z*nvu`TnMf{FlQqF?bdS-o@c}vttFQzF=ijI6C0&CA3THlr!_itx2?Nb1Xk%I;PUy?
zAL^3})WFq9%|AsXFzeBG{cf<)AY8IsHI1P<V)yIQ9bg&W-CQ_?B4lCzD&PXPpAHbd
zZ(4n#skeEOvIOC`)-J2<QJ2~w1m|)(nz^&MZ>tox?`yR>jFK9?d99OkV|_eT(JnA#
z-Bg>~V)qtK0w`6A#jfyerMvXGC{xGNhn~5-Y(%Xq`B|!JloufF4~q^QJe&?|;*3kp
zGbry3<E6MEGI4=L6wA1ns)~GRo*Wy#I0(D7w{g1OAw*0g@|uuK60dTk8uuhlM|@co
zx0QSa@4t;ClQfSFEn|gK61+lFlJHpRjf6Lx(S)-yk#(>1eNj*8mwEVc)dPvmGq{NQ
zPLI6g_WX{vH~GM2&B}2~-5MRfHqT(qwXiRFRO52GRK$vNm0P&5LKoZ8HJ-pGlli?@
zfI9%nZ>JBMSJBdH29R7r%y0yt?4wISng;%^2^z(p{wAS&QG5DI^Y&zb31ELnN=dB&
zD2(l2zRxw!Z^v3D{SAD-8^d%1zb4zp82YotB<-TI((>|_EXJXT=%c=no!0Ts{>dJP
zr%6c!J+m!&yXasib?4LfjRH8b?n!%aX+tbbLyj)`VtN_0?2DoyEJ;S@LcTBBB<uJY
zP8n-WqWjcMg6C3)bxPe<63LbO<(n3ENwLs9lHv@UiK?c~4aAtzA#bVyEhkbks_}%@
zw31P!ZyUk>@iH%}Yj&B&{r3Cd8!Wb-h2TXH%~8yohJ>8Um>_F^+`(m9tb*^2W@|XC
z^-dUtwT$tUV+-dV<rQnJEeF>FyKB}$P%R*H`~EOtwonrG-SrIeVkfY{8ZSQ}WG7Z?
zSe%?o5q$I1MC8n0aj-<KF(qBM{E)2C&-5%~fki$RcbmZBqx5lC_D?9rWXwdKk5|pf
zb{=Wd;CJnq#D<>CWSml;;!i|!r>VkKHC$uro6Da;+GH#&EPQ--W4|MB56;Gzf&#uY
zfG!5Mup>*oouFAlpTxw(6cnB(qJvtV{RK%bT*-E(<vty&)CM4D)6Kn#kwPW$tnG9D
zE?IKLTa`euqEW@sWJkCZSz_YQOUd>dYvfek<vU}=rsnwN1o?}FagiP0Z>kO%<F+B{
z9E#8j9ru33YcBPNnejj3a36F0QyY;97YPqJh!Ho-rn=lKi64|#V>`FEhC_R`k#Wt;
z9@Yb}Zyb3NBa~P3(2@RmOvLS1Tr7?Y8OStMlkdhv^Pl>;-Olj`_f^W|R#=DNIFd-$
z@MJJDQ%DYuM4Z1yOKL&ezcYY8OOEgk+j(`9vtun?+p8ojiil1*@y&KGJ*hf+xpsFY
zWV!|M*lPmaMdD`GVc|7HWc&t|)HcOQy*BYPGsh)$b`fA>bIZnxD`slk!$@W7D<Ovm
zzlJe|<#d3r^shh1khaQSukJs+(-hNt8+Tf6cKymMcBAL{kFPe-(b0<P>Re9kx%e!e
zzb}7lG5+#%Q|-OEEH|VQINL7Otb0WD00Ngg_Ft!a7`{H?B!dB_K8e|BI!8Vf^+=`9
zd9plqI9iEoZJ*Izee;}2{#cZ;uhK5=k`>L{u3{(uNi95?T3k_<m`y$`?IE9Dp7Mr#
zFgjcpOn5+jo>OqO#WVf7n{2om${X$aBDVgAb=wS1_9yW~(&dTMUQ-^|ahy`A38FED
z{)z3ZXFk^3&nB`7koghl>W$D>Usi4Ks_#D}*(^A>dM}jHKI3A1o?6DoK%El2W5zkK
zfc>Geala%boAzU3ESit?Dd9Qg`nO}z)v*_+bo3+CU`#$Yo3L{h@8YBVE}1{UjgG`E
z<AO5)PIe1WbWkyK48E1r)BBdeZGz%A4BLu}imm~O^$`-Ictq5G`Q{cYdCJK1wP?%a
zB7wh#a^fglepYlk1R>bY8V)+PW}W4C%R2s`(kOCcOPJ_1z^j~%D64}Dq9~fAusCi9
zI*_bH&~$*M8FPsf!L*cGkj}0#nXjloVPnYEQ?tq#uky@{alLgeWY$-_FPOk&zKoHl
zxHMg@FIhv3j@*Eaa4Q%OWmGv{%b4H^fz{@jxl=2su^ZrhIPcF2y<BZ2n<MgNyqMAB
zzP%Wecsn)e_{RYs)QI_SDyPiD53|ZGnOKw;Ob7kVsHKiDCOASGf3ytCG^ME~#}eQl
zRe49fOJOFD)SP7_7#@Ide3w4UeaEu6-r(=9A<NJALf$l0)ajGoCZr^ZnTAsi$1;1T
z_E3M#ky0+dap^cfXz7dUUODY{@RnojwX!>+WquOM&^E~ZGe8V9F`<;Cw|m3y;y!~)
zi||=KjOa9giFk2jaMAmb{f7f`C;8XS9UEp@MXjFNUBSGq9Sdjw1+*m*0A9eO^Qn3f
zs@)*4mqACwd8*`Uk&yGlB+1JGUvyJMHEv@xVg}Su7Ia}>tx1qkE1g|PLOK36eYQ2+
z+9kZzP8Gpj!o~)+F_35~gI~4gL$Xx`Y78{%R=~y!g>SLnoFA+MW!s*=ERnnP-Ahsv
zZjZGOCi9^LSY6F;67sSyW8Ne;{;fpZ#NA2ja%O54H0=cHyhOI9Yt1Sx|FEbP@F71?
z+W}@j;h~t<LMzKSrgSc}qpMX|UQP|IrH{R98L}3r|J#$Ua6(QXGx}(-gIGI7A$Xxt
z^{2AnGLnSvWaDDRXT%smaO5w_e%*jWag)R-CcJc-UKF+oVNk>1?qScl3OGP?e*f_G
zMujCm-jogLxqqH%8Lo@SAq{bMoX;<;mbi><Ys)!rJPuEUi5RK}8B~cUOo;^vvBBgU
zy21Pqp3xOq9>#S26!kf*mJ9XWVTc90s*HTUhKtkS_#|`JK8{NDAN@}n8t#Bv+&G&1
z<oWuI5gge9@tZXTmLAz<oY)|~;__~@{@8|u&97W$2v{w-dn@1P(*$i|`%Yy62o+6s
zSioRAXkdSqqU?IXWtr$$$~`61Yj)99ASFg?v)iMgBaIO&*9<IkK^;-su1a_xeC;po
zR8FVG`3s9}5X|2=Bzs1w!85w<rp&Xk+5M~4vawOM?O;ba$xWnG)h1I)GBRIOkr7gw
zZ0^wrFOjQ^XC_Or<uQX{Y`+TJ@z$Wx<Xo_^?nfSd-H>*lxdtXARuUO`F_ODtD-prm
z;)Ea=DZ=-N($~Q(PFAq=-NQEmZ7>XO5KKOYv?K||3WSD^u!oPE*{`qjlPP<5j{9S)
z;{IS?R-lAN>uQ;>-d1fY%-13^F166p_pnaf8d7$M?S0bZJb;c5S!wIMhz@pkm>-5%
z$+5`}OY~_eMVkJ$zWJd1<)>IIEkoW^?O8v+a#AZwqJ0eKXxDmqd=kFGF5WH`V^`w}
z#G;WHV#O!C{UX0QbzOj-GkAPEj^=pqIV>jYfyLDU;WQ4QR5jMtI<M9L#$tPs`!&Om
z-Qc;~fOf0rMsGA3u<bg1_m{8WQ}30V-9-`4%gObtkMW(hB<$3ycHgI<;U%WuRu!XH
zsc%`^YkJI&DrN-R3%1&R;?V@}F!Skpj@9y8SI4_*%u+;1Cd+*DY)xr~EhHt`sZQ9Y
zKQ1L2k77mzMV#N-x6u=m)9oC^S#&6^3|$pT|9CnN@B^TNR5NB=YIH|7();bx0*v1x
ze0$0Gyw)A!2{!jF`XL6*i<;QUMl>-`SQaAG*HX2za@3}$se93nn=J=PZ5B$g6;g@c
zW6H#4PVM%-KOu<wf!s_atY)-!(~0~h+;zy5kObFi;5uyX+=;_%d<Qb$?L3G>FE4Za
zP9iVt0v_I{VxGD!ldc+%bI|nu>vA(S|9rVq3#l*6<<BpKlOQSZD131Xn<~-__GFaq
z5q9*wCw1F_zIM>|@>_?GC{Ml`ndKF0Y;!8MLzgEa8^9vHI8V97RBGy5FD)tQmamPq
z>79vcf<^7O%f^(IiHbnZ<YkZZI^Oi3+WpLpikpO8`S!|mRcus_=?oG~5jYH$H%8@=
z)AF@vkXN6~f@T_iJQb!YkGgEm?A*&0nR`aFb$0x!UAUT&tSj)9I-NY0bDiOE*;4ya
z^-CegPtWV*AY(DxK4Q$cw$S7IveSNHDjGLZ?^hC?-&s{;2OX`+&d*l2x-Pk&^}v&7
ze$fSZuTm>^w#9gcNi@Hvzr@jgDV5wDH+SBnrIIzAF;!bTf%0`_x}`GoGEO!6^*(Ky
zVBJSzCdYqOBT3KWdldkvY01t0TE@-c(Er@|bykl|`qB0%X<L(ho}X7df}q+z@AS4c
z{Btue*w}W*f1Eg*@${gc3$f3`f45_2Y~YLx3@_~($eqA0Mqi0_RJ(ZpdfHoaK#DE*
zPHWcdq!bUC0#-^bhZ+BYx$YXd$5KXP-?3qAZ5zpA&O<0;(RCK_h1cXW^-obrKO=>S
zxozH5|B;1DGtn)**q}1743z=EV41j|=wzP6NhJQ^pBu&Tij0Hl_)^u*L#3EL_2#IB
z5GP@{`I`%N+6xi6Nl7JT&Ia&MyfnEPD=W=0l6TMFEC^)O3z5~Km-d2|LQHE)530M`
zIUHToyOulOG2I<O2PJzMnFe%>7dC22LEMGW!DC@H7HxaBzBc+-vt$6?-{i1`_2>vU
zqF20c!lj)*YiTX+OcczNsIytmgBgnl{*^aybar+R1|#)Bd?o)}B<X_lyK6f>INgQF
z0l+#2XuHQIKs6*2V6a4lhf=JdLl?ddd<$HRPxDoGOr;#L-X&hXq&!4}io|<XKT^3<
z1Ml|%Zrm9h?qLAUg4W2}se|~a@7J#hhSCAHHs2LA_R$ACo&0wiVOtX*>-UkZSUl%J
zAMFt57jL_Gq>BS|l*pYr7J_tDl_*>s%6M+{oq%<4c5LEbnncN*6!cm@@@$>ja=>AH
z8o7RfEg9I}G6hm?(bgQeY!pt=I>dJ90+yOV=9mybwLv{Mw9{jb)yr-MU5TztD2G1T
zR$Jxf2Y;!{&i~F><Vf12nX)8o{;RP%&EDFh<z38)j^ZY{|6b<DVfg_T+SnxCV&625
z`Q-5t*>H>_`?cZnmL@QL0L(L5bD8UU=}XV7olXWod-j(*O_0VIA<SdQ*446EIfo(a
zu|l^NRv-~xLa8g*(5anXn)d{9P?ziUteG0qpQ|OjmLl-IycLbFMSlK3#orgf?;tEd
z0N7A$6DLHDT?7L}FFt^I%a<<zFjm+bSjM@8)gB&66=F{CmX_Ll(dc*r*xrXZMtX^f
zVQU6H)#4^Q{f~n^S5l1&!os=q!y(wtc-;P`>FhqrD+($5JQCX^M42ijMm&_0s&rK^
zg%|yuS4&Zx4`?y-%n{^b)0?+*X!|wKb7<6{K14WG3Lkpt)Xn)xMza?8c}x>LJ_;w?
z2Sab|dXgN1`^?a}1jdxE8wgoXq%G><9y&i~<6pN<4v*1M7KAHLvS5Zh`I*~dYXt9`
znI6?Vf7wVyvva^PW#m7H*0PP4xNzZSeBTq@dS15bwmAZUS;>XX5S=4idkaLVN%({q
zoGftbnCQl4VlzL;DQFu*2KVM6oe7;|oV+zxXXCa*D=OQsq7=XQ=mWef&~~B?`_lW!
zToQ>e6!#v(j_sRHfQMKC0gW17>xPNP5}q5_rF?6k+FS^_Yd0|DjA;u>@V@WmZW0z&
zQn6lqZ|NM+05Po5Xu9+tAHh6ZcUK{I=~7?K`0~tTQC6h!1e)g)w$)9a$HNo$jwk#x
zjeaBJq!Dsc;BC_#3mFU*qU?NxUJDm?FMu*tH^8Ms#uGZrU3oImpI494#3nTM><(Cx
zp=pey<%kW!hfV$IG3K08)TcFKS?3n`d`SksQC3_SWh{7B7)6?sX0-NgT;Sz56n)g7
zmCcdpuh4n*N(jr;E9(1!a7<1d8lAzjPh5LC7%O?^PBCb*s(MDR@g&WKhQ~uANX@m_
ziSA5Nu@h+QehL3fMvNuX?%oe2t>E-#i*jji#4riw()LUsfD)!izklvl8K`HszXd0I
zcs_nT`wwx_d$(iDl0wJjgK0uYckETGS{T`@#1Rk>5Z$)jdEk#*gUhq6uWH^u>t$?W
z8wFomhtvRgyj4vjn2936+s3MYH3S_sr@_=>;@&H$<!ri%G4eRnlyE0V*!k0^coW+Y
zN}llLIBj<&hS2CE8f!Ucen|7)cHCR>Z5IL+*X(9HK$j<YskC%_ZrRKk`-%BO_ZRx>
zO&0+oEOamH3s8(zKN0uIg&D|=!IWw=Udv)LrNnleYAs0ld3J=jaxAlSF>QpR;|97g
zk8EIZc(f!f|5SZ(Xi6C~g@AwjL+@OOr@vUrg-SfOBUoyXUx`w41k18$=*R`LZ#qPH
zTzUXQ&R$&~JkwIfn>+g9wKIi)%)|H*p4dRIWxvhzksey(FP4+NL3j6}L6>^64QYc1
z>4?ccLw4Ws18QZpy%OY%x_kz$hWHzggvC~!&!5j4G8qS^QuL4{GD>LwJFw-8P?FhY
z1_p=_mCqlE5Vj62_<R|^91I<wxVUKB=LE=Bw`0q$5D$rcIHga2uf!QV=Lq2H9jW?L
zuuF_{on=nGJ3jfZtMAhMuGVUjd2;KI<nYFc7vB1OlVtcPsmsJloZ(oJXQr~ogFcJz
z-*48bsG^Ffrju#C6HFq$k$1TkWX*F%8mXG%hwPboDzAKNLU`y5&@JLyV#5o}Cf3*v
z%Xgz5)A0;t40L0fbIce!^G(+kcqPXoByT&s?R%AEyi`YAWS%liYO@hfBdT72?HgM7
z8)n|m*5bD|eu%mwBSD+(pU2dbvz^5yEX>yvy+5>^1wqov_@(CwY@RQUwl(WtOKED{
zbahrzjOEi6!OG8Hf&|N5$4w>pF#Wh6P0>-|A!H0}cBtXe7$T-@)#2GGo*ihyyLx(M
zCFRr<PN=akCEfX2Yd)$b9o+@jt>yQhpHglF9;gg%rY-Wt70-X;+;MSN7ncj{*(d{0
zM<wRU$jZc`9NKS3FSBinSRuE8jh|X}Tj^%D4)QfwA|#vr#mc=(b0WABW$@(QA2P?H
zq%XW>q^`cEVcI(m+Y5})XjOH^0VIkLR*&Xp8i)EIfM--h$1}*z0tpXCt0EaBm;D^{
z9ssj6xQnVi{qN`I*V5=YaI0XL9uBs7!Tc`>=eCl!yCI*Hz640)yDROl(>-qp;Zuv4
ztXU9wWO52hOsW*&<k)w`Fo=}&t@Nx-iKT9wVN17>2~66I6vm_<UcJC~%xloKj-OfC
zG(i9j?&-1+NRRb*5@+^58weFKbx^+$GB7<~PR;3lM${1Z6{n{GrlZBDt$~X7`E|rg
zUB)?;C!q$){PBGeh044#7OIPWQho7i`>W&Pc7ba{iS(;60mIZTmGU+5K_$uGxpJ+$
zNMwXlebO*doB}=@S;AZ|5uH=x4jjKWY?!Pwu~XHPxR@|~G2gmlUNA;$`qZS{ch;V(
zTwGskE>woK>F4!>8qPN)R@Hb<e~etaAMpt&_*=4`$eTHeCt`Yzl;Zkl_B&gJL?vOP
z{hji`((41oxAI!<W+Jc+r84E#?r}dATYr=J>$eQoUx)C2Iid>mdNWj-?VxV$)~Au!
zufucK3S*|SC5-u+4ME$s*SeX%H+KlCP2*1%!;&g+k2gIu%uRU4wJ1_*!JY;Bbg~LS
zVauPFI)tj)8e%Mi*Hmm-|3m4OQCB$}^M*6gG;4L2^ZG=iPHmY>N2|(T)gb_rU*S_c
zaFC{^w2cUPrb<f1r6s&b%6eTBYh6h^Kyk4U`CDb~TxSb@Gx&grmb7Bph23%UrN?)t
zdGx`!wMgOx=coIU4lGTu57OuSU$hF?G{j7de!-pBuTBMbSej1aO)-ldA4ks{)XjIe
z*_yEM5iP$p6MykDWYcuByt3D09sRXaUeA&)kIr1W8i^A!vg1(t)b296^3i^psay{I
zqKRfW)V@*2oIqKKrZgedG%SsUR6#J@fq;pqYXdmdI;Cp}#x#w;Y8qdgLH-CNMv$s1
z0s-~yZ5Sa@a&0K#n<ne@AjId5lwR<c5EZV*Ojw9iMrw2|sTP3VOPU#Xy%lgPE1neK
zy)ta@{$(oCh~hO;FT$hwr-}P6v5MBzL1r+Z_I?c_Re#2)edtJ4JCV6trK-=sG&CE1
zFwI^?#FZeuvsYloj%?Mgn5Apf9gE0@Th_QikLJm7AbZM+0UkrjEAU6I=aC~}ay-#a
zBo$cMQGPsxqgPlaxXNUi)BTj~VaBjDwo8FAmEd~vd!di3=$yi*jWJB*a4i^!UGDwQ
zpJ<F2Yn*SrvGF<2$gWbPli{1SD*l8Z-?0Y+Fy<5&i%xrA1Pno+rSI()T?nQhD?q&t
zzC=h7I%Cif)5u!7R4;4&9nJDnJ;VSYt%&O#(wG<T>2ABwTlv+hX;H`KNn8cXq)eJy
zLf=YU&+|y(oT@x>x<TBsH+{7ZMpc(XTRLtbLoCdmo_|izjNB{!oQOS=mEA=ILdUlI
zV-vXpe^hpmD8E_}r`I7NXkuaUDle&!@yZ6M+KIa}s^u&frWO3tOC5Zn+WlxRqzz9_
z5ejMESE?y=cF)GiB=3iYpeOFpVI6*+VSS^7o^dbnb>Twy&$h4$d4$LAQZ~2i>uYXi
zHQQ>!X0!S+t<~;tUhJt&Z8Q_r6X>=B@WI_{h!L0UK7>E7qPg|l%3R+>s#8kd&ox{%
znjCY4UD@gJ{PCY<@C=qpY~Q3>x<my&CFM{ObA(KUS#d)OxCYHJvR(5f?cAJ+tcPmq
zfNe>l@hdksD@$0QYjKX^3O36{T(Ir9(EHs#*}*2YB~68e8ZF!lDe6Wo)p}>cp?I=;
zr`7w{0h?=^g<6`XCJ++0VwdTvo5s7PuF}KJ9k_$;K)vhnYlz@THL23Z7x&syf%S8#
zNo)=KFzH0^LHZ?=JtlePUCU%1!!_)r2JEWjsb}XE3{CaK#7xGU7>>|ia|{!y?@?zx
z{}^r5JtIjO&Ojg`xT6)%Rkk__-9nMUl4Ro7Ea!h8o7Wu>fP6*&JT(89p(%U!ozI#h
z3`^53KV9`x3PBn6yj+v_)S?Ny`kwHtV%}9~jxm+x-T=Ds0#0s+@EvdG+8p8lN7Ljg
zddb;Fsd;&`sm8_p#CWwtUPEWzHS0jZDYDIl0!@*4SB+H_E^ik{2tiNgpbSmvuIGYc
zq%?aE;Fw9cq?^a4-($CJ%kpfy%I49#zqm-*Jmg-4MI-(~YKp<p6O(4#Vg(K$1Q4#6
zh4yxuG)F~rCb12yzFgu9Uoe2Pq&a)Jz}OE5^MX}SEvd67o=_nyVIS{iMH1%>SJv3m
z_sH5gWc)E1(rS;W%$XR*5$E6>Y|C4^b<cNF7@PL1sVk+bA0qxzd9pbDIk~3JbHrtl
zHh}gdR41-?xxb~PReWT6N^6!N)%ph3&_5FTy0K67ur+I>t(i}e(z~S}+;3#Y9LW$i
zmKlrd5z~#i=`KTVqekL#F)JIPc)q0Pp5EX$nODH^-DF<8=!`uQCEO?j)>*5I7iL;(
zVm?<DuFCE5C%ngb?B8?^*_yOoM!z-`L%<*hX3RmOA+{xZQ=+G3%h0k`6F<Fim&6Oj
zWgKJd**QwXhc+m0teI@Vhh{%%f^~EgyPw4?spY%brCh^Ko|YehDtLE3?>dKAP$rnr
zb3Bc!I}by)ZmYkTn%aENTSv{_l`0WzWZe`a3q7QRVP}K(=3bsF$nVpcd+d{IH+&=9
z39(Ne!tCr!WfKUJ-M~w%Q9&&br{RQ;JKm{}_bMB<>n1cV^nkH3ZGt!ziC$bpB&E}|
zhJ=7Y=|7b7uerIJBskQhpMYGcms+Pp<tf#xadX^2;R~FvtY#GC$|adpP>Wexa8KCh
z5&P&23qu=_qmhl;UWev3qt|&Hx_a@pG`4g^9GvbKsSiSaQqDEG*?KL5hx<t7kGt~_
z58GSld>AiAv{*96(@q}XV@m5G%-^jFreSaIHOfZTRf;}v!#>qD{aQVv4T)QQGJ{ve
z0q3HM(bcHFgV%ZpXRrsxbzY8G=8{F0-LPIXv@9C4i{05BmyYI&&Cu%v!)Hj23)IN`
z=`}<nD&?H1g@$c1+d}4qbK;U?Ly}7y=t(*TQ^w|yW^C@Z7Ym;rN>7QS>ZE8nnf<};
zMLv4IqsEe=OASMD+8^bx7<{Y^nrk(N9(879TMePPX(>g_S$fJkYuFRk4LA4HYvA2P
zsw7|8n<>F=d$o|$5+cldYE_JZ?b=(l!92`kE#8h<5;y+GdZ+HP*H4oHOPcc#y|y5z
zPvnSsam>fq_u|x!CoxjG=R@1KvkTfkH}Vv_M9Jxr(Xo|f`>`Khjs$32T2e$@UxeqS
zqPi3|P219PT-12cm@4OdlP26bfq+X^_o_<_{7*dR+tBA-3EA20YCdcC?3TX*W%+#y
z!8}Q^Fz#ktuW)i>Ro13_xAiQMWi(6pIp{PkpZhw;d^b}!87!x3YV_l5u&p$or#_m-
z;*cdox#s%NQNG)O|N2iu*ZQf7rTFKTmyk;`7Ik-C*1C*=Ne2(Lldlr+UoyO_N{O9a
zGD`G)VM$;)pBB#XK2nxAIjd5RC1D;LHE-f8AqK9r!67WUjs}^c=#*F6C*FfOop~ro
z!h94)kzYKzyrztHZol$~xoxy?2>HbHeHCb$4j`9PSvP&k{7C)=&LOL2mfZuOF>pRT
z6FpsGcmX&c*UME>TH4~(BtcgU9G(gX*lHZV|FYG*UTLdsh~AWa)LRf9BgqnZO;$UQ
z<Cnx^Xzl#5cj1a4cZg%sD4@PP+QFezCbLWBY&5^S=hd4;_2Q3MIxlR`ciq4~?-ToH
z<I2FL+u0MeN=QF`TWL~vFJ;z08JcDap|h2<`}m_zj?kh?wIR?U-cjaQ>s#!sP*x2W
zDHAIA;$}j|g<s%=!aPM)Q#nTg<2>%v`C@R02~&ln?fnOJeI=a^w!sU@>)3jV>K>e%
zuA7R$l=G9&<4k-3xRSTg^O@<A8ep5@8O-GJ{A!FepMtdd<TB_D#~}H!v$;#$7m(SI
zMz!i-!IKOftjj|r&?p}YW*!;x7XMv)p>3R`4$1t9%Df9@jjlt%DZHGH#$u6m)nG#C
zr$jTshe#PI2gW3@Y88?Gi9=}@d2LyB`V_gzywy0Ha>FN7uL3MIJ`E}L<`e8n#gYDR
zl9qm(rP{w8=M43>?efW2*q0;<m@mHk2|bv#>^6D5o{Us>IlXTif5hDAdTgMos_N=`
zIbUOGxNh)pHR1!Cg9L_h3z|m36M$oh!}_A4o&E820@vGQH5x<d8#d?v_Xk?1HUL7>
z>+T7(a>q00a~<F)LitNnBL~h(Pr|-X5`*7{<mxV)Y*(u<ohQj!^`7PFbK@T`=5uV9
zsifjiV`Qfax#edruENPv&G)6`ew93L)Ce7uGY1MFczE{Os|uc$!GQ&znPaZ+2k##S
zl*Tc$MBjVl#RMb2IgA@MVyLl<2!6VM$IDv~nQLPfv2iFMikEaq*pfrTpA2KBXsqBD
z?ktno`IM^b7(UgLnjds~zty!PA2o?NH1o`jgflK)FGSEGLsA@fx_m|;K3|!#WxEVp
zjXkrlG35?<e-?WFUa=uYeR0<cOcSeXZyP}b9D_l}`?#(BP{^f2KEgC3;d%M@WXEar
z4-X|u^#-cZ;77iZNkDGnO!36;#DHdwP@U5>d7ab*wn3xNTu2q!zHICNA?&P!;_AAr
zpAaQj5~Ohw2=4A4v~dj{+`VxN!GgOq?(Ptv(Gc7lcXto&@Llpe@61%qS2g`7)zy73
zee0Zk_FC(=2U(A6h22M`2cwSA)k+*)TZ{{?7^DY%$c$s%#QX3xT3EB#*<8jmgHK2-
zq){{Zxd8)49IZwpjwaTr@pxWQ&m{Jh_J>7OO%<I^kG|^b{$U`wcBDJ*g`Durd+@`5
zTgOg~_X2jnrQs6uGkgB!iaT%KY|*)a=<81flXyq4i=@j37X;+-E=mIv`wSTq&x;o}
zyx<fMj_XH$;P8>A<**8M8fbm@eb3do?2u(CUZ!y=^4x^>)dvoH+ai(@MS4u4D0%zi
z-f3;Sy4akD*9*J4)pn-1ql$-Si)q5p4JFq(LX_iK_2`WGM#zOmY|N<HVq3<l2HK!U
zL#axfo81j4KA0+$`;3*ZXv0a~#l3qZ3*#&~v$HiY^>EC4arLK(hY(?MasD=c2mcCJ
z2~k0Q{w1)qgcD>nw3Sy@CUV#kyWsP>o+dErKgNDbpAk3#YY>s=r8mLz+|ts65z_jn
zem*{JKu*xw*tn~7RQh_dPR4ymnmbFYF3HgHCl3iFnbET=aG&0R{uP}k<pVACe6_Du
zMo6d*<VC7S`+_V<`(EmHMMU)WT`tp8+-44llWQ{UV;nX@OG)Kx4=C%pnkmFPnmK{u
z8-;<G?oQCQ5!d_G=~C&%3eAVCLl;98D!1$&9E)#8LB^*GK|xLLG&?d!akCN-IsnTj
zwMH3z+5P7nM&OgdCXS+WLPn8*NA<@;OzxIbNr{S0gG{dpD(~w?J*035vA}3aH@K>W
zhBt=0Z6;9|r{|?{!iTP;)I}~Je)8#XAE=L1AuEGX_koJHBPR+xP!S`@k=px9i<vaS
z-y+lMHMw5;=m=%)D-X!|Pae-Zr`Z{8P`Arq=?fDVh>riDgB<?R{nu%OkIDeL<!Icx
z7r&iI<}}{=u5dr>YshqnHF-OH3|<IMw6duM*4q-vpv2uMJu(4%)fDPlOye6bn;nD>
z9n76$&rf$C(sjDyLk%Jh8|>G|$H!4oQGlKAyq|ff@wk`T>#9%ZyN^#(Q<IjKmN8&%
zL*cUS48a*398673efspNb<-J^3&tD2u5cpq%`2~m3oqEo0`IkIh(C;dkh*9p>3zNC
z91l&!FYP^0T!QBpt=RY#W8<DG^s2j7vxg>~VTZH(N6V-o=5#@ZC*54Pggah^j*@GJ
z5LQEe&da8f^l|bt6Wi+M{yFBnA3Um^Q3#;sO-hD4?+oZK=AS|y9K4P>c~kzgWYbxj
zy+2}_azK(!pK0F8DO$iBSzK;VfpCxoWmr^ftZ|<2)ppuot>aPff=YgOXl(U!RErlS
z)FofO+~lZeQpD$2BOi|u(VRf+QWY!v=#(tbFroY9u=qc@SM3|hP;S*z36;%`YbkT;
z8eK~Dq!JZSE^zeQp9V$pfinu29880Tm`l9)7~wySdw$PsHvL92oTl8Jol%dl1U+ye
z!0GcxQuDZ#BaLc;VDaQgJqWs$&A3|3(Rn8J2-|8!pBzO<XVFkpm7{ynpdEv?3#cN1
zF0mciJb3C^>?<=;zIMCg`0AQHLm7}Dz|(me)38^fxtL=KJUY;J8f2Ey8il03Gje_P
z@&@&e(gYiNa>oWCV^e2^u*&tT>s7;DLItH;A)^|HA#TwQbS@XaM^&U`<~3fNoJQ0;
zoGq&^gV!k@B%_!EyLj}(BE9UqN>8>dNxH(*&A%#5>R1$cO-Cyqc*737#exs$=@^WD
z7dka5i7mhhk5scICg=#|Rb9=_il9Za+hvmiT1~KVPhE1XxPiOhkfh)jX|#9gakwP?
zb38-&g>Q=LsXpzDbuj)8f1@(KcXs*pjHiBE5#Ff0a)bT6PrQtX<J3u7TH5PocCoUm
zDgcd$@csK^fFw{~SBH$p7NUWy)!y3r5y-rAa9mb&5pug<+5p=i8b3tUc;16mRG+9-
zg}RX4R9~nHgXYiQoU=XP<BH3%Kt8V6v}a$5i~8*7&80GDr_pyYJ-AUb3=afajISDk
zpeal=FQV2fC|z>QjA+l^sr9-Ny=yXS3oY8ib??31a@B;wjN?EDQy7wO`f1GQzSs-r
zVY$1d!KJwk#HR9w-R3GNXRN({l7glyQ0~5KMeXN>f5E9tCO(=?$n>gzY=-~VXeS?H
zrLayK4yM#gD{m*b_AyMA8LUb(z~y##*!(eJuVI?id)K6$;?~T9F*FA=<LlX933i{z
z>qb>AA0jcPbc7}FfiD(JjfWIYbQAf}fatZSyq4qO)0fXRUZGt-q-A!0c+)C}WtnAP
zebpshzuv6Swl*cXr=Gcav3$;cwd_@X;=wJfJ+P<t*GRe4$O3B=X2(kujyw?h>r%Xd
zNM`l-{Wx;6J2lJlIYF(DAv*XJ<Ns5sGRu5{)HlMT1LQ<ZF;eIZXy?V3U!9hTNY;%S
zgdW!Wn``I&v?DdOWL#U0LuzyM4w1*TLgygwC>6eqM!n)zc(P^3hOTXR9Y62Ue(N<D
zoHv9?0MlgYwzDH&MX24a7RiYZHk%6XY9;CV!YN_Y{Lm^n-Voh$w>ptB_h;6!JfR)A
z+hea0idHcTD(eBxzwMW=LVf4@oOZg|#~sElLrsxF>NAFmXn2Kd*VMVl;q^sktQVor
z0z3g&MKcmAs!Gu~z=iPGEven;hqlpvJ75G*7-zea{CB6LKY#wr$jF#1Wo%n=`gyMs
zo(sw!X`K5D4A{t@X=Iulv*CvXl?-?kU*w~kj`%5<evwkQnq6;FvQL(-Q*$a7WCA!G
zZ(%P+1rr-rl|?}ng{yNMCWRh4xS>&t>0C0CM9Xem35TT`?RN+^9Es2~DgK_pB+;H|
z)fWb8SEL8D$~qs$a-bIWs)n~ZjuYkP)zYe|_?0w1?zF(pVi*fD`+0U2Ki)Ab)Pq$`
zdPF?|B_b|=rerGkRBe;8mycrIro(UM`g#V?ITXKdC6qB;t-p1Zx2L%@;F?uuzGh%H
zfBSS2v#V}5I9@B4tfPqZRULbx)s<T6@{L_1lq&y44`ZcnMu8vZ>2#e<MkRxvX=(v&
zAhJ!=b*_1Pk-{5lsL0NUMGcCn%k6b|s5UOJK-{YOY@<R95lP*e_3e@83g?@5nV^z6
z(WmnV>LK>I<e)Mf9r*>bVm8$JIX;ixdHzY>)@p9)W#nA;IFbt!Emo%mDLYPe*xi`m
z=wddWp<4?vI(b@;5mgN&L*pl+Ia*J581dZ1AgfNt-8t<!xT}3WcoEx4Y80o*YYF~v
zw{Pu560VphbGNJWaNUj$@rr}bce{iI1=-o!e%$Wv?rv~uFi9tV_G5T%v@TGV3KPhx
z<1O^?Tk&mWP%p&y-bXYDSb%k)=Y<vMSL<&&N~;sLZY$Y(i@GJ&I}xuv4&y;j(s`>y
zq=@uyl#QWk6M<14Yl==aCA1<1YgBC!VgrG9r|qikVf|hYds#>i(eJB&C0a}TDdEj@
zvN+B<=|c)js-b1g=)FXmkK|lr*Q()F;u($-qr0TNtXZg-RvRV`8Zl~#Rvs~W-NcJ|
ztW;lQH#@cVncvNuG(6Ke4#BOZUDlC-G};)$@dH_=qJ=AO=$u)3N3_&x^vY59Q@S7B
ze91DZxIAh2$7mu&$J3&>){c92k>B`$06w~i=V4ABO*+VA(EKrhyL$H-9V+5(-ZRsG
z%5dEv5=$l)@iJW&XY=D=jJn+9*u8o1xzcW8S5%Yjz25Jn7l`8!RT?kTZC6L%6y`fG
zmz$b0)EzIXW-TYE#^^8c%>sFSik<GGaU#v)RorbkCH=eed1V8xgP%tO-u{tU35Bho
zU3y%WJlq`?az8rdar2^Zw!<GIti5_@*F@R*j3;ca8n)2+Oo?-=R)0yeWQhER`E3nu
zJ(6X@4x(ugN)Xw3<9un+JYf=bF29^~D4sA{xAZtmwT5^8BpUv8697v41K<L)L_^gZ
z9BJcU@UF+IYTMe_z_%edI5?UrlY$`|sh2e^#@x>*$hxeeYOnDpJh2Y@G!LU#eN<fe
z<mRvGT5B<xCkL_{j{@yR2VWeRts^#nEYNiEL3SN^`eZBE*}k0`Frk-Oz!oQ!;Oq~n
z%fnzJMI~Dq2{7EZNC%z8Yw($hX!xauZ+9I!1z3Yoky*s{XDZYh%!w!QuFP$;YYjcl
zxP^EsSn4Nt-%vSc$_Y}3yCYI(A`1|v(2m5#YhZNjS#zrmwxFOA&#ff}WGd66{6M~8
z#?y-wM|<zz)jLg_LgdlRR5nLn8%i#awdt6&JJI?2Xhu>Qx8a0OTxWl%E}UI|J$hYO
z*}G`I`+ApFID!A3c-ns`sgsj8ubW{f$gnh^jaq-}X<%bAj8*K1J$o|t^5<G8)32nr
zvCEatQi}447MTNV8V{NmIYej`L+;WOp9fM-hN;mLpAiWgOMMYx`{JR=6ThI2z+PGH
zcdAu(p;Mkuo#VtZOfosoZ@;n^9)D1r^PHJ~hI6rm`^Es4!9gg~E79wCG-eKQ;vw@=
zw!jf<W(Y^Mmj8(;I6HO1CX{?PGf1|*(sR1fsgw9jyFGh4dMh#sm9R2*l4feuh&OYA
z$7buW8Q=v0NH0`8HjA^Jao|{045ZfcQe7o8<P3}fVN2^dW1<`S<!7;~rhpjon>X8G
znEDE%Bi)NH>uW`pM+{!rqfot((5lU+KgA>d@?PXY2~5F$^TwESct14(q03hy#R$<O
zy6*U;N1e<_eEt24Ck$P7gYH~xdU86Vio}X?F`xCwZI3E)DEu0Qh!B^!vW_6-9&p*!
zj1u#Pq_PJqN&9+VI+Vo@UECzyAM{tjQgP&99fku>su8Igqn*CBmgtC#M!mYH;NfF9
zyc`a93z&CM?n=;c<B6+F*aZx|mA-+(h7?o>TEBF$_9WOwFL)jbw)>UxcCW8np8Wah
zf1>KK<VC4iIpOx*205C!vv<I4K@5jl8Hy@H)|3{Kl&H1@4xeZ*<{WetTG|omLgHC4
zcmTv3GiLh3^oNCR&XLEKD{y@CddD@OnY84wjb7W~fA=IL@zm2_V@cD9{reF~62EG|
z$>e>XPV7eI&I{_o4Lf<$J2*^HwZmD5D&o1!_Ef;(UHyDFYe)ST7wAD)*J&B7X84Ed
zdkMs})i<(f)tl#7G>bRaqY$OtZa@V-mt;F(dSJC?9G&myFh>HPG$}K=9{$Q`p{AlD
z4yRTr#%A`vOG9q@+Vj4?=3==mprPhr=`Ze-Zd%vY);3QzO}olypxV@^RNz%E0OfG_
z$mn8u_VL4?xrEgBWIIUja8-m&)i3cFuPl|j(^hb9bS3i@7b)p^LI5OGgJB<*^~?Ad
zcWP(bJ5zOEj++FU3F?YV?fAfEM5XE9E9pN;c5hBx(dgXRz|18(@;tvE8!Z2JE&(C}
z+p`VJa%*dbI5D1a83TwI{%i6|O1gKhK77W#(a?+(o>5s`CuL7k@^3irD6egbnww_T
z3<RJC>!|`b@fp(l#l-_CZ(okOQL}7Oi$#OzQ2O;+f-c!s{E&Fs=}`4R8Xjp^FZ<(b
zLaT;_pcd;gY4ML{DM(=5dt8u@VU2HcjSM}6`8Y$8Pu=NoVPHVmtJ5>TdF?u3Y@kko
zNFY1{ll3m|QcN@NO!^VJWM@|?{^{E5;qbv$4GzA)@On@!P>KTSE&uk@oOmY8n;3>e
z`(|Dy+&2Y#f=8SgCQyUbaP60C6rWl$H5~s4WKi4+Q*c<W`P?Sh+w-Ey*@c1|!j8c5
z1a2)=5ep}HVjs(0jcLaz5|~4GYFyEjaG<~;EAN=19l0MOy4Q(Z$Nm<vQGsf#vOV^_
z-QMXRiGs+FU@gDZeRuY?uf^tocPXGfUdK~{aDRXHzUQ^;n;WW__)(PkhBtaFDZgqj
z5ND=<%i?9K+#8LVT7SRWJgsNKptD9`z#$I1{vj2IYt8^+e<INGEA&@W*$T1<YYgTl
z9&(8#wYm(JVJrn6g{1vP!o=6%Cmo<ymc6C$iNd}&PT8W*X+OLwip=xp;PhWhAYj7-
z&d@I-0IF(yY%K3dKQr)XtyHF8M5uZ<Vv)<H-U4Tfy5d*<0R8OAsY8tn^(96Qt-L92
zSo~gQ{xn##F5{<5Sm|kIh`w??wWJH<_<8;;uP=YxHr>VT_<2{Nha8ZhFeM&Z9}aY*
zro)%Wz>_q!Uzi?t<sn0lyAA9=h2W)JX^)iiGRxwJ*2e)C<1sv5q}oG*z~b@hM%{$P
z@+^47^e_KPr_0_Z<HmqG4zw3%EQbMfVO&fst>g=D|4b@heL7movf{^XMF!Om4k5yN
zrNX~*b-bij{~ndH$=iu>o{;Mxut`3;;P~^{q~O%B08+H=JQ2s{+4>YB<fiRb_ENTy
z^TcP%bfLLzgNtS~W-n{#<N7AU&C{+9AQt(}s?|~U<`KG91u!o{aYYE}b(;GJ`8fU>
zbB9(%mj=TFbA?%2tzLSJ9pjVg8dM&Z9UhQG%Y@g@uRlYZ2LImj?+L2Q#vff&q+5UB
zj+MH6*Rr`H{=EnF^s|QDEe-Qxu>|zQnoND<JHK7Fx_q&md%3J&_<QsGjDt7K1ZEp>
zYdoU#oFuX51j12C)Y>xc#mQ1v<{nsjWBkDN@oir_)T0Ar;%ih#lafaoS&E0v-~ZZT
zSLm;SFT_k{1ML$i!)Al#lvLtOE&8F5;wdtTt69fxRumMJ8UGs`<XN8C^cJyTaqmju
zY;V&baDSWw3Tqna4PEvt^_82>E0|3y1~~H5kFW#^Vqp?{850RXTwIpI@73Qn=T|k(
zc*&1L^=DXe{AhxEs3vV@$0xek1Bw?#Ms7dQGd3|!n3O7y4iczQIkxS+J8s{t5rJ!=
z;(=rpqp<$iiKBA*-KLN62g?ToIJ$J}ddGcSm(L-&hhOzs^2LBmG8(8k#W%1}PAQ>%
zn8Wugr_OJdr3?!C08WBEF~i>9{8uASQPz!g8UTzM60?-XTiz$B9-prdKD^^qatwhS
ztQ`Tp;#G2P+R%7&N^jMb1!+E758UuL-%jQ%t9WBy9{Nu&pghxq<NRa~VijrhPIa_o
z*VzZbv4G!7Lj4lZ6&i%d7r#{MHggym?-Bd>Y;w{CW|-rdR<&tuqXhIpGs<44f~$*_
zkM<h%j)xRuJmk)oKO<5-LZeTW0Zs^Ucv}FPv2n&E!1;>k>g*JCJa7Fco$!<$p%ykF
z1@ImEwoQ#i=QrG}fL50%nx6E?1YMk#hW+PqiV8Wc1j2Rl8eQ<Gw=OS8HwwVDuck*9
zS81OwY#TmEBc8pl@WyKnoua2|lKTioWH1@Z=}IC<)KZB|!QI)^8?cHmK7=tbHZ)4r
zs2I4v=9&HJhQxkdzKgg=jvKZ$S?Iimd?{j0twbaqQ$GHP0e_Ot&hx)8L;&=(6F`wd
zMy9KbYb~Z%29_$*`HT3p2=GAQW>9zSGXM?A8|<DR6+SWF;ADRSPpcp*q=)g5IWULq
zA)LNelOOd8@@jWbCu&~^$;-*HJ%PXS_4kkJD!slJd35jpef>9@o2PE(IZ&__*M3q0
zz}QdVYLCw|@DkAYpN^h%y#KI3H|8&I^EnC)9Jb`u2mnUillfQw0>2ro*Fq4z5zLZD
zotG)-0`jRTu=Sb@yUkcv*q%JmLi+bzCT(>EL_L$5y~*s8&c0rNLlX7hZv?*dzo_3Y
zt9Q!qi))b)c^o2>jOS@37uece`*wyLG+!3UG{q367G(bzDCLo1H?QQ=mK<W)SCbB(
zV>ueIKb>n8MxVv*-WDBz`9A9Tt_5cbF5esPU)E;RT81Ka><@oRv+z1;aM?Q#5U-4H
z)^L#1cgMh=7ZDlXnPi6ASyi1`YrS`DdEvHmP>3Yi$kUBD9cL4z@si;e;7xw#&8cqH
z>{kj9zqB_}HD`VqqPztX9Cn&dIaTYf_+I~Wli!{WZ1lu;%8f-L<%r&OX;R-+2TIjY
znW3~{vCB;Mb{kAT=e7?``i~ZFDL54Jj~0$<6t}Is25Q=pFaLN^d5z2KR&hM$5AO%@
zk2sEZ%Yo2(33%Qt%y<Y&M<RrzWf#Qm(;Y|cx$b`@seQSta(CV>wVR$<k~WE!0`8S6
z0B=W4i57<HD}P=kKirsUz8Y`EwG&FrZvX;p!aE8w2|A5YD>oM4aGT0Cri{FQd9n=U
zp|;1w;hSM8Z21-nE-uvQ!WoV_r3~FFs-q4D+2{JAl_#P!SYj?9tBZxBYiHNN1>G)3
zSI2Q0ql{FGO_+pjU-h`TXQ|qW{aN9v?QdHQ<wW<wZVq3&G?qM-GCqe$T%y=_w<qr&
ze(yK4vKA?w(&5&YsqC2|3eD4AGhDE$jefEYEOeJ1&_4W)P}$64MyE{9z1A|Hz3}aP
zX;XScJbYE8sCvC<VsOLmDWRxz?^=2UJ}qg`40MNfi7DgNzfUSdIZ91cnlPD0p9WQx
zQe0Y57TI&U6ajf10xIiLu1sFXy1>lbW{aV*7#Mx<sylQ1u^6hJ(aWO^x|Aw2WC>;E
zRAW0rIGXgUo-6~I^g5U?&+^D78&`BY>=`Cx;DfruxCV}N%z!=ga1bMG={TP;nyq<r
zLsUKo2L#R4pvCIC+Cxd?x}aR>e%F{`(F0s*VzLAEN*|@XxZ}0!JFYi}n18p9wA=xd
zbiB_Rq|`8E=Pwk|391X4+*e`0xL^H(i{JDVb$=>TSu6{#q0j+yB4vH|v%)x&Fr@`G
zT$SE`VHM$&b;Ru&Ttp+^-AI3J-frOLs-RVbmd^8fAsx0R?|JYwRQDANiT2&MQiSMZ
z1W;W8(3lDvvxLEW*68ftEXML^H)xjVZgQ6NBL8_iLK!UtBu(8HPfssC<qM5!{EN-S
z-VzOgwj^Yhrep~-d_G&3Eu^gNKqo^aYg~~U_vr1gvP#b2&Vo`CG6zEU)oZ<$-=nn~
z$@K0VVK`X*?3xG!95wv;Gg<J7?feXqEh+{Q*bk}b`lzr2tfmvj`GWFco%5Hjeti!7
zaP%keQOam-h^(B#UjkzS8r%dEXV*Q&_Z01eBhoWH$AY?h-=wujh;~t!vurjjYHxE?
zDTWE^snR%HM#d$ATstQ4tctR&zd_y5$`065qNE*n39+H_dbhI!vG^P8K%wklsGh*x
zv{2}Nhg+YBaA)r)1N4kyCcReTPIdoXi;$hMWCqc<a-|mGv1$osWVa$!Vsc}zfxFH(
zR~CUpY*>l7V2T{+IIJ+sZYRCu_Pf^Zg=n08$5gi0HCK=Hf*%A{)xug7m1n5-m`v$L
z+bUrE(Vk&X3GEW+$}tfk?5*24?}+Vb30MP&=lw>y!_8@oe{_`Ud3t!NClsv~;@68N
z`c3K6UctME_>C}l<vn1sBJwq5byEs8#5z$Q`8~gD35^oaPm7U}pT}YwD2#V8cN<3p
z_|S6CW=ox8HN5IF9zW@k=_qx#)E3Q-hu>MkY^6C?{&1JockL;RT-8oXIzq;JdieQ@
zZpqC-70#V?cpZ@kgCA$>vxg0x{)SZmo7+;-nmD_8RIE;eAgc;Rb<6;SJ!#`Mw?xd0
zvatF1wMZzGE3N0Suak4&YDwolUZ%Jn((PpZo2MYXz{EOoDr+m<Ho@p<m(WkDth3}n
z>YDSVbHY4@EGlBI2G&h`*Ok;7Q)u!~kY-%Q+D*2q3Wg_Dv<DS6O^Nu$x_Ol_v6Cr9
z(B@YCchnIBBDU8lCQ5dgB7r`0mOXFmH(trdNNNvPYAV-92X0{-t8`Nawfqi58D{Bb
zFaAM495i861V-wSF-*e(#FY@oW$ICnheXSA>O_T;&((Vdmb?}70T`mH_sgziJ27_I
z#Vp~J_E&x@<0P#9A2aw6|6R!gzJ2W{#xugm+v#%)le6a~hZ8hxo$R9O6!3WM%dv{M
zgl-<+WHvaE(}QBQRjMkm39aX1jufopn*wh6-ztv%ETpw_+@lDVP~7VAD%^#tJu#WR
zvg{Y1txk@ylw_V4-NtBGS;JVbr7R%*&irKw9eq<m{&?;^m-y-m4qH+tUmv|`X^vg<
zr@Vwrtjjuv2OL@vI!}xo3WXi>84ApY2?gDXiqaS%QETs3e}}j_4u%q}p-p4=;vAzc
zvJUaf0n9}jk#|Xpk#b7+{`jl+8{ZXb0Rxi7!M8%e3i4Eo;F#&5T$f85@T_v+AzVx0
z?C&W6*ot|OFHb<~axt#E0h>V-_eFyPD2bO=3Vg~Q8`1cd??g%*2>E0B4kfTwx4fbG
z!IQ{t2)X$hq2nOfr`7nNWKE}sx$#$_KJ-4tdwW}^!>kAQcBNTu96^OQOGUhk6*w<F
zO$v9TQf&bz8M<Un++me++(BQS%%`$1lXFmT6?3oFC;INu9L}j$=aCs*HWADsvn?xA
z3oUmGO375Okl<QC#iyo_Tv`9^V97iqzb7K^(p5CYXCZ@6-hk+UbQ}>>pyy$bmm#Vw
zB~RU9pCBGAN*Ql2of9IhR_ALWnPfd>7nIo<#$uM4Baay&Mil5{3oXzUmS(#CJJ=+>
z#f*b~=nH7cO7`M;mm^8M$}dfFY<V)~;OM?>t4)mU$fy4EgRwR0DLIdNn7u}g6WW*f
zj1M4k%{<6Ut(DlSS*vjg!q$@hy~!_KK?B18c%zD7iSh14B$v9{>5K#miudP-nojTe
zCi&ki5X*z!I32(MzQZW`MCuW!6oG%xVdb;O!UM~3CJU}RP8dzUq9I|5pyqT#UEiGL
zk}S;NYB|%b32^bw9^A9#l4y&v@8yOhBimt^642xQ5@2##=(kmhmhx=P797>g+qS3N
z-w2@!N?hf-S>XK{g5NQ>u36mT91J}~AlXYu7(TbuA!zK}kgMILBX3lT_~mHVzYL?z
z^r06q6~V)x<-b_W=R6ylH!yq08cw*LguTN;EiZX6HF#A<JEPBlpro~}3XZq3e|=l{
zUKjTX>Y{Pqy^>Catftx9v=O5+FY?JcoJhd7d>=OOC#B!hmd!piBkPkYUcy6gO(}Tm
zgEXNArD)Nul#KlQp{s!=v=b34y~@bd{s)uXqQnms1hRkJh!{!ZRpH!lWeO&W-l15e
zA`|-cXtAd)XCLg{Tal0RSL#z3!A`3$n|iia6ZMDh{u1xuM649=v%}7tBLRCYO#LUW
zp|)s9OY~|bODxo^er*2`_%*YwjR6v?D&gsMq!$F45%x9%kAWQAaqtmQZ7yzz`h-6$
z=FNPx6G0w57NK}vP>G^dToA~=9r!&XmHSSZ7vkPLBs40T)_lOdnmrJ5^$<ZpFN6OZ
zV>&qMAV3w+<WH^Xk39HV<~#p#+Vp=CLLe^fx>8=={t2x6aiT_Rwxq^G8D&jbhbqcN
zNK`3iMP*s`0=sQWn#LO%y7jc#Iv+$>-I7^@w2nR^`5jaQxe*xyS)E{f5vkouKLcEp
zH6(@)C_j__P(@?cT6{;3UYe3}jzyWKA+(<K7R(PAOei-)o^Qx=rsMsh@ZD43@D<}^
z+cIY5K<}#d;2gFTNjnTrCd7v60N*%2y6rx{XSGyI=JgC)JLf1bgDTDR?~8~-m?&pS
z#B(mzR|%qx$E97O<VcTwU;W*;5^QRt6E1z9AKp)wAbOV#By#`IKwHw?d<WXMmY&qa
zp$!}e!jhj!Ug|F1AOp8+5uDsnyd7>juenb$>A9~>+JCoJXng<?iA)od*9K!Tp`?64
z;6@~9FOZXuDKrQVQtuhFh3Zj50q-Nc#V@5`N=!a=#rVcuB;M+u&Ix~E2{=b$ZC7*Y
zLR+`Pu->iiA4UhUU7IHb*=<_y1p91H-`wg@f8c^fVY5IHNAi}K4!encW)+6NxlRw-
zNpM~n#y{wfxn9Z)y2MLgWUG{$@d7>bliw=sOfS?hB0UqR#dXc{FBRO~xoX3nrZ7>q
zvujB_LOimkexA+DAjMXGp?|6sy|w-}E4ailyQ_VSqjP5{s-A}1_1zs(3>k0#*;Ni>
zfWM1X|3;iLR<3Wvcf}b1n2XRh^@EHW&bkpXQTJ)DSKZ}w<ZZouD;7$CUYuwjw|fHQ
zIjw=(OOND7A*^R&*Br=fvbm0Q!PIc0OmXR%I?wpTwKcC(1hf$>OYX6Ma++(;DSf9v
zL5A=E^$k<S>#<4pVf{pI`4}apj5F&AqyIEu$&yL17g_Z`g+8`&Z(1vRgmO(*i^-pt
zYC{@-tqyGWk6`(1zH}{k!?U2nHg4+dF&x>KQ(HXOxakptQ~U#Jz+!Ou=SUuUU&s=#
z9NZ;4G8V;Z9g<mF5aD7Cu{x+mSju4kf!mowk$Q(G!MJv#PI&KX2kv5#eUmUDN;sSb
z=RK^s4vw{PP<~Fg@V*A7*ftsD9&~&%L9aXDsK%6}(4(AqLUk3mam`diS2m^Kaur04
zi+T0kG4@BQfeKlgBsy<TmZ(P8ATFHY?tTW<a`S=FdQIGQQPuji&8+fomF<<KxEGvz
z6Bum~8OiPSboO@YUv@}ykldX{hTd_=ACD0<V=<aX?#c6WL5YW;+{p>DzVM%O&?38N
zDAwnw_S-$6run8ZP0v=#&=IV)?&(lERp{~X^-bjmTQ8&Pa9-87!^dBymocsrJFwZE
zpOOX*_J(Q61Px@pJfR^E60Yr2z=~K0h$Nzs`86)NUiBnOUUC0ntQ6swZ1AjCN!O4_
z{iaQ#ICi5?ndXQ+^McR>=1gPa+~YtKV|<GnTNr{+N7n*y?z!@MCA&>l8Ai9|aikt|
zYVbSUpiD=juA4xzs1ijUus4TAfCUnaE_=<3J**B2r#knLIvlW8<hqmjK;Qb6&?BVb
zqH{18M84^ePKGM*(5Z2IkeFuvoxNs^eYqb+^W1(Qv58QWmkYkZ6>*zpjvul!FE=Ka
z#7VTTB|#r`v(qJxOC(YTBkQdm(hXMSOFtQ~m}}2JDq1*GAjnW{R$97~&$l_CKr+pQ
zT834p)}L#k`6X+gJ|6s@#|w~G;cX(4p734%s`aPDZjdI=`lF;__|lo<$YL@#FB&QJ
z_|i$;ldG~uOM>tMN1Gl>ns6xb$`n9I2UT^)ml!llgkl4313qP%p{d9$hA(C}H>5R6
z&BV0=+EUY!7D;$wh(>S{o1*iW1nsdD`e{{x(%ny0B`{?BT;?nfV7*UpuUOo=k;nVn
zTwHud*Av}yY*6tqddoXlkuu~<Uri-lgsKOCfRm8Pd#XUAwPzR(+pZ^*KNV6%brAnd
z`j;kjZfv1o(VUtP;BChYVl-%`N<N6Thria)wX`E=wav&gt^R@6_)Yy@G@_{|ToPd@
zR?r}Go`!x`JfgEt(Sm6!*DuSrx%Z<XBI-FCCG3|YAG#Q2;qXIu4%L51EV-o1fxojz
zw#L_S{I2^-&QV9nIlh6}%qeC-vSo)CosyB;wyrN2yG$``nLEZOay$8SUB|cHVFIEa
z+l1Apcp$&?Cj#(wWqXmylNG=2)A#ybfkB@)E0EJf7^iIccDb%pO2cTgL2WvUwPVzp
zQ^W&{=z$SN7WqTWetN({1WZB2Ppd5G5=f8~&va&9$=1H6SOfMPQkvEsI1kqD$&+a>
z4v(!pRhUedbPFIfkUR$Q+j5_oP^4zX#6Wx1Yx#C9PW%LY?CjTa^{jILMRed1E>4Kh
zIKI=Uy*{uWTG~i#q?PKT<Mhh)h2g5LtV}Ij+}~|WRj}rrz0eIvYYa$SUDNxK>5j!w
zW7TG9YDt}&M^5*Xa4uWexs6ai_vA%x1!)j2`*38c3FX0YKtYpKaYRCc%}OAMNf%U~
z${d|>B|5&KTVuv6uh`1MeiB=)ImJrS)f1BIvZAmnIqbUk8UrKtcJlJJO0*>l5zEit
z9{`?$e)4$(bue0dAfJTB)`5@xf-Cvv`;^wIVDZHRa`=WXw``3DMQpRw{oH;otP6hy
z$>Q>*PD-4f6F(c~DgVlZPC?^UlpNXe_D8mSn!YeZ5qWpnb940}g-ToFY9t-lwCdK{
z;GWXXns{v<mY)Inb!{^h<L@dr5<1a-azn@D)in|RNdP)|BgT&y;y4WYr<3_#AelNe
zb6uX=Klfug%5e0f(?Bhv%;8<Rik}vOnt4cE3K7(axJHTSCglT0=ga*y7^<OSh5J}@
zh^NU`3OHi2UkAppshWH58M;uX;lpj&)pNE+uM)Ur8f-S=8DG>|R$2H&Wo;MzK3;p$
z)zkk&=1A>DZz3BPInX$E=1`*xn#U1wBEqkv%knR1BmN^vNjD^;Nx7`0KoNu)1diG7
z{S2*Vu#J>2ZXfB0E}f8k@zZRpeQDs`=s@nL_uN_M{d);=x9h|{>MY0rk+C$>Mx#0{
zHg3VRqSMY(PA|3o4{r}IF$+toNr_Y>XxEs(IQMe~hZg$?YsDXN*K$GjP7n0I;2Sh8
z)pj?|8Cn$C*&I0~G;<u2U(L^SrZ0>*$Ev?egSIUyXaf=pZkhj_F6kcckr)`}aA&#~
z_jG*Z%~Z6jYiSr|$xyUW2d8m8{?gBGUb|j?-#5DU%6f*si?p5N=97p=q9GIc+_INW
zd35@0-H)5?1&)gU@?4Wg5|6)2LV<{IRg49(?4VJnB?AL@bzQSIfqv=xgRjlq+{EiP
zFRCVke(whg(sjCd0QRmXitmKo>6p!6iVIB#)AQU9!zSKCj!ac;VvEeSM81E;AMV0x
z)tnwub-+w>7mRcYay(f=@Y*z+;>+JK8qz8_>A3g14g@=hpXy!{XVbnrpEL82AxDc2
z(ksQCtz6g>Nqh=6A3EF4*#YX1j_KL=!c8StF*RiD{)F01jU*oDz<qvmLFrmk!{FBw
zWUt2YGWpx~yOW0T{7jbmMEM|j3Z2in3;6RwN-_DCxuhQd03n5vjeFsfvQ4hA1NS4j
zMKLzFw&?qpP{ZqHO7nSRX0T!#n-y-~{qqS}TefqY1v^V#!y1Q69J(dxf?&n0<oJ#S
z*|(e!*}2L4sd?Z6;JLfU(h_Q8drN)<JJM+V|B&KvO8oatCUjkQu{xZ#-_%@KXa1x`
z=ZT<}?s8!XXOA)E(8`7N_T!o<qvt7hU~M7UC2xPfAfzd=gff<&RH0qi{m>06@x^I(
zofWhf{<@11-~?CHV7D@mcKjk8G~-g&uK5CM)ZWC@vMZ!Lw)W7bZ)-j!UDCLoQ#0Ng
zcV!uTd*A*!n9sb7`BAH50J~9$?yP@G-bix%PK1GJz;#IsdHMH|8~Q^trA~5{dlXEj
ztlld_=aFVXngM|e{QL+d=uYpLPrRF!B6ur|UP7=HSd;yIzrO}518o{G*dN54m3c4v
zid%{Cp_>t<A$h)qJ{-I={LEsMF=c3F#a=_i^uWw+!WR#jXS(E(Wy28UXnz}!yYCO_
zjR71~&xsMQi9FK2RcXb+F~3yz6mU<|A!=y$UNHoM+0bd;=!Gc~#*gh(%f*m!^SG-@
zb9ZrCRg=!IE_JDu2=kz*8@ai^QH=#|F2%7{P)k90`m&vn0<1Zf<B&|m_qRym*QbnP
zVBaZBAZ_k>K?Mvn^QjSyOv3-e!;4SSFzDe#q2EB0IPL7&Ews2cWL9mkih`1OT#S7P
zBH`CpW&FDRK~18p<O>K3TC}3V7nz2&wU(N0plUkNqc?jk!dSHFS4R(yOIxognZqPG
z3yG;J!Y<duz}*C^Ol~}HxUq_xG=YkXpljG&MD!Q`*-{aVIQaM_HDN_iGV_s9^Kmmf
zaB$32nRB?AI>DYeD4cDJ63(_|{SNYLqpBNpw`$+JUG<w_<9CG0N5&vrTEx)22DULH
z`#cdb)Sc4O<tQd<^_^xaYHAuIW7b+L^IH+?&!Zndx?GNYV>w=_G|X}W!91U7*-KWd
zXND63JEwry5wF77+1U=sc81JrI1k0&QlSMn$J~QP-C9%#4-d`QU5*VhqFFfj9>i=v
zBP*h82y(>$Z<q&BXS6*zzRZyksyyn6-pBIKi+%0x4sBKm`6BsyHDUQ}3{Gq5T<_4U
z_O-iD+6Vgej^x3XFbYR&9>oaZqvo)=n)$X740I27O_&4nYaos)t45bzOSrrH312o`
zl-HXU7S}>i<LuT~hdeK}RO?{9N6eR_oE>S;<W1M@oMGOr;up69UJ1S^V|wHBCwU^r
zTK6|yK$I}nh_Q%Q!tn6NIWGU_3k5fdcU!9lGJVd?v6x=BYp{c5gBy2=JrGooZ7$IW
zh>bH<`s`8exO*bI*iKgtH}uJk4CfGRcN#ttjJN)FO9dYEw{f-d<48+SF)CP@fapZ#
z2oh%HR;*s`(L2(ztuFyg8-I)fjEE>@!jH`t>NfxG4OZT;G=D(-1OAFQ8t-dOt4)~V
z`Q;-R#frC4jy!mvN-I|DeDQQ^ow)P@%WWwA!eHl_Ez-cfsG;tDym|(o)C6es85ljA
zL0b@zyn7u3X4DxtdE5S*MpL@B$Jzf6jHZzB-Ovnw(_VL&N&wEXe%I+c{UGBeo*W@9
zaqSNkq4ykG-HMgey_Leoi$x_PpCwkF0(OKiZIz7l(0bzW=)R}lQdM2tf1JgAoah3>
zu1e+dv?Z;pzRD5+<rVzme@_zmza>1@B@_E+zO7dnm@yt7<wufOU4C+U|Bm1ZQipoz
zDwbhAzsCOm88Ax7Ns62DjxQr}#3*+-MM8bh<IjHWjDb4>493Q!{JN-D>v*<4{ND<o
zzBHHp-QdnxUf<TXMe9^*Q1Dh_KWMF=(beomDjf5X&73Y~DoZi-j**D%0zC_d#pJC^
zqg$=mHR1DbPlN7|ad_$iT)Ura<pc9q!1SH?W~^rLoLevJ+%gOc4^7wHq4!!Y$HDX+
z-O~Q?nG~UWi~U>TK-m~Rs0e)Y*C!*f?ZIjB^+C~xE&Hx54*UQt0DSG#@F>_~Jz3%d
zCK-~5hI}=Cdi~mRc+hgaH^lIux#Eh-@|jk6j)caEn)<K4eg@&&^esbPbev(rg$ilF
zLD~RkZ~jL8Lq>x{7A2xo?@cAsOgLfk<6@rfB}X>gbVeR;UzJSF`ScSzLs$6-@?l_w
z5Fcs87k=M;j6-tKwgtt<0t@Z?9kQ-|GXUvbB*|f57XNIzp7c!9FM4}9lI(R#T<2O^
zUOuHK<yA%0@Lb}2$RP}7BlcP=!oaKa!z!R7;2pD`zWz*k!=6e0D79jzTYNXzxeij2
zb%cBic(e2Gn7nHmZ!x`Pzo5N~?>zg{ww{+0be@2N|11)t#q<J~EguO5%eh(AIfJLc
z5TC*oo)3@VaWE}RJ8K&brg#DgoXrs<SGUG_@4Q1CKP*KaJi`QNqvm#P3uO=xrgD+0
zvBszmv1Ns}db#UnN3f;l<>X&{gJsG#E4k}hbDX1$rO`lxLr&hpNEzGbs3w5>nrS{)
zsRGI24vwR{OVAz#x;;BZD&`Yp3@^a>Ya3@k;D;?~v6sGH@=A5!l}XLiW7z-ch*<%V
z8M_#xxoB!pgM%&WNla~6!ugertu<ZKy&0>21j4shk5$bn>N)ds%aru+N(^zK_fGhw
z`FRDvm;}A};45D2G*cr^k@wZ6V!E8FZ`I6-)4*jI&!1dZWH_QDbjgjqbBL7C(cstQ
zO8}EGgJyU6Rjo|fTT)F4l&a6Z?py96TG@+f7W34ysu=ClE}jt72Bns6`Dg~@1hB1X
ztDq4bZWxG+YcAXidhf3Ycbc>HW-^gq_pxtkH{XptXWvBpt?{#RUgThK8ZoxSt#<65
znMn~}gS5!NiluZMv>sPov2j<gob)zf`>lHA+1FJ$a)s0*;Q>&RyzN{c$xX?eI#*3j
z0~p0i{Ti3x!=iikIa`%xS;f|pArs<hLda28dK#=qIa}-{O?B4n2erq@gi#DO8|TQz
z^Qc<`(yGL=stE1A1zsM?mRxU*tPZUbCU(xb+uWEQsZuutUi6j-sDy%<{kEu#N>#$f
zn#;%jiD!1D@Ljp}er9!CfeH<A6@^C)OWPOFuqu^uNg35X3cG0KO$ivCY8;&WGx!sD
z>;P%~?SgMd9}UIMaVk?5(k0$Hs3uw>!e*F)EemkJE@-5&huYh9D>m8|{A#c{>`m*t
zV^$5xW68UHaIZeaYQWvklsZUtgjt3B$!b*Q@AE)nz5@con@wz~^||LdZjA~$^m%qS
ztvRsJG0lKR5_7Y2S3n?EAyCWt!ke0D#%_^sro6sBml~|d-1E|W0`6tl$9GJ2LGTk#
zC>#@K@A~N$DjAjqg(WU|Y<{ngGBlf~nRT3+IR^_;!O!F5qaiJ8o3R3QA&L~H*2xJ?
z;R?eA1bj{i2HENgV_T3|VqYbRf!^C4ge?%Hkj1*<bU&n$%zH|i#&O;Jgo(JRePlqg
zjh#b;AS_QcdJ&r%qao<|W~>_GQyFkntn{4TBk@RM?~y?@#6A@R$Yq@9I2*H8A))iw
zc-7xhBa5>~E2^$|ynSVg&oUaWllIXC%Shf;t14bz4wi#bZfgmML_t%Dg^^zP2lb!S
z#uYSEm;#I6rYV=cn+on=?lgTvJfETyLcf7^f%NFLY4UOR2cjev-4P%Ea@!$zd7#u3
zu)N4CdOQ}V5lk=7piG*~_SJw&objEOzQlaS|Acm*(TA10k{TU}=~bRO8(8ti7c?~T
zZsWragCy17nq;9#Ay<CEZ!K1HIgPff*$6&3|50!#R&QVh`NHGhlYW%^JyipoI}W0j
z>VhXF!lw6QH&DV>UNtv3XQ7B_-I(oe`q%ngNQ?2_?cGFLaW5oOT9kHko3vAB48bBl
zTUq)Yg;KH_ErfZ&o^z>%;<e&&e&_sXPhLX0p3xjdV5)YecLnn?sI*9=1_3_?5ew%L
zBdPlAS!E>&({$ciS$QVYd_b9UyVZ<SszyMD691LVwqKBtV+xYrFr9lLk7oxz#`VKX
zlw%a?6>xRt&*_~JAuGjxvtGzzi_s|Ww#08C;WnZ8A6q{Ov@=FCf}h8odqP%Zk+y5r
z6|r89?p_AY+&ooHbr4N1QHkFeenEFv$S{*iOgf#1mblZ)ubGyfeaS~DMyG^tG$F4;
z;cl4#l%9=V!0gf|u3ZC3?89(GV8aLiy5{4Zbujc<xM{zXDmNXz;`P;JEC#C;)TY9;
zShu~%`=mRYWfMX5B&Xi-hP85Fn|@$4z>b4tj>o8;Q?~BtnZ}{t<{GcHCA?A9B1Cl+
zg-}A4Zu0I|f6bDtYY^Dhc~wV?&s)lHeNu`VHW&Ub*hk!DqYla<v4Wy^@CL6mF{aBi
zWWa{5pvx(G#3R8<d2FX|RH?8|fXtj!mig5SxV1@HcTd*-9w)7QWFVh)F;=jagK@|V
zlS`+_l^T1Q0cFtCZI~~QM^$4;C?K&8U48)%gh>ZQ{z-Z$+g0sbjL2H6BuwUDq*Wi8
z3oZBS-jq!>P*?Eqhg!?+1*%4_baq`$V`Dg3u5hVZ<?7Tu)X3Cg19Ke(@@_9z!+~s+
z$rey1z!fj)uVmRQ4j@@8$BsdXcpT%QMKvRk3jflg6|RncdJ5q*91_y#olf<QX@^C#
z?LpgpQQ6GR5UD81H??kFYr5r;IuNsZbUlx>)>FU*U^U$&?P|osd$n9+3t~VMm{d}?
zx&0|nI`s8g)?K`lwxJeA->)V`)<BQ1FKaNrE{moxh8-YsPPR<-t$A>U?uV?r+M$1*
zhf3D?q*9fZh$?@emVtY$)$u{<N@cr`l%&iU+#WYaQzrE?2x;D+2Oi@GZlsV*@@niW
ztoRbfVui>WLYc55#-@_}s0zx;M)rJ_BN00<atpf&97ZPOaRz2Ie3DmeYa|2CQNRG`
z?p0uvz%(#w`pK46^j;pFmFYn?cW!>>qhgv%GCLDuoyYzo_hnm}N!OnPE&H{AK0E|w
zOP`>cIQCG`kQ@Jq-<AGvd;*pqFhuUDDKojk8{3=Pf=Qm5z(t<=I}Nh~rdOprWr&-0
zecS>Jf>dPDR5*%$>7}>HA(@>`Ji2_>ncnxEc7l8NO8l)?w+T5izY$gbN!=X>H9u8!
zdyY^)oVLSOReJt@ezWY`Vtt=Q^VgRyYbv{*TtNrAQz{IRDf?x>L}+22o4u;Fqkfzk
znRibFHp>}tH8e&|xPcpkfLg}{tUc`WI%k9%wmT)QPDA4&>s{?@6<f;Dk-%*={F9Ii
zGp}s!%hn9v5wg6=?0AfOKSVn7?Of3{*{${x8|&Jngku`Q;rc<DP1D+D+QrfeXVO6(
z>WlfC1A9%EIN6>4PO-){?r5nRFVCx&In=y%VaajIUJ1X>wx7Y%{uIaODUB5dGb`$~
z-TKCGQGaarn7}vvo_Urp-3j4$Q0ri0aCn^SJ@^I~^S8uzdp>KuHrn?4x88=-_4~(}
zt*+09&R!f3fZ3hu<vn$&MQG2{jO4u9^`cm{d^V<Bn4OGk>9w;oo=t~f4^`{pvIkyu
zG_JVr*3<rz2m`Z9ef0}_&MH4P#^1tZI`_fIc5~dLE2N3r%mnmYu#nzM$SI%sWC&bB
zn0=TKc^>_&=hKCdEy`$K_&`-Kw>;Xb^zh{)OWs={_T28Ud~;bNu_<#*iH}5a)jDT6
zGT@8f2HY_Pe4T0HPH(bw)dg@z8B)&diX8X}MJADW6pcAto%tbYjER8nG8~8bF|NEb
zUi3IAqzf2Gyba-c4vgh};yNYXWm>egA+u8g5zgyTaLXz+-WW{c3?rny0kTb{?*El-
z(xlOwx%|b50Z94$Iu)zztfK_iUuzq@4#*c7>}#zinG_WlS(R)k(Px4eZ7IV_&!e^C
zdsyHujeLcAjDyfr%|;`4G?$`^>(r7ANJ};Dev;!&!}TC*HZJRS><1>!pS(vH^LgPA
z1R)kU8s%!qkA#g@=<9{(_x$xBZOhOJ4i)obqC58<zWPm2{Pe_W@>hQ!AkB0h13z0s
zsVF)k-7}lFuJ4ZL+2{^WdY))KSEt&P&bQ8|&(b~zJ2f5#=GX#r*T13aB}oti14Kgb
zMRa)BSx$=%1-^N&cZT9&VeOxOLJ*KANsVhZ5+vZP*JT%>O>Isf{8Q5*m&Zw|fV89j
z7zHA1>@-xU7ggI6^s3jGbHQsWsgqjp{dK0;6RP=pR%F)v)XBMp1=9mU!4p46&I8)&
zzqR5UG~=6(+tFNml}9yQhc@r^$F?lT@@&{`(93c-)$H!27u?G0V#(jEUTi_NeSp6`
zyj=epKHa=uv>~)m+K~LmNDx7-%0uz8XshR_do9>uPX;2L0~0~g;k9CO9Ia_R=|Ekj
z*!jSlIqIuSpKuO5|3qK7i$7V<S6^Zn_KnYKg;v&uNOh@Wt9bM*o&!M{`-I#AGHS}O
zgHIegB4Umu)1JcyiohhQuHK&mMXdQF3mK}>w;g5cE%d@UjH<)y37rs{dzsZh4?Rmt
zD)ag4;++kBGdVIp5iEP=yWe+RCKeV6vYJ4vBRzV`C<+**tZS_M6`R=z*`@LR&H*@}
zxh?>4#pzb|r!#EM&CFVA{;y9H<_ZIVJ-9*pu;nWd!=38`RN!#H0@5E?#jjmo75BdX
z)@0M2-Dl8b>nKSZPu5hXX`{m7$ER6dhS?dTB)4NfuGaphB!UgQT7z3<yfpjFd@kr*
zYb1_fk5Q2&d^pHBm(J+A6*g>eyW4oum@lz;+f-xMM$38NNn0D4*=a3h4@ct@N!nfh
z^X6)>ARDQtlv60VVY&Tvo{GnA0xqNJKELl;KA1mBI(dM{>7310NqJtcp4~8L<jz2o
zHIG0zg{&;I<$^+z2EvT$@bnZtE-L@@wVNb#iEnyA>$)=LmUEyIsb}KYV^lb_(_zJ&
zU&ETcFHplcRnfU7t}<Nax=Q@vVdQ_b_OlmbvH&!53B@Pw;g)r;*W6J`p5+bPYQ{y)
zGXv2zbmthrB+6x}RBWi1Utcp(l}}XW=NLBfx#{7aU^@SXJA(@^#mwosW6x!8Jr<u=
zX!jd<HN+%O1OhaHWMpZn3<bvV)#b3Mz{kbcfa|*D_PXz13_2QM>z)B%*Jh^EWt||u
z1%@%x9d)iaMzP*I1S<;;ul<%=WmCE$?ea`BI#BEcS_-#=a>zvvg2WO=^stkQ)w|~S
zbGn%=UP0%gnk{kKj#3wKVvisVA8f8kLEH8lAH!%rsn7cFs|JtuqbB#arZab3T}Le1
z6O;oD4u@V08fJ#-7h}=eQ}e9gu}(TquI3tHU9wr*C!hKL8V?QwIX6ywFB6ly&?sO!
zTbvROLEc+@ge1Zk2Och8p_ZO<!Zc?z$+s3d>Cf}bF=@4VGN`$jUv8mfbqv26s|3!=
zCDi~2E%S@C^hNL>4ejiXKE5*~<X-8cn__=MAz9Hl57}|>jQn-l>qy-W>Hla8M75Nm
z>$mCcJPyll@7tu+=zh%Kq=vKVWYMVLooee7#6%L*3#6B*P|(d2njK4wa`Fsc3pr>P
zr`?q&zV};!%!^Nch7$8Szp1U$abBxk^jwWfiBC?x9aq)4y1cA|!5-&*@Avli(@;~N
z0AOMUxmzF?s#r9xtE(%N5dqAbPPN+^EAYBMNkc_NHJ>P6{x4JaP1LTw0(-U2ZW5$2
zzXWWbXGF!LZK_OU`s6=BoANWke*#G|VDRcjLp&R~9Cp+u)YC3tC|Xm6KU)&3zSF$n
zh(btMxM%fXzF~U$73AT}<Oe_9J&6kq9bL1_30V>VltDcXRZvkmKOL2QI0`@gf6blw
zKik<F$Ai{VG}TURt)R8c)Y`G6Vo5R8>cW^VrmdxDidsu8jfUx>F0FOiixx>xYDzWs
zQ2Q=TF{&uBM#WM~go+^dF?a60|HA$Ll90TTe9w8#bDnd)-_PfrD<Yxm9jFd|$E^Kx
z5vJRZ%FhD_1ApkaTH&E2?15O<ITzVc64<o-$HbMG@xfi|)+y5viiUdLDtpDKYTq9@
zGG2s0d~ON9r0Mi7wEwM(_ihOlZ(iwaU$?5{ah+F%xr0wgSfGvjSIv(<T46tU+I%J6
z8u{ZRG~zZTrX&FiChEGZ%3ntsDITRiHQ`o*kl$q~Z>_arlV0BgkdcNxD%(#HA-mSm
z(P2Y0^YQX}{3%gr{~%Hy*0Y>lh9)t}%ZSR0j^+YACn%caBzg6qtFs%&P2GXvUc4OY
z#??KMWZu_&7TLOSf?~Z@HiRJzKWOo2iBsUUVz=yad&oy55hW9#4RXP%c6E3nUSNV|
z+~Q4hx;^HHsEq&=h_4{<47r79Q1^3$4X#OyNh-&mE04grlM7`(WQB=JKS4g>TvWAD
zsQa4cJ3<}_W1f#p-^o(nk&(B5=3NtM;Asw3hUwFctFrXr+4eah<+GHfb(~B*|I7wO
zVTMuQBbp#W_tJ$6x4CQ_8jZd>7{0IZ5l<IvE@t}*?Jr$|8}hpjchG3Fv$O8+Q|jrK
z{8q!ORjZ9UbDA8f38#dQAM?4F=XrKrYxfu(c}piTR?B~>XBOYWZQbh5I-B{8*J{rE
zJigkn>z74*kf+n>hvh!}nLi4|UG?M5*1zX99{b}@#BZ0um|H_z8)fL-51f0drYxy{
zWKjwN!ew%DlG)wz2xTfOEZpQl3Ne6-MO|kqm8>i`w!YQMME|62lh$ACjs#5tsoZa*
z2K-dU;sM~>V~eHZ>Jc~}pK~Y;4UO5Enf`-NAouq6x_4%%iUDoxRA`BEpD%n|wGa^L
zhyOG<D(^bk@l5a@DLXrxMEVU{=9-{>u1$JCVBqrV>X)KxqtA6+?pIfvT9b}Q9*=j?
z=uY6{nWvx7@7;505<SD{tcCvM#I)o%pL`;_1Z$K)XuR{4npSR}p4?pQrYEb5EP_Ot
z0Eq$EILLC>7690{Pl235DE+5H%JFakgd<UKiPqxL&&TT~yrrkVaqvP}NRy`YH`X*j
zW5S8ie%!qWl-`nJV#?8uRl&WVJ{6@R7P}y9KGqO;r^ZYBoOq0dN_LO%dJ(8Mu-}FP
zlXaY}QiMq&Pc>)@!_{ZDdJ_3~2Wg4#K>u|p{-wHaaWTU5OBdxTGm#)-;oNBDci_N*
zfy=BQPzy0B;%?6!@Afg7%o6W`ACpd<KJ6Na7#+L)^>gWVB2ifaM}hH>lqgAF*j9#V
zn7+IujB>^ptVj4Eqx@5X&pd>)cZy3VwEnrT-0qn$K4pBC*p^jXtY<7v*cIkT*j$I%
z9M6Dh!xlxv&-?&lnV?ObK@<wbv@p-@X>M+YthTs#=NluW-Ob-$cQ9Jh<U)=uaeih7
zzx;(hH|MP_j;Q<$_?v!;d|L#XO^l;Iymy_$>UO29^^N=tU-@zAe0gopa8Ph?hOUcX
z@=ikgE4!>VHv$37!>i+BlUJr?C8cC#J>1>xuU>tuD_LjB+IC5hlvItL$_V>Fr^nJC
zM$t1*JZ>|1Xq7ck=5qid&GF_)4S`wdzp=Ko&5DM2^$+LsMt<Ku&xN7QSh@hqx8pp9
zed^74|7Dg}zpUg;H%9^OJERm(FtxJGcdi}o?JWfq1T-W>{zQPlHIi}o#@YyfD<sL^
z-#>IB77gov(Q|3XZZ*g3R{{3Vol*Yox2cG(i$EZ0o6@odL5&&sHF&E@m_Sb!Q7F_a
zeA?JAl@WV>n-3lz>TI=TWS9{u)6*5;oF#~z>hU1*yAHx6O5si_2j{Lhq*P{V5t4%T
z8wU6UjYb0ux%#>1O*N^oJ5KBD?4;9GqxoDS-z98MRFquU$ghr$gw?r$R6X}3za55(
z2vpZI4f`@*#_7F4gYvM&jBd_{)E-Xm$^F3JV5ZdDE{g<G5scBO7U%Ot2YeY@y?uS<
z;(o`-Ua|59GeAB!y*2*uxKf=~aG~)XHWTEp?)2v{*qd>tZ|4;}Cy^0rjqt8>d-)qv
zK=pV$a26q(u=;f{1c(MnqN1X|*4#CEA%-;a^IMoAMr5dfXU}a+<-9(^gp`w$v$htR
z6bc3x-rnB*XJzXg92_9)o!y(d6CJ^QMi*j9^E-UjwxX(ZsyTZH+jg&OuI@adqXW7v
z8A{>y2sa*7-DVMb&Gl0{&3JRs`_!MFo`f*F@gc2U3%ux6^z@dY+29(%c2~9asw9io
z-Std23bcCN3%tQykrt41d_hCy<mu@d5b&e5rgP0f#WG~PQQ(&g_Vy9>uM04|;uo8`
z+A^dwkeMg?ZZe+8>0K(g(u^~AUSa-jo5b@PpUx$s(7p$g#ZBjdI6w?vpMLAoC@3V<
zemI#|R|$diMBd=E%3c77aMzY5nBq5JE05{etR?hY_?!Ivx!-Ngj36-p4bi^PDONFv
zY!IMs<k6+XRx<*8U}R)-w)O87JAt2;nwl!IyQ|u`z=0ajoq57r%|xZib*cV=NMY|R
zij0!7K{ZSY-j=#-w{LK&8&)?_yA?WaSU9(o-8Uhumq3V7E5-xJ?gUQvNeCwnhm(f(
zLnZg_#~-q?lu$Gqs;?(gD3r|1%%UPTdy*E9-8yDmVvOLNKl1D*<Rt#)LIkX^#=d-6
zox<|v_<3Y43$j1#ayW-`RjDa!V^NAZXH$Q?E;@e{ftgufnVp`VW}Z)Oov7x1c>y*|
zLxQ0x30~6;4HhfS#M;`WS(5{(Wbwtit(wg!9gT=XV(cZ08q&a7*2E4$0V)X-w(5QH
zVz0Q)yShmW<Y$Uo)Rh9K5|8d89IYZ>n+@J^E38xDO!j!)Jt1ad_Q0(>cDcf&k-WS-
zE<mN=;;r@P;o)HhLxC<!K3uc8Itb^Ja+hcdqY=U>>F4j6uq+<5$uLPxPj_^7#zJ%h
z_JPpwPz-+PnnP8v52G>&99N`l<k^+wldq&0gT*Qmp(CCP;T7d$9i>$&o1eW3DUNOC
z*u8Rc*4l4!Y|^2izM?ITZ-OKX<6jNN>28m~Qar}05vYrp&_$&?*riYtdwY8k5s{KC
zA(o6b)aXDi2cMV3lG4%@$X{t`Y5gY<3QI6C;&8Y`KY^q&_xBm5FC2Fu)NIm@*?my*
zM5D`rw1iYNOLN?TEG{Y0XFF~*y*kM!wz?JPX4)JY%8k*hjF@7)8{p6&xTwAi%uw9Q
zd8qX9Zf+C(6gMC!x!>h)ej6<F;lU0_l;nZcg$Vu&+SgyI_hl)!;noF&trG^oy5MHV
z(;qrbmSJCz4)spUHj+|+JA|dZ%FCnTSB|vHVZ+~5Rq+}k#Rv8yFtaeBK+L%=m_QDD
zFYL8AhOAp(YE^9K$3Ybn4zt7ce{L-N3VjG;sQ)FRX$vrla|4!pSM?szi{;p9T9`fe
zY>sj1ohM(1V}1#Gk<Azy0!y!fyR6(YzY*<Z8u%;CPfelSKOEGSsWivoCe?{|EoyQg
zC9NdaZbZ_)-mI3+av|`(mT&V#0o<hxvS8fN*x82`WH<*=E1A^<353F}*-r&6EiEt5
zet*cx=r}Q@fBueYt;9#SGXyc}4@97z3Wgd$&;3J!KDR_2oylytuVNK<FnajrsLL|=
zzKvAe$|94;AhVo)nM6-BBVf4if^Ofw?k*saSiXxI^XAPPwdwA|BArC~)luDp1w;~|
zc@n#yu+r145pwGJ-3C|(L^rzzOL4p+BO{H2=aiz4y0yGdGwNw?7n5|W+C$aCgl`<<
z2H(h%c5-s6si~REobRF?eSTxAlZd@F-UQn_w&|E=+_dRBE9-2&)xwh0kRXD!D<pz?
z#w<i$&U+k+)v<pbMTN~#+=+zXPx+S(4Gp0aKnGVTN~OQb&(8<=8uSfSGX|e#ZpPNq
z(mEM;&?vO)ny*0JadWe<0g?o-?UKfg#1J!H9&aeH6Q+0{C7I5bFJHFCDY%VT#7pwh
zCnsu)@6LrBR?Je2+HKzUg&fTLP#BBd`dqhRVAvh+T>~`)H<VIb>;^jnul{YcH-HlR
z2TUVa@;BD#Nf_-F#vuz1wc*t@H5n#Jyu*e)_4Q-iDdNIB-<Y6k)iVc^$&qI<4*6dC
z@&Vdy7isCnw@Coddk;CbKg<oPAhz6fA0N6Ff@N5{wLS}DcoP;|iLKDP0aOHXAG>~*
z#q#j;!@>r|$mZ_yPhDb6>CBRnl6UVmz^jrOcb~RX_qdr$TlISW<>IvTbh}1ZL{;e0
zb`#!Vwn+sj(sAbYp&8wJ6OJ6J>^U#*VsW<G;r<=v2t;MWX#InMe@N7;b-mw#m*G+T
z+OmCR>JYly={wO&bW^}GSxJ%H$2s+!78#WUxg1;tGeXD3G&EkfDn_KiKkL$sLuRJs
zmEPB`<1gSVkWg*2Slw9#R+nQB8U%cNWoH?+>QL^y`M2g0w(oRD$!c2gxP2U7`0JLa
zzn+u6{YQBV+wHu+1$poaivSht-^YJtv+y@war-W%e;?JrR{s0x{{+APe;;ocv(6tH
XKcsO>wuQ9)bFEL<{Pf1++Wmh49aE4n

literal 0
HcmV?d00001

diff --git a/tutorials/nlp/images/spellmapper_data_preparation.png b/tutorials/nlp/images/spellmapper_data_preparation.png
new file mode 100644
index 0000000000000000000000000000000000000000..24df8a8e0525ad6deaf7156fd2f5dc79f9ec9a39
GIT binary patch
literal 75265
zcmY(qb9h|c_Xip_wj0}4W7}4fG`4Nq$;3&6#<p!VYHZt_q|u$e-{14xd;gecbY{-k
z>#V)j2YW{;D@q~3e}xAF14ELL7FPuWgSZ9*18;<d0DXcgVUGiP0(ViB5(TT7CO8GX
zfwB-$5CH?LPe6Dxf(E^ZbClL`0RuxG{PzGKbtp3h1N$tN5f@SSG`Prx*2YvK9zDI1
z6c+w!i&@{<S=Y(OQitE!dG6rhK|%#j7flu!>FTlk+MVsTeP6~WoA|<i-#6`GY+(U(
z?`dS`qtD9DrWY+xron^{Q?)`f`B3}!O0+;-u`KKVK5Z|s4*%~6*~9P(ynkOR)1=~8
znE&5__Wz#)9W2#o0O(0SQ`^tB@J7T6A|qGubRWuABX19+G!>`s_&!;QWT7SXRLpc^
zh+?H!R8{}<yuz^kM9()=kW<jeyoD7uvOe00n(XoK#Z{_jVJ{c7_aq*<2YS9vQu9$q
z+$vXqak|A}7MbCq6vxWQDm89$#N8^>kdcL)wd!tp!S0VI#9mbCo>Toh&&YSV>yfrY
z(0Ps+*N<L?BW?k)oj6Y{dJxJPS)^ucsis{6cLHifEzK|CSnPP8+mNlg8;*D=#qf^)
z3j@51v7nc+purEoXs~}G4y&=#_6X);iQ@&-x1|6O;79>kwBB34IXLI`c&G&a3k#qk
zv|K&vJ8OPpAnpIc_`m3obt32LHNGe-!j+^G;<LT37lt^dOEM>?|NSf~#$)vHcV9!`
zBV#Fv4((1lY&^x68Q(m<I8p0_!??+=?jB8<z2kvW-<<AA*)VrlKW9m8{rV|mub>^I
zJA4=v6UrJD`B1gEVsfRp;$-0Hj%$<IZ_rwGZ{P~{*js;$OGv4K9s$587F~@DQw9I;
zW=6ijBf9xr)_c$Q|NhM|qW+zs87b5ct}vK{nQ^`;9BF^oxTwVLm(7^xL>QO3+gE%&
z+Q|uSwkAspm4tfL2{aWnm4$$WtS6W+^4yDMBigiBME~UjjGWqFx`R;7nkx$@V=gNe
z0vzkDqGrqCY$cQ1Z0zcZdPK0%8yVRw##*#IOt<ed=3jOaDt>BiZWgXT^c;bpzy8^2
zxINYB4|VnCvmDxlsr|e8fr|57R+JWt_`fp@hVFK6@C#ONxw6n^x^Odd@yb>}p0}Fv
zr-okS5Lm)mrc;e*#>$8fX>O}a7PdLKVaMdL?)oKE)jJby-_K55tB}T~sy<N^m+HSh
zz}t^sVjOP?O-_u5cT{UkMQUxT(y>YO7If=(W;jq!!Oj$eGWrAl0m&{cqb)RJ<rx4V
ztNHw#-kunf_Q(=^JDmiMPH$D|%^UYuu};Xs%R3e%ayKO)Wp1vVeDZ>Iue+fzfB*aJ
zD>?ad|EotbX1eF&{rQ-h9_qqU*VO~Iz;BMnp&@aMc~w8M5T^#z-Mxt87<<BAPNT4L
z$>!&UgJq#^y^N)a@hKfA;6pZ;!|0NqZq|Zoa;g}9N^;WG)A{;{RQl`|uxim$r?mf0
zmF*|#2P<t<93rC`Ww9zdJCVQ7=T7!0;Am5Uvob0wnyRQI=gG6<@>D~WAKQm~5=mgh
zlD=Z7n5qELS@lnA*A}mVKqN_%>H4euf|_Q6bgWvc+KxbIhc_n`_qn;v54susC0ME3
z72y6vXK4!ipOt+-;)4iVx70;c(r-+EXU}~sHu8+K)iGtCS)U`~Y6=RQY|Y)5Mo~ZG
z5}5p9Kl;<C`C09^9tV}R4o>)kdt#!Zb*%8E<)o8;>A4>ZT_EF$@-dyVlttBZ!sq==
zJ;leXsw}Eus*RjD?Fhgl%jXZV)g#SnE7O*AUz$0&*>@l=Y(Z0(gnbU@;#vOo7E*g<
z@6j$9yn-QX4VEsldpO#jG1s6a;D?a|yx7R83tdTZASU#?2NNl(m~UXqd-k`rwvB`B
z;v=hDW^8|pOsYkc<3$$bQCU~&4X`%1jf-KS=fkJAdC67l(#rSn{9&yrd1*cJF=tfv
z;_sFnXSp&ht*<BhJ-GHqz7A5h6{B||M)YjP!H}MTJ>O4#i<#4|mUq3Z1qvK(SJi>$
znObiCjxhz@8tSGM2g~%E?xS2*(cS{a{<qK4%*6_BfBijU<2GvKPX~5Z1ER(u$x>eX
zEV=7958yeUwt34L=TZ;3y;x++-4)*3*D21&EALL`Za=nb!c3g+qvst7)~1%bL=S$`
zi(&`P3=+>zTdj(XQ3{>X(J|M)?vT~c7&2f>u7p9vxclQMZ)}9|HH>%KW7m1tMB?Qn
z6ckhf{!dRY=fJu8Ta~%`Or7BAka~_oc~q;#A_GauBT%YqI`FprSh6fw+N1DAezz*O
z%cme;uC-UYo7^i7lbI?egxQyr?}}XN>x;F$5B~D2+sv;F^?Oj;=Jgq0yE&)W2`+lH
zd1tDX9g-L2(=np;+1Ds%W5>Px%`@B0{9Flr9T_Y13I=+ip%yo3--j1q&Cr09q^72;
zb753C(`o!?EL;SOYP#0M^mx$L0CRBEsGT791A$3d#qe{x)Ldhlz=tAlZw#m!=KGno
zh>JgHaG1BA#p%}xX#)}oV+WP+Q|k)vXYta37;*+@XPNn2V_ZBPbweu(3Cz1kjV<eC
zBO_@CeJWaIFz9iCo!e!SmqyeEO;wh8N3EC4q*VS(gJHPzY+>od>GKs#gR7Hp)qD%o
z$u!cFSHAAKxW$wdyt-?JPmYshX=EZt7sDgLl{JkTkj~iON~=0ngP)L1rfta!`o7EM
zbwiADU2!yfaX;OWLuap6aHA7>dQm&nb7BD^R+}}F)G`9zVFB}LTl)-64>B~f%F0Bm
zW!=$LZv)w*^sjJ?d=*2|^9d))t~zxE<@v$HLR|Ut&6wX>t-j}{t^Jk=S;t^%{+_gv
z$BIT+ZF_#Q*hh%UJw;{#2Dl&UW?Dk3sI)$#D|f548qJVJ;f1Te+apHxeeR{lu&7=2
zxDa)=t|;hnk64Nef4t^5*#IlG8jNwDk4W+5ZE84QuAVBb?WyD<jE#+^l<dEaLwrHQ
z3Xjw6kTk*fppe9l@MdgERj)(C#c992rF512mAof|%4~`5?Baq3)4n)gX;e}SgRt-v
zXktTcxI#Ft0C6~%eT+rXq`@uJx$FKZxPoG^u`w(@%;f8+?|$4%JePAvGck78{Lpm}
z9qEgBJ2O5toD2SLu06)0W^0WOK#CGOQJLQomcYQb@AcKa$~k`9`EKk@7p+yQi}dCz
zO#rqJ*D7_fRp9pQUpMA{Ie7nSxFsp?>GUm8oPvh=TL*sL_Rharv`-ZFt8zT|Cx5;H
z!Wh*BVZ(K71cEz|b%hvBnl6y6;81Q#-q7W<YB<Yj3~DrTpsALk_iA8Ls7S=d5`uua
z0y;HhK2A?y&!gFXEFRUVhB9fRE>t@&awqbQiA(STkui}m>x=C^yzl#b_6qp5K_a6m
zOf7cAtjyF{pT-$6;nr{mX%XV|+U_#&%pu49d{Pe+KOLi8^xJ(sKwY_l&v1J;KW$qe
z%myrLWNQ{357};IL|zSL&hc<3G?^Z%fKkCl?2E<|hOWCh9OpefSMu*O)wErXKPxPn
z%cS(%Vm)<!3@4Cp3NsU;zChvan<4G*ls!rPd7}Qwu+S>g7(o8uH&wPjpt`sX^Zr9V
z*y*VoA@k^q$H+VFY55Lt{42L$2LPxY|Lz~+q_B*(VM~LRgs@B`pVjy!pqW!+6zRPE
zx6{KATKB7m!+eWZh<EdE#wo-YS9!_r9d4FuzfA6H1%)*2cu}#5hP9!|`KV!9eKzcF
z_s^{DB6{V)fV5O=wj7X_h)i7p{wIr@n>E}&{Xd7G?gL})XU{t?-I+subsX=tb01Tj
z_OChuRwH(Z6MO~p`-KUbPR{P4DFlTF*^k!><cRZn?>=Q~E>ZgZV;pjYBNoYT__|hV
zOMHzIvIN7PsY{CU(Hb~esw=fZ9$fNy9AVM-_9&3Gy{?;5?>zw)E8}sN<O!VuL){SW
zcrr2)cs%GZpla8C)UR{#zB9=u^SxVDU!zi!Q&O=Yw6(WO;OPj|)wHgBI)51r8Za~?
zN?RJN{OUY><YHup#C|xs#d&wuk(t&taR_)}=)EU)_n3HxyF$s~;3sk6NIA*4^l|dp
zY94&9C-!wqhpR+9(DV1KZmJ~bpyD~*nd~eh$hl%R%Bj7zq@fX$cS(ufv3FlTXLPm5
zBd;9)Vc7FfxgNW5qKKSp@@sG$yZTEL1RQkP;{COLeY#lfRhZMvw}<+l(O=-8(W0E#
znXU&lcpvwE3Pxz{!k4$({|T4EA?48T2^e68a7qjn|I$WcQ+6(#By{`Uc5=i@0Jsln
z>@_BLEFv%hWcr_NV&o`)C#~_%LnBTuUb+T<nD7^!45lkE--3COD5WqKQ<Sa9YT)H^
zL`dA-f&hEq{McjqRXldeMpat+9tj^tG@6SEtYWoj!QydN2PNDr*uwfZEFkcb=`95e
z9;P8y-X%mVAq97?rf#HkTVV1<o2GbRtvUkb(_?r@I^gfOh?n_rPCM;vI2v%jj+V2H
zmih$4z6u4H5HWJx${#tNuVVs<_ptrK8uK-OKSwQZ=*dx}<`nbmsYiUr$ENT01$Rg}
zDvez14}yU%9$w76fluKc!HT5$cqT3W<n$UlWWP?hM~f(uj%m0$!dHHEbg618a$OOf
zVE@`BorAS}v5(dJaQYzn^O58uEfPvGMB#D>eSpK`D>#6$-_X!luOfY`J*#jmz{I8t
zZuh{+|9Snk`RFb+1JikBY3Ab_xqrwSBel{Tk_$`{lPF2Nv!Sm@R-_fbG>9la_?`$Z
zXazTZW-ZUn$w!T^n`nyw%d{Do5&4+Fbin5xl1-5`=}S>m%D+E>9Ej1KpF4Pum)*m#
zlAvmL!kPtK5E~008vQXSDi@#+R7ohU$=;ss7Z*EzNnqyWa4pb9h0^FQ+9z>V)~~`J
zAfsifsV-ApRkB=F(b%$r-w~uwOG^yK9tls%!uIh|O;xkBwyiF$D(LH8+nZ0`hVngs
z@qsSY!_Z8$>}p~uKV`Fn6zyf$=>2tFq*>BD4)NWzoqDT@dz4v<U?bRYdSjpy7UoJs
z&pW{*uMtwjk=fifYq^COV}vk6h}W~__)Wl+klMyxD##2}NX7`_{j^1|s`J`x07f3q
zcCpqAKcV0dCT55xEnktJCtC<s<V4P;k{|*yTKTpi@{QU!ywP4X_VZ^MR&Rfc<^vL0
z=hHj8Tpg^nEXFTX9&J+1hmlWKYy9N+dh>!Rxrm;zFekL3A*aB+f1enO_TLDukmL(Y
zPh+_j3%+eQmhXQ}Ndq88$Na@c!yLTK0Wi@}Wc7Ij^(eDgQt@doMR4=-+`gbNQ;j%$
zS#sd%wF`KN9FmZjt1I9?n48=#&Ij_$zGONZaSu_#M@fE#31e?4yicr;<6~w0Ad0ej
zS-@Yja&{Ss5>`@8yt6nyAfc+28<!4=Gzx|e@e{+Z`BXVZ_Q6RtHMsnv7y|14i|WdX
z<MO~b#BkY>T^6J7hpf*-e=GA0Bq*tOB81HsV;%nB6-Gn_g-lGzj7xvs=l?12roV%G
z?^MU)yceLu>=xM2b4y)_U1)M^3y~E>>t_+&#IHQhusoS?hLIK9`{lIhh;fMW*T`k<
zfik4o!QWrcao<uG`?%l10Cip-qm^HA6uCRXDUJ_K1oAZqe7;kcTRkyN0+k^1;BX`E
zm!fhPR&E3rYs-LHGzpuBRPwQt0C8I+jt6d}PbDTd@{7@L+eQ3dB=mwR5tGL58F9=E
z{#z>O3=qEM(f)&}W2FU*hwa#X{8Ft#D6rqWDOu>WS5g9q1UET?iI9{h+;rHOWz(s%
zm)?%A&RY1y!@WDq7BWGPh1uQ`djCN4Fnv3b-iHzld!rCyqQqMr$g{W+rNaBMzuSre
z>Jb-Cj?hq0aCB}P5Iw`iRFl%u$vhcCg**vXGN^JsG2SNvzn}&7cmFtD8v`O@`7p&f
zDLK8ay2ChThQ<(hH^gs>%?|>oX+!IvrfZx6#()yW8zqi;D(PBlw-;HLC8w4}#IKHu
z@PFUq==*|&>uGbShiJOsW`=*1S10HZb2*ZcrR*Wl2B@p@<Ee?^U;=ml;+LW=AK#ow
z>Y0NoeRg(Uew~BmAQn$&zT-C&;qbIJ3{iLH4zH^9phFsYUXLp(@YV;(@U#R!LD!YW
zvMHugff)x%=Td{k1<v3MavT{+%_Xk%$_o*<)!wF;r*;^F&GIqohig@hg;+y%OFCf3
zvesN#u}u+GX=L_Xsd9iTvMEI|3ACeF5+;sRIx9&@UBp@24z^P<WD9Bd(uR;;<U{}A
zB%c$C&|G|43y496DczmvOcMQpFJK|F+~1cA6QW}D?a+~VMt`atj&HB^_op;?P)`){
zKO34<eHQ0`vv0=K*n<B1z|&<7Vjj;gen<%7cCBpmVRMOe(=}qpdl=|I`;6uM8ewwg
z;hlr9P9;O{Cfl`qHN6kY&zOU;a~P5OfZy%?3D7uLOjzIR$?=;*Q1kwD`Owen$~%H5
zet>bQA7a|NR-ibnEQ#mdL((GreDV8u@tJ03+SmGc;qS)5+27zI{WE(XPL&O!dp-0Q
z1!h^B9l*Z7jB_d?z0lZOGM<(Ld^()MC$VV)>zT`g;Km-r!C~2OF#KX~GBI0!nwIXS
z#gcYW^~}G}vn(&@tr(Gbzh!&J+u0c4_yFVJnZ|+ivN#)hW46g$=FJ|!><tjLa@&-8
zte(f@jK%&dmT0(p7_j>p78}0rFW71)^j&exNUL`oN0L$8A$sVAUO3yCgCEF%??^4@
z*3e{j%oXUQz`L8z1^mig?8SXD2Nc<sxrh4GR(rKi6hFONRHE<X1=4hI7oGhs8j5qj
zXx(7^^(d#a9^vC>VPeF?KT%&?VwW_@OV0eD>VsLs1EP_0G76ZvT_I71Am-pe{Y8q4
zWkAJ9-zgc*Ib_NReEUItW#`e3oivsyS6iGP8*jdw(ggx5H~4*opFOR7!N}bm2r__c
z7Dkc8Q*$$^B#hfxI}p)YQc)*YyHXhdH6ex$x4onp7)5Vcr*{A2(<vXOMr!2bRHLjK
z)#Kl(FbUtcTda&)e;j=h2O=Q*6XFO7D(#;pLzL~_gnqNdEwh}qoA>aJA!<{URu=Ju
zdP&lGb6=cdPS6Ryg38?cBAu<$(FW~HF4TH*5XueyQiccYyH+N8hVy8#cC;CY9-r!<
za~_7bM>x?=`35!ur_fP#!Vlm->L=X*`YQ#)axS=`^0w=6o9TFYzW*A$^6E?>>2H5J
z<>WxM$2p-ws@O)yTbg5+l#!+xTD>PWQ2v5y;PY4MrR9VqI&>Yu9<7$Hk0S=dfq3)&
zN%&ToVgom~Gwwvc6Al?F;{NEkbxMf|N9TjQ_Bvun$|MAIdxQ9S#99-7)BL6H`Zz^8
zjQ-Wf8K|mH&|DVhMHuM}1rl3!!JOnq9|IxrOt#Tr+#GihzyFE+<as#m6y63wvlnrw
zeuS6!Nys(m#o!pIWg&o#gkV8HdPSl6>^8jI&hL)Bo?@%W`GlBLDObxM49E2m@yTBo
z5V_F*s3PFd&^5px6h6emno|2>!7*J8xlUi=W^Fc=$rzYY{m^uG_ar+mF{F65!(`j6
zs-kr!Bl6Fj3GkiMniI}D9qig%9$^X_@_N4XgUdVAbOH%#*F4TWQlm%B)e3lORxTfZ
zfB7gGcvx<FZZ##v%6R54_lCaTrV;ydg5;@DeLfI)2cDM|c|PqLiU^Zkd--F;Vebt>
z4@-zNm_YU#Z%G)+#&Cmg8@G{@Z0(A?2r{g`T>K5VCX%A2E=07{>XCrzM%nAah)38`
zzyCyu>l*OiZ3%&mlq>=431_feb#11981Ad3pvktRZ@wh>F;ILvA!2CrdU#6rIDk{I
zHvgi_*Ps9*$zvZl9>M#+RM0)y8ybDdg0;4VV(ihQZahS8TLjyKU82vy4KUf*Xg{8_
zBh;9$NE5mdn1z^1b+nLoyh&1|cN|T6KVBY-5IV<jH@1y0L+&@?DJ@PCb1<ESsIM0b
zF**;PWHum4V~k7_ytf|7ZwisJ9Q~;=QQG-`^utNme^a&0h4&ZzB-5~D+)33B=UK2p
zP7RzUqhQyONvO(*pbOMjl>N$`55n7|aGhP`OjR|n>GkK!Nr>Pz10b;~89M6%LAxiV
zi1K#4BK_HK?<4*<8s4(8w>ee2woZ+APenn&U@I5l_L-NBKXb4YcJ23V6IWnXTz)<J
zyjsD^(C|=YrFCJ#SSPUS=5Sp5o&dpBd#jnvOKav84?_eQo147nMJ&?sVG5GXJFW&D
zUPo;iwn9Z#bWhJSaeq-P3}H0+W~LUp%jx)*dCT7hN6GjN8ucsD<tP2nI?y5%Y;r_9
z3-9r4BiPok=kejM!1DBE)>H;dhBkaq74&aUsQmJJPJh@cWs-C?Mf)o}U@84|F4f3{
zoJcQ2nyR5MIyd$M_D_Exg+UA0#b(tTnj_<n5wY^V46UEdft3@DcsKN1Yy$KMLeA27
z8@#<z&RcUgF^FMpNqaSx$jn}k<)|cYq?x2<vnc0!rL=4QRQbxN3MyIm{9m$8ebi}A
zT@Luh5j<SjFi;D{HLgBwtT4rs=BFmI$f)<4*{U};OH`zNe{a}C@LlUc@Oh&j>HZC|
z(@P?t89*oax_fu0PnbRMb7E+O`ikGzs6R^pM$y3U>2C-S7+9FgeBX3&mTQs7w#nOq
zDn0>=khS%4`S<!%$PHA(5;-e7k!x#bcu*gz$1#wj>tb#GSn1`#Ii9eC&})xju6f(!
zF7`1V*~X9^e6gxp*urm=vB+&h8{^|qU-<X&p3A5$RbgASb8F8qXok*v;`Ylkn({&v
z@rxJZ{K6=ZRl_?~!f{?Qo=bpAK>U8Q{whl4MT-2l%2d54MguZ5-fpAZXG*e7z^U<|
zbnMieGNQ`^EOwz81l11k<CvVuw#FncxgH4E>#cC?U8ltsKuQn|n^#nbD&lv8HVZSe
zb?&n9=lh$R)pZ@YmiB-LqP5<0_)?itlqw0FQb(=zwVa|bK)d&7u|@il+9)zb+MnWr
zg7%u^td6cdkeHl+_;az>-o+KM!NrB>)xT@^@iTRT2Ew?Y*r8fK_r4mk@(T>iw4?R0
zFcsE|<d;w26Kj-MfjV@>k`}c?te&EQ6uexdTbK8Zem--{Y7$`RVFAlSF-gx_gk0g^
z)Cq*s=y_2F3Go{CPqJZHj`;qB`g$6{l+FrlZrD_XwFvU*B%q=b3jo)4%ML;>{iw99
zj~nSMd&m_cV5V$!2eX*z!P=Lu$G+YX&@e*_pv>_7;V4xwbbTIskUVJFyE@%!#=jS&
zlS*)uI4&^N5jE%uea~_=(xRVVi?4po=jtL4?veVEZGEO=eM(MzCnHK_ZFTREz@d=Q
z4NvX!vQZ!VspJm@RlDI^)X{TQxN3rg1my>W5qgS^b(Lc$m(J|jT#v`!N{2%YyvWH(
zAQkP3h1{(y&WD&n8lTSOOim3Wl7RKl)jJi{1jZ{W{vKoMElpi1k1wSAi`$kbtjeNE
z1TqFNi_M(&k>p~L7@x~zcG(*ne<jCg#}W%M(cn^A@!U0NfVio9T+VoMe*m<Cml>qS
z^e8brM1+!{ut@pNn5?Yc;W94P!1h<G3SDb+htG1m-#pihy(oA1p1y?K%BEJM{U9-Y
zcjPMpvB3VFARAw^BWDQg2_%xu$`?ES&^cNlwHSWWc%^xpp}KU-pBg4As;{{K$KRKv
zSpipqJTq>yBlrFo&zH=3AcUGc4&~Nv7}m_mBGHP)UBWHYJr3i1ufaYm>9YQ0CPa8a
zbP}^p2_KUEyr(xhcX)V}NZh}Oh-|7>TdxpTt&#z<I<P#H9&49#9CnDIk6w#z=C6Nd
z=Y6Nbxb<FW8tswiaOGh(I68Qi2upjU_Wd;C#K0xsvOk*m*j#tD+Xk@SCgvn}*zOlR
zhaTXUwz5<8lUR?u`UHyABL9kbFs=f5aby(gQvlMcEL!4c0IEqbfc&#A%j@*X>2Tyd
zMe@l?J@4L3TyR?7$Mbj&gmd?^RuO=3ymuzV3#IpwBAa+AeLF<Si4V&}keI;@WEO1S
zja4NT`#sv>V{)pBibA!mbu}eR9BtM#p^7Mm&CKqI38;5MDJb4Qhv{X*p!JoZy^ee~
zAw(DD@jwraePT`a<C+aV@4suwtG^UtznPTy+OB#~vGHD=)TG85{`lYvKnRH7b)_^Y
z=o6q|`+LoNnTNxOG`;Y8lCf}+Gf`>|NG&QZ9wHqkvBXt71Xq<*DX3fZv2-W<#S^2c
zO!o~P-PoFLfbr$FbwM603tGP54HD{$i+Qb0*<tVAcWG@vJ*h=Cmr+nW?N0#6&jmya
zr+RWV0*9+(tM1OXwXN}+xGEVbnb1;_J(GJhZ@6)ld&3m{W2QbK6AcgR3FUA<^I~&)
ztSvw9mM9-q<#mMfrj+R*QcqUpQ<d2<Cxjq1*n`Cue{Tm@WCncU{i9wlEjObmH(6M7
z_HDZG1rz=-uW#ydQ8?mfJ?pSn#OJHWHaN1tP5Q0chkiw9mdGbnP)Ef9orz(NC@q}j
zT{Gw5?$LDS&oruyUu;8C6^z-L67+Xr2MN8PskqYJYGr0<dm0!jZNR^)n%o2kfaFEU
z0NK=Z+?JEqTkUwLxL8D)8{0;+KR1JGxI$Zq;rnKVf4N_F+}-6Uc^xAhOo!cDct4(v
zK6{bK!WNibC*<{TadMgrghIhF>-X3xZzkS;j!f6wuHG-JFMgYZ!xk5@G7|Fm@UD*$
zFBezl6#Hk1XD`?ORLVZ0At#TqYkFT=;)M1UhKK?y0K@vzCgU;!ApZH2_@-6oK5b}n
zvO0UYAdky_P2@?j05jBgDvLKHVodM2QCYKGco8v7l$Mi8pMNy|TU&egT7wtibr}pk
ze1x6$SR=T&pw}L3><Tg*oTMS~dKii?W{Bv-h#pI5gK8kZXJArnU7f*OTucn*It@>9
zqWV}$=pB)pP6%&oBy4)>Yj1|Kn%3uYf5<HW_aNYT_5%*1bbS0)s7plI+8X`=V;_g8
zMOLP}_(R9x!fV_Ew*00k48{MpB#c~(9Dygnw6qBE#atCtI;@Gp!ZeIv>=7~%u7OO%
zvFnwR&zRX~>pj^-BuJ}9TYRtXrH%)3D`bz6C7VEqspUr(+$4+>56|%5$46=fHYbBh
zcGt34($A14bkY9?Oz8ND1bZVTMSS(=a9xk98Ug9hHwh4^$MSyz53=9%+3fI|dez&x
z;8A=$%UECMMuLqMT~qDa#WrZ^dH+%eEq1aO<}y}eW@^f9zgBN?jfNiQsiLUAY0bvQ
zMsaeI1POzj8;URFj=M(!6S~#ml}qsg<N5X+6c%RI6T;cE-8()pku;LrlrB2{P28Zt
z(?1|Wr0A$VsFC=YJ`nE1&p>(CD<Uyk9w82zkEuwrdY!Fg`=<C%eBU5WzAkdiR*Jeo
z&Eoj*prK#G>3xk-Bpz9HWZde>`dER&LQHIbJVlb-Xd|h1RLL-f<lEN!-VejBzkTt*
zBoks5N7xo81{#!jMG+A+No~ja-DELeScSs-8;^sVpbL>(gAocIF7v4zarlw!x{Waz
zrHH}s(B$L>KNGQ09!|}w1$SJMV)B#Y1Nex4Ycy1?ceWI`BIS)>NM;p=?7qf9!9lNt
zhq)5h2W2xPmz7(|XYw}Mv=T`}D~K7VVMC|Jp@fR6Lx21oevUET46n8<Tv%MN!(WN}
z%1M;wD`iSY`%MIRy-hlr%4}fp(-LP+yD&~%4L+5mEuZw4`o%9o+x%1wO*G5mvgD%2
zhqu7W7zG4J&LhH~A>P6q&U4N+@KU9|%o$xYw6we4B<tdUo2wWc)q04f4E<jtDW$*r
zzSJDhs#)w-%r7rT%u*Df=C;|d5*GCN015rB&IZhXCMMR6c(JP#Kd%>vSwyO!s|!;2
zy`8jL8qIEX!#QlSned3ps_N?(OCbCTLx{s>T)LPO6Ain60UC?_UwZ9J8m73;?|SAH
z2E6U4)cQk?+~aG@Itdp!!qeglvyZ&K_4+Kk%L}I-|817De46E8`w$8`^7(r(x8W5d
z4aHGRE*Xym4^LJgfsTa*{yyQ^CJZGE&+=20Rz?@o=dNh(;*`pM=mwmXRYe|=>B}P0
zdXv?RC)5`WR-?V99>0!6+n15Cd`3^HHU>sS&pb42eAAf8_1_D@w=N<wvr0ddU|dla
zP*y);C<1Qb1^nMelPSToxrYTjreIMbRCKkps>MdX(re-}YL_kbgk;1z(r=)Gmfiqh
zo(1-`^gqa(0nT6EweIrq<AJo^68ZTJA`=X{C!RaAGF%TM)e#hB(IH&uc8u#cCXl=d
zTCiW$Ywg$j;#=e1j^0z1Fx2IrE(8XC+Am=}KuKtOMx<!+Ib?lvqVPHk`TM=Wv!~ln
z%{^gjLq#sRq9O?!71jT#>a!v>XuXx%$LILyATnA``ux=D>B4HS%l%K+c-ql`pG`tV
zrYx9!XIrXzSvYv2Djc7xVmWUtD5LC1PcxRv^**~t`HU@spxsB*dT!D#_hC>*c@V}h
zJVheoqZDOjP5H6FT>G1wn$*-jKG_+nqbcu0S4wc;!FX#Odci>xE;6%RLR1vudJxyk
z%d1C<DlEUCV3^uZxpge@7E@N{PoZ;H7ysn(NZ9#AzY(n0m#FCA^i~K27)nHxW8d^@
zX~z;oq_`K!ct<a4YHF>}W7Zck6U#CI#~*Q6^p!Iiqz+cJbW~~Cxv50x$!OSAbfiXV
ztw$BmDVb%#B8&2rV15(uD>~xk23>#i%gV-{5umv@8jovJHJ2k)!TYOS20TkpWc#|Y
z-WA!t1Z97-uj)cMu0x91`C3SUX}}e7wwTx1fyx&se~fP>8j5f{lQ@DhY`@lKZ)^;$
z%HG%G)Odfalw{|e;fsY}vZeJy7MafR_X^xA@Y0n2c<$G0Fn6AQ=VP1CF_n)&9jpIE
z-E*7v9%D{&k;LK3%EFO;D$JqOXq7tM@t<LrE&-(j#6$6+;*Kod2^kKKzN=@+EY$wN
z!CV5&!Z-rKo6{62iZbc7CaZ)>z!U+2fOkHqPeDvS(sCEMx;7Jj5gx+jomR10`mUPG
z-cQ5M%X>68m}j&6?rN~#>#ebq#^}HOhIznaNQFl5Z<&tlX<!Q!qX1rN;9z&Eye`EL
zeC!_O4PO}EhwmLjaB$<OU+lNrqEfQiw%a6SXo_r6S5KB3L{=n^jX?9rHaiqHQ84_=
zSBl-&1i_$Qh;S6Q3_&{xJYP~)ql@)Y1gmJzlypeTpPU>`(l0p7Ir}?(UTBi2((mM0
zbTAe{2Z^l*mL5hR)DNZ1^Yo-3b7;HTWWPIb2#Q!Fk6jiyy!=y4Tu6k{fMNw&JPixS
z3w1p4h@ovGGZD_uGyx_|TVRddMY+YJ?6yDT$G6ijUoUculhZs(9YyscKnD2URJ+=-
zz4&+0w_C+>qgu+RVCu6(0QMYc#+)s6<8+NC5Q@qq+NCR2zc8^xk~)w1W)ALxjiaHh
z4pCA6Vcc%iDH&U?vg)$Uyd4OmW7KsNk1exwH#Q#7YwgSB^Jmce>UAh9sZA4=`FDQ%
zaktU*=V)B8GGz}h9$s$r;E+zJC-CmCc<i85tWLAH!0X_k%<!t+X2)ilollFsPQp3v
z$cv(!`KLbuq}F;-)UTiXUTvUO_^>E_JvCCY6eLFN)hQBoE#c@`ythZbe$bEDkwAmU
zG(;J5{L7Q(i<fZ}eJW-+Bct9I3X1#9A43I~n;rmHNoaoTK0U-O0@#(gQ?=2fDZ^L;
z<ft@m4Apv5{6o@SCc)?p1Res_Oc2PC4qE*>7E=)&TQt;LzHq8V4ISa{cIP0{%lk5;
z9Br7Z-q1J?YFAX`DJfZ5fA1|eT$YOxh%@W>H|9Bs*{ni4vQQm(Gzu)(EseblA0p5#
zuvmpVPu>}obtK%De#^j$LN)9`)9cUm{1ga4Vmx{GX)P!pXWKgGvX#gSoPxsa!4V-D
zz-!1l2qZM}h(Ng_FF=(vO-0y)o`s1V;lq@e0CNga2k%{8Ik{$8S@q&!lY#<I2(t`G
zCW!ao#g{WS-itj=S@|P6A?#I0ND>%LDMEu{@;8MHD2f~rYhBP()aM9lg1%w@)<?<?
zQ&0;CEN%<EZRqq;#)P<E(}_=j5!X71Q8AeE2dlVAWmzxR*x0GD^HOEqx<n<!xaBkk
z@(TspXRtWwZjBquPE1_vV-GdiEVL!IKHYE1?QV2<ZEp3@zAk|C4P4c!?P0+q$k9Ko
zsxI3L<bQcK_{ngf9Z6=C0$CQ#Ii0PP$2+1&dUd@q5}VtXhV4XRd9~F(u4M&Hx?=#V
zl*b<(8%r`7+Q+Eh@+B#gWx`NGQnwI-SCz|()=AK(*wNTHr>43n6gRJ1^_$4g*gMLC
z8e?8sTBoDX)Qs4hNDyeuJX<ug$zUl_Ek3Kj<A{KVkM_dB-SIIKVov;8^nw%W3si-*
z99X3_VHhjw9V8hcN0g}9(-^Dg_i!7}CO8w((~HnTZ?@jVj*h!^By|CMF;x?RZNq_h
z8s<X@Q58YO!qQL$R0PIjDv1Pb8_*J<L9wx+`##C;D%+~QK$FtZ4Wx&5M4w_FP-MDj
z8k<(&({NiCY1j6&i3V+RGs0;|PYKi9FzG>!PA}3($hM^~02V+?>f8y(d*y&Nm!2r0
zZN+jq@HiF&@NIjSVX6gVW4~(x;{lB9>=82twBM0EYLwjFiy8Q%73g|VpOclfSoO^{
z=So$D8}{~G{`?Ui9;O6Dvo9!9Xh=*AId8k~>$kwx<Yy8LmmK&crKV`8t7BT5R>YDx
zxZe=Lp5Nt;PsZ%nC<08G0q~;s_V(X>y`f!;JmI3=KtqzIPfm`e3KS~a<HO8W_ZK-h
zioW?5@s6P)#G{<l49q+S+U*{ZDVl|HwSffO133*9b`UCE<+T1jOV3g)F%BdnC$}+C
z%XVl7HZ?7PdIk#-2x(NZ>zPUBc0597e%w8blPk@j#70a4uX}cPjUZ}xK#B0*;@<M4
zS{*f2IeI=ujis@%F{_}t;4m+Jw^5$R5en2~knFs$z_HckHQoae63112A?H_AuKx6=
zSubI^*08P?x}ew)1c*g_`ESwd$hS_%FXYyWBWXQ?XnP#ShL2u^gX|zX2P2*uu?g-9
z!{S3CVi_!Xk%=L${<mECD+u~q>FYc3c%iANI6*^O33U4sPzO849Myw-%Mi_Zr)dax
zq~c+O<L@25K0Wj3kzjY>H^ni>D~HL_eDDZ+*IUhTRo}$3RzM(naO#lhScw6Z-j+(@
zN&+<>QnT71U>C@1VyaWhx=&>+wL<6~{Y@My5vy_s_2n%L-xf+!qW(1uT1rGSX}}52
znIRtpAXYd#alSR=W8guGavX%mxt+Jv6%UhMuX`X4@0<4}>DcNf^Q=<wQ98kIBM1i>
zQ5*-yjbuaq8xGyKOtd7pt84U)4S1KUYt%Ry5(*8=M}CmB2{F5Pj~mb~tTjWAaB}7a
z5zI87rf~k8wowGYE^hPwseBPT{{EBJ88|>R!VIQyxC-KW|LAuB)_<#8i~kJ9pJAJz
zV&5iJB+lK);mp(zZus8BQ>Cza8?E*_90SoA&g?9fpD$i)hCNwALg);FXeq^IrL@XL
z#3=dr1WV_-BetGzU9laFXvXB|CgfDK+;YBo5Nf~|M}u)X0;d)3qLM$5Lp*zt^}m@8
z<P-!+Uaq+v9^UKBBcj8w4Np$a*Qf~6Q;`6Ct)N~XKd?PP95ML>eL^T9BQ|zxNJbI?
z=PaI}oX;03Pwjd|WqKoc8iEN?2M<n1=d2=;ymCN|MkDlpL|EE@qN&lw)=pndSVKf`
z^$X*FH28tKkuVpP*Wtr><dYhf((r3PSHo9!_kl;7bm>HrM7WOwgtH4sB3v07FJmYK
zuQhR3tZQ<z<eZr?gliY_^wNYR$SBE+ly>4IbMhO(EMu2&lioF?6*5v7E=5+Na1EWN
zzfWp)E!bEXlT#BD<6}F&+Xp3eb?wcmL|t5**-VTZ)S`I`DrCzt0Si@t1^Kgzc=Gcy
z)d}J)^FR){B47g5?m-kei!mPsMFt04*CotYHg?_kTw&r9gM+BZkNJUHF8{7ej9Q$4
z{XOGxF4=g9n>29utuL{yi>8yB_Kd{wlw@SzH9=}LSy_4IeYK7T>*?e>#pQ&?IS42Q
zBJHg_c90~Kq@sq}J<YdN=5pr>1w@OJjCc1!e^uwEz&236yl^3Z>-M^@SjRX24i}6v
zBH)v7!?Zd6+Wak<-6@@E+n~!4xt(7+DKQZk($mqgXusK|h^`7RGBT??y+MEv=dkVX
zkwxtPgm}yc@b~X=Aqc23%p)RTNY2fD*cH1wo}uWe1FaED$w|%x$m?S{r`{5#1%PoJ
zTrd(Luh%7R1+-pugZ?u-+~SF#)Z6X%<z;qZ5F8Q`Y1}{AgL)hWk)Xk{fZS^4866(h
zYebmf+GB0?DN3B&0Dz$Yh>b;r<Mlr10|AY3;4T^r&kffPWjfnRxCl}7an(g#!N>|w
zodBM!rr)L$b>+2ia&l&~IDM_hfdf)FixctS3}+jJP_$V>2D}@;efJbP;j>=w%-L4O
zDgPu=&C=a-cFwG?uUE=DC@N;vWWqBlh`PF3Q7+R`l9z1LD$_n;_yMB7@WKRRlau%E
zj`c_gXh<u8;lhLYB_*t8XTNJUz4L>DN=R7|ln|0rQV0pc6Q2;_=6-eqJ*9%9!F9zC
zsGcMGmh4udDbB3VT97VgUHb?8>97>O>9ReEJ%EJ`Io&cPzR$G!DyOYw-KAlFnI*W;
zRV$X>Qd)-(&eb=ek-L*djQ2CAf^EB5Yih8Vcb{qGvPvR;C_V_uD2pt!v^SU36$Ufe
zC4D@exi~O|Go$pPw;7=T>64&Bpt`dUli>Y`&+>D-k8CVaD#@zA_F0gDnW1RdZ1X_o
zS7u=H?&~{#mb~T!s<OI4{Uhi?Om{{=stBmgg!-9lmfRrY*3bOTs0$7P=rqmV_7$Yq
zuHm;5u^9o-6G3<@JCl}*oZiB|-+MR}FgA#*QnQ4Hg@KdGum^boE%J{mzl@4t#Drr{
zF|G^Ni^&20h-xU<UHAN&(buOtN+|`!kA{x!3p6zAM{rQMdro6M0}-2sQ?j7nL~Klo
zvZkipqO7O{<?u2$J*^Cgk*{tM6*wOq?ZMyioSv@2!GUt`KfO#AyaxsyN*l_n1f<vn
z2Ud-J!`Ww!KFK&KDiR-2qcJbR_L7Rf^IK4N-)U?5j{_Qpt5)$Ef%0&dF^a^}(y|_9
zcMMf&ocLxZ_IECOE;n(ZiDz%PjdC|{6V57C%GCIVivizCgO~nR5|!+&wC{CNc51P5
zXG{M=InOtl9zkPt^eh3N%qk~Mn`Hy_?_}(BY^&?pQH-D)q3$*|(^i#-l0IWV6M*($
zmXea<rNr7|gjSTG2fEt6b%SL0c!wf5q!o~G7)jG#lg(9=ok5eB6_m`v!Euzk67#e&
zIoXJN&o3M7w{L!tEoj<+X1G|7A>v8j9(wa$rs6X_Jqemu@HJoI4){7vp&Z{<j6mvC
z)4u<&I6&q#q}|2U3b}D_?LX0P7sJ!A@86_<Mdu5;2n!{90V!an#@O4T__o_^kEl3^
zrbs@wBo=IFFoz;*kLQ{oMu&<uFrKc;!V0_|<@X1qOq^`4lq#w91mL75CMs)UqhU<O
zMn&~_<UwIUR+!U)Kd<gcQ7$mF{DJ6Q>}@cVH8Z^f;9UTvcI%K3tnSCM$cnCCLD1r9
zDM2#`_g(?PXY1fO#!ee5=L`6(EHu<}O3oaU^HTb{Pv|D6Fl0%kUK~f}%H#i!zB$Z8
zn`{f}OZ&)VaogUQBagOZ9j@MqbPCtkLpwU+4nSc-mm3t3X~;V7ypNmUV=o{^D_}zI
zr3{EQ6Yx2u3D`tJ(uzR^Dd*Sx(@iD8h=u&7&RA^wCXtcRr6{CD3T&OX_@gF)VI`~l
z4juI%@ez)DMG}<@HIrM9tuY&_AL}}-oKAWn4&Pr6J0h1S<}x-ivX5|maQbIdz&r2V
z*`lEy7j?ib*<3XE`<dw>7_k4ihUp?3?Qmp!%>k~!FxT|$5CZg#j&LupOgZdqt)lAx
zwm<<eKq$0sziQ}KprmI=zsiMVMzq+mOX>TnR72$H)2p3QkCCqOvjJ2D{|+T`r%4q*
zLTm%d`07C7pJa%>&2E<z02JEFH(NSk<jikd`l!;$SNiMKhON<YBRDAn+RhmRSH)u{
z|FI1-@wufX)40afk||Lh3K3B%1k&;jas|t8t+u+Y9@{GJbr{~m<5uq9zsK(9K)bug
z%`GwM@aH*D;9g#Vcv31Lb=+IWi+%Uk7_8UsI`b{n(CB?lzeHBRR<M!OMIx%q-pBMM
zsASwEL(7jpjUd@9Rj&_d;YvrCPwCwu<2T-z>!HqalqwOjb8+2*bUy1Kv%$ZMAhc&X
z+6xDLS$tzcOXOM<b(@Y;UXJC&A+DL$=qRD9t7%4Hk(P{(gM%|Q_IrA|!nYVF9W^x>
z(}Mp+L@J98VVKBDccDC~Wf05==V?kt4nZh@sedXio&!z<1W&5%tR+Xq+5MSro$AFW
ze&8FM4GdX<7#J!H1)xtB&z8IdBqD-va#DWRL1+3vyW2f)ZRkLJ9p44O6FP$7M@4O|
z14!!HT$-Q9s9=ErVdP(9W07$i*E9VfUKFl0q5NUzDfFV_L_nJ@fB1I7P=pANqR?ki
zi&^7HD^?C)^!yx}%R}hB*QSgtlbARzDqT^NCGx8>KYy&Xqg}*Jj9qP-Ei4SIT%;(L
z1at+WfzHAHdwTQ`nGYVgL!MA#Dk>@p3gp-BcykqQdUs#oi>;HBlXbueY4xnFu|d?U
z?ZS4NJTo@d_8%pq;VSo)O{Jt~Vw&3&=$}%ciJ*`kGbN??kLlE57)R>$+GAm8dIKDp
z%W2D_qM|@;DmG8F{N~{3h^p9BQ&=iGJ$b^R7L$pd4<<}$X6&d@3qHa)WujDhA`S%e
zjZKUtk%=uGwgFX0!W+J&Z`8z?*~!-unu?k#D)kIsLO@A<xIH*^iPZ^(Tuer8V_GF(
znRs=B7WPeCn1d=P5d_%xU*EGqG4E1Sm<$H}R8n$jq=c)}o|cr6@p0gVJvJgn<nipc
z=Ham+FE*$fiv&)PDvhomc->=S63NmcP&9L-+Tzo!u3{b8;*)$$5tgoq&QejE_q;!%
z^jS>SiuoV;5M6p1Nr>|#H5=no7?SIY_bjC*cLp}RB*Si_CFz0bwWt<s#Dnnjo6pKB
z=d=f*!O9?c3M9lY-DdmA3{M*VCTS+8<<lShnEhi@)6+6aGct0Mqh_X#l2z>N>;r?1
zYSAwZdLW8bQeqzOg3V#~J^keN|79TVb57rL(|UV>B1~%9r~x2NNT0V^(nWD8JComv
zgFwe1Oq)b=MLv2XhGfjq(0EUuxvO_D|3Ch50jY5@28xJIBq4IrjA5GynP*$CM39Ot
zW=PjtPDVzCNA?N&-WX}3owH?*v*Ruw*&V6z-87D{APX4)C`Q-C1KW@;8XqPo@B!?j
zRJmaP*K`8&Uce%Vs@!p+lZ%R#D<D9y$3F)9@3hnJ^Ea&;>Dxy%G#n*_=ghYkc2Gc1
z(vfIBNnE?}MhuyDMpJ>4Uo4kF!c<9d@l@*yI;Kn#JxeMDBQvz$YJUEkkzv!UQ{25d
zjqL~k#<E-nLrOwCl=z!#GIth<RhZ}sGk(7FtX|*94VuXcB?T0eAL9dY-CT!TXV;<}
z^2>Ni1Q%EHb9`2y!Bp6NsTT(PhZ*F9bsEFZ1bd2>x!d!(MrIyPn`Jwl9ggg3ix$Ml
z!OjcpziBIDtt(ImdDnMZp`xOzf1$eg4x1ZqB%eoVTAA@_I4#Q#cR+yUtMh>)2rlV6
zfcBuO!_M-9_~Ckf2kHK&g2+BZ7l5Lg1QQ^1aXf7}vtD-_+)d2q#C3fWN@6vMY+2sv
zVgCh6GE9ofrKh~zhYv#;QyC7D(EIDxuPIvC<}*&t?grC>+|Vmq?JoBqlLl6z_;R~7
z1-Oc9n%Gj(m7ZRz|I2r2ivgX?O!-HH2PR~+V_zTXx5*Js`5T;d3aSdiEu30G5YEIv
zIGKFsDgw6lb(J=RIzF!Aspb{^xY##k6K}S-P6DB?WaYL1jR^p?4jf!N9v%q8f{<*i
zjP!q7X8{va=UOx45uNpMM@wrRR1ZjSbOeRyD>yibnp&+{rN=OP&?{F9?f5EIk^CSp
zf!o0#nIuq?!0x5%B?hlIk;KL4wPc0%wh$K?Squbeqks<J?bxV0Gq1h~&|*AZm0_x9
zk<n4nk<Nr%(GnngHX2@4HhMvWRBB137{GqgRfsUD7$<vpx(CTN3D30M&P*vo4;yxT
zt+{~DbU+G40FQca0Uj-{x0EkRMmAt}_#xC(R8lhyG%&6Ay1rG6v$#}n<9uW`6ig|_
zq=d=5uf~LDzMfC-4Za3Cgols#in-C?%1(OH6vu%$q$;@&oWyX>!^1@=)Ce7ghUnCo
zbj+l*oHB^0;z9`)`-&ip@>vJQfp`aG05nT~SUkdiH#t)XPmtyW2}c^wox!~MzN8@J
zRnYG)5{{nE?hs;vfrFz6<mZCwAguuW3v%&k$D=F~espAXNkk}xj}JRsXlOi#%0GC$
zG<B{FcTon;^-s31DwORCQv*#d<>a@yKCy{96V{Rq9&Yoaw4v7-VIo1h{|U#UQzT4q
zk7wLFKH*Rfq~r=9i|3F9+05J;4HRYI8yvKl6=<ulc4Tx+jEr2&_}GD4Av$_H4-c*2
zHW`z?u`vRQ20JEvF=k|R^bTP%7^q;&%F4!;D$6EoqRJ)*?9>|iBJhLA>e=Zj>(2ma
zHb`?2@dC|4EC+^K+=Uwb<;lqukd@}hAMk;BEu9h`zVv7#fA4;}*WSg9kqTX5Z9QA7
z+!ve2H~#hTz|S;HZm+!(VO7!%$@L^^4~@B)1pXEIu--%{Hy6_5>NhNc+5cTpnCR{g
zi)Gy4quYI$*xuNrq@-M~DW}^39@D)!5U@>S(4Pu*I$f)lvb2PS1G%#Sw`YS>_*%!#
zhRneOmh%%t?(VrPwvB4(kpqFlv%(<^JrZm|Kj_#nA1=AUI>{x!0TOdf6-Z`Yq-osa
zWKHt6RNxEiu9bUy^w0^#^8f8B2SA)i-uuxD@>21N)WP}vOFYVP;_jP1!(LpJe_Gzo
z&W{l_aAgg(`9FUo%W(tz8*RcMka!x02@&_IJA5t1(xC2Cu`*MdoaT{xa(2U#LZ{tb
z**H0U-X1ukBwJ)q&wm&h_8K=Hx`~H@dJhRoWu%xXg%O1RAbt=x>7m1ajE5F_Hm(O`
zr2NafTa$?4(P4Cl-G5mJpa|elx7=Jc;Olryp^CqIq&*TJW*hi_y_q5vi=ZU_@gFFm
zV9eJx#*r;v^J_!`ujtHW`khxUS~%#Sj)=8xx7I{0V%86mI8Gh_0r+rOSet0%^4zv{
zEA5lU%l7f>?USyTyqiXYXhY(3*#F~!V4G%JDv-7W{J!W<Aonmo&Qx#ScNF?x%D~U`
zjyXk)tC`mseica`meX40suSZ7r(v^u0lO?LD3EZE`{m;J13Q-<-LiRcI@?j1f>E4k
z+j*pnHxeHmd~~JCOKz(a)AWkv0%T{99fxV*!@<5j@ln%@nf%S!Czjk=8~HJUSRC%Y
z=X{q_YzA9cxECEQ8}}7YK9|Zu^X-wkatPF300H3>cV;2c!Xk!TWtgz9W+CtrNrqft
znL0ebv5O%>jtoV|YKJiI%pgOo<_{SdoY~n^dVOU)&(ofR8c7O_H#=nA-S4=#3ir@K
zswec-1~~b91NeUe8+bCh@q&ggbrpaXDonSzs8VBcYC7J<58L+!EQt*azaIv9sJ)E@
z77aCSB`oYe!&`M|c$hB#RiA5`7bkT(Y4pa}IJM!1hO7087Vt?<MNdyho0$s$yd5H#
z?14){kwO%*Qjr#rD`3i+6+-;04Vdt9pyg-Lk7qV0Uo*0k@VxiLqi48B{^IkvAZ+dq
zN)%HJ&g3ZgUaQ0=mB`kbv$3%g^Lr5zd`$x6BxeAEL0j0uRM`#=O;b}bF%J$aPAyv;
z?3gKO$D!Ud{wwoXnZXOOfV8x&&?xkY2{at$p<e$f?SNl@3d7823^G|+d$}AYWaKJ!
zt*N+}cKhd-fq7l<@Nm@FW)jf)c5V#{1%SkpmFKaOZw=Lk3Y}u-BbR!sl@)+Bi00vt
z|3Z*ND?J4k$E3fw);bgU|B&^SQF$a`mvJn(I|TRO4#8c5y9alI1$PMU?(V@If=h6B
zcXx+uF*Ez^?%DjAlXEgJFWp^L_qo;~A9>w4u6F*$Ge<EqOyhB>h|4qHKR@kq*69}V
zoAxZscubTbkJ1lePcbu3&rB;!OiU>#tTuOi$&5iOG}$B6{Ytp@Rom@RyY(q2JD1+j
zxO_V_xQ$k||DOT@1p^i4Y-6+Hsr>SUY9{30LJIue7m9f$)NDOn`h(KaVfC&QV@oYX
z%@dc$o$-k<to#F#m{X2}5yv#ilT}QNIdcAT`+WnV$ryXgRtiRszqH=|sD%j*RIAz=
zt>KhO1RM^3Cd!xA^h96NwRd8on^rM#!jzV#nT4(+hQnH|j)#LaYD#(X!Z)#zw0dH1
z!R9I^F0I;j(e^vyv|s<j<?duuMI7Db3%^>}_iMzESV6%+O43)3tYL1h(A{0t#^&#H
zRpNt;7T*9<UHJi<2|_GGjIA)AmSyb3tpm;BH4e|t9?33$^-C|tdx}HG==W@0B9x>p
z5elTt8(T+p2dk<iLU;5&O^l?_xr><d<=M4)^Og24PwpS+NngJGqatN=QiIczuCU>D
zf!7iSuitrm0#}ihsoK29jupN8%fEs2EQzcD&{|j;CIpRo(~h)wCZSBd+`y<;5H(6X
z+mNfpRlDVBwn1tGq$x3x!6$s4oY|Wxg{2kcSQZ2m;hRrZ7V;K{D!ZsAZf+*r+&pBN
zS^Rl#(Zj+2jK}4cOF+k(q0HGM1HKIiVN%kO1^<aUd#U)ym7U~t@HFTblaM$&6FYxQ
zNKQ;d#7kS<j(fZ^vt-%1I!q7=N9gK8iyFu@WP^F;aVyQf`b#$^Pws(B1FNVJC)SU8
zb}{yHbuuIBs$(Wc2gRg9n;n>~d#}6T@olTBs@B)}WpLo<Td4N3^XpSms>_afd0^NQ
z7j$-w$L!rHP$4a(!kXd2LrAs$x!Jz;QBeui-<A0JI~M9ff&z$TMh+(xFa-qI05=vZ
zIFw^O6+)`W7z6z-a%K(jWs^h^uOPq=Mo07l1qpo!@$vEF)1+v+g*c}ET!Z;UQZdke
zX<;9p$Bn?*>nBHyxwbaf-;X3N{<F6B)KIt=@P}fi#(h|roiMmWlq3{U$y8DDt3O-)
z<i)m!3=G!Gy}6zoUgy5aEFcJPGr)*>;pzP%&=4Z<kUo(TWzUs1DW$g-y00*1XA=Vz
zNWD&}KoQ2H``cEfyd1+PqJ($+a_p1;(fi_3DaB@n<|Xx6S<N-5%}^)HTRHhu^Yi;s
z3FPY8{-TzaKf~b#;e)8D|M|VZ^`NkeYIE|#V~@JLvvaUzXhvZD>UeBGLj0xo%QSI)
zlZZy6s2&=iZvJB+APRY(d~S}OYV4`bdU)J7>#W=BOyu2gjRNp^<D%n)54%18|LT?)
zh;t<+|BF;=hr>xs249t!!^1%qOK(KJE;+9MDRtTIQ(xHqh)JohL-2_*DLJ}RrJ>xQ
zGmt=#pjM9{DOr8BY3U-);^J|=N;6q?QCP*$cW1hmW#yM1=?2~FG_BT)I4k=kmc{Xs
zgyMH{<hfyWR=*ol$^5TRI3_cx^eJ8X%Fk!RNPvw`SXx_gc#A?vZv!bye_c*)0u!v5
zav}<f%*?W^vLb5I>k}^3-`telGe${jJOi2cC90LAd6<!rUs<h)8=DxFjst3HAmHQ5
zlxXfQb~4;tbmuUmAGqZ=kNvoby$N1OM-=2DLvc|<rRgg4V_Lc;5F5?P+8AeKNO%RE
zOZl22yD+mTGjG1D%;66yY=%{W=YSZWsqpf+oYAcHSF!%Uiyh?UGmphDKx?6a2v`gW
z8AT?Ob8Y<!J~;+oVF)GJ^EW~~`mv05GYnKPQ<)ZQovq?GA$vs<SY}jqRZLPO@cX>w
z!^e&X!5j=+Z1|h4<hGO)^WwbPQ6_uSB*sWe?DvJ3K*q5Mc&&PN_8OaY)6G##Qn<(Z
zm<#IL*6GC#P@G<F%m93m;m_V9h!IB|w$RWI_S8q%lO5W6`Lac9JiKqT+a4YVp6pCb
z)$(hljHC^=`;};+6&PW$OyMyiQlpT-K($PAZtxMdI24bFmJHqfSfit<u3MJzJKAUP
z;PJkAPv!FXaErg$^+@7Qq>BSTS$DI4yLG^DacU2%nZnL-HhT1SiI}LQUI!&FW+Y++
z0}|!HPy7APq<A`l2>%@7h3`VrYxdh<>V!Y;GT2M<ZtPECPyxKjr>W?C7=g#l!Eyr+
z>43pK@@F9&s$etc{kt638&p(eWs@;EqutLGxSD#sADTaZf<%0oWKqQZ4Eucj+PwK$
zsZ-}0DRy8Q!4`&a|J|a-^x4Urjdm_2qVEpwH}980p8%7#H(fcou}4Rt=ybKSHP~?2
zrybyV)yabX7lse2HrRN4m&7?9R>LX^2?FQLPp`CVX(JJHYPG6h6-rK^DR^-@_O$(o
zO^u@N7@iv%@<V??4EiYlwA_f#*&YwH#9QLKd^ZnkV${z9+i*q!!48u}eYy7Uw9Xw&
zc@i=*8k)n)!jp^byK+_JABQS^V?=;<?1a%5;~9VV&*J(P=<(%rY+@#fxo9HajH(B0
z=kI#P2d!294$qwMwA|d1=--p|LBdEm-LL>Ah>A9hJ~pH%7XS~2<HZI>_l|c|W@byK
zQ%THLt*+hB+S)74<%ON=S6ji^+4=&UoJe*HwHEE|?UJ9-d3oC$j^+ttW)c&VIoa57
zc-^^3Nc@77e{44OHXk#f^XO{@ltd6l*93~{=&atuV$VD7dJ;^+o9<m7EtrrX|MB)L
zT5j{;b@<ulSAvd!i`!zg(#qrZ@QIYt`80wse<VdrQqsV?q1o*r4hO5evq8Pm0BYEf
zFj0OWthdZE3JIyDsoLNX1zJdSp2bSXQVy4wq(}QMlCf@de@4df7LpA-@g#Q{uO(3B
z7<F}Z<@2SmxeW)S$z-Uhsnu%DDX6G|(W)h8%0$9({|pT1tp^3_V8>~-{8(zW>m419
z=n|CR)$0m$<O-%oR!~q#VzKzi=F4Hf?|T0@XVUKN<@4dp6tGa>Wa*lWnUnt$N0Y=l
z1B`}C)6;SEHj&Zs0s34TSj?t585zHo6%717M&b-{`NH_VK>-9ErMJ1cxrU}@ZwM}@
z^FT~S2Ep<0pP#<IzO0rh+J%3rjDQ-|)dx}J_S^F{l~S4SX2;i=a%Ffd6g9iP(adxn
zH@#0!AMCx^HZfjP2f*R?%q=!K(w(U0#k_?I5(`gjU^Y__(Ns*N2MBbnP<vs0!zkSm
zzxDI8cyLN$;+iWN0M82QHA<CLKJq4G{2t@P82?NOB#Mj-{{G~lz&jk$mgOzod@*0c
zYPp!eWHNrBTwbQtV0%57=;C;?+-%dixafK`UjvWNt(TQ`<a%S-8-{y*eeHO%*x<DG
zMsK;n&1I@cad!ou&6<FRCoQNq*K(;50v_iJ&N{-%(-J3BHMre=uhxS#_z6m{PZoNQ
z4{pykO4aN9-R8z~OAyQCd&8`o<A#!1FqO)O|Gj{V6BBlG=UW5f;stR=w6u_V;p@Yx
zoC~#9biq54)M@EFeC)b!nf<Xyg|#wtl*%+P(<40JwEeg{$B9QDNw->VzB`yH$7Zn%
z3B1*Jdou}T>d!5SGuDqxR$Wt5XSr~_e`Q9Bl&j(NcxWMM?z-V$1Mn3;1)4<?b3_D!
z$76{tZ(3ws?HDe7)Wwt-V&q|0*X8Q!Wdy=-h|aMx8KOc|08_UDW2JD<eKcvZ#j+lo
zV;aC=YArw7$w`Ui-D>q~==Fc`Whvr}YH8J5XL<m%XgI8(tV||MPfxF|zTj-PqxUeL
zD**)sHQP5h*lc%W>Ev`<5J@Da4^K=?e1CH=(_m**K9cSuz#mvau(`R}6N($$CzHm-
zCqJYDLrOrP)6o?a!LzsLvE$-GJCgSP{d-t!7C?@NJ>6o4>2=(m6;znblmTZWqCVkb
zi=)+Q2H)+UTTq<A`7l+ie13jzHeJ%bvQt*}>$7g#nbTi9{(PfwJk7@Mgz(XxF)=X$
zf1mY*!`*LB*DiKOg6Ora1uYh8Q<-e^<mBX9@4(93DPK7-HA|RTU2OgdfyubkXiv34
zhiOpau>5$p_1Ra=IVdy~m&xg~2H%KyF#$U}yLPKPEDVhP<9k^LMKThKv1;8Tu(}Hi
zV{gY5O_nwIRoSU{cn&Z3PGC~p%j}=`n*BN7-!g>&zZPlHDT>ID_2$&>7#R5U#>Yc~
zV5p;#lAvo2jKA-7Bv5{+x8t_A`6?^>WuXLkDOEh=i<L_KVX@uM&Y{<(gT<tzV4<KY
zDryaLKk})msR1)}K%0lH+*mLpB^A{ItEunyg_%1%_HEz5!1=>7HwVXkx#S^mxdcMO
z_xgKzc@^y9n|;kL3keOeMge~fDb$gRiq_Q70Eq(_VPj)*ln?uDcKOmn$*dmF>+e@K
ziNE+aUS3|RPzZR=ET&vgAoua<Q9$CL2Vr8PL)3_e#MlyWuncD;p=|&=2o0T9>)~A0
z&Z6^$_>E1IEC!Z!Tyk*O)@36V5r8NRA3m@rCep(n_qN^Z3p4(6s3C+_K#SQ-hNvyn
zMwyy|X#~uJzE6%(ZpD;fulJJg(qN}LfWC4frKB8qI8dgNkf<IC34&l|u9?V}ZgD#6
zm>+=FA|c83fq={CwbIk$|AaumY+G4nhJpQuiu`jPzt?5^iUFbe&AAAmnk14hmP_*U
zjh0PFstkvqVWIXW^5Hk=EPhSY0bh;rvWBwqOqs3#Jbd^_IxnMc8=;*~bE|7V=c=D~
zrV}~wM!v#l0F-t^Fb8-&lZuLJYilbi=o@Rg<eo?FCKY2(SxK+U`wNji6_$O_jX=du
zPthjB|KB4vqS{cOx|;aj?q*J&@KOWr!=;I(G#v78lKuk;2fZlY%Bbd8X8od9tELj-
z($mvpUUIsU9mq&X=WCq-{tgbp9z0bPkfP*|BuRPRJ}WtiiF3!?E^v5UeK&EkiTr7a
zd<63>#;@p}^?=2+Nq~s6DNianjl<q_VtZ(9u~)R?P3(u7pRn@C$caB$Ki-!kA;(7~
z#^#m@PkqcC*Vos-Se&xo`ibgs_JN<{!&t1+Pz*DFF{TX(3BAc)iLY-Lm(`7@LfZ+u
zNnw`WdRCOjN9adQJ$?N)_ot$wB1#gH?gU?ABBG5p`TRHTPy0*7@hK@d`ZNiFfsivh
zBd91S)HGD>B3C8UG|JT4VQIYsip7Um%xd+<V}Ii5{(Rr2BP1YLy*ik=val$&BlBCM
ztJPh@W*;9LQ?31O(&-O-bG)=r;(XB`O~&hbEBkw=rJ<pL4gKNzBtWTHyUo4n`Y7?0
z_?!Sor0F6Y3D^J$1&0)tjTlvJ&e%;%z^r3?#~l0uq7Xvqp@L3{q}UsbKM9f$=hSy1
zyY?lj2ln@EZyW3wK693LWC=ApA544vePaE-JK7V9%{cw0ufP9p?WE21&bGQfmD6!p
zc?e{VhLU;H9r;ko>9nQOxJ)#%KbW~bT+<*o9U`Kl0vqa8LzPj!bmE(nv22lt>jR^O
zjP^$RgRJaqv6SWrh*EQ}e92U3qUXxEPx&1UE>5@i_w&r=kaH0K+(Fi3n1$xaBE^H-
z3jI+gN=i!QGA&#dS0^8TSfin&*b`s8<)o6leC2%0C1$f}6;;(z^`=_?3VP%qx$B%9
z>$HmgSO;x~gXvPXs#~v&U~jYE1M~BPi>{NqW5>tGu8bq=>+253^L{Jn>yAg$M_ob4
zcW}_FtPktmFq@vP5!jpOoM|OoTwGZ5b62}_(KmXXRydd)VX{4!WOve9r1F9nWf#=|
zY{`i}b~`K=H=ZKBkt5p~NpJgcgaz$yTmVZN0wMQm(be1A+sQ01<Elbdvw%sE;F|!F
z0tNE*14f<s-1+039Rvh~xptF#>btd8uRen_%E>4I*_Pj@{}MTjFCh>@tM8y+7foDU
z$zX}2(UzWm;y1t{S`iaezL+jm_X+se{<Ty0trkY&Dp!P_cD=RNnO(co+a8yj6Cja7
zz~OkF0n|3zbLDTrP$p>m#qZaRMXMb&ba~@CA=Mwu&7$TiPkGnYT!{BUfNiR^%$99p
z(cgMQ_p(n5>X-0;s!kGnK#hovkX;pNIhgip9*~w6gQr~(5*LTNd%9@!e%A$#=9x0h
zg%XpA{%F!Ix5kUhJ$W;1Y!+m0w<oatvbz0Z5?LKBm&JNf(YU!=go4o;+FTs>*1NxE
z3%6Ig^@gRj{8&2`^BpH4BwPc}_Tgt_ua7L;&i70w+31RjQ$fgNt*&S5*QM%=hJ&6Z
z>@LR|0b#f{58F)==Vhz{X!Cn>*}WcQGW<yN^yIPW8)VWS@#)vA;+0c5>_b9Aj+a|F
zZN_r|_Mvb8^a-%qqa4G-!+U#s^>_x~zRi@Wv)WCU6!5<1n4bhk?&ZM>KEwR<^mI?p
zhxnBYKA%%$(iz2;A6JIM30}#ah4T4*T?~)h9>+M}2LbFs92m+=AW-jv*(}M-^!kIU
zeD(bFpu6@ohN{XRhfF2|P!9Tg332fODi{^5R#zxp-&&<|HRi`JuCA^rXw)h-KbX@q
zf<gilPBwkT1*X>0K6rT#EY{l)r6+vR=QX=F2USvjbBp(c#R5mO6$0||$h?Y-j8zA$
zmLWLoQ8yV83*e}!SOFea95&Te)6Igf57$TfMki~#eH<#XpADAE0|Nsw8I8(R8lq?@
zp{srjB~@(nJl$Wx#0n^7c{|#tb?R1BS9VB!P=<Tpf2EgY<Oe(X(M(cGN@!?SMO|G*
z1px*I+DCmWra!u-rUskEoXumI3y1EcRO@$9v3D1bOB^aXS#Dt=tAj@978Rhad^?3}
zDmK92_v!OjKM`Z*Xhg)w^C5Kwg^%Jz^SgiBLTGZcVajbM@@A#)?%$6EwtVvRN6@?&
z9hF^H&H#=C_95I6tnYW;+S^ei62R(%MynBsPo}@+WhK@qoyq}#hb9<bem_6G|3*bb
zRHgfLKN^9sp4Qa3&dO*$`}r?EuL_YnvE<>okhFA}STyOd*>ru2^IK*#JiOy(2aDG5
zFOrgycs#CsXihh@!;WL8@ii&zZdlARpLX7BVlEEM>5{&|H19aN?G48d*^?nAAsKUL
z8%jU9Kg8tl`&FV^)iW^>?Th3zlg#eC{?1@4yEmNo88bj`&iP_{NX=*mfb4bZl?FHL
zQVCgE1X|Ja4?NFl>@L^GEiMB79~)eDh8yg5XtnBB&o2veb4gAnsjB0Z%QU=9@#*#6
zWf4*dHrd`)HZ<JTS}cqv5QY^L7>DC=-`||TUrEmEgM9{^0l4=!J1(%Y<fDOU9Bx<x
z8>efXvlQ214?Sh6oL~7jWw7-5#QVH`52njV>LU3q32yi74`&f;yi6N@Yz+)yvm)Hh
zR_O0ye5S8=a=Y_>zynROL^uOV;1pA@v${B9TAe8$%ND+Keeh4c-kX4ULKx1{fPxYU
z$6>!(@whEqKY4b009$7m4!5{c`>UJdl{SYbj^odDmdl&m<Tz{(5gWyIH;q>ZUq8GM
z>zN;9;hTlr9*&n18IAlu;z?3Mh<x9;zmmq~Tt7tiib8ke=Dy_R^?1g?MHZko#^SMy
zA75qW_g)n>n=Z;N{^J}Sh~Af1D4+L5*0i8_<0n6z8CW3XWXROkQ;K{qyjVJKJR+vo
zN^Fk5e{~uj4$CPgV66gw3!>t!i6jwlm21m9UrQ<Q^H;dLfyokqDT1LV7`>B<k~D4o
zzz0%t@~4+~)~6)JM*!V5IugdRZLT^i%tp7~GkDBZelywIlR%>rhWDv>hFXdGehWL2
zz=P`u(K-`Eg}ZAinaJ>M)${cz4&KQM<FwqH8)M*VS#+h-YBn~s9OjwpV4?oSd%Hx9
z`VNI~z+aki{&T$d7FSpn-Hl%f3_5Mh&O0M<RiEt~0LBfRi65s&Ju`bdRKIvVpb^gJ
z2f*WHKqc~6U^TU(U?qTO`7DejtK8%1-d-@7%LEDY_?>aOwcRT2S(l<OnuM6xURQtg
z=$vtEX3-e4vJ&msJEQMq)`bUqdzhG5NQ%ClCKGv5H<nA|6XT;v9iMcdpbC(8gFfXK
zX8mO`-4u0rPWSWk+c=!-dju0jgi;set?9_*B+88#3ftufv$F6<{IJ>-HwG^s)7gx6
zu<c+o8GnI;_85EXh0Z|5zz`(}vt#J3StZj{u~cj{Vmeh6*^2D`%xbmVMW2;6clHCt
zc5$|1_n|^4G$io%haXDjTf4ivmj^TO-WwvXB`UHDV9hPY-a|iL1*`D5KPG^AVD0LB
z>yOn+f-YPeJ}*J6`N0gjK^8Io8zs#t;{J&VkM5tps*Iv|oNbjxVBmUQ2%^9s1?r#K
zhPLD5-VP@}TOH}Bc<_#p^t-R(qmcgYE1tIdkUSz87G{^_DODLt?S8u!M}7Brnv$Ao
z!#Y>5>(kQ0t^c(1yS&k|<;O&x%uY)rqIGuR*X$20KQ%^$RAh_m@z1!YKmC5w^w8ue
z_=CSB!)UQiyX_a372_JM7%}OJJSz_Ys|^)c;?jKbIOMU2)nYjn#5(Gd3Mez55!;Q~
zN2|&;n`*DqCmwQgbHMq734gQiEhi%IXXdZ)g^%yb)8niS1eL@esEyv0dK>-OU%Dw)
zEur|~gJQB3d~ZYiG2^%LhLYP{pTw{I+-Krw)US`1%znEWQaC}q)=ow7O{iyU5(TwU
zz4ur9?;jugIqTl;hfa~dVkg36xkX!yijUtpYT#qwU;K#0_RIP!s4k?zc^=km0z0B8
z-YkI!v0%V2%_fJ_bp`kPOQ?>(@-*Pc4K<t835JHvDKGcfzuc?Z8Sd`siOQ@i(`@>b
zjz}bIEr05R%V(jp!Ce;}7a8dldS6=$=0~&foc?51t3Hy;p#^Ma4YD>URH08gQ;N)8
z1_nTY>2r7tMsI((u~QmRKL~+!3Thz+Qz1f@<?^X|^{7?<dPKI!i)1iczxL4FK=QZ1
zQPnd{VMjXE+9XR}aZ@kM7mT$u>UBIGXL{}4@BjXad0l`8V`q0M?Y@O6-BF&TfWE@-
zP15_YP-j$D_H=*E?Y58dpc2fcq9a1SPLP+Bqy~$OEK0kmA*!a;C+AW>Ru%kZ^F6L{
zrPgrnpZL9vL`r@+2COh*8mS?E+K&jn?z(d0UT-0au(2xPA=fw@_TO>)K65;HBETYH
zTHUXmXrI%QzB!hV&zHt!x83xEL7%U+FtgBSbn1)vqg-~XmFpe<ix_}tKCZPqWzq64
z)&vxz_8Nd2OML$^K)~hxk^*@!7u*mtrSfOMS^rw=q@kukeBtfDj4BxBnJ}wz>Ac+P
zBC`Fr5-t*uBK>Gn^YcGC@2_SPn9b0x$D4(Mx2TkyTwKr@EFduy%Hbk6H#U5ryR@ut
ziWRH!05|?teE}Yyr>nQuu6lB31c5u({h)Y&e6q}<S|@X@`L4)fzS2%XJaWAllV3?g
zBdf{9J6T=L#BoIe{kvsP7%o7@!G6JLYwlgohSp<kHg*@*jv^vnN9C*em4GA)MyD%N
zsmKz!IVNedP&62g$NFroV+^Y1VyVV3^~`|k3b43(eI1MWEW#6u9yRJ{vg_k5Jqs4#
z&K#APsW-bTUbegi--p9j5D=)SvEX7faJlp>=dh6-4<&E(hG!d1R|>)CQ3v*1UtgYm
zB%w2yE(!{Qy3je5nVgz(_INNcF$r4ChfJ)eg>nBTOm3~qS+2VAe7v+hl-vZU1x%Gh
zhFrUNTJVP$gy^VQA_VtxzYEq|9WFK1DVJ-3(cM>Yp~dAUHyHhllO$X~1+X_bI5>;T
z&ET}}+U$?kRc+H1NUpVTJe=j*XMy-OFfed)Mx)b~5Zu`-gM9;i*gZCe=vejia8qYF
zVR#lra)^`K9Fcz|%Q!YRhM=|Em-&a1mR4(J8ldtzcX#*5uU{ccHH($R;>fc%g%mV2
z@_yKt^0-MVl}@KKC}ygJw?b##nhKX%sCTm4O?h~9eQ7lu+OIM!tU3A8j0&r9pDhxa
zMDNCt#PrD3c(Y4yJ(}_Q?FXjHIzZp)Sc~?p3kA;x{4^R#<#gIbh|*|vb>97@8`yua
zJ(wup7_2Vvtljt>>OFZgzN+x5!>expmTaH#OnnhH93s;^n`lyGQfa}+l<g5ZgE%Ib
z<8JNdX26l4e~g)Eay&_5GVykS(7#8XMchAo=5}WA#U2_!r5Ycfu*p-aHWwRy!}8<m
zI25@~r``LLwvwEZ((zOk*}eMZnItM7pBCOzbp5T<vR6WL#8^t1W@F0z32&>Bul2>w
zkiJKhan1;cLfkC3gyC{<d7QmjI4ou*dB^)394oMROvVqj7Uf#4f}QDUX{|;hX{ReI
z*zMYU;wnhLdIL|bFLA`vuybA=r-KQg@y8alC9d0#n;xqMU7rtk18;<cx&i-9quPA?
z+~Trpx*p$Zc4K)i<OVi6DH<9xvjedIo#8U$JalV#gZRQVL+{wxd9DQhavMH_K_3`o
zt(F_R-|g_p$swJsw7Nj_dzVo^eC+9&pC1@UBDq6FSl!!GEq8yCLkvdY2Meex_7BBM
zvz?rLx52lq(vr)bFh<QLX~(WWBzNv)SsZ2}QlhttxpT#Jz;d&B60g*CCNVlOb2#>v
z@7ozd;pS=`ht)FIqd9<1IOymB%kc9O`b62?!RP|>;TZJZxKE)wVSS(AXw3vS-qIMa
zh12`Je-N?@)_z#I-^zH!=Bn_ffqfk#y`gcPrO|v(?$YVB3Oz@sKC_^x2#x(M;+Fv@
zCnqM;Nzp1#zbIX>GiJ<rqPqYx_#*URW~M|{UP^gpX$hAkCN?&e-EMn{)A7g6em5%=
zfi{6|BvdtZV@OEQ`AJrOxptF-g@#GQa7d5@C3;UMZnmJn!SP!Au=KYz>~_9*T#jr&
zn;uM;p8#$wQy{=;g>^WX<7Qr3LSkbG;wBUakIr_qRLKHR)8;sjNXe{bV5tZ1?ivSL
zsl21hT+R6C>iCOJyKUXj*w`TF+(k{G;_Uf)XlAAkO#J1pYn;|Nxc3sx(QBvJB1qqh
zttTTFZ*&QThlYkOaP@=@HxU`6N$%rrO|l<}A&t$<ZVndEu?@0p_!5#5zBFkxx#O-E
zMyBFY(2Vo9yIx*4JO1S<TJq&Ii;(--)#ij)Z<l|v$e!67M&j|5l$u0YmXR?zI;=DQ
zeIxe|0p)BEb9V-tsc^9suQN>WV#*;3e)=Ze;J27-s?U4TM$$hxyfaJ!E3&){;h%1$
zl3BPN?O|b|&aTebJ)R%IWOa3UnSh~TXu~V55TUQlMOsdd{)}gv_-60R@};H9<iVmh
z3To^tH|vWYSlfi%Ey)x%L|!Y>`bz?Q<s??evif?qe^%j7w%uP0q{oNg85Lf0T`M8n
zZGU<g)!O}fMoBaI)?1d$SqSxVJ+d<(vKDakFSqRt-(sy4Hsm@kr(-3fdS^$+>eJeZ
zO!F(XOmK?rf?mev#AbnaqbjY=7lx;+?Q|Y6+T=l?H$z8%aCeMjem+|yTzI4I?{BX_
z@0rXmOu$InXX8-n1rFxy-muK{FU_yo*fc8Q%Qc*@_E9-Iun`bWrxudkBmwyJd(*MC
zUt@NtqYB2PFgS)R`(mY0gf12H(UmWxmR2kGe;>uD$*%#u=LE0u_3PMt&j-KWkwlsg
z&|j7dN%1?PV`2`cYp5ot{6nz;8}@q|esgb7wF+>7fxfqi>MCmMzG-YW-hsTYIIqKC
z0dcO^kG?opQd0W$6&+pqeOr2agb@oM1-O4)k{g5)IPy;74O4GqLP`Sllds9qEuEG1
z^45kt;INvBLQ1N};V1;1Mu~bg(Jd0P)DXy}TZ2P(c6NIB+|Y{^+-Ka3h=fGcDBV9@
zw!I11VI(Nh;!nUkKXSRe2*7yc0fsi2RF0`ufdH^M1B#)|KAGkD`MFu@C2!CEf{O#E
zOPuYxFxiY{!_&B3u8wIaN%K10LdmJ9U@{#~mav&^4Th4Kbt@{XaahbT8X@v!(rJyG
zp`HS0Q#l2s30VzbUbwZ`Kk?4-dW@exCk#F5n0i1!FkFW|>2>z@8Ubb?CzwPO$`5oT
z=#8XtTg=xADfJ;-`RKQ65hN#Iko@lH$pajsOghieXy#jJD5I77GjQTsl$6TL%l9AR
zP*4<}ZS>Tb%~*mJnuEc{$HoaJWD)u#Rx{yB)6tCfETLcm2u{a6{(XpCA=f}&H#awO
zc+rU0aBytsqJf)z?`#lW&t*-FH#u5G$+Uj4owMQeLWp5g?Sizn?MF%Hs4>$=?@FKp
ze12YD9#BRwKU=SYub_G*jQF$Uwmzl0S|e>GvcQ<0>^Eepq`16<hBPe|EzjA8NKe<>
z5XdN5xu`<vd~fi|nKu8tyzSajbud;_(9k>$PN7Qey&)r0VRA2j!ygeHt={Z(wkMx|
zcyz?;{<yoo?p^D!G+!eEhfhj1(d_<oKYO-e{9{ZsIzPx*@Zs`!3848Um+NNo)gd`@
ze=x+BY1fqrhhhaHqoMiNUIW_D!NCEPe3YuyG`jAM`$8f2%{p3JTZbPnm20)kH!0Iy
zXX;0ipu_>@GmYEs!xj`YH2<6|e~cZ_%VrUgv)>UQ1C<f%R4dKS$f&5yZuggVMk9a#
zxDIIhJF)`b^WEP5=42rt+mrM+qI0u{m7$@-?rJ;gpZKcoFR+(~V;lE1X5e1I!^4X(
z$%PhF&}ueD+<%pmQ)@9rMf$|fz=vZHXbztB)Dyk@W{@U^7)PsVZ?cRC3zL5OaTS4W
zfT>UU3LJ~h7dsmaw}6r~Fvyubxe!8p$Ii(3Vopq%!IDhRoE(+r|4PqELGgT5{9BU~
zc^UPhm?!HHxw&~zy>{p#c*Nm|S2_yh3Trz(OKxvHL$S|WLug2e^nsk_TWNcHXP0f+
zTxem@s@p9|TJp~6$K9}c>y*j7wFDz&6gld9s})asPMMEKEA|Bf$~_9#|97`kQOX~A
z$V(&jvB1#K)WIjTg|wxnztgvDxx@+u_)dJQc(LmTj8ZiC&Jc*%i#b6lek7YPo0Qb-
zyGD<Xm^X{edm}qUocgwHMn4x@eg&Mtc#e)3Rw5(Wj!$%>qN18^Uwnf4m-3VZ_8?&H
zDPjS3#2?6{wrO4D6kFQ{`A_$#Hg}aPQQcs3I1@`;Cj`Xk_4RPbPn7s*`GZlu>D}<1
zlz*%RUzmEtU*b8?eo@KEn7FtXr)%Ng{=8u+33q!eK#5<V8UCJpJ5ydIF9)2Aj0wyW
zPJ0aJn3CqdVTg7fK76~F`u5K^Adig%$B5@pvd2pY#FaSO2rH9#uwG%Ja``7(jlPgj
z5Y&qwAODoz-u16FBB9{Fug)d=8B^Dz<gY>_7^pmb>nwR^N}xc7=IrU=G9#n<_itZE
zt2IAFK1t%2AEbbxpr|;K!t;b4-<Ksj5T;jz84(?awzH3AdpkN}Z}xV&9ERKB<1+MZ
zzu3QD>@Q2V@{5b=#RG-vwO)WpkBJE|t67Q0w^sY)?y@E4RFFs#_n<`*`u94>34<?U
ze07vRUlf~@lw_O<!~5ThrDEO!edRor6!Gg=$mwa|pLj6wjU03gr%0cj=_37e5%~+}
zf&E`ae~Z#*k8yIohOWP^5{BQHnJBlEhL$cpH3`dYNBVH17s}GoSrd#bs?Ac;;Kw>U
zKU|ku)|g&wkH~Da`gyj^`oDKTT3g90!nLYsaJ^$NH38E|7dAFFr}6130EBmUcZe{#
z=7`9s@zK$YtTTsGFqJ({{c=ZMZ@+z%II1G+PI;e_92qIpruhYYey)hHa1S8+#|Dt-
zA}L~X8-N)4aDHVbEe*F(u7@BtBu*l(CFkmzb+RmVx1E}r0CJL)xuvD!B@JsS4S8lW
zd!g9nlb#o17f}S&N&S%UBhu1nsOad?aeDtef)w=Vbeg6snVsN#o0ymYE)qg6F4vwA
zOm3HJ*T2u)z-aPJ3}QjsYd)YSq>aw{T)T?&{GNF?`EU{iWa|Am?7xeIF)cLb-Vds}
zYD*&FWKKz{efd&3V?^)loa=vCN&@a61#>m1)TyZn{Rm4!OpGB<!Uw*;P+euEoV>D;
zB#L;ufv;cBVjF(F{=<#)g}cQ<-1<Rsax@Vs4NYk|U14s&AQDpM)W`491-=G`->ZR(
zcM5lB7Y;vi#snTq!Q45P&eoRU78@5^rWr+Ch=3u9@l!@#ktFyFl*vNvQNYa!nPdV1
zE$wguCDhNKKRF!_>)nr%Dk2fVuLmtOJSYiZGFGvYVmkfXg{-WsxuBqaEiGW_NyK<j
zCcG@=O0>1b<>dt>C8uXq`iE)p!6`U<PN@w%x2LR3TtA&CK$o44=)32AQ!@m*mnRll
zBvTr<#b!flri?LxN1!k>HRq?PlAIjqTghl~0oM_l{YTF1iY=1b5a>6{3GmN@fI#2o
z7eEXZlf@$M`?GE(>cn&=2Oj*>K#a$!Ow+s&F)=ZA_eZDnRyTVvXIIwZ@#=PfSy-HH
zA!cx{`TJhfOgtS>TKQ`Yx3{x}!-tdYEG^5*%TFq|{$S6~5sE#);Q|>76?e#kZlJHP
z-u_^El~5o89*1SsFA{27&H$%1-cn_Jd|Wz(ZLCI})tAL=nQP_E=6f3i<TJ4fP{frk
zyIYKHaB}-_b)YOFB4XPN2M0Hzq4|}J4F&=pm!Ib@j&SA1=N-F^UoYWBMx>p!_2(>9
zt+$}JjV-7^-GJHoeADsxn9WV{*KbHUD?&oTLlQ5_Mz;qCB_%XuVTxC~7}3nbKlFQb
z1vIpH7-?C+{_iP;{e(sf$?eW{7mSWvUJpP~6yR`coF_(VhX2im@tI^0&?$F{rt>6c
zDv)=4$~ME;pD<$lHURJOcTB&BjYh@u$V5J$#cFwRYRVn}6y<VFDq(@{-ZA}N;V_)L
zxj)5#@D~;q1}E3nA583-GVPPGsvV$K1H24;Kv3CLRTrMG-;T;fvAqgC-e)%J3Bv|X
z3p!xP0PGjvtM;ZJSEkj|-o5?t1-rY6RfeUSb<#>owL%x$K>i{Box*Mh8Z;6y?!4AB
zBRCxnM@L5qy0u#Mx<fD-p=I6!ew)L!E6|FEhm@2Q2#nv|o?QnM=w*@Q^CST=99&rm
zR1B%H<?N3zXtW{Wbk>*nHijc<p~&$G34w2BXJ;dj>#SCIYS++-;ckH}<Qs%MQ>q<~
zrE@c^{I;O;Ke4Ii=f04VOd1a^lksS5v9e#I^t(dVL%qe@Ga%kY#Kj4xgbWV8C^(u;
zaZq_Ep1@$85?Da)u8fI@7-Y2~ywseChVc7cJVqwySIlywaw$k0S>}E57b~BgJ)q)Y
zPPEo=Z{)|k#>B)VlS%vT=(tj2x6=)S4ulmaoxjf)JHvbX`*@r`P@e2bwQQi^;k6GA
zT~Cges*HwL!GD^-z5t-e_JzxtjEu|(xDJyAUpG4LE|h7t4YS_|_?oY@a)0t>oP-3_
zlp~-O2z?+{TI?Wffb`|~FU=3+kLjgOfm60SZ+-yp8oAtlVtpwh3&#iZ>pIXyBJ}3w
z<|K1?p<rOt>nt^0`8&plL_{mBmYN#?b<iP?_^YW@y*9&qQ|If&XIhPFQDI^KnAdRM
zzVxSaIj`UCA&bSv1HDrKayXe2y-8R})GsOlbnA0FhPk;DiQ*ujp!i}r9q$i-A9Zuw
z-20}*c55K^h||H^)L*^DT&dZpN)%l4f^nY!IA<p(HV%&EHb;1y7t_?vwFSGga}-oA
z0q|OWD%03*&IH3B*u|$aiB#={s~oL>HIY;fio7)`vaWJGoomkEh@(m>c};SrbS9kv
zRhYEC+GM4uBF=xlp&0(f!m8Y0^Y=Zis772!y4(i?LqjZV%uIGb%!ejm*i|tAX|Jv>
zfDICsX8V={=EoY41xVO8tN@Y}0qlEPU2-xuwQ<YnVirXzgR?E*ScS&&H-oN$00d4V
zJ6rHXganF;i!*>olB_HN8ZK@sfU|S+^Ut?SlrTtS+NK!TG8nvC5)#2qk*?=CV!&(Q
zaj`9Bpzs`CjL++Q4*0za{T?QZ`4|*+adFe#2twfNq9sOsZQqgC(avHqT>=hD@Mpt$
zz%$BmZ~T40#l%Xw$~h7qntvM_8rm##g-(<5NE$GZ!F)8xbB0EO0J6MP%4fc(n+3Vq
zhrvWf8_Zw5Z&aOui1A|MD%FC_pwAYmGLlCCx4=02;pW8gc+nXQM7FjmP^rHsC%=#9
zh#4fyL7vfRSuc;Kv9jKdwu62syQt_~P<k_8%cxcVtEWhjLm(i|fW8HQm(u5_s~(w%
zy4pxt1(fJWFJxs|&loR&c5>u|__S(4Qo@UH4U{{7lSCQNUiS)K$S+^9;fuo5jA7u{
z(t2@70ZL6yQSqB(?$7!J43la|BqSt&?K^rLd3hlV!LWVfc!I@Z?75vHNI-zagrGJQ
zo|YE)0cKvu&Fx7Z3})Ppv@9%fA1%HgflVEB@e5lcL!ppq8#I7^o}{ECrMS4Xw6u0U
zK45DHt|BmT@p&)-uinir3My*mtH;O3PaU0@^6sZoK}_>@6cn)b_9P@EJzajy_Na=#
zJ{Kt#qZJwNlxaq|87xCp;<8%egw+Ac%DeY<%9}G3s4pA9LeYl@gOcC-a083QG&=Q^
z9Sk4o^@G4DnjSU8^J@F=g%NnnCrc~O@#z=`o}9zT{oYa_yPp2Oc`xn7SxeSz|3_Rj
zLgA}ot(Ey_i$*ycUR6yT=IrTAAMxlskY!V?sIVE)WXCt;xYB+sj{nb38%HUC+c;gx
z;&OK#eG;zM+AfqClXBXZARm8RV>N|=POP65L$aG!zVa(m{pPrFs;C`vroiTWQ;f8v
zdTey`S7oKRloUwj`X>d2g~f^R(u@#zgR7-`wNH%I3<P=X>GIwlpud(_m?za4R{-CI
zm6g>pX1RA4B0?ZGFc#$G<hV|NNNc9ONbo|1Jeiz+mm<Ib)~l}$x2l-%zpo`t6$8G7
z-Tc-XM&i#kE{FL-Eo^?2BU&(l%{fRd3yUbnbH`<@>P&tJgX`^c=_`2Mr7I)92B~5N
zwOGi1|KrD^V9YvEW>(gGWvp62*m;~1QiQ+8`^4nvRI1<o1L&DkAuo}}r+Zl%ko9|<
zDS_j4VQC+td7di%C7-_v5O<)bv81=$w<+)b+a|*W`fBnn5PBn%9luaM>gl;W^8v5q
zbjVO_`f&t49`{8=G%~6Ky0Qd1UDn2Op*$I>ut{`eV&d@AY_x0!)e0CWUkG!ju&^+&
zuY$E>wUjrsP9iZoqx&w1h@c?u(3=^77#w+V-roG#!l7#00o5R%Q$r2PV!(iE?Q*fb
z8H5~7W_P?0kpmckr3!tB2dsE-Io|gs^dl1{bAFrc>hfjp;QgwsyyH?GO^2GZ{OlZq
z#Z<!bLozhBc(wZeCcdr`Nm;f}S_;5iT=(O{@LUOG=j|C7zfn_R0O0slNx;MibmSCX
zkL&+EAE_1f^@&=kazkJnf5oeZ<iHZojDZGg(l4r~XZQ-tkg?`9(*vS{f?bId`4;Bp
zU@xxx4!W&<J}{e%zY7h`Gc=(!>W2BT%=`HC?c2BR?rxB|7)2l@BLQA(#`d<LkdVv!
zb3Z|S$wi0{<jXiBy?^3q-JJfq_eBtjgyXe1AD|%*3PrF9Ma9LTp<(@Wh)<R1f)W3w
zqmz+axOaH?aVvi23^pZ}0$@@=cc@o7ojtkSXf-?OT3NN3-5j%eNE;v$!{Lu|09GID
zhl0ARPG@yy@wEBR58)>|Yuh8qWPu2Xzm{X_-%9#@toRDe51JajcYaukBtkuyiC2*K
z`8^mCs;iWg$jWNHGt8!}%=48HrGqy)G8Q#9X0S(wt8MP^KdV+O%QPDE^TLj~d4c2p
zc*~H>$nEWu-W&25hK;J5aFPSKX4eNXc}@GjaU_P!MU1awDt!k73V>+I`4$DV0Bob&
zs50Ttv9mY(V1aWM;KB-BQ@u}s&&kO#F!^?My~GI)b|f!PyvK=rnF?UI_A)LkEVSDm
zG?}m7i!av}d*|t2tG&`XOMtBTK1d!x6b1tx-MFxPdD&Rw>hcN`86H8BoR?|N1s=YY
zFgb-cyzbC*663SZmlgM?4)>8U-~=nxHDAnjxdAIw^tegKyNl2o`pKvQDA`-r*QKDe
zAg-3Bxv{Yk7MscX=CtqOxq(1m^4;r$p~1nAey&g3uqz9D;9r3p;VaO<Ce+<!H$EZ%
z^h8vyb*0|8393<ZE5|?b-9CSlrnMx~W2m&<Mgt2FW5epjhGLx+i;1;Qsjf6K*Zk0R
zF>}J}_EeUeS(KZZS6ZkhB_c1AU&3J#QzZP|+|}4g0igTmsG6{(*{M`!lfbMuZm3l$
zvR}W_muM|@E<FLC-qrq;fSCxIdJ`}`AZHsaw%Fx=3q}JQ?U!NWG7Sw4T8(-geT$_W
zF=Su<!pprSV8~f$x;ZRVDpdn=1OhDV0suy|nynvA#`EmW1O=#!bxoXstcpw~O|4Ze
z>h&NkB`o|QN!>hxoz=UG0<yaNfYn^3#Wa@i#vZ=<5;#e&PES>S>3HP{Kbzs!n62O`
zR$DwcK$2eDf39u?x{UB0ID>{+bL%F*DkxOfR?Gnu3iL!W>4Yy@s{H-^O}pododA=r
z0pnJnk6!eT;NW1O4n=@o3}68fh5a)0eSgwqYJV!3#Y$^+Q?QyY>=KAo>b2&H^!gvz
zGlf2hw(t#9n@;TmqPbo=PcqS)F_>+o)lGoX5-;Wru!~_h>?^E4bW9VGL-7|UR%^db
z+3Ur^f@ZcjFT%tCd<$cC7O)`3!d5bj2jlj-{YAW;oq|2Cs#`I#A`RrH(END${lnGK
zV)a)|dHvzH7UB(F*G<YN%fR9c;>XFQr8H`AbaZspieS%9=k<`jy9g4Z4-5^)=FxlS
zY?d?;*4U)~mNFAibj18%^`g_Ly^{c3C>ZroJOu?Lwl_C(va@~IBG}&I;EdMu8v+U#
zvNW|IK}A+JB0XI@KMWV(L>iqG@6nl=@X$~pf#5fy5@LLd1cY+1KsC=8VJG}c5{y2Q
z$muwHx&{!jVCdHNCIR91S7Cl`<@JZ2Oqjfui(oDTwm29<Zvle$U;0JDaORp-8DDQ8
zSj&|GWq)g55eL25+3b}^Br!GhN6H<bj24gs|F~@)7;OqKc7mn-2t-@furR>4%O6S4
zOpzr<;iRE~jn9qECF%E-Ljbvy&Eoe+l$7GISOZB(;zdRLH1F`aUS)&dc@8S5kQc^v
zr%lvXi4hD6Kz~wXN-nN8hmt~m*cAI&PIn060;WgvnO{<#Cs?fniO&9@OkjPm-VnJA
zU317@>)8F#9wWyuFQBNbF0U>jr5^2$c6;K<0@dPBP2U@aYv=ZiLlfZR(>Y78pT^?`
z3fx>z%C*&*tirJ2kky@lR<&3mpDzFbzuW>m7*sG%F4g5AWMG~)yE~T+QT@jwUwWz4
z4Lg(^s{sh*aCl6Cj;Ao1*HgtxHRfaMlY0{&V<5yiUvINvqu+ZCCfry!@2-AydHD&N
zWL93M)78bh3!s2|UGu;8hs7<*%IfIqgU9IxZc{pqdh4@s5@5qoz}+{!OrW1uQX(m&
zZy%eQT57YxhqgYtw?}#PGe@`6|4%%P85mxiKN9KS=%}>ZgImDO#Wk#5@7Z=Y{c7L(
zY`q)6Umk!_fM;IpstmYG`}Vg4B7O+G4#Q)Av-Ld)8=}JuNRhz&>sG(vAJZ|myR2Cw
zB;i2&vw&^qbwYDZ=(VR8$e6Afji9-#23@$u&JA`pe_P;cyEUKLZoBL@FenPeq=KhN
zx>1fyPCh)swY`;<R+|qwU7VlZ##S0M@CIq8Ai}l*ea^Vg&%BuIZd0SrnpAzo8~=Bh
z?v_d~7I74r`^R+(tK%`iNEpbu>ydGBwcqC$;2m#(7%d?1d74tv@7@dj-SJoO;_?Z$
z3D>)^-EqZIH<zzaL5Gu*hdIPz_E&>_kbkF&NfZ<aC=EhGqIf*umfPUz9eXz2M^GAE
zbm7IwA^At%e2}FarK*rgGaw_i&Fe$3wF60eQOq9Ef7rg<0ixX8Tw^mBis;R~JN><=
z-6ls{k8;xlKyK}<zX>AJELE$Be@h#d^m>#EF`mv%dlluEiT-ZThV`r`x-jLpe?yF?
zpGx!T$_jvi9RFZFN|`L@3Azt3WFXhM-kPuU@n@l-%J`c%U@Th|Ka)uKmyLWT5hNjD
zvog-l-FEYU1o_B7$TnIQIuMTekFE53>6itbXSC%Rvc+in>wKApG&cYAPnZ+N_Za>g
zW@g6%3JVqzfBq|FYXbuVmJ;p1r)vTn?{D!50QV3QXbsr4sp(RZXICN_(6tCk#%<Ue
zO6uzkPV~xZ|EaGB*CB?1Z0)EirJ4!k#X$#oEH*3R3%M6Nfz8@_KbTm_YziFkAXL^3
zTx$Q5+hhl@I{SFBUs@a~K02O}5ji#{26U(u$ct$3y{07F?$`gH+}aD3HQ+Pb8}44F
zj(syo!p+9UN%H;^DI?`eWWac)Y<Hn9QMCJoC)KXX8(t+mpJoAl82{WMq0gYrv|}Xw
zZ^QH*u-3d(0#a+E*<Ol@wVHeZ<4oY6ev0Yxa$b|DpbNFsOo8B*QCRTvAVoz*`^Ix6
zN=r(I28Uk!Cn`YV+G6GRnEL`aHDIj+7V|+PSitz{>qoaV>!ouO;m7Dv?9gGCG&*hz
z{KDVJjG>IuLoyQK(AD28qzT%i=n_Rtfs3m}PWo=hMUoBeK1V>!<qAjyV9?kU@FWrm
zJDjW_LKe<cAfbGShxLpJE0gp2Fn)gzddI=^loArM>*5#<sO_G3*q;}vqy$zL*^|Fs
z?_ehkP<~n*$pc;F2EGAg9_PL#{Xc_!a#N-T6BrlL{*rIc6p{N8k<gy?o6xafStO#y
zCkck2v}3BYZT%b;z?A7P@HX8Bb`QWFr-LOGLm}_PD=eC+z)abc^iq7CE#e5;OMB7-
zfGqWI5_4dWQ;Og&Sb6tsgEx*EYPOl;xPj{X$B^;HMpImlX_bl&<97a<Yh2*@5;b^X
zUhx121u`^K)6<p<wH8w`48RQnPAi`@iERAPOa(*Wl2S-w_KWEs8XgD?6orQy?)CFG
z%tyI6^AKAx5r=_<<aL?$m>T$fv=H6W0@^GEyGcOXK9kEemy7k%ZYFo46L{)Bae<5g
zfPGl|`{(EHuXn(9(k-$)pvEsn!^+5bzs&tNmI}mkh7a5=mSg@c3lcAS(Kl-#;7Tg=
zzFlE5Zqm>I$VJdJF2K*w(E$uG)I%Zvd^UKRiYPZZr9hCnaOIDIzZv~h2)IL_w<{0>
zyoZ2tC`qEIxHI8J0@+;~sX1`xC_mF~8L-*gPH*EEbp;mz@n>~)HBg<f|D6OuiU8yn
z%hXG;Fs=WM8(B9w906Xb&hqIV#6LRy;lsnjMT$~VQqbwNLef9C56okoJzO5SJl>wg
zaE+7+@bd$Ye3)<G#_Fmy0A66WX119B4v2o@H=M1=SXhmAGi6}%BOo2HJ{=|8^l#y^
zv&ZE!2uy5XcVIG`rjCiQg8M|#UoZv&3c!lqX6LvQ>f=T@0w5G%Qc&3=SZhHD&aoQc
z!06OZb_#Eb-U#uxi=G3YIiRj08GgT5@x{pf<i-eAO!T<Ed4eIR)lh2V+n*40S_P9a
zZz4TGp(<o#<mE<CadD<UH{M63Jj<Hg45}4cu80?2?idR2)`nuUrtw;7fy@VRUFS=t
zH5-q8Qqdtp%*x6}2LRLU;W{HDqojno0Ae@fUn21@_)5U0j>T?kASmcN;r+f?seBui
zskOCrqbHmf48gVFbWV#(;xIf^F?jAw_vj9er_+W5PWi<KyZ!xr@NNfa!2LCT-%DaP
zW3ifJ(E07@>-$b!UHx4uw<Fxc`Q_u^qc9q%Yd<y?7MA}*Sl$VaoFS9_3LeqYvfrPi
zXJBXq;HgV2db!0VfIvV%0L*n^ogyR|l)vPKgoNJljhAXw86MW#0I13Te04C@8MdnV
zv=a#k^?)3ibnuLIp`vaM4r0g`0)ihANHMYsNQlTj!{X2P%z*j<X)^w?hqJ41dZ!Ty
z34`XbL>%Hp*e+o<$zU)Vd(C#k&I$K1K})&(1G2^|w*w}Z&vSQs)mO7Rl70*gsxv!`
zEK5nhvi!uPdK5GcL#flHR&FiE_#V4i^h*nHuo<w0`~;Rb9I@L~JQ%C_lHo5LA|W@e
zC7Fw!Sj}mrdk1Jc6GT0Hnqwv5HmfF<F=>Jsn_%+s`nk7{4^@ll0mDHFy4+e^uE5I@
zvcM_m-^h{kdy+z4$17FPv(_^T^z%zw%Z`GfR#8zASaR@qYz?{s5kHsNJNP%b9lThu
z!*Dsb?;qL4-s8SJp)ZoIh)8_COs0b6a{bw5KeAze5bxQ=#ldpR!-h!sa2mVa)j%vZ
zh+vd!{{?s)gg)yXt|*&-5q$@yNLdBo&K?;a7N*>drBW_4U#akkd1nLM=hj>2-|Xxz
z0Ka&5dmAwb1$J{j_LHTSU}Hzi6@OPgC14X2REnTrGr(bk!;keW!T=sUFe}ToxZNBB
z7Ej;<Jn7vAxoiS(asNy2`fN&ODlqBcaS!~Iku{l?%yqWwQTPF50U)u0S*>#<^-8^x
z&G@G5rQF@~@%9+M&pZ}=6wSzg<mBswma39gjGYIPVnm#UiJF<r-$K7XG5_z&>7jG0
z6a}Q-e`A`$rLn-2M}2FruDvoTDwTX@SHb)_Ha5nPOM^pgZhEZPYhrAm+M#>KG)7zy
zR)3>A7;UOhUL>SR^j!o3dOE#+&oA9h6cm&|Au+K+;5wPTUGJt|c|{L8K)}`K2*IJ}
z2bColG}P1}^TAi^Y%<o&j`H?_Ha0O)wJJ}l;<Y|lmO>40ATu>N9BV5pp+9~Ec?myI
z^t;ZXd?T606hs5H*#~^&zyYzexESd7s%5;^Lf!U-N0cS*+keR_+Ujby_XP&#<Pd|s
zfW-qB7k8HhG6TOYAd5T(=*Fp<HDlQ%Q<GpV)IfZ!o~_|5XI!Yp90N5HFS3`bS74Bx
z{ZOG(In01We@42dR3c*#8F({s<<6gA{4|Ijq3xfWMSrNIjmI(9eYysNMIyAtg@33i
zZ9in-hZM^4_&<W;#h)@#&nWlN^orcosEWxL>hySsk!n*R1R5A4R*P@lI+6A5Z0+2L
ze0cp7mKw8SI4U(j2@~Ld`EWoogz@?D^zR)b7gw`ksoMVHoIAC_y8w848yh9c*ETjb
zn3%&I$bh{Q96<O*LrRGS%$)tHXn={|JbQNY`2Y(X4Px)S0nJcbdktt+q)^YN?ICD!
zKdx;G$UeINher+3vOk)?I!t6F5<canf9bjtD!_!bM-@_6Hzvu%i4rX+_^cX31YS=@
zKQY-UEcn0JddsM)y6|6Ax&-MG>6VZN>F(~9mM-a(?(URMr8|{wB&2%-5|Yw&Ccgi3
z?iu&qFAT=mu=ieT%{ia?g>lw&GzE$sk^ZNH>t5$J-#)P)r6u{R6|SEHq1*KBfysEo
z`0db={x{ZQ5pC}Su`!p#!^Tm*gN+|)<(Nt|pixq*qS6TeL;_`N`mv0us@L83{I<lc
zZ3X3P)<7&z9V1U5Q<^ZHE+Eb-<npk2_ad^Q;M)z)l*!?z9#%5kgI>pI1({4LXe16k
zdkm>@u~V9nelsaiKvw}59Q+nXxVo`%wc3{-IQV3k*_urMAKdOP1QLe`=A;zg$Hv@Z
zwwlURksY5~hR|emB4aZ(HIoGFT7Vx9E_yIMlMP(7%Y~JctV93I8r-eGxyh>b6EV)&
zeJGxeAri|e{Bm)sT-r6a-$bJT*6MpZ8aA3=pWZ-G5OGTVXxwqBiq^k1`|f)zj)4AW
z=ENgxNMb$HMc7=qSz)cY?wC?cB7qs@%8;;QaC|Z#lc2-~0bUN+cL$E{2xndJON_{>
z|AK=DHgv|SnBwAMwx7!D0_7WTi&-%f!uQIMET3{i=j${~Vq18Xc}mHREj4;}mIvW-
z&ab+|bUX!2rUP8jY}BKym-kL*T&gMlCQh%u-b%Tu7<q}2Amf_Z@u$nkS#WeI+V*~B
z#lALX>Pd4@0@8-12{3KgZQU{Rs!EjHl9C7$)I0?*X!}bg{-nRN`|jY&W8wP!v5q2}
zvX3h5g-r1GiHT>PG$8n^#--+RIPWZg@=(7Wunq=cQ}M*!rKhFUkG^toxkx~vgG~O{
zOpSw?nVMSc^7A|3w8drCPKb-^7Ry&E#}qG3uRJ^iLUkae-_Y~)v<v4Y=@T*(hw=M4
z3EwM$j<O;7Ny`&=w@;kIL%eY>IVJA*N6QMzo|jqYKYtYNOvMB;HlPBB&Pj6$!X%}5
z)U=QQC$ACDuqevSd}9um=&)bHY%O`aX(`1G*o9^oVyuVSBfh{8kJ6|}R8DP1keY}P
zLk4;uHU;cZhWd*d4E)#W8QSaC`K4Ne!WwS*DeO2&z%N{X(ehn;oz6whul&%Q+V_Vi
zGU2jm)_Y_2OMV|7QnGsWlFYVWYjC9|L*&f5*BWtMw4ov(+}8d3@42l{jPbHEj#+f#
zZr;<*UkYG>BOyLZ`2}`h5`w{f@U)8BMwT{9ji)DdNO{&!T2h3ltLr5I%7d8I>bKUp
z(&iRRBG?I9Ve%R5Wk&Y{<(iwAS%!{E4)0D=;Naomua_ERQ&pcH+%8861h@c;%yLg~
zCk5v<m`f6^|NnMqK|rC3P-0_ZUcF(|`OQ0^48%`?&ZQjgHln1v!-f|ED+@o)A0|#y
z%EEbHzKkNh_~q&#t=zEnFv@WmoObRR>%6bM1Pmxqk;~WDQ!y5E1!uiOD4!xMPA1IU
z9@|d7bMQy9vD<yg4==8ll=rgfKDZny*&g>8e>I*bMJtqT#j%(+yFvZc8yF2z7Z(8+
z#U;YDAF?6owNABy@OYf3rP<>AY7AD7<8yAf*8PlKUdZZDdmrpSB6Wl<`ol_($r(C7
zF^JfE`vzhmY|pLc6DJbWXi{F8R14}3l}bpZ2sC7_+s9@^#}P@uGPZjxQ~c&7--^1b
z(*1jXLx!qDLA^<VGnFtJud8_wqOmwj=3O%uvifj~mY)tH7e1h-0i@VawWkY6vsOb%
zynEi8o10H^kqCH`9!ytLQd6_)Qb}lPHUZf45ixOmd=`tHfs8;utct4Y?r8dSz2!8(
z)3Q=yVA$IJ@%6bgrr=|U(xOeG7ss&LA%P!**j6=7;`+o?3T!<l)i}kcCb0aR6lWW<
z#9I>-{JVc-H}_hx%g;Cfg_?@uA?Yr$j+iObU<E}sDp*`tM?zQGWxHcqV|?o;-z(l0
zjN85Dr5*>n(x}2hW2jf5^a3n&;ligs=c|8mu&YelX|2dfh_Eto+-TYqW~pK5E6&)W
zx@505Wb&OpH2>bI@tBL{7UaI;$nt<uP|<@PhNpK}i(ESY_D16OcTG2G^bc=6PQPMJ
z;#Lnz56#SQSXt`GstOaVG8=WFf7;tR4`e>>r=&45+*w{w3-<RBuKfUW8joxl4|Dce
zTvWKlKAT!+Ig0{wtEo*-^mo+A-A&Gu@>zb__}(kA0o)Z-S{%T1CZhl>_-7oewipu)
z&A|4;Gsr&?5mpr|k<)e&B+;Rjfs#?HtE-t!He^dw2z|e2%BBGfgwuAF=*`$o^b2f2
zLf{HSc;$O@Mpc6-4q&gkI$-gGSrp0RBaK3qe+Y#HQSH)F)BKlQ|7*?gsLgu9eJejB
z1*@X<TopDRo>2;dP9sZkOKp~KRE%$7j05oNoNHmWuDuk3z^O3E9uribG|}g2wr3&7
zZMfE9xMI%L&-gT$%>b=AxTKy8?a1P2MsULgwF+X~$30%1Ma({bm{<jA%7$#_rml9>
z@YwGm!6}ceM+L-+`dnSvf#D(J=Ol&+Y*R;`Z6#)AGGXCv^|ky0y{BhseH4d_WM>Dt
ztIIR@umg@6$cG9#A}X@N!7t%{O>>bYx6c`sxAN_Z<4_UspvaHYY6%NDintkr+a5tF
zDNs#nVN0csbQEC=dH`_AK?!&q95JvATpWR%oo-bIx3{;>yJObVjeoj@0|AwQ$MsOl
z?C3QTV0S=T!jh6^z`z?cue-o#4DAgsxlYw;N88nJVF?;pI7i3Rc+fU@&3g%Oj$;{I
z$8%kRms156iic8aa&o&9c-awRva%yHGk-yO2Lq2r#O2%mG?Clfg$h*10Wk$&UUi!+
z**<i?fW(R#07(0P-Qcmp!h2mYv4M>$cy%^7r7V(aFzEw|UmP0WSl3#kkIfu6WzL4(
zr4b_{e;Be`ddq8~y1cPO8aE9~gNU&=_0l5pfHJ(L=p?2a_|We*{jvX6M8iZM0qq@$
zC?m65DXZ*32ECd5G&es(sV%(+GT)i)WYYCz=uQ?E<QG;SPQouV1v?^S6ogh1_UN`U
zKdnxhu#330t)@8{HF>SO%_x48VseEtYso*4tBK}~hXlOfhc)UwP+^-+ug>@9-jdBs
zi0>~qjZgxe?6NX?$-bn-3@A`#3OL?hcSFGdM*hmm3YfRVyy0(k>Wd)kf0od$i)HyB
zSx>-bURYAn*4!*5En~OT>H=(GDC^b!-#FUAg~+6t)&}cXB7sq^*b*r)%ibN!T?5!z
zpdtaVatYr85)!8N)hp26@P(N?E0o-b{v~|^VZ++W3BG~uiTEcA7&4Flp;S6k0l&;P
z<yuu}06C2OF1+nZQg6h^`0?zoDynnUeaBRu@*L$H6-I%&FBTP`=6HqK*-hq^65<(?
zLuvmoa;paZRLMDY*lTukj17D{$gO6!o4#g|j|L*}7d~T^gjMV!bDi{GcBUV1=66Ly
z`6Cw(ofgmUannWAROd<q#UH-?Ld}bC<!du<v-Er_7?1T4*sK(0<Y4+hSe1ipF@zqb
zkaaZ(aX;f5<>jlsxoo-MQV8skNPUpy{iA!v$QqVO?w6mismjV7K_kHE@jbeLl2I?W
zr{cA3y36iC8-rbwfr3h_opPj%s{2nhO~$+oz)D}J_I>CHeI+l)9XpH&8p;+5xjSr_
zm6er^b}P3RBY1`+xdi~vXS+Dl-@gOcA8{DN!%z0`fcBKVGvRlzgG-3%^mu;*U_ufD
z4#Whtq!tjx6nt|zJ&<GuQ-VNYbgy?7{qklC5c86tsDu;1L^OI`yXqBU4h}5(aO|LF
zl^zQ_JEa69)>!l%PLkj@2?ccwTSH_FJ5CVjV)ReOonJPTew1)`@7<QoqlwvdF|tr7
zxo`1wyE$2F8|aMB(!{C9R{!Nf($RiC*<yQp^!i;xD`80;CLxLQL8K6Kt>~~f(p~b~
zgh<y<rUkAhuiY}H+P&UR>&R7~#D&fSc%SLbIsv1X<=aQc$jbKiS7#%`M%0|Xs88w;
zq0wgQY|g*<9W8UsbYpG7RK1<AZgYl{F}j7+osB|2Bd*Li+vuLRP9}9KfA^T1h)YZ$
z*6I=m&e<BS1QPY)DxUNapY=r*zUx+G{LFb&MK>}nxGV{s;AQ&}p4<p``r~Qjigp}N
zWfp?H3t)+CF_5-f`ObG%8<iRz`VRsK&K8J!v}HVvM}c2}!}O$E25g=kV<hvM@3m%i
z)r17dMd2T7<UBw|)MQUYHf{xnT$F!$`H<?_z?htzHk<qQ4JJJ_qhX0w(>Hv#Xc;4*
zgCb>V9XT>$QBo|i)`E3+?Fg7c2C2V~a&QJbs5Qogf^FP@Y;#X~4k$smOMPY8W#Q>F
ze&0&c&>VUA9*E{uEuA?1nXWj29Va8@q5Sz5m!G})TqrWsYyIkPr5ua$&v6Y@J9#aZ
z7Hv&&%W=YD+O#12bEFQL;d2s$QeB(%7yQtX1}YEzyIe??bP1c7CWpMfx}Q$gy9wfQ
z&|`8%{fK>iUbUF*?g*U=$B=#!Nn{F3&q#lakfh9;+CRtyJrb5L1}KB_g%>YRyEZQ1
zt#+mc@GP~{v$zVQ7%+^4c@bo7VQj^dFL5u@d*PfR7Z2#)Ovx;WhK8^>XvxG076v*=
zh6w7}>V=eOD&S3H(25)u*_%K$?CkXve5(0kYI^z^J!iH*1JcTbRFc!VzP{}r!6>LL
zaUX#I9JC5GHv^_1ZNDK>JR29D=}oJZ$w0d8Jh$H%^ipdu<9>GPW)%BpAj?;Y+#)f}
z)-}<iXxsXD<Kk__y2!)*X<5J1FT>ktt|)fYViy+bhN0DkNZy9iVoJ<TP368K_sNey
zMaxy7?GY^Pj~DScItTU`!pDK!s9p6zg=NJKs?cu<C(#A-c?{)aWfa_R>V8WcRECZu
zEYS!C-&2|dltvF0UT@hGkGJ{4uHMYW9{M@ba0?K+y0g{Rd6Nfdt_gbmo^Z<`^&IS0
zvNG@g!+h*eEH&wxT2XP7ErCfHgqKcQ@m28~LyJ#<7%gnpskSECx(b=9>-v{g2QvBA
z>bDGJxEAP4<Sa4#fh2)9T}^UBk$In)j=JpE{NyLDpL8Dm@GLF&0281Wh+P53iCeA!
z3<B!@YQaEUo+t*p;}2M$hacvVK*1BG>B_L;V6Wa*Ieb=*$?c|9?`SbWoRr%9gUvf@
z@?A|j?tE|Ax)WCW1<na(fu)8gcR}&~$?C=5EI>TUs6>&<hyJi=n_#1*;p-SzW!lj7
zNE(YpV_v)oUy-n?$pIva{nBx~hOF-OYwc~wPL88r>)EQMaV5D_xU9F+Vq~<0u6bV(
zt18_{t}d2Fnw46Qw6^2b-v-4hd}{#6cu0Bs9_4C;0r9MYuh9f)>nSmQS`+nuI0A-H
zCntXmnyn<ej%-^ScD<`=0)QT!4;Zk3^Z;Oe7J7RN0W_*71Z|8LB~+vg(ffA4$>VrW
zInWF3_ewmA`Ui(tnbV8(;*^QZuY~%o<qh_^7z6~qx1aVWp4<@4qNAQQB&TMgJWfb=
zPAO(JzMASZYA9jb$;tn=V6%jR8ZyZ3gKQ@gY8c0BQGVpuZ93>Gd79Z{^c4D4dGx-$
zQ@|pcn^iop7tCG6Rk|9@fCeM;s{4I^0Xy$&DlvV1pT(K7S4V5FP?0>2_FKIdSi$%U
zCo!!#2!`g{uPWQw5-;6;HJ<pXDV6$$F(7*Eb04NT{K9=Jr)_5k;vjHVT8IrAaw~7$
zrlwHzR0RydxOY-1A2T8S=g$b20}Z#V9<ELCtjjOLz}Brai=CZ)PF+PcRWoa<{Vx7{
z-R+z$6C+uAcINr~)fQ?M*E1o2`~@2g8VlzPulFtdRtXR@taH37>xCo{U?Ipl2@WfJ
z#Q&Pc(^MM-?g>#tyUD9j)hAgqYQPJ^z03P=1cp-?eV$|n&<?GC1CK4Rjo*hV=w~+*
z&9%)lB}U!+E@w(8>CVzu8Dr-po@1ZBggqs<EGlkc`0zm14-Vv~POPMFQCjTYk~YWn
zHXG*9^Ca69hR4OFLEd@hC|_8O>_!;@bbJwQRujq9>0N`%1<~6`nL17}pw%qK2PrHG
z&E|_g9DBN~XfG-nuigOKV(6~`F#tie(4sQK;!8;nEMR`P%^xNu{hgcy8gM`KKGNp1
zAPu89var-Q7ya)2`Qjmk^@Z-o#OqD9H<I+9C$z?6_zAc<yoGU=lT(t)fM+qijluTy
zx#r+C&%wQK$_5?@9k3V&+?64POj5H+!7?$GV)Xy)0()_=Az#luw2}VFUwzk|ZcHhU
zz(m4C&&wS{RN$F_R*)JV9KFs|M0+$76Q5D%sY)8?Emr>)N{$0dV2R{$Go@`+n;?)$
zMoCG{tkW@lHBcp>uY2)Z>u-q=T~hhb3Qg67frme8NqT`UhpkXla&SP-*ozJ%EF7y}
zqhmvWLlNzF%6Hm>fWQ6K77UY;n`!`QL=m)?-Jb*ia<y!upqxN$>rP5}<DOT)KPzk5
zW`DT9M)q!6hAC$x4>K+@A^#rWvH0HuX4ApmX2YF5zq_)Xp<})hy+FFQ_O~iQoB0cp
z!l;(;jU!YP`s^gStTeByDN#}JJ}Su9%JS$pk^;enf*!RZf8NKj?vatj&wV4y03hi&
z)}JCw6il5!Ov;??)vKINp!|DOsq_VSnl&leuIK<!)xP&G+l&n2PU5T{o&J7Y+G4t@
z9X{4x{f7O8MHUto)0_oz{sJkfTYNfF(qsARdM_!dfY7ImMY`Bo#o=rV6*ws>+;V0c
zSwc<jaS}o<Z0p%)fod1AfY-t8t%;nCi0LOiJ#Q6rjmE-4SR$-gk(;^C-r-RZcR1KP
z5B%#7G`U#2$IYELUkXbnOLUhkx8SX;N?A_nahfH~`o*{s#w2-6fgPmJEoELQZC*J!
zHQA7*j*y+5hOu#esN{V5bh>g@EF-m-k8897kJmtv&s(y;qAZdbV7S44_c!_HoP~_&
z;FvrLazwi~UEdsDrxURy@J*ihLLqMkqflG?e)o=9`DfW*7EzYCWPoK|_r9u!sO`s7
z3$(XC=oz%OwY%fxp3LF0fQNy`m^t5QW3|-#jcKbY3!8PSH#Dn7T)|TZcx7zu8d=C4
zUCG6_;a$G-g`$lCQ02y-1bkW|;&5jGn-^Y3TU}Zj6j9q;NyJIo`9mRB;c-)Zc2p2X
zosBA}JY56`8b-u~M@EE4#)J<@$k6WwR$<Gi3#q7x$g0ZgtBcF4YwN2^Ycz7N(UgBg
z{AP}vmo@V;eGmo%%jMi0;6tO-#l%`1v7+r(D3g)~#lX1xk)Sk#j_jYZJ$(S_b8e|5
zqnmkp5|^2sy}z4^gQxkDgW`VQ<Ns32JC`AN@xMi>up?gBEVL`=suKGFNHv&3J~9Gd
z;)@bFj<wb?ppM%q?iM}`TBYOdY15D3L`>MXGG=838oEP<XE(j1|C{F(@nY2VQ>)_v
z8Bpz`#U2(61?%!GB_%(hs7?=#MBPW38B8d_OJ>noEx}Qrp*RydIvzBu9ywiP40d@3
zSb!x_FCH#nzi5vuibzY3PbILNzMu%0-@YHtoli^qjK<%<u^ryS$sm^&HV6zjZFLrU
zj&?5hfdlhRKba`8^RC=0BqZ}X5mV|k*$uCoU89P4?iqC*9eONkauIr5mjGlj^zg)E
zbj&iVO$Q15igzbFN{4dK@CcP(Ql+%OTEdius;`A_40t$H7KKh)QTOfYdMd@G)z@1c
za3VKYvuQB>AH-#6X?a;TzaJwXYkD(q0iaeO3jdWF1X}d`zPFy8(+NEYSI^E}+z)Ty
zd3l*FmbMmR6qpTsq{3oB=M*!6A^`ZApzJE1HzoxiZ|8=pQFvX0KLy@makI`occ?<q
zbQPuf&=^68P@k3a^2pjF&>K+mOg?^;a}s*CvtBQJ_Z>PgQQYYR+s1S@xCD_hbO(VV
zfYZ;;rhp98z!h>+$k$Bd04%4prPY4)8%>iN<N`R9jcR4vLvDQVzH&2NsEaES(HXtH
z4!Dqwz8_P_sALOAi(f<Y^Nm3><rH&s(`1g@Uc$*nv~4tJ>+kYxFzj~Gmku?I|F7q6
z_xLxKMl(}W8o8{U?d>6%eqFArr6sQ5BQgUqAj;K|P|JFX`^psh6gGbN8lW74Rg9$m
z$)J$}mA^d7WrQ@?LT)Q+Fz*MfMxc32o}ZsUNI2!`=`asGY@d<7-C|!~;1d!eJl^)X
z`%YJSJj#~U0w?aNIbb%fCv>(Vivz_fA}k!QI7$E}1Z)i-`23*dVa!xZ?0otsf<I#P
ztLgy@)`SF1kI;^Pj~oNci=;le;Uy-i*)1m38R%Q8j%s&H=j1%T)d<SwPXrAOZ?4z>
z)Qzd93<K<$`u-UNsuX!&s<4o4iM5Az`L57Y%e_!hRRL-C?CcDIACiN!SD$WFar6PX
z1$ET(V#k5M$>7+otgKa4-IkL_-5sYWc<=qKb&E*M1QSu3W%ec6V_)R@!=2wZmyjO#
zcDZiDfco0`54$See-^xgf>Lm|>r#<rZRAD=H(-FH-5K#C@RY)H9>LS*(FXJ1&iFE8
z2jEn~nEA`Y55+X{baNH~G79@ogZ>98=&OCL7$Gpn#7S1tpe}fSub_$dJo8c^=TQBL
z2w1ZwoF2n3`wcmhCJg8q{`0~eRq&vLXJxGgrbB|hCm-(hzF;tC3HG?dzBuh@=hNi1
zS5Yr``x?mU%81jjk+Xh(&<BN<(fGp~5cY#0psxn}R}9+)ygYqL4bF&N%3`6CpWocd
ziV)01O%2z{?X6KbreIrL4^Dw<+9d~zR3h-krs+jF)euatbbeT*>YsRXR{*Rdz#?Ih
ze%K@N-H`)ervL4ab+2j#zw+tRuk4*M@;chv%RT7hoDNA+jvc2tF{8{?;O&-oDvGJq
z`pK%pYmSpW2(oQ3Mf(Gq*Rsw?*t?MYU>LgvPeJ)nUrv6EXqQ0<C%(t9-2n*HH0|;-
z8dYG=ezY`l*t|Bnr}Biu?W7OJ1uozZz^z4!8d#=3Z%_iR_^+npIT6NZNd#bDQX(i&
zhxCJu?clfD|DPSNdeEk~i;B>1O`0bH@Iw~yVV@FgPx2FA7Zo`Ppm@#-?A;hi{ezID
zJiJ{Jo>vJ-!KBz*T`D*|qaGo2$S(W9y(`^z(Wb`QjSLOQOG-0PQmL>3Y$8zC@AJ)#
z_@DoQBuR~d$<@J2!phkSyyxul`5@T91aV?8zK1OI6^+k*+WO88??C)9TcY<jJz%Z(
z@6&)_KTxm?ClN@RaC=<_G|&I-h(^E*{RoWH$0zdunlY+O?H_&%ye30HK~0}mT4=L2
zKj|0)V0;<;o=Hb@8UIJdz7d$XQF^+Y_B*9uB~JZRKbWlPuiXxD-YsLdXQHMM-AoF?
zEF-{XIvm{#tI+%}$bh#ce#T8H<lfepN_X$b1F#lQ`xo!)GkZqMykysI(6Rpk3wW1l
zrG5EgR`9{HpWGB5@6-P|EPzOY{Y<~SUZ@jHVZ3m2jq$o<pdc5K)6vP(u<fYXvZ~a(
zaO=#Irz2s~=hMXLOn)YsCY6gyXnllSSl1{Lm%u#^j{)C!9KcU4SeqOtIWwYeM!wh}
zR-w0*_qeQ!OeQ3PixEFLew+L%&L5{*?fKqfB*rUSuD0LWNCF1mA`JX6K4Cayrw~9{
zT<%;GN@%d8r5+tQOsuS2?~W2RzAmvoMnK72T<;y+DBSp2(J+IPeZJW;G%AW=tLg*r
zRl9t|{ixI-XVChVv$P&ri~ji>uRRIJUm)b@rKAG4!-SaszQt9)JLIA!mSh{4vepDc
z)}HoFb#(SaE-DaS5g?&{2LivkoK@Dp+Nla(Y*lq*-^>opr{P=dZUaR$O?6chIUN58
zRr=dmX{dBZkF}Mmqo`1k8%ay8m3uH2B$yCHIM$@`P_A#7Jvt`+_NDt2vSUs|Q*wRX
z+ae%)I%WY0@^wjo;f;aeavT3Ulk}v>wecG3^Q17WmV0PEDXY|vxVR6!ls9^<?lD2o
zu81%gQcm23#sPjkx-Yp`iQG4$`%{sShj71q@n91_3MN7$ipl0*O^EG^%b;XBu7Pni
z>^+Sss~HHMs=YbMfreho!%>3?5K<s;Y^NftR)<ZsAHy~nI^2k>w}E(9JZBK))cKp6
zfH%WKKmdTaqOl}TN9=--!42;sxl_O#QwI1x`{irG5zREPg}B{_S*$+omCsbCk#qxz
zBn?Ib?%k>FrH%L@2e6+n{!CMG(}o3|MTY}HW4q;g3etBGZnRd71=Eq5B$z-id@QSe
z(F2QBH~~cKy04@JKa@0-#bF__h=XM8>_JWOqKS<k^R|E=uQUs{BlP__r#<j;b(cj!
zvCsTkUhH~#&oQq|4327Jh<IGby2FRu<9bSJVBvf6xv;x#m-`LBr++vZANSOAP%j-)
zKt+wFQ6NeOb5dAmlwU=v*0s#sy<*;tA0W+_hx|@YtG1lEF0UK&>D`lI31TUDy2-eb
z1gmCfF~RdL#Z5p`SO!+L?i1K_IBj@YjzlQKiXy`C0ACZTr}=yUE7As>(bGIHvkX%S
zF$ukv!zKse_3><An!);OssM<t$Vn70rm@WSM+JY0pPBU6l*>5B)^CaLkg3qndANJa
z^~hU5^KpOj{n3hB(i?4mZ(+8*gSUSr$^Xk1r~_38QO5+65Rk}w2m0QRYYX*<e~F;}
zmlj9|AwK2^xJ1b-_?LGZcJ81O{+<u7v$4dG{2wDWMxyUmBJ<V092r(BAMUAGBB6BL
zi1x~74;DQ5;@p7$ni5$r3Ne3s<WBA(3i@mqYG(i)-P^hw(%OF(l_-3wKu@Egv5epi
z))>(8KGF2=@KL?|pgh~mXVB#<tcF4N%3l2ot8!7PmXRV=cPBA_>qL%{n2ELj^Qz(o
zE+1Ql?X;KmY2<V<89xtyiq<IT+Y}-o(D>XYWu{9Xp4uCf+}?zb4AU<yVaf6YHlkbk
z;~>D+2lS=E*_f!x8XD%FLJ;0>0(;%4UB!Ur*nl%?K(Rgcigk+$mGui1uB0Yqw_Z(9
z<irVD_aYOTJ>?wg+u1^R^*VFWyU%XAnE$R9#fgTGkHP-?88P7}cte3;|1@Oe2*5v#
zGJK~*pA>!S@O}GpWY8dW5<(HVEAC7}V<TFbK^{H9oldhu%eti4P%4MMq_^%T^rN3i
zpq{sSr-2cl(?IAox`K4~73nR$Dx8VLNKS6{WlCYtK**?slW9ODKJdP=Cj(ZLyxu|a
z)6gJIyvZ8c=O{k-^>zWr*&V2kQkZ<CwO3{1;^G>=SOARV@MC_bx+lRa&d2S@e@JIk
zp7VR%TUM%*Ic8B&lm2r}k6TE5-e-8;>40XC4&@ioWIqNdsZIknB?BLtY|WJlCN%Ut
zYPqHbpBWO`XSW^H<y)7EwCaS<k+7-A3rDLBZ;qM5bX$AFljZ%266k~!#eGZ*1e0jN
z%AEnGDQd5IaXrO#3=MS@V<)6bOT%QO{?kXLC@Q03q5(Be>iSN9nmcfA98-`6$9BAu
zvbo$gqK<ZbL)7kZ-nZh_uU^cqI!IMm5OHf;+c(0uS5(WgH5<v$W|IF`x|O<qT#tu@
zValBXd<M^s0$fbve%q_F!i7bIadI>s>6Q>YhuvhNi<md0+CU}|C=?{{3GRTwaZsqJ
zEG1Ak%FIsA&LTShjS-MGFG7AVm56W0n%|<WO$%$XAq?squ}ba&&7y$xGI8Jow<e~n
zIPvPY0UI#}2FURASFEB+Y)D1*1LXW#93Ed9K0V$x$O5mt4}8Jo*9J1F?_Cb+*n+f(
z1s@H)ETC}B&ITM?dEtb`#MI=JU|+Xg*!%y^UU7sk)he+tJ^;T(7-#><W~Anfi1<)R
zy8;40+xhKnD?@c#P_K86$7Oo@)zvsJ+wl)?w=<bR3*u8}Nvq~CqF;1jU)F^}aTQ?T
zTUm`Pg}=pdQIozfj74~Ll%On<w0%}|9{j21zEY+by<Tkwp3}1En;JCD6mSI0jr3e;
zjerknv9wgSctpB!R-HQE_2A)I0CE%vue=v(d2WKc+A}h0!;y^BUQ>!0Z^ysMePKjZ
zRaW-%3mI0QaEGrCoUJXsX`fX4W#RRgKLn7?M&mx?MxV3uwqqWzkcNvZBjz)&jBH!v
zkzkY^hi!%McTSMz3+s*`>WG|CHzBA`I633}amlQkif?ev>8Mb8Bfi=eeAVS-G5U_>
z<Bev0@SD|$fB-Kw&su}|)lTJ0@{zD!<_th6?Fqe_$6nu?Tvd*Ur#>gdon$&Br<O_&
z$CB8T6){DoL%G6vG5^waZ{*j$Vwov&q%B9Ja*TdjcdXDh!`|ru#g)AM?y)z&+dw)?
z*IL>p4!7UjpS54yzbZ$@jd|Y9JWK@@RWAsBqCO%#nEA5^Nv<BC^3HkP)~pC)o<ow}
zc;<b%Q^X%aMz;)owbr}+WIcoV%JcA)re*s#qT*3dPQVA6TlZJF&NjVzPdvJC;r7}~
zZZ9h)z}<g1?lr60{o^l6J&PUe@|w8beo$_?&+l*ho4FGAXzZv?mqtrZ`&&=3{A(up
zCDQ)7MA`1;KVdvPRsyqrNlszyDJfaCPOiqrn-mx*83_*$&Qnur3=HdOv`Fz<V}+e4
zqt|C}gfd-TZWo>xX<AT((S&>T*32b^Ndy@2mjY?wjJlHs`CpA`-V<G&3V7`2r!>00
zEupqalvt!OelcRual63f`~j7eZhfuR!z9*jypTg&?l>oVGARL7{Da0^|NZ)B{%gu;
zBI6)CVUw1pb!EP6zF=<RV+w+t4@*10Y!@A!t&NO^zEug!4viC%h1Z$ejObjeeh?xw
z7u(ccMj{untt)(q|2v|xwrnGOsp(iTEY{>LEW!DiPS=MVgt-t#!lBv1@PhR)YEl-;
zM6oGhi)OD;@$W-_Oc&0HCW0%4<KyPA&(gW<iT#2(r7eV>h!1}-K@b1%fxEGds5HeU
zU^&zbz>lj!mi|osMp!w|Qk!5HIikZ>cZL)FXYQ?66PXf>4!u<^YweknlS}lNe8WY$
zH4zbmjn$rJAnJXW+Z)z`ho{j`FF(YR3LN088%~-zOx%vaHF~RIC&w`)Y+i|s4C?$Q
zU_9Ip!Iw9CJm_bXuh5mZUyH?$6iwXMK&PTbyf2TCSvla!oX_a$*zoWFy8DSpO?5)c
zKm!de-A3;r)Y8xPE1}K4U|x|K|8g*?O}X^}#gppNWeeu@q4`N<R=XZHiE%}G3&-E+
zfGlqniN$a{H&s?y>jlJMl^^S|axc^r49trGjWD5xB|IM6@)BC3%3jxY5FPGKcKh;j
zRuC-WxAsZ~Q*mWOMGXUChyAPgq&cW<Q|1pcr*ux>=f-4*f1l}M*&kag4*yU7Pmd*B
z*PqI@Um3QrF(0oz{dHAtBHNdt|Nb+N-;F_JtD|!F-5d4j%B(}{<D4~LlF6?*J{39_
zKOvHoeG>w0CBeaCS|xT7XUHC`9@2Z37}Acp#uNvd6;Q(d)K67Q7EeBRrt)hnXdL_N
zba;k>8IBoi9)Sa7*L0je?)DgcIL7kEvZMtELeoxQd^KOEY5lcaN7Tp6!QXwH_%=Yc
z=0)<o-qmLaOB(Ht^AkD2_~7{M@gG%)--kpE`LW6`aZ2?Vp>uVy6yc)L7nYOIm$W~>
z-KBD^ysztVvASD~j(f5&T+Ma&Z7S1dJo_8VvVrBzDcWZ9hMmCpEkEypJ|s1ym?M5B
zn)E4LAQNsOFUhcc$Do@Y7q_@TVx*!>{L%US#*w~y#bPmqq=<OXpOwtZy#jMpUX!Np
zwbyt2t_-KA1+0rG_%c-YCu;;oMzsJZ+uB-tcIE^KYZ4OIpW|`xUc2nPe2K3LlDqhn
zo>=?dLdv0`;l>6lE&201CF*L98q)U^wDg}=Z(wK)mlw|0*M38eb-8O{C4*buXw6#h
zZqm^-y3QY3)_L31HFOE{3&rz2-lI)Hakg3oto(u_h2J!P>Un!$mxU|~ajPyeH$Uur
zI{sPGG>&lHS8{qKuBNWWI*IO<Co1KgG?2bBxDo6xxu=~T#xgS*a$TBd4eRG5vG~m^
zHST&-*6%LV*RXe8tIm$!<WcWpJ_(|K#k6$^hlkdQwyW4ZQ1CiqCdm9f-U+&jw776h
zd>NIPikeEmRbpm98(or<<onoE3_|3F5AzjG%1=<1$ANXm`8Y10K4oWf5OVgGw3q8B
zSXr5?Q91_(N%5F)ZI5Ltj-&>^;f}i=Wag&~>N*{*(2Orp^wa4iVu`|dt|nnz`NR$N
z>#X^`s5SiaS{w8E{8%hLKvI7oXymJ6PM%=-aWJlM_(%$7wQOAQJ+`MyfAxwNL#9!r
zX6C&D7X#<v!RkbbAx*H>#!1$P4S{S(4~-(?e559e;&9<ex^Y4^TiaoUYYDctG7rY*
zYYibbMEc`%VkpGTc1PS0wBP#hh=Zg;yB+6lxK`QB?IACJaeHqt)H@LC%*M+8=TAj!
z?iZbk6gdNf{Wdpr{$4GWLN&G^@G|K~_lkCbuX?90ca57_zS`bN*xTw!IHhl?B<Z!a
z(=}?@6_oR|{64Cucf9)M9lmw^pgJ+HCoi2Y<}@@kX67vp1yxf!H@J$b^Rc3wrnpE1
z-X|uK{)wW5mJ~ubS7ctii#U^<7t$DlStswGRau$S8g{Non{*~ReBfW*;gtv|DAF5O
zjv~uqhOJ52X^3xNv=gr((vTsnUKM2e2|;ZRbvQ<Os(qQB8fSrZj8?I1ZUu4iTgJR{
z?n8HE9c$LaXSaGG`0&b?a#C{fpQ`PaC+n#(k-{S%w<j~e$e5etByKRE79l_A?^P*1
zJwbUSetm?3g4EckKMfhb*-?v+t93EvoZmz(?tz6(mlsBA2*>e9vq5Z<?fj*4rC~wK
zTmj#e&OhfNsFe6_4~usm&M+kkNKUx~t{?my--{J0vw>YC@qjR+Q(HeC63v;}1abRi
zJty74Q!`^~v1@L=t)wXvqvoW6`_<G^57=)$ASB&845(`5guv1imi(CR?lo3@I{nxu
z$?M87IZ3W5PL2SJLQ6_&6II^SU<FqBw*WrB=8{7Xf?GP;{kMD|HkjzTzP6dOa|D>+
zbYH{iPgh&ktU|>l#XJ>T-7E?#O8sLnG;c_F>?Q7=$cWRAO{diiG?-ZUFRa$tYCFU2
zCPd3>r3%cW;9t(zt{EsRDrzWi#&1ng@Nm2pAPa1razF~dUkiIdoF_XIzNIpK-GPX`
zCp9GZbocWD_WNJml%-?Cl^b+_YCUCxJlziSx1Ok>?}eww^>F(lt4KsP^Vh+qHF)ZB
zY=c*<W2=zA^$HuNGkNs7dnp1-{F<Upu=Q!vd`>|sOsGGY`XpAHV~!<j*679X{6aAu
zC5nuWysTTZC=G@DAHMYV0W#s&c8{X{TZ9=iL|ZfJ4qU-1zRDFQ1>5U!YnF>kZ-4)O
zCxJ;`lc8KwtyG~ZDWSp1MoxY<GgKXjaMtE<Orc`0AP1T^B6{u*zgJ7RKDMM+`%|UH
z5_x!I(IcE{^DKOvDNq%wv^2iKeqD1vT(fH^7)MrK7*+qYDj+|;ZvR<JUbd<@C#g+O
zYHSG4O+%kIMo%~~57BV5z{fAE2c?g5>+iVkcjkC${BGfgET4-Ho2O&rzoIkeN_)PU
zAjhEbpKdC6y5iRBPu!xBH|^#r`M#H#l^dm2EUPmrxUug_fB8CRP2KtR4?KJovQu9+
zCB=TGZyqfz@d5(jvEqln2Slzi%h)F$%p;Ii>?P!ivlo4D#;~ZButE5<tt8a*?IAK>
z?NT1O(TBIDt?-o(6MoMuDj4sMT-2`2ag_-b$GY!waHg9CJt(gjd%~0QdskE9Zp^=Z
zRPK@QnfrDl9c8`XV^)QM5Yp;aHFxM2trKIr@J0pxR9M-tED10hUr(|O7vCH7KvJ0n
znRIcv&76GkARA0)Le`FPT4SIS04XdyaT`@-N?*$WeJ*l1`7x_9dODt8m2EXo?p3Q4
z)_a<wrtZee8v|TnP=IYGSiQ^Mx7M0YK-}M(LNt|S^Wktaf2CFa?0h%R>*}3yJ{Y?t
z!@r+Wllv`|$Q?x0pW}CNlAF3oOm6<=8%JS!oha(-Hykdo)2wW3;yygLe=lXnV_F4K
zJa}&Q7w;40x29Dwr>|9g=&)1JTrytP*xTp#J%y+VAQM`WTi`*=%!A+XrQOExT(^)G
z;#($Yjg|hYpRrp|nO)S8HWZ8=7>U@d!ayhIddqhD80h#)P>Dm8hZcsT&Nq9GzN*NX
z53e><6?tRVt;=mfVSGHh(RNKzp*tmjJXb~L*~?O?9P!yKlA>ULi)PT;3YTXOH>t0k
zK%7EUo|l(ORFr^+M_pF-XtDfwypv!w?KLeni+?+}+Xg)rdT?Oi^intaIiU(Mt|ILj
zT~aPeA*-*UI$CFPm<R^cji(wND+NVj4qv>g+KlCrMp;I4ra?q(uMgWkXo$G|p)OII
ze>Ju|I-2Ik9NTi4a5<)iHf&xvG;#d+W=%KvZ$@ulKad4?Uv2MGxf=5a-<WW+lYm7U
zjJ^j;DgF8+-~$BH=j)f3%jZ8SBMU1lD`>V&**Ohfj)keAoB<aA9UY!Bx&_BmIcx-S
zViki9`p8ZKzMb>dhFpzrw7uV7n2(~Gp%AuzXYz?goed8pVHxEhNMB$7g7)VPMlAIo
zNVo~P?XRkH5eP%<nuusut@QV_7KSe;C2F4YR+MNi$vNX|^?^eF=54)H#(_F!OwOe0
z@<gkMSR+vJm7fQ(gPh3!W*~|S{F??+#r!M(!ksSm^$_SNnhTk*C%Cb&mc{K-vFS8F
zrs|$lJd{qPhm~S)rhL`nHTr>A?*Rc!`(JbR0%=<<4in%0@U2Q3vkjUd`kvPPVexHP
z>v-a0Av5a#8{Yf|-X2CY=8kBCGbmWFXiBs^p7D_suL+|EzRuIGTS#*ztucm)9zFF#
zH%d4f3d*(&>I`R4OmHxjG!*=RPy4A#+6eTASSdYn&UX@2G)|XrLctTcyPeKgWX9c!
zp;z&5J$pUC{>xL639K9}?WKvudbp=_P$b@B+VMpyv$2vutZ!q~sgH`@KE4_2E4Jfz
zt<1Bi5&%;|!%lB9VG(bU<ujZE2j2ThCN_S)<Lg!O)HBsd)pA6@9xSt8BBf60g5#AH
z@y==+&;xC#YoN+9pqNI8zCIz-5Bi1g-!}#vkqP57=r_v9-rX&DDbZI;QZmKk<F_@x
z6tATWD~uspo#>>1_9&@5VmN!AyNS?IO$qo!om}F+r~l%B;I-Vh%uHrnMi5BMe_%7O
zbr4Hs<zfS}jjt(H)dp?f&=FwLSh${SPNw-F=N8l_(V+NZ>4dIsu>ELm-VfU=1OZ=G
z35k#}L&SKzJtWx0_BHhh1v<GBeRSqLixHHRX<X!MthjiO)BCcowiFt}=<h%M9vz3K
z-e-H_-Jf>rt}D48GwS>ACekB-FXCa#s0|0_!NCE?$oRd}V;Dr{U>-9w@GVk#mOU^X
zfKj|LQxdJ&dD0mY!S@YZzvMZUoeccKqGBNGw0E$_W~6-LPBI1@7NFsPz%vJmGmIVQ
zx(*Kq5*nHa+Vdx=tadle%@!%Lgnm6;3+IK%<f*~+s<1h7+4Zk<zVW({+1!#WJ6;X;
zBEi^ygXtuAFe5!R{G&&q(0{4bmbpboC234`n9;GOh9U&j^^;8><0FycPgxtW{PN=R
zvNAe$Rw}W0AN@GG4$mtSi&=-2j0V5*($a`?!0j#(DsIAazIbsXEUu0cEP^gOW*QPI
zPJ9)9b7gp4x3vXM1&o}Q^MXLA(ZLS0AwVdd^q0@zi$+RC0~hQhtK{%S6otl>H3D16
zYJQQ^^Yoq$3-?8|`|?00(%>Mwo$)}ng=n+y!@u55qWI$D<An4(!A(98BIvMdJygK9
z8Sox=vIxZK9Sz`8{6XbvKpLzfUaD5-f9mz%(*3qEnBt&R*863tO^%A)&uKRI>l5N7
zEHuT?%NWdP_Rl#Tw|wpnsoCF9;%gkfXY~fbV!~eAqcPUHj8IcEXl+8aoV9c%Q;|jn
zft`}wgAQ|^_}sh93)?>Tc*W--lg#!Pl`?{R1H(q+ue*l?rpSH%kNoJ%;I<aRTW-u3
zH-Xc*Fcg=Czm>;+p}i(q6{|J~|MY|c{<LsR;^m<u-W~zd8Qp!Qn!y*gW&WM~2&q%N
z(Q|H({Y_A{+a%``9xSGj@Kqz^ib<17l**{Xjzq@XxhIM-jH1G~G)EKje*S7Y4;lkj
zMn=*v*WKG=|ED*Q*cUnPcS4(qXe}X;(d&&|<SpV$8HX3zwJ?%8&U|CiIZ1-o3E%r7
z=B_04n9TS@ny)!%<^<_U(ffFLIiU-iOv-LU$>7W)f#er_O!6KB<mq(_{TYV8HH_rr
zUQP9^0(}Gr6K#XYx_L6UUC6245!I2`#!lRF0GF%*Nf|*SwZ8r>x~yK`T%Z`TS+ee*
zWo9H&kM4rVWr9c*GFngO^(|*f>qY5CAv!L8{vW&ZgU+w^47!tHJoimm^1PrTkrq(!
zQ8P<+N44vA6QS^6X%z4+4SE9Fj7h#KXlc>D4wOI<eCK*3AZ90wewINr2q*hn^Vh3z
zy(~wS(~TpzsF3sTfo(Q+7=KTBDhmpBw;OVz(rdr8=yzBx#N|;0!Dpg(IeaH_9e~1=
zlapGv<NO}58M%BysPf^F&+VuazVH;cRxG$mtESmxfa&h)5BWg;_ez&cSJR$gax#ye
zV*=`X)O!d;It}yR{%Derp?Qk=xFJUqUsOc*@Ihz1Z<~2G0ga8Qrx7M51*c9!`s8G8
z-&L<uVdLy|yuBF~6hulBWMk=T@!nzK21f)3x611mS<TjNZRL;Wh(`s)pe4Z&0jIJK
ze;m|kd=hXRCm{j(u0v1udIw3L@oIg$7pOU{t?#+u<uP>1C_=9agvnWYLSm^CmnL62
zKkyl7=T=8A<92!^aAW(OQ_jf6>$2AbdGF}AU9LR_N`}t|q^GICd*7UG519zg@bF6K
zjvgCn(k0*xD=dWu{t9YFLf*-(h&qhtR|sbz4Hgfe49i;Xcp@5xM<Y()K24m(H--Al
zZHTFKJ+766DOu4HxScPKOI-Kz_U}(8hslpKi1E+aq+h1FfQyymxII1KCYITfRTZaX
zrr<XE`K>1n8Ml(Yx)Ku<q(*u!?qTa)KdQn1Jr;idv=Ia^1~|(LGv5fXuHhHF!&cy@
zdwY{?`<owYO9sahBVePgPq`H7og%532AA4BB8w{62bF-OVj#k094a<x@)sOZ?PgEf
zqk$eH4QcMvm=9~CpBWXZRv&^q>w0ezUL@6j$*!TLicvV=>}O(<36Ws+@MuX-zc(?5
zqy1>rU8T|^Qsr#aVHd0#lmSqVsjBM0Eq-w4koI}M7a`w#9)EP*JrzJk%0Ld{&ZNOn
zkhaZHyT+A|Qm*-oztZTFTgNudaB;MNq_{C2^|`am+JuRHO04YfU|H$MCY$hyPsGRP
zw-*?a9b}AyMKfQ%xUVgN-g$XLWl~fj5gNEv06#c9{80TWCnYfOV$mf91^1oNbC@%u
zme0XOUyqeYMbe~==u&nMx5<HJ`S;QK(%o-le_(~8lA!*nwO>=v3$P(?qqJFROdWgS
zrWdZFtz6frF*7?REgjx*X85>;G9}<UDlMY|Jk>DTCRD=6XXIq8tPC_#vOwdy>~Deg
zbH*${qe%3&okjw(E+tGsCzwqR?mDJa^rn4|n0YsLt+=Z6XE%#c?e9UOvAOSNe>EC`
zeR-CZk`WTX%j>p3na9TB@3TQ6-z*EM-~3UgWtdRzpH*I7nQyG%q%!4<Vff<{F*Ebq
z#4d=~x9Yu~V5Oi84-E7qU=~+WXgc3yi0bw#(Wr+blYN84?n^nmpYZrR=sFftCBlOJ
zAT<b@oz1GM0LW31mp7yk<v-Bf9acF&_MA#;cX$s@Vo)s&D3OykiQDC<QBr|O0ro&L
zsi}YXn^9y$_r2fq$o2^N!+F8e(XTbZ8Jk#fXs+KJJch(06H94qWG7Vk5>r?O;qsUo
ziQK8T`g;w_J=5FYWXqWu8PI|Y3@`~r&_2@T&>q?g9xXgh^yOAAVsdIzALo?}^eKRx
z5tsaMl`)1D5JBd@y}zlP*OH<K+4c9Akp7dmAFX=~ztq_me>5GW6b5SwbJO{r$c=`K
z__$z_eiKU2PLlnL-z!&fF2Z;90n7Tq$Y-epA}!*ML7a-@4+IV$TZS$*DF-?GKl-QQ
zwt8R^@j>Y61WptAb)?9ZFoW%X291)!w*)ACn3XuKh-Bb4#xvbecM45uv|GybD;)a%
zJ)A)ay7^%-`tC4;`jieL3$3(kBfZe%pRs!sB~B--vf4f#6RkcuHD&iRApABYD<VPz
zMbZN``Vaz{A*HkK{QY3gNiaeWNU|>bg*wxi?<F*Zg@1OxfToxI5+<IInNDziyv;Ia
zF!f+gkMOB0-5Au07+AGyFR@D39j-FTSg?`vhL5;RCJRaocE^=4K=04;yRg!`g>a<P
z&dx<fqGWmb^V&L-D!uSdcc>JquC=K0@(A-p>T(WipyoB9+qN@3j1xx}$GULpc!CEJ
zsM5_m+sFC0E&w2c{#BApz?x_&?nk`r#y=3?O7b!~>h_h=Hv@hk33j@NMQ&DDfXK1+
zajklB(l81hEx|Lxal1GvBrBLUv*aFI>Z*BC$TwV-^S}7MA?EHCdbE~$`N;3*8}+3`
z5IkK9cJ{O0(-YCqu;@?NdFH^Z`MT#N=wD7LusycGqQu;<#}K6wpp(cxrWxYE8xCoC
zog&Pg2PU;7UehKMdwB4O_sMKZg=6CcZIu-$2zQ31G&v>SZ}#n?9t4_uz$#>8a+{sC
z5ljny)gt!0<8;>6iAdw3prL8AS#*G7{0d55ikLgofdOkCZjcw74-EL|HAdCE^|!?E
z*fv^ysTMrwtSFP2d$BzAWzd6dlI5)iLk^cci#D8&iqKdXpIz|};oUJMdMMdcriX{C
zwRC^lR@XnC*C$3k51U74l4X<Fto9+KCHeUVO?JTL>3wXM^yh$>7^|@i(03C4fDWT{
zc5a^2g)D~($<5CD*<3R_W%lcU3;&gCqN;`lJ8L7zvq>!)yn~B7OcB&oK$c`@XP}@!
z<ya`jlUyoxzQZ^;KF-*y-|QeQDXI9IY5%OPoR3AXNpwIb5;rr3pklg}BV4&cwPcaG
z>sQuRxGbf_`-r3C2<{`$hC_Oig45RK0WwbCyAz2Z>Yo*~wKX0?-vo_dvbKt)K*r9l
z85u9`K4Imt#Q!CEl_5Fm`JL|anvz&An5t$Yrm#DZlsr6oG72}La#CjGJ_Ul|Fo=_h
zb(*VUsx5<a_Gz;@P}nq&&dKTbx`DgYrId6qs{}*1sFmCym(=ZYaPbY}0-ij5=oqGo
zIRml}vaU%be<pO3SW+z|9Tgij(^n^MmHg)GPkHY9rLt1Su79hne%ITXOm?)j7gM_#
zrY}dm<n};C)eC?MPYTGNm9z8ood5hF%x2-e(8+kX{`ofw-pTrCsnj)!)5fZ#%36oX
z5wq?jDP!8M<JpXOt<fc@_MjfTydPZq)BdZ=FRQt5aArBIpCG|vO>X9t8**+FAO*>X
z3nppGOiTOKz%oE{kH(nw&9F;kbl#`!c-8M)S(!As_Pi;LVmca30v31PpPo|&vGG$q
z{MT4)#`7OcalWS$=a@d(D_yHbFv#wG-g#<{<!Tde4G!K8LZZNgqwzom!>DBekHg*L
zCnESjy^t@^JwHU?8d$at(-SG-UdTGN4a$!VvADS7HOq{KZ4v8f>N+I7DkFk~nj*!t
z!TFLYl5zAOSwhW+vOY(*yd-o`7rp<gyTJxp$R+9(<citkr$fvJ-g>Q+7y6`3mK=Bj
zv})dzX^}~1`|A0*h8lA0e#A}+X!md}g*1xIFdxh$^lTV1>TQ><YU;Qq1w6j4qGR`x
zFks~x^++ma@S)~BkJ%pha!!6?QvKLh9|6U;?4qT2%DE*uI&vI}Z0c9zWCHBK$dSpQ
zYi>-~UoQIzFz&CpY7VZtgewJkc%(~1%fy;Jwsh3gC@~@(x|nqgkap;waf4<eV~aY%
zx->LAJeT{kqbKjEHH5%(T^`G9E;Kc*{8>$g^_C2CqEf-<ifQ>y3L^OQZsSZ0ls@6%
zWs_nv0I@2ysQ+1PzBuE6nd(auz0$$m=8dZEl7xuMRGz3sV=AIrm4Uu`G|T#}EK<8+
zKKDPosesYArHr(K?@dDZroR+eP^15iGr_??!|(CKE~aGl3lz_oP@OauPjYU1i_zfj
z1TOAh@6xH@fxC|e1@Ks3o>6k*FOdaHkm8$RL0zo;+_WU4{rx*FhRCeLH*g^+98xGf
z&0_LZqf(5LiABN;O=m%c@MV6r5VHseD-<X_^I;JXT3Y{ZjHjjsTm-nhl=By%BQf=U
zNJva#ILo5`f4-Q+7_B-ROD4DU^tgh8G#xWQUhwmCQHWA=;R%avV!p(muI+1Wlm|&f
z8+F+F@zecIw6&a&goL{MEXe<m)Es<ev<bOtP!Lb>G9H~CZj<9C?z3U&iRyEKnKOVG
zFT+Kt2&Eva%!-uJno39%!?q8Pj7;0CE*t-{SwVX<*p0@ANRFBM;RV|cJW{;G*l(Ak
zLK`b&I4r-rU!iMw@E*+wEfd~PcOtMD;j-^O$c^CY^LGd{*@45;d`ahi?j(!9XYtmn
z3pf1)1ABB7C1m>toO82N2{=#yYi2>6&y@e2wyd0SgkpMfuq>!c8ylk=?G}22;AFGB
z|C`EhB#0_btZ(#q9UiMq!LKL6%EIymyd95oKh6={U6L((&wbxa8jteW8FQ0`V?hqE
zNJFr`u&Kj0#?Ugel((CGrcZb$rCM6}7bHUPuIs|+&rj`h9=lYf`H5P?;xCYjOfq2z
z+^>9==KJqrg;+H&*VkZSqk~(Z7PukJu~$Q?cn$Rpx;z|4O*jO=Sm?H4)PTAAaJC}X
zhYi8lpX9x1EY_uks;{5cN7{uF*cc*`2?2%Dyli$49q?#cU|&lPXdIH;f=Ec>aL
zG0Z@*(9ElF1_)hnQg96|-2bBMtE0N=+BQ+T8|m(nmhSHEP+GdXyBp~aX`~yZTTog`
zTBJK9X7fDn%zW=!Gk+2be&?Kh_P+1yz9Qt?5~z;t*1Lts1Z!$y@4$;DZMQ^mRZw&O
zR|V?Y%?=r^T;M^;{U$vcYE&Zy%N9cC0a{s796<x+9kgSv=m4HKUpI^3@RXww?>{(H
z$jasAw177`i2b<5=p*?tIvy-W85A)<b&IDqO1EhW`Mhw#h4=hLs7|wCcc`X@r@S2C
zwF$P7ZNdZTarjKNr8tg<izA4BI~%5uV350Q3aO*BQLFrfOUkB7I3Q#6XW&9DNXD^%
zh9E*lR24X@KG)&`!@?jQSkyzZvpr^gDta6Y=gpABcXs*$4yTJxz8$}o+HLK(sNJ@P
z9`Mmlo#OUH?ed_kk$S3a`*S)_>wJk5`T(lq<aXTXw>6J^pp^&<ziKESg9PQzR`=0#
zOUu%_c^MV8?*y%&U-(oFfeoLypvvikEyRBM2JvIyI(#wlueW84`UC+#`{OY;VQpdy
z%mJSJ29toI-gq!DYp&_w>HhXi;3oZz>hl2V$d<H*gR}G+-(NFQPD}y<1*`fl=i#yu
zX>Ak&L0ymt-D8DDm!feT+^V$U*PGDg>+OzXXipVgSeRXNtz^id=7r^1AIP2ZFbYGS
zutg%8k(QH#hu%BjAWX^J8(=S4?(*A`p{UE^H5(fOf)k7vh$%sk*UpD=X-aMKvuQ&Y
zFP@IQZ?CvD?F%Bfa`BT!*zQ4x@gTn$dhmp3RiQ;?5x$q$CG+2<GeiB^K)jkg>X+ou
znecl{aLqIbF0-&m&|-QUiDZ{|rvJkB{fJ{8MRj(jNA((n|NCEhD{>ftUbxde!}ZwK
zGUPN$2HPi<chyrE3fzZHt}YhxdD6ropF=`J>tU!6lc+lQk|rG9Wt)mn$kvlNNx?UE
zH}L!Q$5KM8(s;YsS}-F9Xtw%YysE+BcpW;HAv8r46hPVc0(+`62CBcsgZ2jCPzNR*
zvtvg{G}Om>ozUp0sO`7_G4_pW)>p5lm6TN?Ec!MBIU=C~X~|S!ghdmp$eRGH+?ENi
z1RA~E9#Z0CH5p!r7H5VK)j%+Z*e)&EF*6&rwzA?ts~bcn59MMiRi{o;BsSTBb_w`o
z<8>KVJ(fr&E|12E*0NPQp{J#-0HHJ=yj-Wp<zzoA9T&CS1me5{BjBA(4zGoFj0BBc
z;q4|`oyazR5noH{tKOdrdi}+*-Qjnl5Okg0DQdCTe7OANZSp6we?RTp7tgO$cMDUS
z&Cge2yUuD`Zni4%sDH^&v)-AmwtZWZzN6Fc@WNwa<|{5EUu?o7w&QDbT{gtczjC$Q
z7`V5!`Lh|$S5tV@v*nG*#JY?ZQ27^?c;^@s8L&tJSd|33LAOPduYsAnVd!%zPN{br
z2ILzX4?19run|H?5)IP*`IC|sg?1sa|8H8Y_VCda*(2H=!EM4+r}e^<SZu)>S_s}>
z-hmLl9^w`;bH{b)rJZcogwF~1rU<0pO(V#L0<hc`Prv^QKdj6*lM(WZx1$R!{mf!)
zUum(Y**6X1rcff&TByAWnkF-%KgTMT520rKaB_3nUECf;jt-#T%MKT+Wb=^hx~wcK
z1-cd;K=`5T@yWo$Bde{bm?>?V{z>)Ej*r$Rpjcj5P*YY}Eo$(Qn~14Yje~)j+Up1b
zQb0q1NW-=*CIVZIT}-S$?QLNG_XQ0JpNA&{JbuF!84@^rsEvB!`=P_-96TJDR580*
z`A8I_It~g(q!w#4Dsq^!I{)f%1mx%kXGkbynW(pB%(+U)9;pu75J3?kDe!tvut`zc
zb>dPIFs$wP2<QxH<{@EZsAwG9L$hjYbV|fDh8?d(#OzZ1FBpv{Wmj+;K42T)W=zQ;
z8nsbU07J|{jMxx|r0`pTWtD8)lYTS~I0Ow*$L!1s@^WF>#QdZ92smKoXwtWCxmrpD
zg@AcmS2xkaGP(ZDBPb&`5d<?a-~2s#Pbq`tSrYN!CwX%>t<J!bL%jp3cz!rIbWQqR
zC;TFP12}RpMo_Sw+qH{=zOek>_v!FHke(EJ9_%0BGdMhdI+ED4y-~0%#Se7J{sMED
zo+7>oXi?EKmF_|{vSRmWR4WBk5KPjhnBJ1eujZs>D=JE(f`IeGz`#&~TuPEeS9kSJ
z-}>ZdMb6(d;f$6TZr77ou}FB<FC?vi0jYTBnHi00E4|i^EQL&QN{uUcOzVm4z?ag;
zL^lj1@6ge!YU#$Q*MBdjJLhdjulzJxN-ZK>wzQ;WRIfi}OUog`bs{#AOQf^$!Mt)l
zW5Ni_|J>sogl~4gGr2yl;POw*Sjb$No&8<yzboN`tk*3LOr`($GtOA<#SoZn78Y^U
zBqk=;b{lIn-Svn~7yn(AYj@x}>ye}Gd0XD?qao>_d|2SCBVym5_WXg{jXbx^#(>6W
zkc4f&ywzZIV#Q`3p=jNlR4BvVaERoQ?<m#(s)Rw|4U<B*FU8qJ`Q>Ln^hz!Aoj>DU
z+q1J23pFVUQK))n;qAeg=lOio*a&nt3~$Z%<_{Nf8|FqHPU@skKkN69t3wVto}aHv
zwbE(;aE3eMQ|70Xl;YaYQz0cgpoTgOai4E;vU&}zQ!k*325UY4B;zFx9wrWRdcR=b
z)QO(~fpZxG1|u@56sat!LI@XmIQ0<C>Ze1Cg7v0oK$YSL=vV>*xxS>NVO+|EQHcus
znY)k4)8jiDP7Os3Y}IIK`dAp`?YY^1>n6HXlz#Z+9d!*sl;hg@S;GZaIAabz*Y5=U
zmlp%9`+@caCZ?ulsf<>l0^%nt_;oOB#)(ny_oTLToEymdJd47uZw{+hi{2T#JiQYx
zs=oOAA^41nE0YV8wE`J`V`zF1(YOm$BkQK4FfuYb3W2{o{!z44kp-7eq=@M`FY)P1
z;#iZFKjExWZ)@gLSzUyRyA`y^h1t)P?d2BvU8ba&X=N6Uf!MRpN+Dieqzat9C%XNk
zJ!IY_;V7!^4zol9Cyfh#ejo?!UbH6S<q4{1ji0z_)|iD7m~=fWw%?NkVac-6J%uG(
zCV@EE=%o)3tg~Q!yB(t})LAph!Goa|*#6#p%L^sR+yzY&zh29c8T(fvEqW1A!3$ET
zi~xvp@>5Ca1l3QL95!X9rbu;34$pqm6L=3@2Ip-z7kgBocn416Kg#gXysyUgt`0Dd
z*T-X`$deF=UpA^3^pYfFj>)3k0+aUOJJm1<Afdl-YnVGIz?MdWie0+znV5f;DMspb
z8I8i{yB3~g`4E_2w|Mg|5giH+zo98>V*auG*49KAF7r?2yL8*ds_)GLk+1G4B=v}S
zt+@R#Fmb?74!AO^SK;WyjIoenC+2NMDHGYCOi-sXVY^&mk<I&Fi<oq~YF(vTXRx4i
zT)ZhzmQPm=11ZtAHu|wh(GG1}-q8<VaKb@n^GnV0I(XzPlP&+2KvC2RE;jL%CQbxA
zo&%YQLy4?|!*r6+E@EmL^!aOOcemqN(C%P3pAXdaNo6Krxd&!(|I&lnciS2s9W?b#
z2IW=#Gm0{Vv!a@l(+@SeE7=X{a&uvG5AEBHev)Txc}i@4I0T{?ner;y--zyB40_97
zUT#p4>StyECX?f>zr(sXBCP)T1)aeR4vnMY&OB#)Z#-BxxwI;UOnBO%1WBY<*h61)
zse)_kFANm&R#6HMGqc!2mEpIX8o5vSkwWO$>uF%`^YHq%jgSdAw<4Gh7FsFD9^|=t
z6Ua2)*MBa4^%}UfGk^SriHRYtEI)FsF6!);nDKC-8#Ovfu+X|QSov_$BPZT1tj^Qu
z5cI9g+C(v|ebG#+%iGP^Su}-ViDIm8B|thOARrCYL4=Dv1bL?v{|!No8To}mZMYMh
zGjOOq!7AjkLE1wp1tP1ccp%Wf$~gQ!iT5$-^L8Gd0v+{sHg!g4Cx34)G*#TCRoKg5
z7m~xn(C`3EnMh;N2;*-OtGdY=q`t!yKpSiQh`KBq%;fZb0z<4taYjbQB~B5LCNxs8
zZ=7~W4r7b!zBP;iH8nW1l>Jg><$W{#u>#>KtSFtp^yYHuhgs!fc@qo4_9mCtA(T4w
z(thGBQd^X)F#nmm<;e&REOrNAz8*j5bDM?{eH1lE>z_wrCEZWP8W?arRkrgyqdyZ}
zT59!3T`4H3UcTNX`55FxC6^EY9=r9s;|A-@0dcLg9;YY;sDvwpL-zpHq2f-@9Bd+r
z&!wd{y7U$#*v(q$BdZq!7P{hwI>)(EU<_I@a_pH3QLuVJi-j{-(e9lKVE-`RzveWQ
z;)Hye`09)HVQfnR@!-C^w89u*PJGqjo~EI!T%kE>bB=Y;ba2jBxx?^&2)vOma^!yo
z&IYDCJ3A?<X4rRG_98O?*pF~vNl2r)P7;;Mq)mwu-ysH#`8WvvHhYatNW^63Lrral
zKx8Xf0M00@GOeG;!-)7FRj0qJG}Y7VS8eZX7kByR5s|zdl*R-`<gyZlPL+7Fp62^M
zN_w83_%P8VKuiFzTdm5ouOlmwRY}N9gyXdb&r<!plhb*9gEK=o1=ZL$^tjqqyW^ER
z0FH&WUhKWKvOYjql`!xF>60?Nq~q8>E+@f9#l8Gp{=GOdsFWHH5|jM@4BtMkBZaEg
zs~>mcwU<V!VwcrhXTo{qbO7k8I=_;Ef#$c;=;UNnbaan1&Gp#L(ZaZ8aXElb8}%9!
z$nMZ2_RBN2Q)YiJ-VTUA;mBagLiZ^fNhvAm>6Ety@2F3zV*z0umUybh3oWgbLb76^
z_N}PU_HJpXA5ptM$t9$yS%EnqWXns%YR=A#1Fz&BZ-Ay`w0#Yf-_9V!)grgr9jtU;
z@O@nC+LMqN7MWzJ1OG%^No!fzs6|FB3&oa@aiU~NG^!#a>a_-YN>Ew7)liwge~#Oc
znW@=~kWU2o;AdwPzX>LX6dH4~)ve}|M#rOjT-ncO>9jwEW`emDi<{%*_+;$mnTTe(
zwhER%&gs>%--6F$F26srQ06O4ibt!Vd(H|F67k?LEflKybccb1hk<`5@r#X(eP?@{
z)RMQ+^@}y7&#l-7Am^y$TZO5jX{b%o;^AqK2#JhNvz&b|`XUCKv1p(Ez$;(rc`;E|
zR?F9HAu0JOCv#Lm36;--1wdVx3{jh98Plg_)!x3uLWs)B$_`fZUq`z|d<ne%(3P+y
zq-6iOIlL;es9lG-KqWeyubaDs7aOH=%(p5n6rMvWA+0FQ{BC1qamdB}mR+vcL%Gt#
zgv}<sD#<wH=6kV)(zvmLnz^{RqHNTAg6to<%+J4dzLu3olbYST+P6$NKl~=^0u(EC
zd=Ac8-sX?HcgrkO<^6<51p3`>?uEnW1C;0#%_-z$;h5ML0bhS`yCnkWce2VdQU;p5
ztX%4P5~%+HCdurbslGNGEw8lZbbntco1(xM$EaT?VQ9E!@?+X>by<_WFjzKaOvb=E
z8ovK|p{r;{YE3Imr2%rS+k#oykCL0aOw3#Act28A_?ueFCd42{I952}Fl}-FVlqYa
zti>T+>Ybc8vx=`IoqYm(<IWr>2R@f+AvftiNDsBFD5WNQ1t*gkHdceNF-I0h`Nr`F
zHX9E=i^E+Q00hY_emOdk(>j?gv+H!7tAcU0W(5mtXRyhh2F~4M<xP#8bg=*jd6~iT
z{pJqAQ4Sy52b6ZN2j?Ur!t8X1-mkm1?;|{s5veCW0Pf8-+D8D0$-Zn#L<9`A1qE|e
zZP{a}khcH9ov1ZmK24NlSf-nsY*-kZlT*c~Pr4{5<k`OLfDIBcxnq6v)a9q7r)Dym
z?Kmb&-dtCpOizV&ysSADW^uiVOc|yc8OYt?chNg<>z%;@Oa6_#P&I1SLis&trvb{z
z{sE;m1glK2DCM2g%uMp5(XN?=n5eeL#E+A;U^tjZW&`U97eJ@!4XB#_^cr~i07kK-
zw)W6MZD`vUmY0tJ`<BvXp*Q(-;_gTbL##lZNL?evLeZh$sMm<EE2FFe*`3?`)3E)=
z^1fi|LuEvLku5e;m&n$ugU<9t`!D^x4rVOO{gVz)6At5uuqL~&VYkulOl^q;(U>`t
z%xZBsN;PkT7kSBA8iwxJ^(j{-2zguGM@cRc@tu!`e+F!s2QyI<jCM;UAty1k0-{3l
zVQ)YaIz9RgAL->M85DFp{QJ~LYPeNJI-SC|hSENlv~S-@mFsc_=L(Fzj}yrg94jpa
z$Pg||X%a<UrVQ|6{4WyNX5IQ<8(FKLwZZLK_yjB}z@(y3<?*L)dvlw^;Eg@W{&Ow^
ztYsk1KLiY8adm{1{|&aEhqf^WTnc}kJj3ehM4bEqIvqo44%UV*U$R*~h#MP|J7>(G
zJKa1vD<~*XH)D}bUV!;z`j?mqK+Rjp=gx;e`966EG$8WYl0#<7_IApI2_p}l$9(Q2
z<gTxWw|uwhu~lhIrH@~*LA|e3omj>cDAJhN-vp`X>CZtCKf(Fx@PHyN3;91t|Fr=R
zH+S#uN@yjev?TFS7EhI76Ovi)!9}(JBnols^Hayb7<cjLBg(Jn5MEP>IeB-bMH*_#
ziF857rKlzr;okVago(f!NCsZWiTmMn1B_zdu_q6_%tjm|2c*LXtV%gN?s$Jc6}_<~
z%yO}riHQUrx5SU$&F|P*+YfHC;k9I@u?nDnHv5w3YGMQrO;}%r<>&WZ2cx}a(i9GO
zeyIh%<8b7G>IrjmOZp4rQo9+ZX?TM(4acWqoypp-Bd3_mk%WPwIl|gdP@Mw+qoE9i
z${1f4zR;hhf>3eYK7HC@nBUNmPx*!8vuocYCU{X}@Ynviw`iN`t9<sTjQ#{*MEL|Q
ztMDE>hyI>%7j|;OXxS1cJgpYa1Y$L1uBsldt3RxDtgTTe0e~#wK7cF{a4$mTK!TNr
zHariHzz6WAxDzI)NL5+N`@6IGDwIqf3WqUMZVxnZN-%Q+?<xW>Z|3#&`sa}dCzb#_
z=U`m@o1K%sJ=667D7qB-Xtf3y5>F!%0p$068u36+hKZ>L_+a>)R^(1N4kDc%PqXvQ
ziPHm#C`7!`+0gnG14Bdi0-g5O%V<4MOQL`VrLdin79^ar(&~ua@=FpUxHy3{iAu!r
zmi+gMsJBuOqcX-+uAuqrbzzCgtQPG*8Sh4DDCzJz$clJ?NwlxPBZ$j-*oq9zFE20u
zJnXXV{K#lAm2G21^Kg-lm8TB<$~C=5kro?0axMlWOU<o>eHl*^=f(XR(^fz$+Fw+&
zdR=KWT(ABvH8MKdnSiZW?X%*^`i$$Bl=Enl2AB$ZdW|-3YQU01jNW`=_;n`e^cTjD
zysqwhVj}wWBw?!pwR%vI2?GEH$}oL4acVk+gQP#fM+}297toWm6GilaMyj`0B<VUD
z3+24c{3x%O0NySkdie!7kN=9<Hsk*1XL>kzy3{fS(h77j96P(a=H|OQ5gi44;Ox!F
z$hegm3mHc+y|~JHx3RGy->M&UNR~W8kBKAU<#m6^;~YgXD=I3gS#7x5CiezYP3?f!
zH4KT!3xs9O`dqXs<?uoWBNL>%z7r=UFGaigofy&Z_Ou=3BSch!cnH%%+V$y$xuNww
zfu21)yor1#4M=(@-4J+b#7i_42q7W?0Z0l86OK7iB2<jjL|XO&y@Y&m1Yskk9<39<
zLrRPqh9hWY6coh-W6<wWi3s)l1%U8aPPdr(pM*-RC>MasV>~_t@vn?Jq$MCOHL|kv
z6Y^4Te9Hj?t8{`Wa%$#Rd9GDs_u;dv-Z%HxM|{@ThapgYFk@us=;$ChR*X7Nz<WqM
zm19~44fwRVt+qOv03HxXyug2F7c;Do%kMe+`#Xss&NZr*qGC;ZIi1~Td{>tM5*SFl
z7mH~VE)>8{`Mq1Mmtahp)Mu<aNy<t`wQb!tzf=<LYMb@ti?SOzP4D`1+cd|IzK<&d
z%%oq9%6c~LuNchMU*&)Pyf@vqKNMT}G6A`GY05MV<syv;m1zLI*a5L)6Gnzigi<bO
zQ+TLNIhj)13rvET?o9m1KpKRPed0%Ed2Ou06J1Q6AkAq9f)WhnBwzM&5fR50`<<O#
zE5MS+!(*+JK*uDgEiW*)x$S51#LNF&X>DU;^YLSORn>%eD*Np0Y)nk-!Cd9%&$UX%
zQR~PQA+@z9AeEO?HjY@p>+gJZ(b^hK9LbIM?eyyEBZy}*G&BsA765j|KsUXKLw0jh
zAXcq+=KbCOZYM3Q@Y<)-`Q{je*ll>8{(2QJ)sYtZedc@L-d-&_rS_a~5#PvQ?DF;2
z=T1Q7o1f<b*rg?%tu2A=Do}GGLWRUneXs>-uKHns3y7k-4fgXxTGWpGa=`HmHn{hD
zIL$R6lV{#Yh8mUqI6Eqj`s8B3MJ$sDt$@biYCEw27{;$g0oW+BDT7oQn2`J;>D+zT
zv`W0trG);Gtams44wzq;i-muqwEXbF6Hp*4kIv&c_V@7|u4vC=+R6P`d)>bRo=b-@
zB4?ZV)j64%REm`p){ccrAB95320GBBo@e5Z%?)DrB&yWSRmybE8J^WOHFa$rbZo~9
zsr#*@e0czy#oa=lf3AS@P`$YVb%l7KDvv{gf5JYnPF!xX9ltL6)wVSpO90Y_fi){J
zzmGKeAT0jQ?>ZA;PE5k-RGgG_`oR$FI3SH?GK*91^6!a=h{!4X8-({$YhA&sD-Vy4
z84j=XpKgrOH<IDCOpz|{*nd~rtRsOu$)NlX%YKbM0LP#GF>kjs8^SSRxjaJE3Rc$l
zZ|FiT8pJHq*wxh5LtDNp-M;>!K7{y+D=6%^$#T+e<GZnkTOs&=HzApvX7`ZXi7+3?
z7nreM{}H#WOPV@!#!#Sw85srqwzdoB4Q2L2Y0PqMb@XBkhOmf2a=M7fgI@(JV@51f
z{o~dG`-HDOIH{>iZ9*v^Y4JBlE6KRT<DGgSY2U7KvRzz`7J)Iv+1TQVn5t@$bkDT=
zdmxx>FrTvc8$#+bQfZN9O98V^b&e$yEq}K!?Gxs<kzm5IqpnWw6Bo;Wqnyp&_!rgL
z-Nu~HM%kTOpLbwz5N!55^lVNb7)?`CQ{F~`LO|g0Mj|>_(6f&zbVS<G<MQ`+V7usj
zeb{Dv=fuhR<k_PSvaLBi$5Pfy6>|i<n?V;(!0tU?1<ZEQvL3`GCEL9{gr^^_7K~Qg
z-OP-6gwv1+1Uo@0nu%n5uAoU6$V~{lIyf8E|27<kOrTk-11SURIUpVxv~4@SI|-br
zEeh!w%RlZX?^b<0T678gt4$pRDInHSC_FV?iX}eBFQ!L6wuyAq7g&+DFgpU;Sq03N
zbv8~W1v#-CcxgF1I}rWsJ{(J8RO{!9sW$Tj2}8*3K>EzD$!t0PfyetB;qh$j?d@$3
zm%ZI;DKhib;oQNm-e7)W`>$UEtMD@J5#C^9yPj=IfFL$)E_Vm*dKWr;rzB~=Q%10h
z&)4V;&&9Ratq*naxgVSMy?%N;5fT?o`<m2&g80=$r*ghPEc~@$4>gZC=C>7UQa-+{
ztP+XnH{qbJUJyS!TxtNU9FXz7lg0*7Ps+&Qa=g+eJ{AGV=CD1ISP4A3&d1*pW#?pQ
zqtMQRdqF`?4rtu(t*m}dtUZAYFW`{=o>>j5to;0Wl8XAgg|P@Qi)4cV7Yb<enm#dB
z1DVX^F4J8OAn56$s1jQ;0kMz$bbeTe)MVQQfDFvWuXtTQrD}~tMav;4JvHjd1X}5U
z^c`SV4U8LBYmIt9>aVc2$G4mgm!<kX=vXu~T6XgZ?K)!#d3pIt&1&F|oY<FMXL6yW
zdUt<y0A9||3VFF1sdLQ=1OqR*v`Pc91Hy0@_?)2Lz{15%Pe~Csc5-qeMu2f77Vw)m
zR*D*W+8Iy7$H#YbbE~fxg3|-Fsg~9PPlLrf5U9Sm=mFxLL2O;im)mXxQYI$bt4z)n
zMiCk&?ba{(j2k+UK@-|3D=hQCkNld1TX`B6Ge9sn%zCN5BsLaVTy<QtI^V`_wS5rV
z^92(rPr*gTEnDvHD$LETG9D!vq^-+L)KXL9VpRdetDdcsbQ*>%kTdA})Hw?)CDUtb
zv@YO%gF+(sa9$$Epj8#n;jz*8H4qYHBA$5fp_r&+CA?XaiZ^_^SqBe#?d(bb*iZ|7
zR{FcvZ3}#UaMD=sB|&E_YxQT2LF>Kq;UX)4jov~{8nb~HHmTPPyRSeO3^INv3f%4a
z6+@(qw6w=lc28>n0em&&pvgkx$5@|>yV#hRArPkXp)Uj^JK+C5=xA$G$l%XlBqH~@
zy=7%*XZQa^{N_t5BLzjU3-$i~r-@E!%U%KRYfyE?;B$@v#c<?fV!%QziGbJa#M;_)
zK+haD2Xe~GyM?$vU#L!ph5aI1qzCAXs%8a0RbfqWc2Z$0(ON1zHX`Daq|56>G|xY#
zi@lI{2CX-Vv+gJV7zS`AW8<aXZaV^wi(lI5FOiTb*0szzy4rCU8Z~IoI}io_cz;j}
zo-3s!@F{Qe_4dfe1}Z}^1QkJ8oUauE#;uR`))62D`g(b`yooXN=3I0ld#Pcm1|1Bq
zz_Ixi^Y)r45?xe9r7koW5^8w)oxSt~$Oisi<NYlI@;P~Uf=zS;-?UXGnHnZ&$dEiH
zHrC=>dZLhAr&6A@e0fq%j_<@UtnRegG-=|D8N?-dh#XjWU_p%hez|_+f#1ttPqOBd
z(S_$hloL#%@BOuz$w(Oqo|g#)U>v<>_o-*K6AK#{d<?NYq#G1bkPwkmD+jzOas}j#
z+bEE$p<hy6q95UmH%r67F7v_<<8y6FI?14Yro|%|udLiFE*aEs$8vvcPw*pU-XBdq
zy!Ch?bpR>9?>+9u5mWK!2*l^V%AK|LI@e=p(-q*c;)#a9%SnK|;H(z=0!k3alR^8c
z;&uAhcjr<`yF64SU)xYXYSqXFqW?3n)b8x;1QDrRR<mVVwT5oj*C5S`io%$Tj7*t7
z0^Pp?Av&;Ikb@8tP1*F*IRz#9uaI4AY$(c+KuFlb!$Zk<Jhf?j*58AC-QA|?kpl_6
z)E@a1>^(WclI3Hjd)q*Lsg5Ego06*GDa$eJY$$2&Ms|GIhM1|#E9$P2wr*zerkTuY
zhx#o$dzSO9HN0wHL%<9F?t8w-ALKC?d!w>IinTQmbw<+=urU(vdIq351yKJPc6s?7
zsgmOo5K!3LuXoCTjYL+~>tS04#OZ)5*_iIUt4w?Q{Raq<;rBg<3d%QJZJ!V*i3H5W
z#P>@1(_dIHAVKO;mDR%TaR3d9ii!%@dSl|_N%-7sp8Obb{QaM$;z=6%<)PRXFE5wa
z-PCqLU_p3jSiRkKl}-e|_bw7agV!8A95n+&CD>!@6ZN?4!|()`a8D?tT;wd#VpYwH
zi_HgVO#iw31WH0BB3BtYjk96Bf8i)_+q3H<bW#VU^f~(#B>;`^o7dz|;9T)jktxG^
zJs__9di?FDi#L`n4k>S%<V#aLKp@lmcP$dpI3LR@r_u!<e}6BcQOb63ceh|pLj_eZ
zNMT32JzfRTS3<zx{P%Z?#!V2|3|#Hw;vzlIHlZ7558m0^*xE*p(At9By;x|}``Zmj
zKKO;cKI7r*BU8*yiJoUgazt42!`rj1(0JBBkjI|Ya!WJwbhlt+zPoF)t^^A8lhux!
z%l#QX_Y(}c7c!f=;1AEvlmIQKKYw&5OZ1!6xq%L-#EP2Jd}7ws4M3d1Rf%dE8eHt}
z+#FeduZH9LEU?BYwu0(};+jWc5UP5ARP8fx(GNEF0NFHKL$THi%}gPpWI0geiOI>q
zzanCy#zx-zsi$&-VDH-yhgWlzS~R{aAZXTRt@AE895pyNSg6w<bx_T`<4!~xkAUEB
zkIyJ`Mbc}yMBl$R%S%hKxU7W<*%uz2u&Bi885vEsGhvsHUm7jHzvevI+oSpZn&Y4A
zAXT^49iyi;9~`jc`l#B!8tkpgc@VR@@(&&ku4%Q|Hbp`H*C-tF&eV+OJtZHrA|L~Q
zR<LG*h5enLd9jO0l$@5Ddd~an;2V*Qao&u&d{S~Ukcxd&DGCb>g?86qS4Y{xjQ?Y9
z4g_N7H#cu|dsC<b;IU8}cdiZ=dOQ#2Q!BNOGP$ZXtMSR38XGg;S>vFg1&pN3s?#DF
z>%)KyhtGh<3if?o|EEqD>tYjQV?dQ5CLs~>y8QhUI9%2h257@0AmFomU8<aJ$-n_l
zxBX1%B!y6q(q#(d%h6R#i;rjZbBPWQe{5v5Tvu0=Kf>X9@}EnTs05Np3eK!{w)%RR
zsa#*(I)1>>obK`#YqkljPt?+=<Z_{PiRsM5HyVb53bFZlX?8GQzhWb)gC{yp`RnQc
zY^%u`#UNo3lrn1|Je?H{(}bdkIfw6MJx=vJ8Q)mKgHXK|JA;kzw^8xU&2IfL$jpYF
z9y?>Hr3zW~eD&PCydYT66|6E)f7j9t9l!0I$}^EOUa$p`S#>5ui%UyPdfgA8K3`et
zE(?@Pd)rU(l6`!EjCi?OXEID01~M!btF0G;CWL_jimj26iHQj;Z1{uuw6ke%+R4#!
zN!{555N3gLPD(Kg6gpUAN@O&goD__VTL%k$`r5ca4oWGG#A#v!W|hgG5IFnA&!$We
z<gRa~#wQa&jWanR@AZ^jYtn41@K>D;x#u=t=E<nX51=rhW<H*-+XC-s^U=>(Ja$RF
zaQ{Uh%V*{LI6VeZdr7L#H~PanTu+Yw{NZ(&Cf!3J>h^hzGEZlvCqBd=UcSF-^JT%o
z!AZb6B{fu2OBRoU3I&O~c3|#NJer)?5B%U*-V_i1{COWQ^i*bN#S22oxonro71ZB@
z$PWSzn^q8D_brDno`|nGUyVm7JS3ZETxjzdgm@H5#t+5bt>$(;^jnTNU2@s}{N)&=
z=MTXdFSStL=<|Co&40ogATy?OyqL@ZFUMvKqwJeLkQfE(R|#S`i2`-FO=A<-j!UMq
z{$kpcAJjW@l~V`ve!6Xi|B4KyrN3KADr#tGs<6IgXV@EV>dsI}1d4&B$jA?WufvYl
zG_HBW21iHte+5DY8yJ9<3mW<$YUk%}U%97SNMuuxjP%oQsD>5^8M(z~sor!Hqj9~(
z*LUW}4}Ktw>aok|Z8@@tir1*t1&vs+CJTDZ3cB=ONLW~yZ-aU0r30F>yt}(Q*4Xoo
z+;#TI<}|2n!7gRUprt!sb$7Yc0B+>q@UYlCp@5gmO^rdQywF&v;>5SGJ+Pg$%27tb
z!+Uyqo{Ec$|KhA8cnz;I(^uFQ99&!b7GY4_SIoe`Kub%jt}K?K;Nm^C)25<}3y0tw
zq6bGwkkk2c;&%t~tm}V8wvn%HNO$s}*>s5O6LWK$x8v`&69#1=zPpk4KpvKec0)#(
zB1bv@p!`vAfBsqRvOM{(SIf&Z=078m)f<Cvjf8~aLk;rLBba-$qjtg6R|jh~MmNp!
zt=X?UITk06{Gb0Cc?#NQsiNuV=!}1Q+vB)3RFlKE<ZCx3MB%5WJH3t~;HnD8L0P@r
z=<MKN+;=C;?mT=m4}ei>eNDI=-n)i+GN{=JKvx6+QZF%;K|$bCI`@ZdAP<=`Xr;d-
zdjB@@7trM0F_0MLz5pTXOKl*2=I>%~H&x-t+X6I=i&+rKNTk8Quq5iPHa<Qo<NYki
zC-}6N$E#Byqo9C1_HK*jbhTr9+Y|%>1;Oak?qq-jIOz;fg6hh)4#ffvv_Gj}+<kUr
zgzRW7<)R!Kue`cozWw`CE*2=&u*kv`d@I*K>dpk@jS(^<y#fCBf$GYf{RvrF-oK{{
z5{o1ay1cOcF!oPQ`~j68l&=;Rd*s6bz<&cs;mxnFlc^Q4JQ1i%RyOzC^N3z^dv4<5
zaRpUIh*1_Zg5k67PE!gK7NNUM5Yd~goijZzFMK_pl$<&=69;66;RTqE%Pyb&hkJVp
zMroW#y>tR#u>2xt<f1zVB+yo+z%6qKQcfp1%imgVgi<4tgsuI;lz9X36JR3<WqM6V
zJ<8KI=J%?VDm0lKb+f-3vsn<nSoGBV%ugq46G>O&<MgGuxolTgbgPx5h+HM0H$GHf
zbn|TW+BuDl-ED1DeX5<t@}^mG3d+vb|E!n;&S5_yt}3PVZU8ylKc4u5s2(3+bUzS@
zX=`hX$0r-w*W3Hy@%}0-OqKbr=mBfm19&&>hKg^wvxBMk*~rEt80Q#v`V^7J#dC9W
zClUwo<RZ#MMr^s!<m&?|cB}VuTWKlvm|_&_8z4_^UI$iTNd$=~k?5~K0b?y8&p+g>
zdd<d1V8x_cKeU-u0A@`j2dHo3V9^P<0i)RjD5)LGeC+-T%&cPGQ2&Fzqwn|s`j_Jf
zUCxmBwY4UehYtp7>hwd+fYBPMC_?I2%?KdY)~ZTaBkDx&G@9Qno5I_dtfGGx>E|ax
zd>p7oz2{Dr#bq8AE;dZL(nfK0&1`%#nct+Pxoe*@%=@sInu?8%`ngK~c2WCoU~KG9
zSxd{pSn4g2KZv*mUFY@w>fgohMHEso%F6E*2TEQfZJDMV8!znlENu?pN1Q1mBSWd(
z*7h&aT?O}r=+2xNu$4DiI5bP3B7H{_EiInc4GGTi)|_{wf@F4gQLdCGmdzGk9&2Yi
zCznhp)2|K|&Uer&j}{Y22T1#gcwL5QwCW0oPknK@d^j+1gk{5*TkN*pW;0SfJm&H7
zHgC?8%@U7w0OcCBJRQ7haL}gR`q=p7Tw{w?lH0onAd>(NF+dK{`dJQVKgzl6W=cXq
z!S?oa<FjHe5+bti_FjwKDlZ=&AMcQOK?)v#r~&!9wF^2QNH$|?>u1*5wu(n*(gQ;B
z15f6=(f}EnZ<|adEmHdO>au`hM%7=L9jG-yxDogjC}`L<fXDv`VJZy{Rx78+C=U$l
z@Y(m-qJB7HZ`EFChDSs!F_EE?Gxrd;)6MKG4Rdm(>(P~{%s9Ng1;FkiWXGW6AkS<T
zC<Dzz1^vx^2rn;@5Z0q2MM;Zv2hKzb<Ntcd(OZV>7R@%XqTcDBXSuxG0t8La%j1%i
zSWz(0ysf0&k%r5G1|}h6=;Q>=i;KVM*?y=82{TV*wcrLxJ{Uqt=K;CtYTI8lhR8LT
zu=Is5@0rsGQHfV};tV5GzNFAYhctASN*Zcv)-+1NtGpOMjG=pZ!Mu9)B#Oy7koQR?
zRmbR1U`2V#u=1TWhnkdXmGv?=pF6L|`>A=RENP^3{*Rlqn{VK|kr3ZZ^fJk}Rs|3q
z&UY=mumn%v1qDGW$jk5Z8_E3tP><w$Q_7dva#^0&3NJ$W=5k2O;^gFH`g>_i^`9?J
zbkMSveR8IT(P+617GL5aKp3+N(4O16e;2j3v;^>E5)>Jhx#z<W^N;2)U-*x=CS^e&
zS6u3?G-d`T5^9HGm%9?WcUMA&s_9Y#c%q4HuU@?cympb2K}Q@9OsWti3`)c}={9L3
znLJ;@j)^P-cLf~n-d-`|#j7;{+5?XTE;xsi+*LlwIKtr&Xt+6o?mCx5H$T8+12t_*
zrPkC5mWvDEs<Bg$7f$6(?PTN9qW@z_e?AxxSYt^n-SHas8K{ZVtzSoAk;Zk*!7kG~
z>+$JNt|z4mD1SUAc3z4#oUgs|BEaoq{z1*eP-C~$nV2&)jsuIOW13%6bKK<BALL0~
zMOA^4DpON0pUDp*Wm8yL-UY0Dc=NG;5$GN!A@H3{G0rS^BTq&76xf!Eve?UI1`_ab
zdLAXq+X&{Desbw|9KYSP*ib|c>un;q37zLi#baL;P(dvg{iWOmm_DJr<{^KOSzj)#
zp1cSnj~^$~OisTq7#3Y{*uz@$b0^CCDyq-Flaaab<k|{9J7wDl7o-SWnnIFII}r19
z`d?>5uap<<5EdjO(}qHnL&<PR&2B)ZCRG5^xcD^qCYCG}B^Q^jm{`MW8N8@l5Cukb
za~3w-7ep+iTB0)jofV%$x=RF-n(<#le<uZfn?rEh?=OkIhR|V?9Dl{jz}Y%nc>%sB
z%w(xV6(t7Co&$8{KAp!SXg4&Me*WC7Cb#?ZUA{@h<9YPX12oVG?#$ihWU2~FWp-aY
zJ^lsd-15?L+L_}L0hf(-m-*xpyG7<Z`)}eZ6<iW=9COu7Uf&Ow-y4PkVvZb0JKoJp
zy8hMM#hjBPW7;1U3-B!)Tc#Zy7a>Fi<frfNN6U3QU5e>QR&Ed_WAQtFSjSdaY3zF^
zr=@AAX_&rFMLCvER^`N;Nu&T$#XA~U9||A}yuQEs?QF3T9>@B7POiZblc#riN<j`$
z=ugh&b|e;sg8HOScm<v;PXQWdgWY&9!#fg|S1J!&cGB_Rsxu%$Rl}fQVA|V#@MsPp
zAgFd^#6{HGyFJc=;G;KBdY*}SU1%}$)g@c4h871UKD!b~XT*H@67$x`6nk5P$y%g$
zySPL$m%BjobDU(+P%%3U2CVy_VVkI?r5dLiBOu?TU?Qlnr)E_huE?@mXzE1zYZ{oF
zqa0-b<^~NzQJMBx92*v<=6|><ukt{z6MAgcjA-m_eeUFJx^uVubnZIQ;m(@IT;h8#
zZ6lnj$qvwvj+?_akF!nCJb=J;n2z@A{h9EHr1(5Rg)A<xlx;v!33_enYgSfP&<R{$
zU9C30Ot5~Yl&i_`_cr@I67jid>*(b0x>}v5&^7S$6A}_WJzOZQ^SL+MudlTzkt~vi
z0<6OVv|d+#UgDLPxojn4i4A%T1&-Sk%hnd`*A7=3_L^(oj^Z8@aabl~X6oweuYlgb
zeywvf9U)!7dOik^Jskp8^W$U|-y0?V-K~queKpgm$w}MWFCPGk7L)TU5|x;LvCVoR
zhtD10AMLJ3ydWccIG$uGmpvR?evl5Uva7zK;pS*P9wB6SY02|X3LP>&M?`GwYJ>Y*
zJE^dv!$W;#b&(IFU0AH;^;V_%oC?_JFFYzV4O4|=$^NjpFr?JPwCGd8%*(^S)FChe
z-utc_C#h*!85!sIt}Anu`b1)7h1QwVeqsZcBrIQthll(63PCOrh<%$WReClVO`4rm
z$JL&%+81;-_wnJ!*R2=gwQB<s{t~$~KQP+@vx2!Q9cPg5mQ16-#>FM+Eo5<R1M2f*
zI|FI|p@|I3tBtAAZ$o=qG)nIw>D~ip)^bx@Ti@%Iwih5;2JZM*u=(2G-`_#6Y0&8s
zIhnq<(l%o;5fK#yVvPT+RTJbc*O^G+#^du~Jb`&alD7l~8X81(!w(SjtI?!Usi{m1
zpbEV<C$syOqcG)~ndb*~n_Gtzl+Yj-@5@`(AXpSaQW*VW2CQD|xe7<ne{p+2(I0_S
zOf-RMZI5~I3Rh-jHd}t*1XBgt!%Wb*v$JO`|6}9MnV_bTs-ZqHJ~1XE^Pv#RR8nHs
z%*=v@I_zkfpxs-`kB)*URb;Z$^Dj<pWn$v@eK5!-6!M#0Cg3##MFQx-pZ6y4jDwr~
zFUwRcEua6M2yxu?_V$7!C)gn{ETG~`OG_S`XMI*yR$#Z^YP&0qd^!tAl%$kCh6c^{
zhdX8#7DkRn|K}%oczA$`0aLe?7Wb2lp6BaqcGx_=X8Z8>KRfJ=01u??07M9bO~}E;
zMchw%sK&5ubK^L!`5E98R#sLZ7QEm2=;+p>QS|aF>PLxV>`^d`yh27gzP%^sY;Cak
z2D<sxPS1;esoiA|VHsYn*9>&lguEW7>*Hx>1E6Y}%;CGSwxnhKMQ%_4H?1`6>+{6p
ziyz<WcZ5%Twln^Q79cO-AnhH1c1UU8as;9e!7@79YB>Z#MTF2k8X&tXqV)f*>A?vO
z<Cpc4pPxUPOf3?TAPtPVU+{$aT5zF{piLQ|rKOF*WqqBw`?WWiC2U$mPw?XpN@r(h
zC%FwI5-z0su%qhHkrB(aswEKko(0s|Vee0u8b7K*3HjZ=gj}QpB=zDAvoJyop1{w(
zAe}&v;0NX|J&m?AyWdtjMDK+ObsH@=1f5H}y9MFrYYYG|g{Mjl%nXfa0$kuc9`6^7
z{3SZG37>a18<bAg#KMutz!IXx8jslMU-}tF7y?&m_7VPw&1nG6ZE<{P2xdvxHe5<P
z0rv&G0v1LHwdSOnhTT4q=B+A4lJY4KAP09`x5)#jn3WRJt>!8ea@c^8;yZw)-thPl
zRul$b+)xFD7S+_C2n;}(ngSwXKI94vDzj-l3)}VaD&O|lu3n`Q3k%CF&`l>|dU{4$
z1eRA+IOV}Lk-be|q<cLG!*hN%L*qf1`EHLoE}P<6J|Gw?)EIrNvpxyqytJfbkSN!r
z;SDB{g7LpxQe^th%*fd2=$IIv%UefhSXjVfm=_Vt7V;P5<ixh+H@Ua%j2zwq9eI2V
zE9)Cd8i4HpTqzXsjoa~$Q2x5^Y9@DCdoVOtH>D&F{FEDs0YUWM+WO$vudg}ndgP>X
zJUl$jR|jIFK-{TnS^&VEsZc@@2}Om%fM35zwbBHI4sMxb6KO3!0+M2GkL%hy3;sm~
zMI9c0{_MGPY(RgR$iYzE0J(4Yr-wEmhMqRn?e0s3LDuqP0w8FR;;#2998T!a0Y
z$gl@l&&O*W3jlXw(reD<ciaRs9k(Dn5Em({Ln|v0;%!cOqb|XG_-e!fyYQnFH5iH;
zzZODO-vu;qt<dJYzf3`xI(GmM7!cxQ7rNzo8h}(7=W}wJW(LgBbt1ow@v*3ga5c?O
zsXnto${TYrxw*Loi7KGa14AvJ*=4<vqs8u=oQ9;dH21r8f2C~h@7=zCff0V!-|Iwd
zCDl_<EffvMxgN|P<;{E+*qhjY?s;wk;EHF1IiOa=47S9G2$`kKMG-%Z#C7tYTv)g{
zD3A}(VTG!~{Qk2~bj5>-p87)cI$oqodj)Cm0pi`T+U2Y4RBTnA!;R&XpH0>AciDq9
zm``}Xv<dRH;n2_zvMUnlf0e&r)U(OA_^+4Nsg<OdSP|H_pPrrq?bq2<)_<M!4=yRt
zaC3W1<qCjv)3CwoYMM+8z-0dWKd5Q~Z#$8ZkPr|m%s!6V?w_@;WS{7nVWrfCgoLOR
zNulC$e;L>tqGw=OsMUJQ!*d5V)D<T)3yaSWt-^|wn^-Rq8}v|B)PgUbZmIDZ$GI#X
z1h|`r`isV@Q7-?AhLe8p(R2q392*~}C4hGa%pkq54$F&*8oVYmK}u^wwTHI$;w2ut
z+tE)6E<ONitb(4c!~J9^2G7LYoXg|%mu9stK-`cqK)CzM;WQ#hpoWPVf!!k&KT@TK
zhKbn#@X_V&Zc4btn4`6Z@ZqgQY15bOMOo$OXA?UI3uz*hn}MpMa6K4WdTas`N4^+Z
z@D#hnw_gPV`Dp(<hrnNPoDTzm5Keu@z+zd`lZh&WERIh&tdlG}f|9~|8>bbt39LAq
z$TfBQUkeV?-sV_HX(upZw6MHe1n$f)zRjwlCY~#wzdEu*j7%fh!6QG8%~vPHawkkA
zERUk0%HZW4QgP`tb1l}RjW9WsJUT7G_C{7Qck0}pMd`2?RH=>yfV(5yU_QA!mG=y`
z>0<$}%i|e~FImfAzRT-!2<8&LaBs>GqzU07z%;M}34gZRfNw>$T9<PKe{FAy&+XW>
zDLOIn<)ZCZS~tM{zUa8MC@CP++}sQaivmsna{y_-z#0y5ii%)-(Nb+B#KaU!q-*uF
zaUWDP#QYx9)!y!=@OTGHm7u|Aa6AD^WzkFonSd)KCcWqVD;u}t6&do9{*@IUF!(q-
zJL^FqnEz~V#9wFKVrN*XsWV&FtBn<t9tzUIpe$d*UY^w14C=Cxx}MJtLSOI~Te?Tb
z2M1&BlL1e<1k*6?V@vR9Up(qaDXf8$+=d6)cL&f-+VB}{|H5Yk5{bAZ4E7Y?s|vFT
z&Zld%|2G^0nl8J3{{M@IB+ec!e@)mDltNI|A`f)KW3@R2GP85I;;nric1A`ljgU(j
z!4|vaacyV%1bF}8%c^|wqntZ}gikQyL>P6V+#c_hfZcWpeJ<?}6};J)sw$gxL0{k>
z#11OBh=>S)aE6A5ceJ<X@_N=w#S^#U6B2gXE^{@GQAozTPa_eV7hWM0^tlDFXt5OR
zvy}N%#!Q#@4H)`?e2Y=v<#8b)aBN5WJ^QAnX@N48SkMQTi0E<IzK4j<tI=dQjzrM!
z;nKE&8yXtAr8oHbZZD5`wd2Br-&9^p$^w+X2lF*%H!-h{mRp{EH$>qw0qsos-T7cp
z-xC-~VRgEntOW)JN`DEgW-=xG6Ie%!%ZexgaKO`pJ3virJUDbg04nrH*m$Qwk-Kd`
z1XWQ<6#ww`vD>muoL#g+tG1?MNE)*+VR}0`-{o?jkCcZea+%=OCg$d+T5Tz|u&A8<
zjmV$WX#*eDyK`DYv7MzY<`3_bi2o+Ozk5}(R`*y7WG28lD$JbxC!#6_YXps}=m5$0
zv}wHon40uC#l)bod~*n0M{uAB;k@<R-g|?C?=`&OepV=P_|2o$MFWDL5=MBQ(Wskn
zNGu#>@QAYhQ_0SnfH&IR)c%a=y#uHu&?o%eFziS`bav>)-so1TVurDe4RG1ghid!r
zicU-d;Q4W;6T`!wL6tAc&`;*&_jsj+3)SWQ&hNp4`dSJU5efb;6GT^6FtZ)OO_HF_
z`0^_fEcBH&6IfMm;HCr$QY$?jiO1G0IY)%l=SblPQ0|xo^gKKJ6c$j4ydDSy4*74U
zdh*_=p?o}9as09UQC?V-6tUg)#O#CJ*Eo0SSu=8se+iY1vm~=I)wW`w{G=u&6Ndtb
z51Gho)du_BuTW#iV00BY6qKUVZiBt-8`=__{$e|If?M*SEHjG~x&vPJZASV}fcAoV
zvM#2+|9Cr<18^X=k$fb_e<jaJ2>5KxeSZqtxpv9&GCulKy$(^)PCpf4m_`76B>DUm
zGy(!5v{MW`lM<wW_3w4q{A77T$+)sSffX6DgJ39Ks*u-hyfc=bj*F2DqP=d`{nxLr
zY-gIQ(}%fVNGcFNS3NIq#HJ&9ViAg`T3LIJS9~K<#er?)gf&Z+ehOTL6&ToiwZlcK
zDGCpT!2I;!fBMqVhMSB|X9lmqo{GTE4n*1=dEAbLdN;>4H8lWV<#j#obOSV_qn|I-
zgTbZ0>7wPr99FYXd7{<aKZ^q(o`)3ZkL(f-7Cf<?E%(9Ft(_yF%q*v%E6$^!<xYTt
zH<b^EhoeQ+eYH-`R@Mlt*EFZ|le9dUT-}Z-IkbmD8A~yBAI0fx%QDN=LD?yv&3%^V
z|MYRWbTT}i#D29MB(XJu10{#gZMMGAG=?^13?*~oe+sbx5JEp!+KvXWHe$if;cp^g
zhqQ^FOrjp&a(i=j0WV4s$nsY1#LDoHktZ806q86Z^*GvXI)Lr|3)9XvSSgoVbCW{h
z{q~5Xx7*443;3zbx$)55$^3Rwfu-#HLa&>Y_j0(Z%xv0{cBW~=4dZR2n^xQ@XMiB-
zaGHQye~s+Q>DQ;Ow;YDXacDzX-~F*PoTf!nuVv$Hs+pE0AO%v%W{Awmn~7Pjos}ok
zNW~J$++38XW3?VBgEI+?%1sF{nvZBy1GpQRZ5a9<IpsCSkbbWRH;JnWyw7K7)4e7Y
z`Y%kW=75Z;QrWX@5dHJWpFe*9kjI$5*Q*=F2xen5eSM;Mkj{`JL~e;(F#xMuLIU+j
zqRc>%Cv|F%U(TCR$_`mP))Az7@#a_&`9xt@56^0g3wYEZ(SD^eW@#lZnrZD97lyn(
zEA&MN_vwuqnzw=@Lswa-t#X^A_teLKyaGtwWq6>bSN~3he(F|}C0G*u1T|&Y`2G2+
zLtzM-S)Dl>7!t+)S<rbLcA?gXScOsev(RM#f_O7nLREM0&X!djpWNt4;AQae@KQ}r
zK2kt(`bZ9!?(E`NFNe~<R+m2DIto!2HIsO8e9c;7C_aC+lYoC|SCN}C7I>;<W9aZ{
zVrNS);|ngZi+!p1ALKzMG{B|7Xkd{zM>G-c1%N~dJ!c0=BUNExyHlCKh>dVMks6_7
zMa4`i^9OsqvlIPjYfO74xuDuieN;=2`wk@&)1-6UaIQdyv_o&7L~iayWDh+l4NYZ5
z;a4bnA|dPI$O^oF{s55Lh+^X6{{Hg^SaTp!Ge!Zm*2^eHK0p<(z^W9XJfpR>6=3f3
z<!a$}5Mt;uK>w7X!_$^pVlOFiy2U3zAr<EKP1#ReQBhMSlE=0694rqF6?VX_yx5%*
zaywkSK3e{Jz285eN=x`Vvs4zsU`Ct{uD!He8hnKi08$M(O-vXV7(j_d{cnPrEGrNJ
zRRw0AU0!x>9RPbg=l}787xf<i>jou{?eh?!d4`HT;7UeaKYueaiHUmQ2+GSV04kFU
z*s?);zEEkj-mXaG0lKG*w6r&%asxFWaM1Z$YEf$ldU<ID1+!x;h%CT=T3ubu;^Y2W
z>YT>xZ@V^;D}oFjgT++F7MJM=C|w-+Faq%~7~}9pS-?g9!3w$s^Gt(A57yDJ=;(2O
zt(xuKT?_(Ikc<vO>mA`?Sb`m;rAI(X!{_?v^6AlgNScKc7Vo#vdx!~Tg0d)3ZX!@g
z{BtEGBFzV1BV)9%b`jW!?^VC;=ny2oNt(QsP#rG3&$Hfp<B!8Ki*zXqfQj`kZwoWC
z63~Bw9=Y)tjHah)!%>K$K>D}Y$Be`bJT$a~>}-C(1#8RAH5EX=1VC{(DV&RoOF~d!
zU@thojk>(%z!V)cVg&g3eK7Z1fW8Tej1M+8F#t%hMUO1r+}bMP5~~5#hZsy&z6mv;
zI%SQ3;M5Q(Q-Tbi^X<s6u%S#&>-(!CM1p`SyY)H~;J|pgyu4glS?+s#t0Ds!H!?CJ
zhJJu0O3TMA!ZVpC^gOZD0I;U%g@s{*5lBH`cp+%F!VMiN+c7F)7WC~kv&cxV0DyaC
zzD(pd1dNA)f$QDH3NTyH3glX^Y;6eeB>SM}b`ihLn4Odf9pfJ?-wHt;@7Tm*VV*yh
zp~cRjqLN<uwqsN~Bw2S1Vuc|E-_ZjnH)5|{iC{4O6O=OizW~F1S<B6p^}_DA97F|n
zlOd%4#u%(EEZjjQ`x3+6G5y1Sxw+c4QnR)3<JM3pB91<{|5Ho3+FO9S0Agh%@~iT>
zYQ3Ad+LOqw;f)0IleMl7&Gyev+1V%0{(~IjQ#l>(FAoN^&La~5P(q8W5?w+I2521k
zO@g}~mosuC<-!j4V<bG@OOW9`5O8|p|12^#k;{n)EuuQo55S*4v*pg0i>!9f$f2sa
zBA88J;C$#AC4?S9cHKC>8Gzwxc_NKs@d;lYjwh*sdp}=%GY?It;Db_W2<9Ees2my0
zAWuzwaW~~o-pFrlU@t?y-W6;}H!!D3|0(5_bU`z@5jOVbEl=dr6Xz#A7Vn^-xZ@|R
zyHnA>DzXUt&{KVwXZo6|sc;lwsNpfBepFN?FX&u-qxiBA04k7?kumH|OtuaW2?5wh
zj4omfu*Gm7@3gx74G@V$T&K7Ey`Xp?OO#fwHSB`l8a_BUNG3f5N57q|?IS>i%6?m7
zzz=|1b^4)S9|oCRKoT&xj(#>bS^rGA*;fPbpfUIcvq3i@Iy$S@Y#Fse=2wVBmZz&U
zqjs15zMf|S^o@;MaTyuaf%RH@BQ>>|1hgl|RxH_6<I)a`CHIMK8ZIv0|JB=>$3wZc
zaa_@2D0@-8F-#b{u~i0RXY3<oDUy9jwiaY8$I{rBiJ_bsGL}NNLnV|PhAfjU;uH~)
zr9}rN^Iqz_?_clV@BBHR`8?0u&*yob`?`PE@AtjMCXc0eZl-;6!12V##~W=^8i=A>
zRZK{ENA}EBFw*j1`72RGn0%0UJ_Kp({%jJ`Nalic>wJwr3UoraUwxXv=IJ`t+I}@?
zS~ky?)iTkQwy3hr&F1XO(zVPGo!wm~Vv>XrP^C+GL_gl-xy6T;MT3^&>*M3&>x;43
zc~eZWle&K7i-rB@__$Rb_Mydx5tRMj;0F#gq`jQqNT~PgO{ljr-_-N*4$A4k3^?H`
zc0x>%3NY%-%os5&n0TX4f$@z`SqxV@X(TB|bA6UzeF@%C*S{_RRLY=TOb1GMt%W6v
zQ-{C@L@O&Rt8(}*<>*pBXbubhhWemdiC9k;J!c=kgw2^Q#v-XdBJ*i3hQTQy8~vs}
zH<gCAL)#v~sHiBOd!$Ni<G}nlP}Sd9E)##JsEE+pjuID--J2H6J6I7I91>DjSC^Wa
zI_6A`^?uXx<Fm(eZLk1+sCHGq`~jpK4{&*bq8{&`ZBj@UHJOY0RTI=JX`DAkJaCf*
zIx}Pp#IsT;)ziBsp6(3V%{Ll4dH(vwK-luf1Iv1wkTioXzPNfIA>~EJZ85&}%zQ>M
zqi*s%X9NUOP{*H7Yh;NAoSs6+@HGy#*Pz)Z;$^g8`Gk97Hl*bDtkP-ueG2m!?lVbM
z9}wspe6@Ugt}wXMhS>kR9FD_V1jI<(1U&xEiIaSJiEpD!We(_=F=zG`r?g6=*(pM=
z=<?p(m1I&<Qa<MLLfMNuU{x_n5e|Ak`7~R~@>=EDuCoe)(kT@^=91P*7xHfkA`l1#
z1qC9HXpvmDq=jD3-Qjzhp^NKt)F{11zQZuf>YZc(uTDFxI~4MEB52-)ROaxY=0eD?
z;AL_m&&oe!ywOR1&B#KAE=Rq_@4u_$0^4oC7y;y7JRW~Rfh(bmSEn-sNf1*Hyr%Fw
zP-}M@c=67&)38A3GT1bXt(|r*eiF9}7`b#hUg^h;e-$9pI=?e6w%SNtQv)B2N{JQ9
zF0nCiV(x}Possny0A(MYd24huCiJ5?R9ZIuCe&4(i@;m0$;k|Yn1jRCnu#-S3u-$t
zYIAH~=IJtyEW6`eTxM1F8)%oT&$uzJ{6%bD^QSOHt_;lNccKbiYWvFVI0J6!tIHD$
zVBTFB*9aRZhF8b7JO#3R&pW-C<2OhoWp#DP5O@|DAWIyk7+qbuC6FuYkV3(3aYg0j
zq*}SN)swAJELg@?-MKS8EAMpl+lF#3nvCC(zW~^salk_RV43x_cM`enx@Kl(F6r}&
zC<Nz{w4*J*ErIu(;UPEG2=Q@jgAGb|yPDm{i|Ff1sX&hXX0c^d@D7Um>$SZy`5O>8
z)&E$2SNT|*tqIDMN_i@T2^cP2iJqB3HC#>Pg^*npQm?}^{92ZAa#u^FW)Ro+l8nBJ
zu`g7tLOQG9SLa`^S=lw_mHw;^hveCKw3%-9E_8Or+THVLcH*ekeN_&NupA7Ohl(zr
zqdJEwoel=a80^eUP7nUFuC!wZRVV82h*bsz?HQvK692e40W6;jFYnE*tgvRk`bpy1
ziV1iGOA5WCwrhC!GWfO@RlOV>#vF5UBE#1#EiKcMQc#(Y_hqyAx=j)u6!lI38vV&_
z{Ax5Vi7$JA&377dzuwcN*eP{-yql{l$qF;043E+}vM;H;pZ0fFR#sg2Rk4JgpWXpp
zSNC({2^^dzy)Aj{0@4~{;I}QfG-2>y$zT@nh_Y+;Srl?AtxIsVn?q2>;|Y)NZgK^U
z&$wH7;RBIV-ap)bzOcC1-QS&~AEYlvPB^bPQ{%7(>HVw7FSDuX>Cp8)sjW?Xx%?MI
zZvG>f;mlP`GjnrSMk;UB)O^aRbb`xb$sthOv&Fs#?jA$`=#mo8NT>0LwPhsF;sgGu
zwR{Lb*{~WKk_o?)lG2`)KFnKvl`Af}=q(t8ecee_@D>xK)f5kkg5#x@U3D9SR+om7
z&t&%8`nhTi>-i&#t!*zM8t6ao)RdJyc9Ic>K3w_SG32lUhdA<%HW(Ipdsu7|`pp?k
z>Y$gHtn7H5mzAbwlSS0lSXE>M1Q8U@HMP-8ig6mnbh?LElwvECM`Iy#w>3kfMcz-)
zpZ_!lE-k2EQ@VQMCdHG$tag2-?AC#e{1%##{G9b|O>}W~q|mL?%{cwl=yldj_DJ=B
z#Zee><=Kb)XIy{7im5=IDy)1V?zmZ_v`zs-cq{+<mxZ339b}9fC;%W{(N9wA>!<p{
zk!}Q7fAO{&5Q%G@sT-H)mi7I`-Gli<9-^%61dwM3c75o`W1|<@gvUx+h<Uuef%J&p
ztHz$B=-p1nJYnJ##bwly^WOta9WGqF*z7mg2kx_{QR6Sq{1Snj8vJW&YrlP*(w8e$
zd{Jby&)zZ*d*Z~gT{&>;ewpnZuXbf0o60pbS3yIjfub6;?c2;(_xTKyJp>}IE{+Dh
zX^GswUuTt>PoXT1w*2My4lf~b2h=UC$iKNg3<|gGAKP1`ww^%KZwm;R4VyazLR^nJ
zE&#iOoZ(dHAPLBY=l|*ObY^lA4Ck)6C(lPm+patkFHm;By8?S~z|t{;Q<)B=6$D~G
zO1BLfx^c6;l$T{s*NI0NzKzSx&e*xD{vo=vNXF<1D?#f+v65=JiXN|%VD9#Y3nAg%
zo{Pb`oH<OTYA0sW1bZmT62CfT=Jsqe3@DnL(~rCu9`-2Wn9)#IQ!_MUG5UEvX=`gk
zo<dbaLnrKQJE%i`o9hR6sVQAw4{U$-8A`+<v@KNN6u%<~4O<%<hh|?Pg&Hq!36;I#
zZYUF6LuQy@%<VI{jO=W`@sfQQ&IotmF);D(=MtzonH6IzWz{D4{;wv9M4mg=i=N@+
zU`u~&ltXIRR~fAV;3Kh4PTmBCh#w^%KuS(G6)+AuI1R<>X?=a$GWo`TEGHcG-v%mM
z|ERhM1_e@+-eF=eiJaCR_|me@agUYPOBJrl)dK_ii);cxO3(BkW}jl}ZF_mN<D0ng
zWz}UL`9*zpN*Je&*G25Pb@AFrg|mX{B_peTF|jAqUQTC$roMY}G2()Uq6`AI$HK`a
zU9M;vU*f*Y0#R(P_gb@t&Mj@-%QnxEYCzaM)L0(hZ`>b6nZS2h0YKrF#--6QrMQnD
z1COSAx%<nZUP%ibZ1uW-@q?p?$f<c3;`-Ecmj{{=yM2PXK7vhk_r=5FFPKH2I;3i*
z8bpil#J1lNk}F$+#@GH--iI8CS7cdaC#;}Oy8|g&cz)zf7C&ez?rj(h5R96MUv$f_
z#PxhSOVMeR=4-l!z%4^J0=es;jGPQdE^j(svX*B_b*Mk5_i#jLh@=`xnDFdv;~PeL
z?#pkWCNpd&b*;VEIfpO!jo;WFx$}WFw6*XIzki14>D!r>lDHOB`;-yp#G$mM7XB}0
zZ7zE|zMCjK&77FMKfu3{bt)8(XQH@@%`;Gh-3)~MlP(@06^tz`0RmcLDCDltW%t5j
zLV{A{>jj?2_hn}-l$Zi9hCdA==M1zXKgVf}*`O}uoAzc)SrNZ4H9|}y4VqmkBH(^i
z1n)FFW@b5ZA}Zna%%kn(BnF^08W%NB1dK*+HJkZyrG7T|xeW$UTB#+T<EJWtay;)6
zRQ{mUGU>OKAEnP<t|T}Lm&-d9dorvDRXr{1kJn;<<ANuGR^1{JyRM}=U@F2av`Fsh
zk!$t3L{Tyng!=QtXwCG3n%|?)C49+R7P^C}@q@LdBjX{nXI&QF0^ajX3a72KUG(Z|
zJ<O<DTJvWj3Eez@&KH{U_T0?W>y9oO=H@kKn#g<L5*vE=tzKT)*y!x|)Gt(Y!vM^2
zu%|f$V?lk`N<OH_SbYsDD}d1-F{bxR3&m=zXlsRUJ-Vqs?c1^X<|G|~4DR5kzZEZ_
zk@~0By02&8;qb=!_6U(+nKUx;c1vJn=a#^}55rfe?>h`^r0qx*i;IT$u69bHDxI;-
zS1PHkwCU1}%sl8RKj)A+?t3?B-Q2AI04vIW7zxp`KL2#Bh%1?sZSl$2xjv~aAi>|R
zNUEK2Qxe)oO1S+t*!^#*naHfu-E5pg+`MRS0OakBs(2m5dXe0>bR=Q_`|j<NCdfxv
zX@_rxcTJe?zp{@ajiBrC{crhYjL(+Yjwt57FHG2NQsNn%K|`a_`oc;RTF2BbjuaO*
zT;AX+091IJPHq4zWPaq*^h9`Qz%w`r$RnjDx&rKI0qWX`CVRNq6rCSaJ|!ilHF!=Z
zTwcL)Udt!jI3e$I(76NsN8WF?Hm+!emzUhVU}xJMf|DjAAelLvWU*&ihdB1Qd}*DJ
zJ<onSWFUw;{N&uD@2bbmIvh<;1JLS`%}%rc!5__zTE2y>)>1tt?XlOi3muZXCA2wJ
zXIq!>17Hh7is^n0XfprQ#yuK)r$Wau)RiS>a7Vif<>x`q-dHRo^~~}pMapxc*<trQ
zw%FPGm3vAp51o!BB)kKeCrY%HMlWCXe)FW<f&KZRRU8ISfBT~DC9(fCHhjxbSJ!nY
zqs1=dD(Gi~%e?%meDJ3n_28=A_r%}&&_ZOLYh`T;v5u#Erf#<~s_s?#&`RBCrInS2
z{jnvdl#1M@@3`@=Q)@Vh{mzLA#=4#L!QK;1PEEVN`l=X#_(lkuLL4&5tIo>C&^GUi
z=$-#q8bMTC*1o=VOf?pu5;Dgd)S+pp>SMZg6GR<5?$gY;+%ocxcjtPm<F4=H**Jt;
z=T1*aa=U;gBlIk|2J>f2hAmPix!?OM!O`uIu_f+w?lPsHY%5Uv3{e*fz`*vC)I<m<
z{O4=@p9vt^ZJPq5g%1Bx1woeg2V>N)AJ+W8f9US+zQyyj$0U;eiglO`KBk722330J
GWB&~oY(3`y

literal 0
HcmV?d00001

diff --git a/tutorials/nlp/images/spellmapper_inference_pipeline.png b/tutorials/nlp/images/spellmapper_inference_pipeline.png
new file mode 100644
index 0000000000000000000000000000000000000000..07d85d2e2295c4b4005ab33d26fc10cd56936a39
GIT binary patch
literal 146148
zcmd?Qg;!hMw>BDDq<C>JE$;3PrBGZ#pg?i=;_mLnifeFpm*Vd3#hu_TH+_HSJ?A@j
zeD^Q7Ym6i#J1c9iJ=Zhmn)0krMR`e7Btj$r0Dvm>Ra_YWc((}vyvcg^270BYeLfue
z0b{Q$DF!GXBR+tBfi)4869oXOqLH8U;Go|Te|%N92LRBz|Gr@QZ3+zmfY&@JaZwc)
z?c-HMftdqRgtJ9VX$-S`VAa7l>1<`WT2swjF&a8=NpVVZ?nN|*QggNDpL11obU1@k
zOO;hXC6%1W{K$NJDvZ1cmDlxi6!wuuNIN8b{^lv;eiV|?31J_<^tisiH2FL2|J6uJ
z@E-8~*A)Q32Mv}L=l^wWAND;E=6_%Fkq7+$x|@#Y%M$_4{|H6Ixv%Wu?f`4$@PMby
z-J5c)fLpcj;p}`%lIP-&%<ZfiwiF^O2eiN3+y6suBIrsQs`ne+)EqENnE~Q9$uOAT
zvX4;{ho8TyzB%hXoIRp<ZE81#qALZS@A>WjUIIWmK;z$Kp~)2T^a5QpH*Q}Cqt6y1
z`DDl!>*(BzzBS5y1e*ceU`^#_JF_@(08fA4vfw`E|3xeS@JFU+pz+$KvwKr5ua~ky
z2Y2f=SQzzpVo^mdtYalHNZb17iim5IFJBFZBw9iC9$-J{WB#Eo6b`@9_2~d2$Re_T
zVGRKM#tXQt-o8jEt)*9g#Ss(*3B1iiK#7ezC5H!V`!n~p+^(qQ8R0<Iz-D5QwKhj%
zvytRHUO6os5X?lGi0o`9bVfd;Jt_bnT%V->0M&=N>!j7MmVPmjKpjF)Up1J`aer_~
zSN#@Y05$$sVq8oj(Lw<_tjZr5mGe_iTPbzh!6P+Fqmkffo0WFSIR*R@4wGL&V+^Cd
zY;Ok$0nKnBMSS?sJDmALZ!q!ZUr0|}!UblA$~>hqlqe8$%RQ%>9n!V6KZKRS0A^gC
zBJ7`cS4UD`?ZRX#2S2H}YL)~-Wiem^v;Xja7lepH-)<`(vD-u6r~u-*kXucGJn+@{
zY9M=`7vS@bw78^V{Rz)~`)Rm`N^b9ncPS}G0cN6C^#3k_4@XYco%tsSm3~u&8*|3%
zo`P<$>gWK|oPA{4&_T5-zo9!;FaEi>1b~Cld!QprND<%<0u*2Wv%py<NW351Z@MT;
zSu`y!kdXa_^jWU30gUGgeJ2JcFHdJ1{z*v-8*Ql)(ZoR+fGiomq+)+j=%NZk#{w|^
zw~`A^$ak#c6EswIDaS!}J^9m1VF9%x!H4FVI-fHIwsdz2!R>a{Y=(jWt@k}DBq&gD
z-2ENK)K`07=O2UsL=<ePPBD{}p8x2h65s&J<|Sn<_oxcK!1H}u)JsQKNBmDWQ^CJ?
z{@eDIPFIYAGD3Qh`Yw%2qSVOfVE|<f10tIC7Xi)?dgrGHqT>I+7*LDm@PP2opG3wx
zcT{=H*6;m*rF<_(q~A*w839=*E~iLtV@%q{*0m#D@1U4V5Gn8`L4oR&&wAqIfsXCP
z+P7B((sfKMa9<&0okuQ;=GHKCO`zHCd4Y0H4y^(9@zZok0~^Tc)I)ICtlAVFpaJu*
z_D$FyBMZFv0R=1vbO#CnrqmWEqgAh&JaIo=LPh8P&gzehvPWj3lHLbFU17=Ij3biE
zyZQl~{^UXCkO-v91M<r+wz>;-Ej`$Pvj4Wef;~FkIx;OOt7^(!eN*p2Fo30{%1;;a
z^s<#*onWOer6z`yf5+w<_}n*lE$KiU+JCrS-zWIxI;-u6O;BFIkBI;ug^2*{TGam*
z=qDzf!LFB%g>TL)^GJrMfGok036(qrwQeM{o>a8IqmkrquYs>p@?}Yd@;hAohqBV+
zcYD@EzW|mHp{nywyMk@H{jf69M9rS{>fnf@0$pV8ZK9KlgyZ63bm*V}4mtG;Rh%VV
z+9i(hZ8nuzHzV2%V3iS?>bbmGOCWT`0+p{J(+(;WSEQ#MCPqt>AhSm~vF`=6U=4r<
z927bKG`Ba^pni^Pn+KWeJG~N^eGe>Z4?zjr9{U@aY8-OtUDS>4Ysj9~yCipT#NkL!
zYlz{=8N(Nk3qv(;LIPhkd+16i=<edN4Oyq~T`ieX-~m3yE{_i+bQrZ)#)ZQ-eKOCc
zL&Y4nIha1Ge*c;rYY1rYXc*;5o0lW+O;)#Tsu)+{-ybI=cx<iB*+*qjB8}x2y@_V)
zG{#;Q4<)FAjMwR!v~<CI0Dx(cQunqJqEG#QTlm$r9r{HZ`n8x*enqHKrpr+iGOvv^
zCAvdkpuqK+`y-&AgPA&N4Q_uN(yrt!9Pqe#lK_2qpYrXJ@l0Vb*?%OF<-SzuxW4*?
za5IN&tf2N81n{|WW_Zx2BOjhJ%qW``Q-KpPp=T_CDl=9J;6y<zr(pzuSgLpvG`!<Q
zK~K~ZRi$Rv?t_C1zayTEMoy2olJ=2^9uiM>2%EZE@Fwyu(;S7m&pZc6tjdDqg>Nhk
z?Q$G&4WAl?_XB&+l{C^WgCVpS#+0<p&6AROzMVudkwQGBr%e<KKa$r>eoKkqmZOu}
z_H{ZMgdNrhYt_$$#b}1K)14|wFdwbumUO<krD^#~sBKn~R$W7Wnvm1SBl1UH;9%7v
zqH8br`Mlz`q2b<`lIF##OOw#OZ&acJR?Yh$gb$p!YuPY~p4`0rRB@voy`JI`Mzl<y
zviV~3h1Cqo3FW?LJ-?%$W2ZgCX<x**MO2TL`}NQrikMNHM$rHcbRJ}7b(d;<(zW0X
zqT!yTKD|sMI(QXmyJ~z=BG6EJeZe#cp19g8Jd{=1Uk@%Uj)^nH{!Qkj6`*~=f5SA_
zhsF#jgD?Bn6y0biwD-MK@YvDtA_DgDyMxjCUUZ(2yx+b7h*(G!@@6&<d#T5yuC+!K
zi*kTHx$SjSD=Yd%B;eXnY(8KR;N*urp3d?QE&B~?u9B${n^46O;Outyr=ll)Xd%;z
zVjtu^y1x^+KSqkqB_W^zcg4piWJI-CZrXBf`2PMTNA^eN$2Ytq%zkMvUp6{foJ99a
z7z-Q4(wP3m?f$1uE~{fx>CrwFTbWyNZM={WAwfBw-r+))*o$Ivr7GIB-Q30sR2|HC
zyd<@uZ6lSW2Aj|mL@5rtZ~?D2>{(3djCihiB|RaWz31RCKrQCKn(kmjD(vN<9`Op|
z{i+%S-4Gp67Jcs;D#LXUSLXiuJ|gpH=aYFgOkVc~oCi@^Ew8zjZQmlO?gSKg>d2b!
zFj6qD>Rlie^9GXvG?3FElI~x>u~%#Bic4Mk%YldrlN5nu+4|Y}<IW}RhoWOun%_Bm
z<+NKq+#w~fJ|OSYxv-B>xq6+~8UZfzL;QU_T(B_h+>XBBccM9<{g+MqowzxswVT?2
zW155D3evcNI$Q(?FP@KF_Z1+3i002kqLAlTBgEQ&{GH}3=Ym&^7_M_e3Zr?{IO&Su
zo(_!3p|G=}`o%a!8{pO|TO;bY|KpW0WMeqme2$6VD$rV3rO#7<#1mEUvFPlI*XiU9
zLZl9>ByIu`(2;(PyuQjwN#LMLstR36U0%rxn=WiZM*G@A1vV?;Umr^+fZsWpsO?$M
z15KsXJ-XN}OpWr6=E&bL+rS0Dm(ohS7J#7v^erzbrMb^7XbFAj=~S-mR0g7!3ZVD1
zo|6)isU)%j$y&8gBl9;d4WdJrf}DuZ`wXirI;M2`PyB)BypG+c7;y`PpXW3!=r@?v
zl(_9z!`ptjyrCTqUxt`}Rok#q_)1tnC^$Q<Qd6UU)0_%LQb8#Hu>227v{jBw1`Nrx
zIGjil5eL8ggC#2z;~x{iob3;)pX7r<vY(2Jkk@DYGj2?QX8xdv5v`kk!>}cdS`mL;
zDj`r`#5u+IowyZn9QjKRLs_rqF9Lg)HWJ8GZv~}29^~hKsT=<=Bdu-^?4xxp+ZM^;
z(!Bx|tIUMuo6|2@GT7_Ji2){s%@XQi3Zt63hq3SUxm|~1TPH0*rO|jlL8bA3QozA&
zMk}6Vu=3JU@?Cmssc-n}Z;rZ^!w6mQHXFAIL?PH7>?miwRN-E?NEz7D>Uzox67;or
zJ^ZX2+T=&a8xO-a4mI-p&2)^v`NX|_c#0`o>1Bt%e8IdFlV|h(F)Pz$*EE<%UNRN6
zkYT4fSI+JO^jvu|z>~Hh&(p?~K;G*;)d{V5-XD_DJV5(A!QQBLUo~&Begejouh~W~
z*4`m)iFl1Vzqz<Js1pU+$b=JsQ(S<rz>R|~g?oQKTY_W7Z<{UtR^y+Be+FI*y$N^2
zbNMyD_=o#4YGb*T&MEu%JMCX%Jr-5{82Cv@wFLhH8*hWsmh@h4GR_mGe=z047S3es
zXX}qwcL*YqNAqgR#X9TI?Y3h0uFno3%i_;2No_`v{)rn)n3BjEyNOY6;QhBr6GHga
zatb=w_*Jy}RNjd89?E?~4P55&KPxga?`|Eq6sI`OWxXQk;8fJA&%D`&>1m7-Jx3}z
z-QcO$$#ZDsVYYIkYdTSCo|Esi7Q+1S6;`X(HSRNH4KRrUY@!P`#J8@y5?9Iuqnt8}
z47azj9C_n-gj&<n4={Tz3!f<){CFG!;TYQ2+I}A9;HxEs!q+drh*KA`uz&JsOAZTr
z9Q1=Hd&iU*62)UH_A4~~cEsQ5F5fLkOC(h~8{0@L>dbK}ooAYkL17@z4>t*<DnT2j
zE*N;~@Kazb&0R{*cAiLPln|8c-lGDPn_r~n@pV4foS)hW`KX`?wz<7PKuSZdPuZFo
zO`C(DM;n}>aYm)B&&CVM&$Ab4+o^U+B+|5xVvM-4;xv?I?K1wB)<Y&<4r9Z0vr=N)
z<J_@LYZ#0#l9`()7CdIea$M+@4g79Hd6_#rW6OTx;bMbyyrBK==6g;Kgix)e&hHyy
zhVkJ)cx+`u)+xOn(F30N+Xp(L{&rR#!f!!6n3fgn-kQ*U4-w}-p`DYo6W<*b)O_9W
z<;NrP{A|eR2~VSm@Aj@9Q(tHUa$CW&h5Pzb3EMOL3sa5RX4~0svwiF~l6HsTmwnpo
z;_B1%U80sD!=@bx@-IzZZJ|YLo8+KGn~d(vFTL916{EEjeHGt64rk_2>gcX`_DAfD
zw@DP0B~8tIz%DLd-yLo^`&_k-z>i&hcdAQam=KAJu!Ootx>kQEx-n{}0-x3WZ(!H{
z&QO+9Rcd4Cdbxun{1d%9JQ`sVQv2vT5$ccYZoL&*>%tSg@f5C_Zd6;hjgm*p5Af;I
z0(eAN+dH`xA<qk6FXab6?G<;9E?ICod%kf6rt-<JJAoS#D;g3@a6E|<3b$C$G`wwy
z$oUkSv8-jd*TcEVl+sKZI_KTrRxuH$R#_B50&|~-QPMuM(PF1m7tz=M*vp&78Q81w
zD{BL5rdv5sQc7j4FH2;nz2$$o>vX6|4~E$21w6{Wtw6abGNzSzk($4!kp7N+m9|oe
z6@X@^6kpPOF!zfATNrXF7<wrWW?tWa<#$S_kbsBSb5V}D>!s~A1c|!#l!~pOX_Xxj
zkM5q3+t$%cXxx_(sMe;x%u&S5zYXrftF0ukjnw8%)*C(+hlm}mEEqhuf!@ER)b^{q
ze%U@S`|2F|o=~z%Uc>c6MPjDjHEX@ZD}M-~0j+$4hJPFCIhIu}Z%ScKB~$6VPAj*J
zA@$sXRI>hVVi2Q<=yYGx4OwSpTmv715#k_;JKwz^e<bKGl}5>Mb`~xLL6C0av3zj4
zfcsvIN?N#=(y@6yPSx=|!#IHIaBFR3w}T?cV*LF#Zh8IIIUbcBK}OFg>BblPeI|cJ
zg4@%B4919`OLDP4Sd!;-WvP6UyXQe419Pmt;t62$oSU)#y5v5~iQnA;V**Iv|H~ym
z`$9uioo05pJ}`_bfR{=9YC@>K{<f98ne}*CEjA4n;E?OpqX}7k%<$MkcjG*c;z=a$
z3gKUsfI*CL*OI$-*uQekx48MD?ddXRk=>3WRH{)HOJPHl$&gccfE$-;&u&+dcuA<X
z$Jzc;#a&Y_xoCztZkyBQ)E_DhhnWNB#ZGzIoq3!$@ZfIZv+0_1<R0*o5Ew+O=Ds$n
zq*XzFz0!=()v?@ILjV%swGF@lnU00$<weD)3k_j;;}9})n$Ev>YF+5MokljD|1tjA
z?Qm@RL!3l>vN7WMbYr^>a$s$evC307j>Rv!Alm*CfpHu_B7r7p`fVyJ<uWQgvE(&l
zdC$lwK#Q91Azyy2lN(F^Pk5dpTbnZyx&}>rW0L7+n)WEw4MvBCU7Ldjx4f}&gaR=g
zm#=u7*X<xmgp5U^syMBbI3|Picl?auu=rN~6p!iIA^!biad;b!X-{s5j>7eA*ILkB
zC|Zll?aLP(Pd1LRi}MFLl@^+d-2KLMH<vB_EN<7BnRYCDk7!CySI<`|ltR3b0aT1T
zr};}~kKj0<8&<-*tsvx^P|2UC)O<8_UN6`Q?-W+mNG5GdAAfWNX7nYrW8m`?&;KAt
z_T80>UYoPdXe@5t;VxW(Re)T=r;wpcBvUwu)4PH|zTBta#HZ0=!2Y}cN|^XCzgbKP
z<N9V@b>5)kc?}ZTKOky2ja7j9gYS1B-qsA7b*|%`>ysS&RNn%(+g<`MO|m5LL+~Tp
zspzJBUsIprS7=DtRI``*#H}(UamdYGDX{S#;NcIkjO4FAOuAx@a(v{RvU0OBAgr3q
zAG5}qgbmCw#kw(RoDEpoqvX&MT%B0GBzi?MQ=ei?ph#a<ppv+JdfT>dCmut=J~qS6
zs#k-S%*i3CZg-{4_I`35Z<jZ?qcb+5`k*~2T>EO5Ii8><m<PhK=kt+%r`b2Ut2I+_
z{=Eq`C6DN~ROTtcr|mk%+vCafr9b^t`@8fv!$?o(PoUG;o-8@sp)l<BmV?^RE7uvs
z%j4EA^4E6vR~Vb!#ER2b64N1P5gw)FTcs)z#XZ+PA;S%#`}Zv`N|dC5dA@^icR^L<
z!Iw5Tf%Y#!zkL~Hcn(9SHc6^Phx=#E5~;ukgt&jg{ED?OUPHoC3vple3<RzRoN-~?
zsbBtlX%D2OX180WtUltE&Omf<DG;(`q2ruQ-79-B55B{0_SbC)5O}-7FarxeXD*%|
z8S{Z|(_jB*t4O?a9IB223<>Ig>+nZ|;!iW88id8+k&ac>I=|2R6;fYk4FG^UeTqEo
zReKg<`c}x~Xs;Uu9VqyY`!w1jvQL;d!rqY7hwqer4-rx&H#y~ssy-yuPO0yUCi=(-
zOU@cmJm%q`rqa*NHepJkUjB*K@fW4Kw9Cq8FS5kdui<1eGFB0aCKgW=oU3m<#c%Pb
zzN^D;QK(l}u@yW2JpXM2?wA+jm{j2%j1ku)JuJSr57?EB#j3-_2u$Ng56mQa_SCaZ
zu}V~$b$lH2)XpKJk^;@KuD%~@Y_H<F>%q!OTDihs=fr&|1{)cg*NZDl%=Q_T5vFv#
zbHG{1KRe^>i<Ix~W@k-#G4{3rT_B3v1I;f)Q$xiyXtPbL+$&I(<c3^F*PV4v)Sc{f
z#v0B&D0X>V$3BI3Mrd{k|2jcKLPo=J9GL*!hLBr^uzd_`#OKXdL5Kq>mCeJVOe?t6
zPRuEm&FQG>I)X@HCHCHAz5Sp4lldP02isI;5x4u{QMFyiZvzmBG-MB|F7HBK*++%<
zVb$}88pb18C#<pee;ICt2Fx}aYl7`%LJ+e*;AK&f6t63QsF1|I%Fo!5$waKY*(l@L
zMnF0`u?q3SRmbO13}blrbANrBiBa_vA^A{9IhDXA_$rU%ww>E4YQS%jnC+}kLAet3
z`xeg^=Yh+YQFku#$WO;Hk0@}TB7&I*#7KbD?*>OSj20S;>R1)zCt_(G3ffpB{Ho1W
zW*s=pHEmK^tb-2&D$Id<*_e9ZarpL|K<u!QT;3-F%DC4@<$D4>!%n9)6+Ii=uKiFq
znoj*~yq|%5rggR?9iLRNQ;aLkmrH#6K%fS9U2tjM_gnC-*4RP#(_6#yR(Hbp)DwBS
znniBZXCHvSaPV7@ebD|*I^L1Bx`un})u}OGAy@W_wb2$ETte{XRsx_YXT`1W9(E%8
zW~D3^gkCZZm8~I~TUo=(wdbgHO21_+`!F=0ZKdx{UKr|sa#0@Vz*c8*1XQF}HXFj9
zODuSU;5~yleKbY6<iN7Z<}FFN)oh6jvczA~rt8m2eME7YAa(4l99Rzv_@ZN7_X~O_
zw6mpLyyuQGea3xvJgJ4zAfWS2_!wqP#gEJ#inOcqT5b+@#cb4HWTaT>DXe{<@awWn
zo*U6u%!eFrix7DlUIo`GEUT;#A1pPjjvZBwquj63Ii2wf@w+BoI`J=W;aWP}Z!i-y
zOkMs!vtg)0V#j^+2mR5mr^d(BD&<?&bmVOWBp27Xc#XHOrwJm_L3J6zo{`c4qSwKf
zZ?e+=W?KI?xAslUCk1Ay(4`I-%AQ9C?JR!to}V2A++^~-(Bv8bfIT#5@BQM$08h`u
zp!w~Au%mzzUjyj)843AK+%#AK?<aR%=<+kXG}3gbAO6IK)uUd`%a=T^9=%nGJT4VE
zGdpm~q*Gi!SpP9Dxq*2jz+yGKo(GP##VS(8K`;*XF|Cv)#15pSm}<7y%zZ3INM2G9
z@dKVOy3w*DyYMTG%aAywkU}_Vwz6k!)f?2~q40o)FW4uv<lC7`%x^=Yl^p(fN&#+*
zI9T{PkmNX|y{>+!kj1ZI3vnz?r#9RjxtB!wkv?RB|ND7`xF%7Bt<FB$#d3HI{ORp$
ze$IPH7jR~!y{0RM1Nbm&_7vTP0P!mQ!pjIemvX1(Z7MJRi2mp=){ITAoM60SqTe%5
zs)!R>+)>kmy89?K-MkZI#hwF-%cV^~MuH}#!kG`a0ekJGy4+WdM<w$jml^d1EaSJ|
z$GQV_nWqsQ3Ga-87;BcjTk>&~<R|nUZ{yy}`Ep%fzn=lp9%$)x;dH*-4qd)^zEIrD
z5F4P2-hu$GM-LkF3#5~#uD-qm-rB1Rj87Tzc-!3wJi2v#R<V{%kIe3Qu{}|0bJ75*
zl_Ok4f16VfE!pxj1HAvQ*idA=-!&9{h&4c?VY)aJtDTE`t{3S=l-lm%6j}-Zob^7O
zd>a|G$E)&7-HJ5>ws#^?8o5}|@h+mZNue(N>S5s|5B7|i&tyfrXok5u=T-YE5F8Vq
z_7(R-yJ=Q3Xpvq!9I&J0NZ1Sz;?ycpI;~tPoz*ybr~N8qUSk^=6e;0P!RfNbABitF
z?O>qz@Of@je0_({Bk_B_jS3+zpN6_}fHSUAms|sXlG|2iNs0p_x63ED{M4U6H#w8_
zI&g)ah<ryytI{S1;Q0<<%RXl33c=$@RrQYt8CI4EDbZv$j<aK~3KA5o=M)4Ga(-5V
z!?BI7X68SZ9(4Kko&N*V?W!i)k@dySbBh<dU4-neyPDdnKCF<4QAA^LN(|-~52RJm
zkHubiE;nxesfs0IeTv#U{FX8!F5ki+XuBkjr%g3z7?v*|<&5f!*(jjVPUn~ZR1e&Q
zZe(&eaX`>2nUu+HJlv(R0UXp~k1zKAO}2aZz7bQ%3|BavEt&7*9HuJSeA{f_1X_4Y
zKY$ibWel^L5^*v0n}yk<k`;-P_ErcY-&jqU&akaDRyJ6ESUfWNj!MP}_{ZV~RY|e_
zBQj#sJy}X=8&&g^C0d|uzTrk!*&`XQhAI_Ht?X7%WQur@uq(lciFme5EPjze12dJ!
zTh~?0F8cxbg(TPglpe-!JM9k5ei0NYJxq)661v5q#)#gASsOh+tJCaRb5e(DiIS^Y
zo%O|n&Y~87d}*THo-3An&Od<{e)|UAbX*PlYoJgY{|8)t=UI$&(1uLcH*Sk5DkTYd
z`y?_Jq(!i#Z1k$hUb#@t)>nbK59~{^`<UsZkNaDi$C6fZm-z|`SUH(Ol2}PzOjPFL
zi96K~$T;^zDj>mu(9zF35)o-t8Zud;Ae`Wxsz;7?{)JzSz0DMePPz*^ldIxcU=aWx
z{C_1Hv-S+I%8ItX)WWlHrhLvU?{ih(Bz4%Frp9=S`xXGTcAkK~*JLW%PcPGasWyGF
ztKCY{E`l%T(I*JaoJ%YY7LS~X^Y$q{hRuxX(n$R2v`;rKYf9_ghJ!&uI4N_S0erS-
zNJMA|)bB#Qp`m!1ATu~LKc{#!<MS`HAj)@Uk19@!{ZM~1#gvXyj=q{#{`)>$XIefM
zj>I<w_G<y+QQhicmwB>=<6euGBRe#?ukP&;;e)RGI4iM!+uf?hC0il1dWGOK;tcBf
z{#EY4$d3Bv*j^q}Zs&YwWfKk-U|MN~G&YZW_h!JY;kRE76N#-ZDv1+=$)fIe#`VEL
zBDFv@=FlDF1h@xy><NWc<EYP<sp;))BYz+sHidm0;1s=bm4c>swdW7`n4`a6Tg}n}
zhF(#)kx&vTHN5Xhh;~JRbQ83Hn*UQF7SuT}Q9gdHhL$}CA4=u+<nT*e_D-MQb}AJ@
zE357J?r1L=@opeDpBV+~S0XUan^|R=Jn?96|DB_oB&6onwC8D*HUp$XM-tVZuOA_+
zC34Ee2duKj^$CeoN<-J0<_*G;<O?Wy$Jzlc>C}r&DZ6OY<c-9H`Gp4glD{@voxZT#
zkBdqfCq2tTQ%9!N1QOG7dog(dWvMx*=Zv+@4D>zH>mD{%5NA0`DV=?ZjN8(((+os1
z372A;p*A^j9+E8_{w5aMk^VW${HX;DXg2ijvtC5l^~Tc2$YRb6;#V2bm<ml{`P#>Z
zZtQ{k{vyrCo~j9~F2z}6u)X-uN{2Y2_sN7PP)-0#yq?z5#@zT6*RVFGa%rq)?J+TK
z)1W~pX`eL8xM47;LlocC;#ozNX^H*I9Ue!I(RPEi_Qu4Is96Qqro{6(o0K!0Xzk~q
z*mO6$%V;f{og)vM-xq;f_u5RF4UzNVtp<+pqnLSkqo&jPuj+4I1Y?m3o+1yc(!=Yr
zDRK5G{tcQkAe0zDRAL&o{KI86{XsI%r9YxlMQVe#>VK<)XW7ke{S_V0&*P5JV!Loz
zjS@Z+%Yqc`Vv2MIHxt@E7aeV&nW+4sK|!+HY1CKUTvmKDL~6nGI>&V8KMFubfswlu
z{jai4*NYtKG}Up=1oCM`77dfs`)8e5-5ICwbOv==IUz*1D&c2YAP>(9m<@?kE3qn4
z>ZL#-;OZg*e=I_qmSaKG^mVIZ0EuJ+CtuIyXqN8b&Y)&t%MEsO9JQC9$M*<hH1>-K
z10~aZ>+(Vzwdtbf(;LcQ&mok@%psMxf0Mn2o924C;=w1{HYQnOL}fX`++>MKZ8BLJ
z$J)}`CD@0=L@m4A-#$ffY3?jd$ly5g<&|MN`X)xh-G53W*v*xn=W?KC%E($hDyvJN
zVp<&jC`LCbN8PsPIT=Z~LlrSq9{T;^YVL#b9`_RrIOw(<(M2LM``kG(7$xsuzDKsr
z?I`+}<PKq*VO^S)0e2xY>9massS3AwNb9iO)`DR8wd76y6%k8(L%>UQ7iN}F_w}kJ
z^9k#fPGOd?xL7jt?BTkOm}^LD-rQYT2`Uy^a$%f;pqE?Rd>_s-1<rG_MC;_xPa9+R
zk;DFuoJZ!A6)jg%S4X0w0Kp*lUDSTAeB<a!9NOWblo25Mc{QpnIQlTS_~?D&uX5MW
zM)8hpve%q9Sv&0yPxtI}|EXM0d`@1pEQE&ol@N5O?~+5OS8ctHEadd^*|9FT<fNgd
zAZLjQBJR)?%8nS0;($wF3Wm}yBo{r;zP<QMy0@)T7o9ni%?j$0M!2H|=yW7=M^JTm
z%_>8wACp)D=iaPi&*u&IT}?N`vWhtS7t+IY!X7S(J0_;Jnl2?IhXU(`y7QsRrm+QV
z+&yDx=mG&bliUHpR#O;u76KN`qrVZDEv1cYMDAfoog&&WV<H(*hC<m$BvdcVws%&2
z{-LbgS?yMHe`mTSS67FcQEN^loI9lhB7d>AKJEIHeQfIEBo3EG=_!vBt6TQNYe~sj
zTV$RCUamp;Q7&IG*1rWux}V%8_fiBWeaIPj^oox~8VMtDM`hp13C-Pe@m#jzQ5Ste
zXa2m=EMV!fOhJAtHv=}^B5fL%ZgK+9S<h+H6J<X16LjdSHJJ(BtVEMwq^WfcNBU`M
zB-gFaIH_qoC(<!2;MLoFux~?3KA7`7sWH9ai900qP4WL+*r45+I4`V@Ev<B+ig!yc
z#udS`bf`OWnzwaFhMqeaHx*?fr&yt_GdRq7^~ePP%>3n72!2;W<gKRxFj6e5Y3DP0
zc>lU+YCdMarA+~3Sv(d;AFFMOwX@v?KCZ36J-$K^8e-#6zc#12vh6anvQ`OdAF1?t
zN1C^Ag%qXak};d_y)1Ou1s=#BeZRs!T>k@;^`+TH7WB~UG46Y|><R~#MAO<crLEPR
zF_^>y5~xRU8RuWZxedJ0BESG{Y%%=8p`s)X=TD638|Zqp2g^?+7hgYnAj^=;r3^&>
zhHg>0>sb111c~gSD}MR1^C?gc#fXFNZsn4t$Zu%*XwR*hh6C&~!-C9lhC}M)cZAXs
zXBn1bu}!J-{J=Wjrd!i{L_i$oH$O{c(QE`{%x1-jiX*TWAY(gK=o91$WYe(k6kw-U
zV{e;WV5N~cpV6`V`K1o=k$>WAmXgBg#!}K*bfWR|rON17^Lq@}ClxlZ&*y9!Q>Gd2
z8e6W+0v~3MLHBYq#ZRM$w-zrFZw4h#^>92h$&eiNCP<a?P5<zU8hsfI?L=hd<$jSf
zh&>P4ba*H{$8yY9NC$(3y=nvSl?JEp-e_h=B**_?qUGg(6|ym?lyN>b<g5vNWuGE|
z?oO0EmRsY*5z>tw{ybFMj@I>EeP0t|TOISSF)lW5Bc+|o)@8I8+ZxZZ{?yR+gkcvD
zjj9K<iOO<Tfz^H;UzR=UZGJq0lSr>FdiG$JxzMWi5<bt!p&7f34yj5JnYm<%PF*^c
zC#z4(Z(R{KUKjIaXSlP2fR)DL0$Xbu$Jwy-<zHj`oRB)acK2mDwM+$VGO9zgxrqd6
zTL~OI#;KMNt?ifpVH*4;CQWp+>a2aC1ZV^-c|$2n_yYNEukYmm6Lp@Htg!mUd~0fU
za=%)JQRCMdJV}bpCk7W}q`XZc@Fgx+cllftx$jUc=aa%*m-Ld|b1IqD=}pb)BS4&!
z?33&&W_p?YB%$_eqN9{C3^rlj_457<ZzeQWIr8;87#8h69<Tca8zf-Q@g<?}Gq-{>
z-%2X154CidrLPjZxqIDys4YrKLUKTffog#c#ou^@5n%8i7Mjn8sLN{DNAKscQgL;2
zHs`}_M1_nhba=_^VrD#QZjh-&pq^14d(Io~h4J)K<95Mw)?zFE6$WpTI=${7wCM{e
zMoGskj8c5+vg`%XR7;GE=wd)2<Q#Ytr4#j{d8X*t8v9sGnFFnVBeHTO9E+IQVg3{K
z`f&Z#$-ixXV}V&m?-g$;Gl7QdmxMGpXuZ*q`{Oo8@x!K4|MbC99lBh5R637Z)B;gO
zl-X8paYTZG(@nR2!h8hHsvXpKoj!LdVTN0^s_Bb!6dWJ!KNl|^(2G}lj{}&w)R~MU
z>G<HOIG~)#Gv=6Qq%#=g_M3sR`?Ef;`oEExyRZB!bQ)RH!V}UH2c(ElhSBQRdB_#>
zIp)@%NqN&e4v1!19+my1IF^9N)v@l^tD|aev|%|s<_XC<EoFD7hyGC8lvchE%mnaj
zXj&8tO*)9|tPV8=!ZzD>S{)-;GWWsXY|`P?Py>dASVoeF8%g-Mj#*YqraPV<16$Ao
zI{si=m=*Fd>CBYOtL-D|33)yk?$yhPeHXacAAimhuWx*#h3fDy-wB-}l%iGO{g-h!
z_aACc)@MPFE9?ZmxunT%r6RL<2~^GMchPlXQT<Q$CsgW)umiM2FD?QzS~zm~LDXII
z27vvp)0MW4=Xo$QPw$dOoZ4a%N1Bq}z=v1(;bAceC^Lv}P71iBTVyPZ;+Jgtmom`K
zGm(Pi^8pQ7@mBSlQ=u@GgKLf{Ur!lwACHMB<v&tmOurh%SD6j$rDh$Yd|Iur(OI{$
z=#Shu+AF6&sGU>E&57G(=Vl4fPvvrl4!{T~E8-J%Hj_(mJUTsny8)%!99Sr5)!(_R
zEPsSDELv_}h0cj~bUap+aQ4x%I6(;hb1IIN@+tC;a$;C=1x?YmlSM=v(r}T!P$~SM
zvS{-Fg*|(pW%+I!;+Y6U3z_6UwA@0_VI^Y>YJHj|qVvGYpNGPgbDXSMxaSD@Eb(8G
z%q&ma1rL#e!!s(BGOCT(SkZ?HSwfi%QQa8$<xo1rHz}->BGly3N67PsllK~!rzv+c
z%`7|E;T*q)&?=lKhmIHJy9cC!c;uaA{}{X5lW^KO#yO6pbCEVBscA=MRCG2q`lgvQ
z0~yxgXA|+P<t@jzQA6uAyMd#FeBRsC)W}x)%$03(T+Vm2EUXo<g6kY3KMXE*poEfR
z3C?YUi&OgRD31K*v}Cub{?)ED0sV!y{LBk48RO;N`sP_AnSYheyfTjH^gvgZcrClT
zHX@xXxS;ya>QvtMwu~C+LPO<Cus>?4FA+#fsR0~BjsHXq1IVJq{?%8hqss9J17-FB
zJpQ4Y3KBzE7iF+zMPH#S?m({9!I_ZPCaHd(y8a~ac!lv9q?MsFF}UK6ZY-DD3iS(A
zW)mFjEa;?fPWAc_LSoNc>5`N*f7$#Q|8=KmQilBQCTlob>V-2*?D~n`gt4NabFb=#
zumr2BEBbtV6@j{0a{;+@faN29PrS$HnGvb;5U6d7E5uS6yX)0+997hGjL<%GJ~k;~
zy}R9sX_4F=Ta~BKF4j(b(Z~~1nM?E(8FfK#mpyD|PyXQ04i{elsd5(lsPBoS`qJ-R
zud<uk22&SVD9(!7PNK{3!+glTYc-vl8aI2ex$p|q-r+{@j=&<~uU&-`{^{b+$f>$H
zMk)}Ew1{}PVin_CqLMdZh(33%3Dhz+JP~>y^~_gu=5B@cnM7q7*83&hmhJ{KWnZK`
zN;Swn71R6eMk9z+*war_kfgvFQql8x)rSNk74mv|C;7qS0Lo4!qn)(!8bkm%VEnUP
z)M?3Hvlxr-<AnXX#M-(!%|7US85QkNHr84&DvmN2q4rA(n>k>`;isAzJRfWXLjygh
zUU(X3uj|G7RQ<bxL~!L}dzQcBdD3Bbu(<)fTItziXPKc~gW^rTK4fD{A+v)cy<*SQ
zAjrHWLO!a9P~L*B8l8c4EnQBY#+2}ARGEnPr*hwrUT%*GSR~XO(~b4_Pv^zzscx*x
z#s#JENWu4?pUA+?F6oPROE<(SB$_}RZ4V$3kR-a`nENI0+q5ZavzU&PkpG~%WBv~!
z=^(<;qHnC_$f}u?)40VK41#So31$oZ%<P;yHkQ2NoL>y4duxtn%wxf=<n|qxpBg3}
zD`cWtw`CQ7y&7)mgv6U$T{(^~#fLe1)m6zbXso+5x0jb!69+1H$x6xTZG*4P++G}=
z%FMB1>8#fle~{6-jVuskh^ZVbT7sh~q<IiQSekt62yf}V{F|l+mqem@^cOmMM9E87
zRHlC;!-OAFf$CUiLpvzQe%*g3)5*#zD*JdJ<M}Fm@}@BS?IwE4JE>;3UrZl>&)2tg
zp1%E6e)n$#$)A%Ajj&dsjBAlQUXDp$%Q9Szl#$O^|7>1a1^!9JoOBgdsl4|6j7Ua9
z6%E#Z%|#oJfnYD$AzYw7DUeWq3g?tUh_^#maG8DykuU#MnG+XSwRzB)IA}BZq6^u}
z7mp7xw~_^6Z#$aeEo#X_OMFSiG6$!uAhQ)NqHun3+qCI!L5c2A{ma?NqQ=`BxhB4s
z>NY(|)P$)QR*>yVtnN+zXjD&h&F({vL9q$<oPzZBI7{je;^G?b)fxkC9dvzURR7Nf
zQxoDA&=Kf>4K#-*U0(c?57)bOsKB%0F2&E3!Qt3LKW7n}f>C!u7x{*2@R(2L+)($K
z9ryRH3yV`-6z_VNX%0OpofFFL$+~TntmY?@S#i9P@{ST+^^0_=lrlDp+8EwMGZxwl
zzVT9*+|v<pU5lxs+0I%be(hINNGX*};o<_x-HGQZ^Ae*4nm-H^=vWx(E3Md}sih2Q
zn)|bY)|9rI)_SZ>7vP^Z_4qPuG>g?k*Gf*h2TU5Y!gN4!l2v4k`LAD5hV!aVrt+5D
zgccE~h#*u>w!o*8rOuqw53Ct2g2>Ef3-G7QwIM6I0gkqp=8r!v)R`?kbC%2yCu^&U
z%;%#Tj4!Vpxrl)+FBdIUE!<^jl7l%vQ42$hTq?EBRylM)DNnl&yzIu1huQ6o9K}+x
z%V(53=W>L@A=X(^se|Cp@_K=J`Rk*p?CGV)LvOkb%GaA)Y)@Vt>I$;|Lxr7~hSJ0N
zY=GX1bC>R-1e?jqcQxe{+8}(RY(vi=G1hmkNv0KFM`nvoVPz~IA>YolhwGbf;hK}F
z>hrteK1o0JsSJ$Xp(4{jDdOaUxtz1pJ(1F8d{+2C!IlkH+VdkcS)(Q4d^!O-@8gbN
zOr#pX^FP5cQzKhGnJWQOCwM@dAR^jDN@W^$r?s!ErJ=9&%E?t0yAg~HG@nazY`&zR
ze=<2@aXw|^c6+3cBy6|xCn-J@DqkBAbbAs|JzLu6FeoNeU&L{cmhENStE|>{vRRCB
ziv*{@_8=t9ajkdS)}W7YulaM=PMw7a*U8s-z1rZQWUAvzo?%OlLk?|ACJh;jp!g6S
z=ROID3@OZlLYnIcvl<Upb%w1BwdflRq{(KQLHlh%PTa7u^+yfn(EZzr{C;6y;u7Qe
z0B*IeO0q<SR}7sW>0I9;&7@^P?z*(i%S>>{M~M{zy^`*{w%Rn<!$046e}2i5`k0@S
za+khJJ%L9Sg|n3#sQQWR_YrTir7X^+<j~~1R>m1S`?qb2t6S7p;|OG@ul<KZf;3LZ
z6~j%WLLOpRS<g~muVPQ5y-|L3?ASH^*zQa$SF5OiAH;0<3f(CO-Lkz!hp*;W?HsP(
zBkcs*sOYn*=b2vHhkUw$^d`+9KpE76{;>yfT3#pXTcpMjMD=7)Qu%uk_Bvsm<5^Dk
zr*7GYPUPiQW+Culighn>4}?l#@vy;nppYhhK2)+{rR#<Bc6o6F$}ipdn+8<5w@&sR
zPvf(Vfe|Vl_Wcw0iX-%izJoZFDJ~d|Nc9q5(5w1}EnZ2<qznls_VkC%cM3K-q~4)k
z)vpqPF*&K(?H9_*)dA?*vko)8Fv8(Og@irQf>QZR#ZhBjY;+M_9!GiNzV?6UniK*i
zYfu~qmDif+J+1}R4>k<vz~*8Vk>+8_@;UEJ5^5?Vkifc?GhsA8(6k0Ak*s#DW#2bB
zg%u|jgj9iZuU9+=pYs&u`zfFt`Q{Ln3T=r%B^He;w4E?#AS8tF1*x_G$m$(SkZ=-v
zze)-2YGrcwfA;=y`$RD5=mAeswmZ<>i2w**gGL1thVOxOb0S-eFvG!$G}+nbfraJ~
z&tsnKmBQO^_0Kg${;{~}JC2sW(Hm&vKM{GY^1SsdICt8w*jNw_`XsFBC|v{5C(9=k
zC=53)kJK>C60|?eoj2ME7L#vxeMFn}8tlV&OCPw~8{zu==|KJi`A-J?u@r{tW$r_k
z_-P8<b_!%hqy1}x;UH0cqdZKKTP2#yf^f?la=9Hw{><LQ0@{7%zK)!rLH_Q-M4*w*
zCq98(MU_zp^Td%MY0)G0G&(y^JC3!7q|*Mv8LDp$z)2_bWx}vic}2Hh*^G2lyKGH`
z--094LsX*4vU!KGaR^*T>{MV$b`}nVy@Xw1{ucNO+2i(4>iB;PspurO^&(xY-k8At
z^Wdg1Aw?QCAqP&v$2OHc{;ZruGw}&tsnkI06aGYvQ1gE}@vW3iv}NDDQ;}@gzoE9E
z1A@KVB(g{DTnii#IHLceYkuLi<(hkkB$1i;u}<veS<J2{kr}~JAiZ2ph{Jq33n6sl
zTx|izRQk68E>=RLdX;1pERuJR;RV@G1O~P#v!YNWGP1b7)>iKrhQhVilGEK;p_k@p
zvb^v&%Tlvkl&OMBr+qAkS9NKl@5oI+ta+DkVrgPC1{vmhcB^$WLa`8VB88Cksx3y3
z5l&7-1fPZe=c0_-YPt&K!Pb6D6>~nsz)<+Yf$cvKw+TIH7$cG1TTun#%D*y;8yK?Q
za9m6+Mkj?ILfI#mKy!fq=c(J@v2cNwX`V*E4xaDs&UB@(=h)DxX+Mab^9zHMrru+H
z(t)LC9B5CmV@!>4kM1o0{jv|1nrzS1s~U4E51WwZcF`7=3P*|-%zULN%LyZ6D&gKd
zI$$?unEEC6jY!Bt$7l2%2hr&U;>q=7vZ)x2`l}}f(=|NV5SoqvdpVLE{^>T~ruUpm
z4Mbc5o^-&)BazI=tWqztEPtYEu4GX?CT5Sk=JI?L9HLMV`MTzj41T$LU>BBi0P=-J
zpX=}5X~yflwhZbakWG_GpQ{x`ByH;+Z5N7K?4!qp1=~(i%NMNkO11}Z`IWbgbbq&R
z<Z=6;ma==ejTZq4Q+l{JdW(f#{`|azRU|cBdh-1`;Q>e!J@q{ILB3_xB)9Gw9{n?=
zp3Z!UOT3qxnhRN#9{%}(bQ~@LQ_VCUAA3HBb!ucU35V!0q0rDfr@5}EpuiQ{X9bV#
zhgYiw%aDq;^00oXy>9ZU!6O)c2XZQfv?6Xb6Wt|q;gr|FK*rV`vG>HO#j($~!sq>L
zNr`kNwAifIdP7tW2JD<EwF6Z}(qnMP8n-BNe^U6kW*>9Xh9zIy?ap@|6S0T(VY=%$
zcX`HCNf~MFEQ|LD9`pNzzhE^|Maf;i*venLghx!<R0?+e;WuJg{jFi|vK&1tN8C2e
z`z1MVK|Zu-cuJ)sBeyJ@DZPgJtJAY>^O>kou?!@=CElq#>x|BI#0bZ6&!a=kzLwZ6
z<Lr3T&u2UJn`q}%&~$D;p10<>t_+H*wy~}mzcrvf+L<}9ku9vzGoeh5K{YB4>*ci2
z<6&<dZ+wsC^*4Ip0+0HV1E+u8x;(yyjkUFC?OK&=>m$jgWjjOTzIWBGEG_dKcRd!5
zn-vs~l>EZ9f{f(cN76$4Sl+@LkbKIyuJwbmClB3qsds`zo^Rayv!my}-0VtBn^|#j
z&6cc?toIvUYK$Y1=VLTkAGJ0^E~f$v>bY7I89Btk?-OteNEJ|`X&4x>iK=#6u7%}6
z#h0Ibh6QA!rFS*8fc^Phzlfjb?*sZ3@Do7Ttc$!C(kbgBNy17u<34A4^1)?y!ihTK
zUZfsD#1cBN>N*8y30=)W?oI~Jx#I0GEl}qS=hjiw&Uz`b>o}tSnBggszMR)EC%GJ4
zR{fJ*vj%B1>$=04UVw&E@Q~3<Q1)kL<0k(`g?Yjd^`KgJx6L-yMzDOCkg|7Vc3^v}
zit1(##O6KwJK<h|6Jai#U{|U$4Z9(O8^NbGCVrFLptADoR*Y(o{%m8*;-L<xM|zsW
zAnbgUM9UY5E*2S2f=O-D{xCj9kmHn-?h4_Jjd3)YUfS78fD8I`s4cD2u~_$dyBRx=
z8{S@1qa9$*uRt9k2aQi}`#&fo{<ygGZ|2#a676pGmoy^9H*ZBqNbTbJNWBnX^J$bM
zwRkNA=opd${NYeSRaldAA%1&)Q*a)8_^X4dX{J7U=pxhd3Hbx-r1v0?ffg3xIV$Qb
zM~-B2am8d+QHqm_ZJPM};EhS>=HD>ZA`!&Tm7v@9F@_o9+B~zM;;T1_$E1BqPPJRB
zAG5oSSXiGr&~}@tqd0?48QAU4l}$H6mS>az#fW6=&wWOA0OY0i|Kk9uesR4Wy~sSv
z6vg%7Qc4G`Hg&V?0<Uie{NTDt6{XB@m$DA<=m^6<x2KmTerPIKDyH3H+UzCRDFv}Z
z3KWCYzIaF0YZX~i_X{&}z<`ea!yZNE0--BoS=eUXbC-fx2aPlJH~nSre!p7{u7fqT
zvJo(HEv<{}HO->45znJL^*oM-^u;EQ8XA9T>9>T_Y;t@d(6+d>cB-IRa6cP~-PVsh
zxP!@z7|NSv!N`Epbk8LNqm>b>x%*CpTn04tqlma(?Qftb7Dio#GjJe`o_HT{9qh1P
zH;H{zS2EH!yAoUC=0<%!?=u{B<lGd6)!H8p@d{1Ud&#spkuRSlc=>d+$JwH@@KV1~
z1{v}I|7hLS+rNp#*mBNaj*?^+T|ji_N--XOnEUX<a=Dy@-^0{9Q2y?;&JA_7j@n5M
zp`$y4q{-EY&uaD1$T&{orl68sP*qZP={~XUck>!<K-oeHjalW~@Q=QEjwiP)6_CJC
zhXc=_9#JQd+_wegI#v)Iy_X!U{n8s5nS>4T2_>zIWBKnFy%XGVZ%+DiPv2qS0ukRQ
zu*`zguQW}+*2f;8Ur8Cj|FPI?cXi|3(ti*3D37dJT4l_SWZKpn2~m{B(l3scQ9`gn
z&gD+kU{WOzN@@TW7$}f>R@nX?6mtjgY7+g9)~5L^Hhh6lCIvPMg*dBrc#jOBgg4!3
zn$?9|oj$M1h-Ta-aH|uHl(zXk?TU`&X(bMo9lvs`ozb~v*qWMiogh>lciE0<+e^rF
zw6<=X8G+o<Bhx%F-j<%HOr>_fXEJdwT-0c+lDMZ`J}P(vM`TBL>EBBdt35w`B4^pn
z_Y1s~hWpg<3DabnPwKe(s0OivA&K9Nr5%XFOe(H($M~!HBKI7B%^gZV1KD2SUbAmn
zBRgJEnMLJDqf)HNMW2d8mR#GvdTT~(QRs0GY2MuXv{^L}XW^eza?RlTb|e&+d@!NT
z6h3pu9kngcoXxJX!T`>?ICUE+Z_ba(lCyC7ioY%wY0mFXrQv;JRw4fMMNcxgJW_eZ
zR@<34rK){C^af!nf-t&>?&4+1+}3}yS~s#6mXgj^^ap+B_FC9VvRkB*++ywK6S|nY
zTlZzcYYM(UH2!nhoh`GfnSN{>nMnp0vf5?EqqMt%O1s!QBzmjzEkildD2|s2dqf|N
zOM17!qOrRj`{wkw{T0D#%v0X{p7ETQin`NN>G_3B^5024B&6p(PiCaLL#*7dDO_Kh
z2C9?Y8MMpQuU5{Z+BR71RF#%E^NSho81<clHd9EN343-oir0l&)4cA^mcJk{*1F#Q
zw$Lwj4>tC)KTEYw6q9vzoj3}&pu%7KZqMjgqgsG!(dq_<cn3y%a<`!5p^AZ4ft_#X
zPCxfouIJ5{erJ1a;X)5iDZtXFdvo+;ZjksbrwDzPi|l!?$xjE<Ok4I#9z?*lFRBB*
zm5C5YyFk)W3yk(w8)mlpVB+tbFQv5TbTCD?CS*vOWZ6n2%;)B%7OgqkS3Me0B(a=v
zXONij8}B>eL=Y<u<*;|sIkR{XC2PV!;;n!db^ek!#l?6$z4G%z`S*I=>cej;%brhh
zw)`A)UVSQ@6_N6VNVV_&*TG_#{i}*HS@raX?<_Hdrug3q;0Zc}9xP|v!UN;tO~s+H
zOFT%U(b&6Dwf%9jRzD}7>1IdJ7AMsiqEitGjazbO1`dz~4Fq<Jv%~6Dd*yjsrzkwT
zvcY$5JLlKZqihY+CN*#^&xJ#1&W~04i-Dd_9t1cNPHFYPksGPKIcovNoXqzU+oyun
z76x-8l&7KwJv#}zZYNHd{LO-53zgdnaScfnvOEq8)74zyt`PLh+5zv()#X(t;q{{R
zosl|nn-eJSzjIkb%B<Wu4a37FPc&uBrch#VZLIP_UyN{3AE{_asCkmZ+t`ig6{f#l
z&g8j0MCKvdY97G^>0~KL$IPpT3t?P&xWiA->MR23GkXnJJToKC@pVA@d#850jWrt0
zbX!A-K#W8}PP0koN3Q1?-J|ARy<X<Xhd)nSZYG>;(1{JA0void?sGL_Mqj3j47G@8
z9;{v=I1j1O_=V6@iW2(2&usSi#LJY%!xw65<ZXBu)jX+RvKTF<;1oOxpa+=!A@P^y
z5cZT~M;KP_<<jNRSQ$KK`-Ad3`55TmOCewLc(pBF_NbmzZLZV9A3f+$<x9DCp~n;7
z_7x@vEz`eNy-v7#IN*wov(+Sgf8PCLRn`MPZu?rVpj+6$-FsnDEup%qq;tNozUbKZ
z$veYw+nWIF?ew^wvlM|u&ZS~D7R8B7mveLMefME@HpX!&l=Ioy{jDPzxa%wG2LlU;
z<&VdP&(4E$W>zBF-qiFi%Wj`Z>M;HahLqE6anchvHq=WG&M@-*=#vMH+(OmE%AHl{
zhwywX==5bhwjH|blr}67B%7Iqr}MYmYNt~D3Js#xY(~c`E-U{Zvd%KB&2HV+Z7FSW
zC{P?)+}*XfJHg%EUE5OJ-Mv_FcPMVf-GaNjh2*5)THo3GI)@+mM*=hRo$@?m+@pt$
z7TGSZvOBkl!mRJQ;)-_+s@)%R_dG8r2K#Lh=bwAXF;Yl=aiY1diNdOl&^G(y^wMSE
z)!$FoP^uR^FlF@Wg{D?3`cF5^3P4^oBpD~EVAJy<_sdhp2~qz?4>1I2_mXeYg8RE1
z<!as&zQ8{v%v*sHxMz-6N>T>ri-+6t>gzm&?NxI3jt3qMe}}v)zH_rPFdV@=FU+@K
zH<q%z<0(xmL)qvZKA}J4(VEI!E!8-GMhNea6O6aOGZ3X*tz#XDjS6)?8W`hzKAi8M
zroURV5Zxopya-gjl`scxVn9!pt|f?@;Msr*M!2zh7G*#AS>f4eBkLQxX>0q|&kf{b
z0$PJzG34K)z&R{vHOCIl6<Mg+Mf6&g+&UI$KRQp^`k=i)EJm=k(q5O@%C(wE1bzSh
zAA4=59IP_+Ma7$!_v8c&o&1%HhFwLY@)EQ*d03TKC*`|yardyz@ffR^v}Uk^gHW%O
zKPJ#TA!1m)`vdcAIGqntOarb$NT1S|yd)9tbFc-9d%iSc)h65w2q+9y;@Bk>o$_e!
z*7#3RvolCpPb+4)jz8y&3(s(vgZF-^*4f3YywCv1IIFmTdP85AQ)%yzAip=H^A(Mu
zzljuPf(RW;o%XUrBq+hSSuwO?+#Y*M%YAw*Hs%*xd<mJRI1T2(8{dK6&*{?!2c=2V
zV0}qn;tKhJGegOK(<5#ARLYai9jVsd*M;|<F)ns_2hKNINoM2QKYd*!Y%el%7!pak
zgcB-m<1b|;x;pNPjzrvn=czo<Ya-wN=X2ig3K4#u_rcRuRn=8+(h8GV^mi4u9qu~i
z5jwS$wyde2+s}oUwDbr3e778ubFGNA{Hj`dZO(*brDVFw=NDC0s>4XBBn#W=F+Aro
zGD5b5${SM2+HItUS?tyEaS9T|R|T_f>c+~wnFs~^oNqUu+o!~qCq59#7EVV(#ZcX+
z$|pja7J4$*m(Mi0<Im3oViNZh`RLyzbEY$RWHtw&vSE3MF{XB}$BT(fak_RR<FU^n
z&S9Hj(a;z<xYL-OJ{pq+H|WvzHj-D_@%~~zZW%iM&Nj(k;;&N%!Q7AZjPJvG1n!lL
z$O!m)9Tkya9s7NjRe0*2{+c(Izw&8B_ExB@-MgZ;ItK?Uyk}M(<mWGxs3Y`pW-aJj
z!bmq5fPQnllBTSslU7k^&hNmf&W+`KHMPBOx{D*B?SNNxsdN`7RCythMv9U@txS_L
zmxLCxYN)T%*Ea~(BjU>HQxm|cP4r+yDRlc=vXU`t)>L%-K#fOx9^znZMCBVM@)0^9
z#8?)`QblWUoz10YD~vN+;mczg9Frq>dV?e_-=#o6zu#;;)rVkEK-HSD(1#Y<9k{%M
z_W7TJ!lWGxZ&G+8jDqqL*J#H~v&II-rU(yTcysE{oZuh+i7*vd@n_M|sN?GhsT@FC
z#JI3?+;wV&vC7Ug%a2rH(PL)}*-h*P1zV$d_*2l8fw$|0Sb-OcYZ$wIWx~Uw>cSD>
z=a%no=ZBa03}LY@hpyAyz31Y@DobAPc*SvM@ClzH@+~^=OIoUpN{;ZYT9yyoei`ia
zhA&_P+|GU|c<`^ZelJBoTiY=J?$fJ$Y*_beXm|*@wGTR4RSgKJp2F{Kdr?kF9HAfN
zY-RG|qI+}*(ppEANVr9QP?vpif-M-~>y}9J7D)!-{P!n*US@7tGr#QHi$TBD<}}!F
zb>-dMrc%;ZceEmq7$J`z<w@a=niwrjUmfOW?wA-%Oxtb5cm-K;8mCMk*%1};=;_QJ
zZX~+?ei5q?EcvYFB1l4<6Cqcp&#qw_;!n1>>`{2xyZ;B|=jgs>RI(h|zFAa?-IW<(
zqOP7eCvckJzevx`4A7XqXy|lIZ=9?|WI-r|ody9GP^UoS$EZ*R97$Z>#=(iDqOwkR
zh(yxvFX?<iy~{)&pt>h5RLWd$W>mv7kyEM>g>;Fr=a1#MD+*wIG`{ysF-QY3nBCw5
zwy2I?Z6A*#XJICN{7y+b@gzQy7;j47SNwCFyypq+#=MWJU3!w7bfXbw=sbYCqz}%@
zvSanfuH{rLu_HEI>O*H2aH0SuiIv`A6VCcOXLEgCt$YO)+Y*Pbo7M&j%Ea^Flt9W}
zL9fPh6m!#6LY~My!wy3c58}HRTY32Z2pxW>yfX&ci(0pCI|S(5^E3_0dn~;P6Gy@-
z*TAn_Kf_FmUGrbhOir>B@jyazsV%!1;Ns4YYmNtgd>+YCx3?6BWDPEc<FEdT+on<>
zbs@0YpG}>9|A=k9-DPk1ieOkeJpx#JCFY?Jghx#`=h&)L03|Q(6CLgQ=*K`<@#_;q
zY_;Z7Rzk+{CT6jYPoFh5fBW^-@g~YkKd-IpN~CGGadxFTF&QgwPoxrtz?FhaJB?|F
zhB9t+Ob6+Yz6DHV;Q8*t@ONz)nPK*kv0M-HpS?^Ok`5r|9-D~%s%QW34o^tl<h+sM
z*(Ws{I(APD9ovToBZ@R=L#@-2&z@*wV~^veT_gH(Y88zINh6@Qma+6kVHxwpMZ#5E
zAr6qWXx@v{kp1Udk-uPlXkw9(;}83s=7Zbod;0i$t(!68;{I+v_hL%WfWylx4E-I0
zB1<{kA4NxbH0&2nGq3e`s5H2$y|}~j3<x^LTW97bSeLff4Ihc!kfxUg#H?m7Rs+&t
zjT@M-CE{=SyTtQphXgF-@QccNYmL;HJ3tp>jVxD6Er{17q!9-oFPd1;_sblC+)kb{
zkH?0l1-W*;E1eihe|vE2Eub{VEON}35f5mxa-)kP42y1&8nG@a!6HeZf4YZLIFdGQ
zX7F9D(aL|qjrkCPF-2SsmLc`#>36q&pEby&{Fu?Sa{tc4eBWQdyLoN;aYSaVZByF{
z4^mdc|6h5v0WOSuB#zB@8ktitBjbj|@7P1)$yzggdCzD04#P4b(tjDUGIoHpdX1wQ
z*VTXGsLdLG+GJih(V%d$Q+#9xZ}v?M&LhOh*pE6Xv9;oN`r+wHQ{wliN~z50Bc{1+
z`E=(k&U{s}8>3SkIitFjv!WlkcJaIESpnkN{p0jpPdf%7?slAnk2fAE=cooY312jT
zHBfx}Cerz)3Vio=+h5{8HKC1oQrB?q>1V8{QH$-uJ0_AlEHVhiNi66jEK4jAa_Zs^
zYuBbJ>zFxNQ}*L$GgcE!XEmW?6&?&~8fq%Po;c&hZax(_(vVDwA$3%FD#j(-UhEj_
zFa~F(jH~H7TCJmUiLUf>IOIFn!k&?PzBR-76=c97l^j_GIYnV-IRUYncrS_iTWH;*
zZ_UDEfWQwjJHoMdb!hP1r?3m65L2v%Vmz8;g~5r%m-E14%S97l&dk<^-~O_7Xle4}
z#g~}rt^l9Ckc#PFW5<C4P-V^0;mV)3^5UmX+tJO7=_s*BDe=i(Cu;n_r+BI3^xR~z
z#Ka3R2zNNJ>jQ<R7vFuv2wOTS%ej9WCtwB~z2LTa`0{;0r=Av}RS>@j31$h0HMDd@
zTVt6LEDv}J?sTYLMC(?p7H}VL-<6X|$t_gy82H@vBjy}BmB)CHYR$S%&5+C9IvH!|
z*rPj})78S=XNj4OYN--{1yL+(V8AaypFoG*y%++vk%4I`r&;D3v8LPBOiOu+^%;MO
zfA+3#!LZKbCakoP#;8LCcAq|~cve;1_>LJTCnk2j(jr{!<)KVbXt`Z~nvbf}e&zA}
zs9=H8MhY?rzbeX)`v81`6OW&k>X}W2ygaMwbG7Ev%9QEI4UNoDTO-}WX3W#2>htXv
zN=9+jX{|0MN4L_P!zs=O^LE@j5_eR5B`uJ;z&LWz>?x+=q|eqnvt#MeVHLFlo6EkE
z?hu_oh8Jz|^79R&qwz9<6Hw-Z0T`~^BR4gXyl~5p%p*cbU6q_Vid-)0kZvwTMSn?Z
zJ}4_bSi8A_$-Re|;MPwo5LvGmO%iYc67=|SFImB*Y$MW?Do+~i*E4&{Ru^sTP8A86
za+8vpPNZ56uH+2Bu7fP}C=eD!WMSFyJ?l2E&T@P&Q*6KJQDr}IvRms^suRcL^Z1o7
z=%+^Gu<pJTSuC9qvVY&vW;-2>g((E@t!YCW*`66$2Q!g}@_JlOB$mMrDPlha*D(tD
zy(BSleM=+iD^Dp=YEzq+okeH$jO3GmEt&6Wl#L>vx+1Bm@$*$S&iAba2U`dg2%5g;
zE2+t?;S|Ike%roUD<w3V!e|0JxyxzVW`}B)QXY>)+};QaKq^>gKjBZ6q_?K$-af7{
zJyVGHJ!xv~_nMq>(hcO(MoQvgZQmzErf=)@Y|Z*U$xc=@=E9Bgv@Ne5e%R)L(l%$(
zNuDrh495E8Zf=^59p@{R<R_FxG5C(N*(frEIQ{O2yu!y(?H%n#R{`E#ob=1)baZ=M
zLsjvLO-+nSnA?#W2prOYXmeFz^J*|j+~0}y&;Ervpy0TcUerQCI^tfr{Gs%FLwD4}
zSDNMKWE4hvz6DV32-THyM&##pUm0usEt*HPsYE3J+BNaCxxDqR0fEXkRaBq)&wFh0
z>20uX_-&uMI~{2yr<IBVBtUV>lqz1&(nvGSleBcMWt_WEIuPo!tW@lfS-ey-%ZoUY
zZ2advO2ev=kmpaC3bG>LFW;2gdZfrf!Hs7gSNnnfF8Fl2C?!0q<Z<ij3Z>f_u!ZwX
z7x6brosz2Bj|^G2_1%!LMKnkOz4}q25hAcQ#@>~&=7DN)MnW`<60ou%EvIX}@uBe2
z?iQf6B+W}!J(2d#_+&bN{EvBSB;<)xwamS^r~yE~CI@K3<Z9_+fx2XcB-MTd#GiYk
zC&b5T;IF9Hn2jRi35CS_ox`UC_lE^2Y2Bi~Y~S|dFY*)KKL#McaZtIP8QHGN!me^l
z-vurzZ@Ek)R~XY`uu(%YTiuG21kgQ%fV@T2BO8RF^1IJ9<aw=(%)b-=$cHP~r}F8`
zFBtcZlS#e_8yrX3+vBxH47Xk$<+2^FTrUsd)lx*m#^jRADYvu#K2gQ(Wp9eqif)TL
zH_3%XVNE#j-7G<Jg1zy6HL&qIW$aEivKUR3kZaBJHu>jsz6dLJE8ddCk-ht;OKDlX
zGCn|Gth3s-U!Ks_{C=0Nyz=Ox)wR8|+jk#-=UV1$9@`sF29uOAgg>mtjYa9)D7S)A
zEk3^qb$UJ`xtLVs0ks878*(6s5lunbw6SLDV0Rak{yes*u^OJRH8r;NY$*sxiUZ+;
zZt>FB44>^n^+g9<!T}7NET;sw3kt*IblaNekJ>3k#Z`7I$lnm(V%20no(S~#4yim#
zEDcOMF2+{!UYNRIrL;qC4l(cG@;%N$f1NU~4vI#uMfd|Ujy=WR`;<S8iHcpjz4{8n
zsquc|<706C8FovK5pe8>?iSN<lR0R7?;3ZndFd{ZFLi@4m*h);vB1Kb*q$0o`h<Eh
zv$H4_Wrn09@Bm&|{-n?{wJ)~*Sk<0@tJ`2X9I4K^l0Rgl<0%j(sPT<*1)I;QpeLI>
zZYe_z%K4a}Dk2V*tQp-W34bDnJl`Puq6cw)=5!iv@*`y>4t<(;=wG9gq*HE5+Mzs#
z{%*OE3<3WNNOQ83nU#-IR$A{2R4NT#lw)WW8%T1U;(O*lzFKm6x8Bs6G-Ju}f(@mB
ze&NO65p4gcEMO5QQ(Rv?q$thU(8P=%M0sr89A)4V$zC@f#vdSU+6vxLu_Vg^a%Lo&
zF37>jY>P1PyNLUf%HCpkEP485xhDlYsvjcSiMkrpX9w#TH}bJWJ*v_aL$NQXV)R5%
z{AKgk!p1b&0pQ77oBV*qTFgrG#iJv-{9(ojTX_n$+=|xpQ*Bps1(xX?fS#To_s;A}
z_<hM8Ql7D8Swy&Uz+B^IcN9B9D4G@rk~meSuY}r$drnp#*#-GFA|^aO{xqxU_w|E9
zq4)Xfl8drw8n`=PHH;<2GAXO_2=K9m2`EHO+1<GBH4Ndg8Ftu?6(y5^;q?NX9(xp$
zE|qN%FI2-tkiQ?2<+BmQ($c!zd_OdI%8yPwF(hKZoIm|5V_{LvIGKRyRd03eG#;|?
ziW&K1N`c`4R0I5}GiE2>_8q6in7y$Ju0w<(3bWmVjq7#=7!s;4nXQY8Y4B$K4YJ2S
znlqowGN#p+_c6YA?}WU;x_;9S*vY}O+0%jz57^m5J-`2|alL<dRHao3=C8Ep2%4`N
zH+aD)iZ{%#Ite|V7XImQfOyYz$q^Mov=3umSSt`n1K6LgL?me}AU*DTRG}(J>uK{+
z%4AjqUk2E2zScAYz(t{@+pe+(1cC*+dnA>C{?7i#=ZOY{Zp)wT<K`|M6H&Oa^giD(
z>WR+_Ldzmn8CE_gW9IA}v0R_ncVQ`yrOH&<HR!chF*ui7leWuYcIJoE?VDV)N2V{~
z{ZeA5n>j3<!l^`(l_zEsnHM-Ii!Rc(xgm_`_fUL`{h@_N<K^l~aq3T8g{`2PsO=w_
zW9dufxWjyj9PflSpC(BqogK|ZD{n2wYv2WTN7s0>ufuA2n%kXhw_&mR)F#qxNJWoP
zi1EF6cuI|T2g_WB!?!1Glf%i>o%bSSH6uR4Qy$3lH++W?e`?ujDW1b7L3gy3glk(%
zhJ1g_4~2ReO9CF|n+HYWqr=N{nk2fC?Y1+vSVgH0RUsx}p^UV=3{Is_Ql;Ue9e0TG
zh`>LBehstk%g7)i17X;_rb3eqg6eLlH$nD;2;DyKy4Ose<ExpH+h5yPmnuDDF3jq&
z?GM<0$s$z;J>K4kD_D3G6$6jYt{yxBFbX1SDo?Kq!49W=l>rVhgc;Vkt{M)vbH=+4
zmvje*E((ts^B&79;!-1#YLtSUSnoJ*q=C%q_Sifm93g=F;&Um{Dz-G9%hy6}^o0|o
z`#pb!>;sL*jSoQs#n^qzv<23$5hLC(W-Xc7lncM3U6e((bAM#qlMVd!3p<puclPpQ
z)DBP7I8B8$L8fx7|LVlryb?hL2A6hVWryorvfgVd$jI<&Ayq3!F0dv8yJ|uJe$DOL
zf+KTEs%$yPbgwY$!ob2tZlT=np**)#zR7v%&Sm3Lb}3Ehn6v5j3-PzY5O2*()P^7L
z){X|zSJK>)$tn&%#(O<iDM111zI+iACtDt*0zzz_N#?5l%5en_yb0M}#wn+|kU99i
z_3;l4fsw$|-8G+$I<5zv=iPdm^+iow<oHvbC{=@`Ef#DT^Xl)R;Q}7UCN+^B*Sw3^
zYS_*AF_S%uTA-qL%gwNAH~l2LeM^9U5}M^FwdTFuc@qMTQ>EauIiFwCaBX}kXV_Zs
zy2L|;5_p;J-~Hx`@$!%b>I)4k${@Sw)w!P3ilWs2ux52aS$XN2R0wE-DCr7Yja4|?
zcCl{zqIj&b@zpQXMzzY2RCck=A9OVwH@!7qI6-X`g*7aV)T61M91y|MYO)RHM?%Ag
zKiX`+&B&w-)xY69Hw6F;Ll_yIpFI@blN_GA5=zcDY6FskW~Q+>ed&7CYG$JC_0#zF
z|2#EHJuo8<P{~nuL28+(_pK1L<=uwsGB)!-NUXu{+qAAaU$1!=XtZjD_n&LKUG4>P
zS<0_+DMCA4`))3<&FEqn>~K=0XwtH)V^N`Q3j?xTB#Bqr#%f(m0a`cvh3F9mib7M4
z`pP~LrEb7;9&oD}N~0<V>`zG1jY89g=6IKgzx<ZAjqmnmT9Vy+Jrz(3nq_Ayt2AFX
z1>0jQv#;Bo=5s!g`IhVf*Bi3It`Ab4uiNfW^j`La6~I6PMj0)yW|~{^BqF<CX$4cq
z7Yt`%kNpiKx`2Lo1R>s~heT`brrpnowGuin`(_U8t1f~G_pc)Y5GjaR8`ktp&jME4
z#XEqUU-h9OrI`#EA(;A!`PVvn-Cj;qn>l`!{-0C>CtQtjoCx<emwfn9_XT%m^~<r&
zj!#}Nd{`;%FFz%jhK^}ou7y^YWOZQ+Zt6(^%=+Pnxq1SStFvqOFLviSg9vLTCogUd
zbOao*+JS4;Kn=x@O~ln}FY3MdGlQ}znVTKUnb;lvbd5nJi0k|l-?Gq(PX)Q*0dDv;
z(l(j%Gt&#222$fbI?vHS>P}4T)19ZqgUpVhWNSTXmZ`%WIkc$KAO10qRNIZ7IfD8G
zS-~iW?aHrLQtLs<^_dnE*+Qci89JEE2wy7N7+yFzSL!m=*u@e)BLIWLPCs?ASqv4%
zxk%(7fEtF)O7nti>ksHtc}MT|Hy-3bvLb$V@LQNx0##)w<td6Ydx^va7Ps4#ld6_c
z58Kw@G*y3n#>S#nU=;ffzV$(hcZM%u4`D^3F@(Ih+~V-k#nF|&#;%Wy45C)Qo(yP=
zxKRsfq(6vL=hWOE<eN>qobo>l9uWzUEdt_e+h1?5X!-6j$leaH(}82vCmUXPegwEt
zrlaW5=LXsDtzIt?fPpu+e@Zd!=uvzYEodAJfeKK+zfb9_SFZI{f&q}o@Z5q#bE@nQ
z(QQvuZEr~AWpw9l^T$PcTdvt9P}?!-YZ$(#O0L@2%qrq@WGy6RUu$~#DEIL$2WZsJ
zs8p^Q@f)pIY<0Nj#VwI*_ZnO=b|yI9*xrQk6+$;vw`Q!!C~d|#(o0L(qA4!`(?6;m
z_{TA?KiuE;bU>HG2_H+-vtWuiK~_GYiZ|=chxM218B8v-BnC`oMv|2VR)Y)S#;uPK
zR~w32-OP5ght2fymLtOV+5cA{6%uf4pd0kBMv5C6S!h+M;NRW;>}g<QE441OBf#<A
zYkIkOWq*s~<SkwChKr`~@ZqLl!RCVtKj5Uk88dEJi-Stm0sQMx+Xd%H@uMb=u;XY@
zwHBt6TG!c{k^9y_=_93wtrh%|R)Bt+Hw6@d*BhpzI%KNl%$z-1dH9n8UV8-8LLpQT
z&3D%-W*kAJMS^lmKkaRcBod1F7rS^4G>ns{ZW@r`wvO8!Sh-H31?APk$vMA(--on_
z;7fHdjkNgQoYUbLbOwYmxcqjsi{go{o0F7Pu^({Ij1~Sv%1Jdfm^x&>4#qn=os{au
zufhJL<FAf6k5$&XL%Y4zQtne)_|Qsi3|EhxxDS+r>bv}z<g|XdVBMbdDCe<y=ZuDU
zRtWLge<HgBfL^mtrq*fN$iX#{mi?=lLhle$SN0gcr(>5y>eHTJWc(o28AeHc<T7uN
z8etGN!G<ayHP{9&6`(;gk@%_KZNpBaB1idQwFUS;QmGl?8mHR&L+f`oU0F)!pVbUv
z)@;)O86P8wE&Pn&naoZ4X9AfPkWDz(J+Cn;E?+{1Z|ubL@9w0RYc3X5ThQ1p@5%0h
ziG&mlEW++|Iq;wB2S>0?%lei*3~Yo*7Ck<Sc#(Lp(=6Y!eOk&OkS|uwrnFb%m5Ih?
zqv7@4?=e50+CQqDI6u}e#4caG4w=Tp45zzbEbIGq@Y0oCUMXOpWpahC1%ZPe%{MyX
z;6M3RYoq%+%B<P7zN7>}I6%dzAJXgYUrHmQ(el_CXK6glo9j*j@c!J4LifzE>drSc
zex8Vk+ZEsiBViFddR+pUS-?4L#YnP9mo!de-^nNyCUfqi22L>8yZH(W5e9ATLes=B
zn>bJ);>_4()Cw_A$KaNpkUkD-L8A>%0k_kB%v-?wS`(2h!r>xyQNnX>Jk(y<lLJF4
zoKAIh@H3sKT0fUo)`M^hNYVvKO;EZMpV*&_tF#tTsF=*696Ua68TyajDu)kLL&a4t
z5fR;+a8a7)6xCkR^(aJ8PL+%Bns6`ex6Lq3%v4$yn@X?xf;Z>`MD~m>?2ADdnmDH3
zFbqOJpk8U*;S=%z{R#Arh3gVuA7MCPnwyvzQ!u&WDrqdp3{~KEqBM&8K28c}2O}7_
zZDx8Xxk&fP3JnN(R2>>#j5a)P)PUc|+<CVts$eWrR@<4Hu3g@uZL0&u=F{8%*k=C}
z(*aK&%k(I|#JS%lzp!Fn2e~L?Tb54Oj-ZhU<{?Z%$jQA&k$(qhF#7cFIt#@oz4EQ`
zzI~F>udM5&TQO@l^t2r%bMW5`E*Y>d<|<6%ePuuobMhT*zBQqJl^(S`k_s$F$5;K%
z^Z`%x#l~TgX!*VCQtHuCM2fpjNAUEH9~WN%Fnep^_fR`H#2L<vf9;gge<u3ARQx;4
zBnh)e9vys3J<74+3b;e-hj>EWlY?)!ykNspK;?}N-B9K6NXwXC8q2fEr=3)Ro$agl
z2Gz`lbu6?=X!5CBCG@Z6a9BA_WD^TLkC=Fd@*~{^-R5+vqCO#hy>_#dyFDB$D9@Bl
zAu;FoA0%((Ib83`Ne0Frg7Ik8I&+eA_b%;dzO+MT0OrT12Kx7W(EuK4`!6ssa;M7E
zjgGy^7IOuNt58-8uXG7vA;KZqpZwVqW`H+7LH9bvvvz5~db;nX%^vU`IL84Y2q`i>
z68YfREI2erGx6`MD6m{(-(&E^dQ^c%Sf_zASW)$}i6KaKM$ms)(5k-Z^iBxN(R!5H
zi^f9jMnx|!v@cczNWNtK6y$1>w3>fG(112R2a+{8?yQuH1ENk+TL!hImS7k9hUFqI
zs#n_oCLUPIxp9cQ?JP`&0FGk?Ah+$PS^k3PaVb#84*-x&foicS%(q8;{xsGOV`j%F
z6B?OVEt0{y@=J#~?fkxch`tE4qd6FQnplFgRa~fW_gbidamoFCk~TJ<?3c)lZHGwJ
zwP00#S;Sggt4Mx#d54=<H?II_j1GDPEu`e<4{$IvGN{upB{Ocnc9-j3%G_?9yzqp-
zlg0m>b?H_z&hPmbE{v&+cAm;~+^z=_;PqG@BAX%$%NmDC930l29RxjFp`Q7@G_`G!
zToVN2CCoYD*GI0Vx0_a1kc*awo*c@q4cHqflB5+D)<|;^`SuXqu@QHO&iR61CTke&
z`F^d;Be4lSrKwe%SFA2REHb=gurc<qhHiS7+v#hlTiQu0@x!;F66Qs1eXRGvSscr5
zr#!yBuU(1%YfHsfPxE^OaW7ceaZqmJ9PiSBt3<t8vafGJup#q-e)NV4CHI7WfgM=}
zXfR*uU`%TrHi*7O?w@b(gA}8QV?+T0<HxLEfBZ;!_)gbMZmxzjbIM=|w&Ihns3UC#
z3#hFd8K!8RL((ea#x~xoNjSB&aF4}|9Q}lYSr;~jT}@QlCeZay&5&QyPn+sd`{&x}
zZ@Na&9%=jsyH<nwz!s2<{f#6~4Jj4C_I81+g&tUpH~~hJeoMJ#YpCZyvg-*RPdR-(
zS@U0}GJ5IJTtZ((w`3WPEo%^gMFl_(*2Xpe6JS3V!qC|*9+MNLOnBJfl(a?t{gMIZ
zx<H9~@t(yGq0rnsisIX6DoRKakXs^PErJ<HH%14%`ot(WGuBSt0eX@;47bS+w^An_
zg8krgpS_*zY%XRbEk(%p*g7fID!Rvbh@eGQk7M8R0+pnH>0bd23P^V``MS@qE{i%A
z%0@KpzwqR9ZLb{$gcAr-*6deuwO2~wam}n8q`sci{Vxq<*7Kv_oJE}PsY#mKktose
z1_S0sqt<Z;IqypX-Xu)a$Ax{~`0!51@A4|cNoIwU7li~-K;#Eg=&$-8Zy-znEtvym
zgj~JJh9d6eRHLA4oEezehJwx~GJe|m1PY5`=<x0qK)f(hooAWpC9qJXTAZY6REBd+
z8@}1yzF1eo+E23*K7W<_egnjxk<h&^pqKL+)txfsjd~%GUQ*+r;{^T9y+i;4%htO3
za|-ro6EDMyS4~K$8<C!yRgQFMP%R`E^zt05cZJGzNs+x=2^5zj6IC+_d%PTsTmD`O
z((i1Cvd|{XT1+XP8c6?XxtdmQ?L}8p1dg<MJsGHdD(wPGaM}1GghVz)V?xT(@7lc(
z7B_i~eXM!xuz+Mq`fWk?-AG>t0}WpP^$fqVj;!w3bn9%s!+6;b=XhdHwp}@3sVJ3^
z8Etxa*X0Y~)pA>HB*`kday~$K)rmaay26?x-~%~vZ!Mq&z9H&y?F7MGW;HzyR9nPY
z0x*be@YYA?3y}wf4-Z@a18*m`WJjKBz#p?*A6reqJ~?I?PoPjHdbOtErO-nJODk|$
zG3?Ndpag}a#sem6Yogb<_1j|lG=pA?0c?`^d%I-2XB2&s^9h+=FwPZK=NDFv+E}|Q
z;mKwNJ6uZ>yWA4zMKEE}0eLS&ePvA#Qggc?N&<M8{ZNFH=R>mN7AxX^P{1e6eU2|2
zZ1m3<inH;|H8rb7MT;bSx*g=f<D5&pvW9;~Z%8d&5P0AJOHU$;HBlXwl8sAPn?+I>
z-dpQ7+jS@|Ewf)aU{@-#gFL}NXhoW4Hi9n8JFv$c>ds$zA`cHdg@nsqD1`198UA&d
zMU{^DrALOp1#pm2sFi;aG#`(Cr8CN6S(~&Ie5oz82|7iSY1MhckZ%}>=!w)Q2|X@8
zF#IYCW35<9Q1<puy*=~lp4l(a%mJg4h+;!@WsH5ZC=<0Mg6-?xc4Fo8YZ*QQNS2Kk
zURiclRU0@eW4xNg?AUBn4w&DNrI;^MTo-if^1p_E(v9j!dT4`%CCEO@l+*ctS(^qG
zl!K#kVvavAaUS&*^$Pfp+&yyCKW$4s-OyyLp7iy-?etj;QxK!d<W8s_`f3sPof8(W
z;F`GhK4w?QvwmU4FJ1<zQ+my|uxhjkLimsy%i+|sK#*CXWXI+4<uE7p=V%0Sm!6qT
zH1I23Vlu%0;Z;|&J0U_Ch!yUl5!=k6S=V0m?4s7>Bq@{0FepfvN4dyu);b*1gky>Z
zl@1qP2$`+IqmJNP^fb(If932qCC2B81>4TjA}Fb`UMS{u+z6gQi%7wPjqS+n!miy-
zS^C97KjVK&B{VNX&gwrgf=v33)dDkM_DT+jKa`o)1S@jS`VXKW49N~|`X`6FURV}E
zw*)3ETOk=lktbhU-B8u>DT0!@7cK_BEZ5mTcYAlHByuwVvDBrRK1-uTPT1_%cu}-k
z#2POgZ^SDiG+0o{WG_nWZ-1lU_x#$BZ;|0QNvVG{jB78xNqTtYA~n%H!U#m=VEL}j
z_%#n(AT&JNe$D0Y7_TkOG=i&ZkN<A5OyMVA4>{;kq|gp>e9pVJEg18c?V}gc5qV#1
zm{u*)SV9(7(n=Hw>0y0TT8TYI5LH4dg=Mh!B0{CiVkV|Q!cdifb8Y^8+mQl=zTn*N
z5jOI$eyE=mV+KYoX#6X@L34O(+A-*-Pn4ftZEIxO$IVk~OZKQ$^3wky-k%rGJ3W7{
zdEIJsA39u{zmuMXs-B<Ge4<EgX<I>52D|jtEv#iin+u(QQwgNGWQrMk2^3pj(f5CN
ztp0DbJ*8q4AhHD1b0IHqU=_oe)GqS>Qtj*KDH(D9#o7m*noq)@@)-}}yLkO29eEQm
z&)ig-JHjX0tYsZNmt-d9U4kE0;Eh6R==+QJdNNvf4DCC-QU~P^X?KuiTuE*izn+O}
zUN?^L1$hSZPqjB}Q$0BnCY*DuT39IyGWtH(;<SxVxsWDe7j2$vf%--1lRX2O5l4Pz
zC)ScXY+)Jhk3V_7Rt5|kZ8)&hll2wG>n=8cNExmh)i!>j#M?nTQOIZMquyU|@${_v
z{az-(;`2DIA&T>&AW<@_YxjFWP5N-ejv@4x;@5kG)g+n#(4tQE`m0i20uU|?z`*-&
zwoC3>oB#xcn7ulqi0|=FKGAbeb<e$b9cL>f2A22pO@T$<>n1%(wpwIFNDBrWiBDQv
zE>i)I(?rV8F#;n#HsV_Y4|$*8QZXZX-g8FOGD^N}NuM4pT@2d!ATQdC{b1*cS{$jP
zjJ^wGo%%h0Z%P^A8J>iVvuDNYRw?_2<tjjHPv6Fkf^A~sCp~G-1{Slz)1sAPjUcJ-
zaBA&Jw>w+NR7&&PEV<Dwc&bbvD}>GL@pm%ml-a|5@yEbQa?a0n?LSmt5F*>6aJMlT
z8e2-Tp8J5K8?*N=ZBiboF$HTDw0Jy^VtRW*_Q9AItzs;f-HT#hV)zTIzxA&Z;Fh*h
zzso`bInn)*`(WTul)vR)n->~AN}(v98Edv>agj^ZS(<HxGg%a`5n7dS)|aMk<dsws
zjizm=u7y`fZGMZ@I@7)5(+BUuL_>e(?P8yY1V}t7jW{jx@FZN82j}x1*ypp)aVR)$
zCbc}fVb5ADw|E;bd@Q&_9E?HU7OCcHL^>)GzIFeuqpsw#S);<5dW$NHM->*9+;3hx
z^0smZ(Vf+yOw+8euU{_H6104Yd1@k_BVE}PSwr9hhu=yPy|IErViAaE{>&ECB|PHX
za{?GpVC!42Yma!}J3<QjOnGDyl${8J+&dv&6byRFdlBy2U5$xhf*FF{e>SU?7mv9*
z6cAp@vdvl!vmklxKHX0VwaY4)q<Ut`2ZYlucfaC+D7>#h!c8Cjk^SMnP~pk5aDz<`
zQ`28<W~x9KeP_|{fLmSj;)SpYF#ngE=t_EucLal+)btE5cG>TOsOZ|N7R(%KplHb<
zslYMp{_>dyzIX{xk@{r46D&yK?#iuBucTB;O*#A{zeV~}ShHR<P1iw#=Gu!XMv#aq
zr(3`Nu`O@3j35Bl=-$N9IWhv`n`3GVAAWtp`QYu~IGyv=ooi45bVQg)LBBclU-bNF
z_FwcoS)--{lLj5HHfxB}<de?|Zk{yurb08rnlBURr<7Cc9R5iI!Rd`ef}&GA|5{8a
zC<|3Je&?~n4hvC4BH{VKoKNv}25qEcj@1TWrCWU_i30lTI5Gz*^G9|9f%|GTTy)Mz
zKDCD1Oa+4$wLkBk&&T3G34_il=zX6{O;}v_^JExf<TZGnnK~vF5W13A=9X2k)b2;F
zkpHG4DT{BPxTX%Z8_x6SEz*9V^5R|Qvq0=B@%->iq{WWzugpZfDDm4&a);5JcyUfb
zTWs||Ps<T4sJ0z80av;L{eRFpUc=YmwIx64SKhz#%G_x&H78@34o#>vSj<3bsTq9o
zDf_$`H4&bFaw?XiH2DoAdW#cs!T~BEtD1ffnDf?YpJTeT_wl=NEPI>0iq<*%+WQ~M
z5A2BWwT(<i)on19mX0c5JuX4h=<<ypjt^2T%x=p|beC8E+YnWq+nfLFQZ=uMtSxH6
z?7{K#Rp7h$gxrc9;u<p2w}?U3n>mn^f&Q=O9F^lXx&rqS6uN%N)h5JvNM@&Z$gK=p
z$@jYlsq~2AGc*TBT`-vYNdt_sju+r*6+cl_-sB!`Xf)B*zoQ>@xwdoEZe?KVZ5<1r
zrndVaQ9Nh21^b%pwgVYH26oZzoA2l|+18BP_`aApnCr#VlHrVn-7V`CugO*XJg`I!
zl34#%!9KT+<WrGyUfRa>hJI*mc#UXEAx$g+!1^Pzcc@}v)w6Pgu_Y$k$Hbn$8E=O<
zOT@@U>@>qtcfkMN@hVGj?etF_LGhiJ@gNG{1`MY9BdfZ1V3Qr{lDp?>s4+AbFmZZa
z%Q~Gm<8~vTRgIbD_{M-fDE_bmQ)slOuip>{7qV<?Omdl!dR!P7Gc)I$MiUacBkgAL
z(C>I&h*Wc$N3gVkqwn2Z2cov3F=iBt7s<X2cKXqquw00sY_Iasiv=kKhft#Pb&$z>
zqOQ-LU$FW;GlDtl`|#5`g^01{7|1Co5EP@uM{WdD<{y>qS77-#e7S0E3B;9V9o!eO
zr!*FT(Jv*aUZcFpx{)LD<enN3<*-nq%}+u-(Acd*3gO70_g@wNO|!D}=r@&{TYFd3
znw;dreS@rC&C2$ffbnk#(5A6PULnt+pP8AI8u)L=wvhnn8REbX!we0`Z1pt8qsSNe
z<e)aCe_repmy>V-s7>OtDvsPl0aK{vCU0b5$bB`fkMhPsW5s+e^x|gi2;+ORsbkkA
z#+K<Kcy>>LDS}$a6Q2hdfk_0rqI+CwE2-4qH8%gljgPO|6+7plsvUbj=DOZrBL9X-
zqx#}%mMj8;>nzmXa{9GQ{P-zVZ<rV4yS^(=zEnJ(#Zg9TwN^3BQh09d&OwLiO>P&{
z9~V^UQlc{F8jj0U1PC0&YI28O9Joh_Zrbk=E{%BYe@9*wo4D~dZr`}o;%Dqa<7%Cx
z$IwgWeMiw(Qt`Mn!%e>t4TB94;$kcD-V6j{7-{V3!e1Hwnc~tDax_pHMJ-1`&gP7d
zXs;~|o80FX58foJH<EY^LZm8wKO7ccH1+#rTVtI<&Z7p+#60!Bnq_W@oG-SO@3#wt
z-%jrf^?M1%4Wqc;gmw(>{lk{8@=KO`o7Rxe*5*dGnuqu%P`MELT#bk=eoV!9At~i0
z{sLiIZ@2PgkzxwVQ$1i@(a2`~h+9p&Eq!fF+hSA;W(vr@Kfs_xENQ)EevOSFb5)>(
z@QwK2{I$t5{d@)J+jA~gJF5kIcoM!CY)F%9cR0nsKPx)Tab|g<M2zRngO`{qDe%ai
z7PJ}G0}~>{A_1_q;FiUfqUxz@cQ%ZQWs;9#e^p)NC-h&8w)A9Z96S)OmT195FKMMu
z?R)*G!pinD$NqH&%7Y&q4nt@R5!|N+xKl*2Tvs*5F+m)e`Gwc`wTTJ7vSLWMA-OLw
zTE2|}bGm(NEPag(e!JiLxOEIzg@A0fg;YpdzUeCo8H^)uhKJm4#WgT6^5NETNKwS4
zpbKaDg>qCnn*4{{|3{Z^LY#(W1Qac~<6)blqnzVoB-~i2Z}&z@Pjd<NWgsxZb|Sfi
z(nk(eIThiFv9(X~o3txskLXEfNsh4G8(Jrx(a5^xO$YJW>q$8{l?l6aNs3Ahyr+RR
zeQPm+Lvb%2TBH4k93<*6a(ZN5oLxB*24VA>MdB$giR`;w#f<!ERx$m9a89Fuf#xZz
z2kU-Q%Ji#`k{|auRX3uR#)hKeNy5XMfqY&mM=?l%asJNTAqX<aD<zrur_4`}a^&uv
z+sFPH#?rA4DQ`5r5~Cj@QJd;WR<s=c&A+LHt^7}tJW6!8XJ&qKa|V1Xs;FFL@KJuB
z%knRn)I9{o|ISf>#?LtX%mKsE!72(2EHBxAk9(C8?z#14+tU!07yH$xds|;x;Hf(7
z(s<7DRTaoN*p4mt>159`?BuhIQ>~y(^|bSGy^qK{RIED`%%f_A5^^D>#hzywMAU~E
ze#T~D5Wf10Kaf=!UBo#*8B@!6=hKHSBqrZNEPU(+mga76;ZMBqW#~~DW?~wc#Qn-o
zD&7SuszxEv^tfYbL+ktzF~1CF>Ia;{&ti+WGwy9A@I@EN?(wWs20W?3u?ktA$=PhM
z!w#S9#d71D77lcLzZE9$$+E&7H`!SGAo-!%V_Du(6mw<>c`+}aT77X0rBuiYk!j{u
zw*#l-;J~6$I20scS!v0@!k0H{eQVU{Q+Q<kr%JgSqZES*Q^Nhk8w3E^?~{jPpd7`V
z&i=}$uW}tG>jED0MkX>JLax97c}_6`I>)bq!9V-W`6(C~8K2D;Yp`nyA8^A++vWLe
z>hBh#nI#6gPMXdI^R;co<CbbH-*DBFtWzbNyn@h2sf<L*Q!H!4ydJPj!+1rp!amPa
zX|eBAoLe^_m&b`=fbUi^e>+|+gTGL~Xq7&D!qUa|#~YY)=AR15H}{2(fg%~X7VoW-
z98IIB-j6enB}iXj8m9CCI=V`|FpuuL!9u#j1^c+9H7S;<3;1_M);6}EnB4PGKE@?&
z1WzUoiJ&&YJq?@S<W-g>2D8V09zxzidAtIaYheV51-RZPU)Qkly=<$;G7L~BBrY@d
zl$QUYK*E&F8<42D|9`3Z)89kX;?LZt6BKkm2RVT#VMKX30nzwWlxQyX0l5f-OII!~
z_8Im3zsP)vwyC<)GW#v+RN=d#Sv#BgHYVEH%abW0wH`cqLMHl%LK@#dnP&btyS}IS
zQDK{c-E70=DPQ~m53fYD!tcU0E*d0LPW<`XpN`|z{*b%mYS(JBy`9NwvPH{r^o2Ck
zoZ+40pPg47G2AoU>p4qg4?;A_n$|Bg;^<_y&#9}Mrm#Bq5<XrPysfQ|sVN@cs?zqe
z9fs`u^sQA9@vBKs{@vkaFEYJLu|umKlyTDOuUp+D;}^kmMB3%?>g)Zx|H21h8>Coq
zKn+5Q2MPbYQH+r4)IwSY#hJ0Ut1hCt=6Sm>gx~J(VX2ihOAG&Q^ddW?M*#vWW5<@C
zFv57_2U?5e0m`K)PMS@ySAc3dzwV4>+wit>4V;v`2cOGnW)%v)T^S6EJOhic8n(4z
zuk*idjb}H&EUaNe$r3=5&{Hs3#RTyf#pLBKu2vHHP+n?X-`TJeFCY_X@`VGSOGl#w
z;Lc2LfPNRYP^g5}pGK?$eYa**Jws#$L6Y^^w~?aeCyw}_zM3L6^JA}ro(Y*rlYdeB
z%S3j}AW?Z3be8Cm+YR!u<*ta&<qOB1F~0hRrp0sC8S+2>n;@(O6ScqT#~UQx5366s
zm!Evn@91&h7nOteM#Qtu;P3!akS+8*FEZ4A=D#<CZ_69hGw67?ywd>3g1`3VLU)`^
z!nV2ikejs9m@Rs7dcT$Y0u87Zzr(ZQC{z$4sR~{!PxgIZMs-wN>+F;}{_Wils#~oa
zY_^AddIYl&lp_gJ{cmHw-fcs{GFyHGhbovpkC3EoczjP~YUg)PIze4d6v0;=Ieo>r
z@9mX1tRINxExrW?nr%4m@|L~IrGoWs{uUfx)we}eHp1|_d{^Hq)YK(rJ#d2}_)`*N
zeG3&yYAS@d{bh5)tzm`b49!zR9z7G|;p3}q3y3dMEmH|&2u#bygvJ6`iBH_S<?5&W
z@fAbATKTvw2$6_6Hs;rkSnb7yI9#{L5*>(HYNTLjk9{rt7x}s115u|_h<0g_kmyKP
zO#13uAWN@Afp8)|KT(#noJ5-a<|kjad;3U&qj${4s-D_o#X9!&ikf4ax#QRTZfy7e
zfcA|ry^DNs#kME7FrBZ1{6g!}T736MEM2*L1M9Q&;nd12Fz;~B(j;e+PxUZ`qT-V{
z(zs3+vy&j4Kxa35AGDlCrwQW#TmLkWDToXfwpo#f-<*)C<zH?sRG%sMlK6hXT>WLA
zo_DwCj8j}+Pu&>CSC&4U(}6jVjJ&6A1S(<}H0DW2m9I}qzFdtm+(arllu1FIZlZi_
z6?w{9OGap~8&PJux_&<421TI;BRp5Y(mtP5oqv~XHeAQxi(=xUM|@^v(5oBU`uGw&
z54|o~|GJxQc7Ifn&#T+_;PI-S>0i`l8v#osue5keFL6@ei`LS2k@t=`j9|5%!un&F
zjoAKq%4+^*xa8Bfy8-DXW|zfDqu$v+y<Xcx_1Fp{G1`mqAqtHjBt<`e%bu`HYRK=o
z@HL+rUNq;TI;C${fEwBY8!6b_NkOfs*OK#gad~E&r#t-lxxhmHc<OpOGY(#@p|(sN
zspO-w|6=`ls|#8h3vz9RKZ%i}q%KsADwmdG4Wz{fAvK*vXj*FFLDa2-jS48^Xe9OD
zC3J<U;D}}I;~hDOWAP00+=so%URwRy`)%`tfAwZS+3X?s$I~}Rg`!9}7X**-EJx>u
z^4WH?%W%n}6S+iGR$zE@Z#P1Wa?Lv~I?|grv?i3+Q8ppTK5J3sKa$giqXp<Nqbp^|
zhzIX9ThmHo;w!lLaY|#_c{!m>bW9j*2E3+iy<wFi;%sy}<zGh;G&h-}m?jP>SZ*?`
zD90mS?iY!XzS7C?)w2HDgnh3lxEwkT$L=eOWCwu3Mke=34fJVhM4f)D*1|Hg4-PBU
zl0AYt(5PUE8LX!TQxi87t}9>`baBAy7PAOa7iH7<IGu-0X`<xEU<C*fuzY7*{D$E{
zViA^-^Y7~7ovu;%a2j0HRK)8I_FbOKsD&$RdO2Ws3r7UP=fe5Y2k8BTj<%@iS9QQe
zY<6|~h6Qz4c&Z1A4o0Y`kM}?~nMdeCtuEky3l7d#)W#nzaJ?*72titZSZ8bxR0K>I
z$ej;KIwAO<j*1os*vT7>+?&o&jQfANZ$mH}78XR#XP=UL-{4}c@iF1B(UgYf#I@+6
zp5naZ+^E)6G-tVnXsiL_sE>y_ijR<fQj|z#U~%`v3s0UM8(_yd!u-EN1js-(PA(-@
z+HrOVTOv#sy8?O%tLU?S_{H(yrpg_m1d@W{<`}kuM*QWcI8~9M$>577(eUMiPf3zs
z^u6TW0nc<l2kc~V{;9mb(EL17#_=U5KQ(oXru5XUWER!C<)w!UyOeZ1Jrr2@eD3q#
zDD%HRH|!Lqa{XMk&Tyz9*&W_wDV&~@jU+^KYJ>UOpJh1coc6B{dxKGkIJKUsA1S~C
zPB|8XZ|xho7L<DpLIvt0Q*1AIvU+TlpRNjrqB)?u<YZZKxHUY~9b>#+Zv;S|X5t!y
zg_@fuB&8aa>tE<H7(atoA0w=Y9zE}gyXuflR2u{UvK~lSBHVnHF)dWMK^k=l9!zd%
zRBzCLGn8e~E$a+yD<iTA)Vog%4J$bVUmUux6W1afo36<n`fSb|S&xbY6lso95912C
ztMoABAaDlpc;-SgwgHssw!zEDWJbqD)Q~zYy<}|TLWj<#$L?@)zdQmj-rB}iA4+tC
z^pdK4>U&Nd)4=w|ffhr)ImqV%5SoDfUQR$P1sR7vCTV&Pm#-ec{Io%xM2vtwNC0cr
zZ7t>($4#J1k1HJ{GaU02a>@*DAPYX^L(uXWR7%}99qa;Vb;IEKh@zJQJ)E%kOsX`8
z-Vz(%Q#EBgm_YZ{zYCFKptdSU-b|ODq=NbGDglr05NznFBC$`Xh-X!^1DpdJll=@4
zm!v|prhcxd&y|ZgL|EYulz9YG=^I_ySHrZ#io{L^l@-?D887pYHSim|T<ZDG4sV*b
zb`loJMOn9`FdAp+PoI17Rqs#1;ENNcIft|sWl2F@qG3?`*Oh!cyJb5W(Cs*)`+T+t
zm(!)7FZ02L@Oo5{D+h&*(3K#>bs{w9mX8)aKK10N&Wcyrl<J%{lUsGcz5uOJS~x^t
z4MlChx=jot>$M6)PoTQndNOIj7-t@PH0G&DPU+c6Ac?z!##-sNs{j+0QQLmwJGbFQ
zSHvRy?Dn8m6N{N)Od)$gMhUhMJg<QoYIapFjoI+BLp42*=Ogq5Df30BS`mJ;XNVXT
z+cfSX83nKHy*D2m_Od1ROAAIz-w9D(`e^HhRZ;%WQ$tO;k1Z6!i`aYtOf@Jo0u`>-
z_)AvfqAWUf(g&IjTMn&iI7yW=JrqsBtNiC5TzA<cW$qE7!o}_f<gl|Y<loF)0TgTb
zIm6<B9H9V^-@!vx0Ew@F_s%>CQJ4D3H({REI&0MJ$*ZMSg8X!+L4w|Jt*+k?%dz`-
zZ1#pjSG(3dOLqA@(~!2*Dj~%_{ajS>DEZb0je*kblys|$fUZ{64r7Y&lR|XYFEC;d
z+mroW$k7p|1=P3OE6AGQ8u(VIOw{sfls=O8%80vZ$zwq{A+r9%6Wc(VLL;{k8y<$R
zU^YlF`?)t^vgsX`9k`0J)#?gmf98ywgNvN<nNbYI7?BOL=iWGD=hTInHUj)ww7@~i
z^sT1rv9Jd@r9&idU7UiyvkRgz>UN00fhpzPOtj3p=UI;9bJ!S3q`b%&1M5`9p6vi(
zU(Cv8Xt{`O9<RNHJ^Ye_l`vHpCXul*>T;}PMiMs0j+Kp_v1PD8@}l<>JXI`}c+9+!
z(9aT89E>H}&cJMiqMc;V5Gsr=YPlvpj6Ke@b7z1X3ZgiH!GtB5fdQY|ClRB^&g#O3
zuIP)rDGhOo%**r$O1{LOdjlmxzESx21Rqw~FH$iHI0`4(VXGAKzqgg_;Px3>nBo|_
z>8Mu$`giO7StFwp;Hri@E)>sMQpPWIN0l(V*Gbc-Cr{fj%vE+3U8(Pv+>Q-2+=Xhq
zrh~~CZ_-q!E;Vwnf4y6@MxRjPW?{_8Nb~o@+c`K^Fy#-pqo<miKvz+l^tI7o%;FWl
zJ&Z^xo}khI_<e0a@dju|zXW`6L+3&8{t<WGZUP<az&*2TixvF7Aes)t%d2QWRbgR_
zDD^6zvX!N!Fxcp#Rnfh9LZv@0y^3O_Vz-Y=B$jylYWDj_W3ixD!(ph(T!{7(#I6nL
zY*{HRlZl5&r|mtM&Jcm7(#Dp^S0I@vi7U6;fvUKQBPCXs_VJf2{QBWwk3~rvM#n1w
zw-k(H@st9op~xOCz~ZIV!O-ZTXLd_6`iYr1i6n$teNxwQpTK3?I)9%KFFY~w4Y9+d
z=}|*%T<mAXK@lShRTVF*ji~n?VKaFJy4PgHZBl!S+&=KZFMcU*`Sw_UqZ2HCi7Wlh
ztTJeW=OAFy#$1UHghjlmt$VozQUt-nQXW4Jrrl%zAKKnBDvo9g7bXc3AVARI1b4UK
zB)Ge~yX)W~xCVE3ch}&~;O_1)xZg?MH|Lyn*ZqEe^#j&)_e^(H@7lGko(B_33a^Cq
z`<{JuH|{QFj$&l?XNA{fY%9leYwKg)7N~uqT<K07Ou+MO6CONz548OC0MukGI%H%a
znpDQEa<()ksTMn3bgXi5Q?baA44#Th?_2r?IecLZi$O^jgqDXeJ1QWcNczM*p8I`!
z4~}7=zx{KVxVUiBt^|L`W#KpD%!A8GdZ<ymYc?EkSb0GOJb{IU{w#42ZjcuJ8QO8}
zt2IunR+T@k!&Vhrm)!SzwoEKpXYBCHUt?mdekDY<#G_8Yl8=$lFagyJGsBA)5%)Hs
z4IX0*9mo$$m9)8*(dZ(;h{K65j4w@GL&<)w_!bpaoXGTYQt#*?X^I^pL%#uVTNYiN
zJ|uaTg}0xIy`r>K6!D1TUyRSznSByS#Vi;s*}9k{vz*A5QM@$#4G0!UQFjmRqSAtd
zKkR*OTG8-Gf^MWZV-(v?w^sYSp3^_Rdnj4MW>FYOJxP#Io*FL0s`F*eD+#vv?TW0K
z+D;%+1`yKJ8wI7!2zs6G*U+kM({dy@%%)SS-PRtRa1=@g0g-72_7VRK#J=3nTC1nh
z12*-0D=8&;^>&A~v&f990%^I0hwdT#pK^6Ipk+s*oa{*zC~(=8amvA6pU4wYmAnkf
z0la_q(S{sfPFEPq1sFyE9QDjw^&u>VFtq?1Tq|j1kK*Fm;w@f`JJ`kO2u=?oapYih
z@84}7;_uYO&r56^U96}u%;0r$p9K7(TsR;Y^SU<&8`~P*w&1l1iRX=AbbJ;Sg~nrE
zfrvY;ytVl^e*?x%EW<(DDsXu^Tmi$%Pj(_HNoRa2)_3GwWVSVJO%aQgdIDAbk_PVM
z#Ec8LS>He10<<>|h<Q&jcKg)tM(zZLT?e5vrYJ{HSY&trBF38z#ZBbd8+Ai!lzJ&A
z6DVXBDF*Z=w#9>c*s^2d2(N=+{41u#F$6A|*$?xi*v!iwOpLG4)g3}vV9fR|9=s8!
zV2&yt`!#&0ybGbN#?;nO^2p<5-S!?=&KLD3#+CWEOZBlZuH!auLEYhl09gg95qw1f
zUa~X2_dKUgV-tpW4h5qkbh~lv9>7N2zJn7Sb2iKTDvo-cqEfdMC~S6ZwF#>@bmcF2
z38@qFOULMpm1`fd|CVfQ{iTO=i&d#O&sVIzPL>B(Hmn{HXoH%cG#;kGE@nvR?N=Ee
zXtQRM+}G+_HCQm~h6`yL&BQfXDE^4~l^{zOx4%Qd&ttX9;GxSd+<X}}?KWoa3pmtX
zCZUC3<RujxnRGDA!VaUF(E6@Fms$Ai-Po$<W7O0KVdv2hRkZ{1$VU1(W!9I2XkeW~
zQ%|N^s*CH}?&ii7O%02M_;bNpNW@FH8+&-xn9;W-C0lkOkxe&OMk(D}Sf&D@)j4n3
z6J=%>ZIc&tQL91rOtw^1)Tpb$Xw<z+`Zpsxd_p8lph2OUYH5OB7(6S_BsIQ;A0tve
zZ7^urfixcxc)ycl9e9Y4q<ptpVmi6#(~yUZ5ETMoPz87D#cjQT>9Fsk$1ryqLk=7a
zLb|PbwNAYWD1_;{O?ST?s6L1pU5ON&u~d~~d$&|!q+Vks`x=v}=4foTs7Buf7q$1&
z{Hh}m_%o~s8Y!Ce{~^MSe#aL!5FBC;75fK>ujdWuXYG&9`Td+4itvl$4rJyMLUKx&
zA>v)Kae!_!QeLlM@$fC<dyVmx=z*3MX{B_b0*k4GEkM*<9PzN%MxA%pcHR?O<ZfO>
z0nTFS<zh90dues<Wq`>zVuPCJ=YsDe`%beHJAF4I17xyr*msBK{Ym49I=l9tW%f%X
z<h`mjy>jybs7hFC(g@W~SkyyWCRH}}5*qL03M{Ksoon;uFaRH2LwVc2x(CaMuPhe|
zYlo1Z?dzo&TDC-d`K(PRVG*IHJP(=)yt^@L)rR1RL^v#n`ZiHRYBA71mY2*51KA&9
zH^X*I&W^NHpH~vJU>*sW7(`eM^~b&JucC8kh$-tY+`6`0xlk#qVI+YG*wYjrbc^`J
z@1REWFlKss%x*(1-JjP=QtgqY9k)K0)4I(6n|(vw$7wQ_O|^Id>4z@aZ-Pl5dc?mn
zE<$KPrIxg~%lA(uK}j+1NJc^1su5N()Jail_eL){Q}kUUC|kBDq)?F3@KSp+2qdv|
z;yp+fqS*X+k=nI%V3Iwa8p0dn)<VIxIv!=3*}F5$mz&Efa<p*Dou00e7=w*>cvLlH
zgnn}lij<a-xzv#CBU@;ZVH6}2zDPM~yV&>RoZA{XF-f|(JDjWmZM9v?vc$BJbJx9;
zG=+5{YzVr8la1YMEIIqm*zIW-SmQSRua?2&;%X)a0D$(T?bK}rl7LuDzLqE%=lhGK
z(xnTogw4>gi14oni-$4sv$DXN1t%gNg0<Xrt(2g5)#jmVNz+$XI+6ovLm(bCcfnjm
zsX+jSMK*Q_Sur7jbE~tRUu#Z@N(SjzL4XxR{Rp#{c8}#9<M`0~x~b2*L#u71nw6-W
zbX$H%9Jxumkr%JI_^<aOPf95s0-F^WyQvGtU$m2`M7R->ZOXRfM+euQRIauqJr4UL
zyKwfEP;64Yo=FD9{-|sSPKf`$3a;mv7@@MF)2Bz>+pI0LA(N?lHw)bOpK+OOO3IaT
zgZ$t5+DzYjS5E(IEo|E`%*;E)ciR-<q0;$4#;Djm#PyJIxn#*2Yb1;3&$P<+a(Iz(
zauvICNNPMeq+EUkyi-M1Ib~^tO}5B+4OUf2=?fCG;CujJ<rEc$UJIvCg>ayV&VpCi
zcV29=g<3pH-Lrfr@=`-HmZc6Nk(omDi0|jkR9j3Li}TZT9`dXZ=+qb3c(?p2%2qMW
z2t|4pM-f=<;7yjTH^q>zxM$Z=29S%$-h6M7?1y*8qRiKAWbTwnpfDl0--3IkN(2;b
zTVpNK&3^Pqf^?cK1}Y-Cwb4RLHRxT#`ELl6ueG|9@uDkT04{IvZeAaF^1<b<H54@A
zdL5;vH(S{;O#inWe=REt;wn?)xPOdD?SYe!>~N`N?9PD3%dW;or_hfLSTg#vH6PWe
z0CI&?LSp5F)z27Xa!UF6xNvd$i@^v9nu45aDvDw;lFSe8gX_~Zh4509tW7oJG)8Y#
z6@7()=pm{-AwwIFClW<Gfp6X<OX#c$6LG4sOog0WN$=Hahl+u+k;%?7qq*F6oK#if
zf;Twx5Xy&Q)k3kwIW-5O*@_AOLE-jxr=t6mrU|60MC!XPNstf?dQD1>a^ygr0ojH~
zfJ-j224i9}9}j1aN><x8ChU-oeY&FpVYu%Iwz<HRWXY!`;2OakqJJc4CF2sQ1&)Ac
zW!>_+epBnb>QZ$SPQsw17kayfXEJ!-57h((18bj#X&vEeI=%WS5bO0@tXG)#DIK>_
zQCdG3BNfw$h#YON^+z&1vN;PIIpx{Di$hzmZjRX{``4TAdVMVyzIy!5K}DUS>K-yi
z0q<or)B6`KO39o%Zv<cH+7>^D#_V%0ywGT8zed~jHG9Pi7J4>`Jk>b){)!$z*y^cJ
zb_nT}SoBIN2%>xOTkrWIrHvPvz@`^!Esd+}@bKZCO&}c#ci#d`0QMQ7_%(ZCPe|3A
zx+gL{Ywc?C4kn~wz$}SX9gLKNm1FKt%}VOU{~=a$jEsjcQ>pg24b=0WaSknzPUlzF
zFE`rJ&q!*nDX?;UCIA;2{%)DiUoCzYQG#*Db7DW9@QG_D#9+mB+sCA*$vH_WQdRiN
z<4~J?%I;$?#vY;_ENt8<ZTZmG8zb^I@*JO$9q537BJdj0M_0m6+GRk7+@Q-6D+DP<
z+v~<}sE8R}Ri@1X26n*T64zHedBcGs5b-qgjs6S_PUjqQ`l{42(j3`wS`zJ`Wq9Ef
zFFK>q8p2(s7D!{9ViP&ETU)sF75FhtE!lZCw6ZG+z6c2}<7XUZHJIZRKj8JB$H#*M
z7YP1-dY|+k8cKD*MFc5X3OZ*D_z%08!RaON_4&nC#H2uF;UMGsh+->3lH4qrA48X@
z-AJdfT;ru&zKUhg8VWO>AqDjuvFqGBlzibj_z?;FUMe|d!l-fAb-AocfRMdABV^a2
zENAub`g|%AvVhYyJme)Hve{RQ;zb_wY|i10T|B4C8|k8z!U?x}#Mc_F+=6ALsTk|I
z<PgXG{Xsv8*DwO+A-j7b4z<>m#fdFD6`N(Xlgb`fFy30U&yB)h*~=ZW^|d1|W0f|_
zW?R+UhC>6VYw~IoANpAp9nYBxo2|7|a6i4kbTq$v3vq>8p*@3`S2H&FATdzb%=x{Z
z>6@bRc>ozUhbna3A};&cJ~%{x*n%@uxTr3~ffJWYn!+imjH(oW(Fcr<H+S8pk7{&@
zq?wwndd<rUD>k5*FNzQniuNnsy6_%5t4{6>?GKUfYM_}@o>8x7j;91_Bh(J5_vc~9
zbPRf*?@M}6-Jd8`v&MlJ(@g*{yse_N-1YViKHg2?cJ>8k)wi^Q^*539=NG5XFd#he
zlS}=F=ZYglLOaHvVD4p{-y1^>8ZVNwG+#|j%}AgPOwxIwkD_$g$tLaKIz(?(VV0Jo
zdyM=sL$1@~=1Fao*bDVSu|?0$wMah2z)n4uDK7U;)kZ|buy!NA^KRtnTl+f@&5Oup
z(Ja%4fpV?<LXlnR_KQ~6+=7LNv?i^eUQZ@CJlY0~%Hr$hC!{ddpNq`7_)4`I^)XM>
zl}{J$I;sX$diKcwt~@{f6XA^zqZ)#KxEejeyQ&2<TL38w;aYzcfmFxk#4s_K;p6hD
z*lc3+$jDhr`G~p()s$9p=%Ff@XJmiI6Xh?>$oh*F#@;rE$DkK?xhf7(TMQ0Jl}dEv
z5NB7Pj${xXVd#nEo)CR&`qhe>ba#hERTp-J$)mBhmE_+r?s}M&uLRySO6)p%#$Pgs
z82h3(3U|Tli4t8wQm{rr>;6Lt@i+C_@8<Y>|IhEnA6Y#%Xnym-iRBIFFfgjPut&6s
zmQ#jmNUBL9e!qC=8fRv@KLLnVJuFxCHMNzx$S~O2LfU4Cb8%q(SL=mmbrOxDIk6%Y
zJ>*FH+LMrh2#G(<#A`$CNn}tDe=;1^s`qRUMb%dn#fQbM3mAmKqE=PtZAB~7*6PiG
znc=Fg1=lNH!v3Ov6-@VEe*x^Np~UR76(+i}2`R=#Ct!|!mS`rsnlLOvHc+yXqa=!&
zh#kA)wWpR7+s2eTo9M8T&^vaVh77X12nizY9OXoh4+0}@F{HEGD!38l1kud&j`CvR
zM-DknJ9bmpn^LokgfMiY$!e2C7v~c$HYZM(k_iP%WikxEFT1rKg@v*b_!E-M7Pm%M
z#;2hdQU3jenap-c{XG8#%GmjW`pU8Lz1Tila~8$Ig_{Xnjk?7_YxWC?AIxR)4_7&a
zsl(*e{gJF{R5LlZ*1S%Aq`|XGb^xRDtgzHENJiZt9-S)v3TYJM;gbWvt2ELZ4o=if
z;yfEO%Rn&B<ldcte}|3DZgCPilAoj7rRsuy!L(RSk<~R+dcnb$&~W0w9RufV<GMgq
zMM|?DWp=UX>ER85x1u#L?vV(EY4l7vtKPBT7Dn49Ck30-679NZI#m{EOoQQR!s5cF
z?DGej(uXc9HtqJ1I3%Cfvvt_8gLD{g=OkvcrB!FMxbX~>zRvUR9nkA<&=I1RCs<8*
z4BIfu!EkUhV~qJ8d)Agi1(AkXI~*J3;GvA!)Sl14KA?Wpv<dKXGwda{alZGN6u`KO
zQRAdSa8G*1t0th7mM0Mp^7WNrx(^JdGU>CdveuKGhI}j1IkV9jw<sC(m}=K4y!0e1
zp+fsL<E%@=aUJ_n;DfZ(BRO55o$38CDU6_zVh{2Ju>?TmM>hUBLA(9#{pf6{;I2kv
z7F>+%x4!p$-Gv+lJCXNeKSGKg_3iUp-$e|&6u_kt;alHt9Vb@KbHD7_BTDv<X;Ia*
zN;{O>8!Mh8GfgV8W?eqSKw6oVZA~^g)Ur&IWt#n&D-fJR{~M^`XLlsx+~`U26N2Up
z{otBISeFK(hFM0^+Bj=2i|!pDN|^|o&7{5}75`$kV)Z?u`0eu{PlT3LD`!4injCOi
z`Dd=|{QU=GDbg8|&%B+EB+2H{A;<3|!d&KrxfOTR49wan(Z)Q5d2O10FZ}V*w6%=~
zO9iwXzUi4%DRkPLauHp|WF+ZhWZ3(%tgMA<3jnl`-&V$ftUepbV<*Gbifh)30{6JV
z3F|<s)ByML@w>NW$pgg7Eoq8VI!`Q939o*)V|&Q2mT6>i!&B>vNBEGN^InE)9lnUP
zPE~8X;ju>PcllOb7~%4?-=Mlp;)PC-)eOz^#PecxMYc%*#zolJ)0Ue-kYEZwKY2=g
zx^9=|d%{&avYVhr63K5iFz0TMB@KWa)(=km3L-=6J0bn-)o0szF)%v1b6};7Z-Od~
zhRzaxZCn(N8>B@$G9B9i`1E0h<VxQs3y3-aa3<i6e_fx@wz&ms)j~KQX(e^#`;BgL
zmN$zT|B$lEbo@)o3fuXq1k69Py<_gp2T6P7yHQ0;{|>ysgZJw+OmS74tp2kT0Ygz8
z{?i#_v<NCh?K^Ny_&+c9<H5@{Pw>G9=(Ts?9ctN@FAqJPXk>1_Cb@&{HD1bfKcuUw
zgE%ahs18e=OypCN<n(e`X44*yOtzA(-*Svl_pqJ+KBtt`kqL(C%{sB0&0rtY>w~nu
zU1)lKKU9cT`ZRuAW1}DWW0WkbiVif*=_chWfX`^pRpgd3Rsf?aLz11c+l}~5@QQO4
zQ)z!bJ1hju>+m}ME05?Ct<ajHyY`onOOeWVo2WrdR8SHfH80NDkwK6vtd0s&`$EUa
z7hT`qrd{Zzd>V9#2&qOTMqIb}g%1XQ0W;#&$C42X#oJC6*GNtvR#k6yBYnIi;23Su
zWw8|4YS9F<7yfCcr2no65D-Lifmw)~*g|O)>U>K-F>9vjdHA0F)~LW5n7$(Wx9@KW
zAJpOb93}$a@rf!AjJFYmO0aHZAW9G!jR1wv92csypSQ$eQjY~EsDm;T?{%;+;YB9_
zU{;7T!CL(7(k5(}6wwe9{+N?DB5_BRnT?JobI6=vVsgsZj8c##yS1o&>%A#2puV&v
z>88Gqp&@_(I@c}HZXV*}B)`0CHk(8X$sLi+_HukCOu*J?!S$R<y1D{d)%UduMfOK_
zCLNEUTCkoANl<^hhC|WsnesTl1l^u6zm$|3Q|1u+X2fcdPhK*E33DYRK9EEV@PF^K
z<93CN!+0<l{iO(;)^4V}^1kB4V;A4rEByT2n9)*}#kTlu8BE@GGpVHE43&CEP419@
z|5_4&4Q;sEjuxUr%UXb*v1T8)7F_E)ujO0tPYe^h`-dM)J^80O+MFzcVZk!>z~Er}
z=9yC+h|p;-Z#t2{+e&c2hwi%g|23}vx%~HEAne~@D8vd5m}M{%6>NY5XZS6O5uwY9
zAlB#7%Zk93S;p$!cuPZ%;Oh(M6z9_s5+NC697&o}i|o_>J8g`FpV#x2YA*FM%X=K`
zldk}`1H1d2>PRI)t=AV6gFxWbUg4pn-2Pf%A<BWlV8?>lpg*ioo}|7-4=G7)EP6PG
zxoJ2y|A_3?)>7a|m<G}%XaTS@GcLgwjT4DQx+DXr?OFz4tBX5nfBU$Yd^m*zMh7XF
z%rI{K96wdi@IV2kTrj&<4ngKtt`2c;7*{BoN@7j`EI)#(R7`J`@+WRN3vbAW68kh1
z>=s6jMp6LuOd+eSfPN!;IaX&1VFau%E+9y&=L-RUCxT~gy2+i*6qMtB#fnw3S&3z1
zq|De)WKS<L1083T;!R&!Zk@;up{nR6EI}W@yMG;QYc<QsEY;}CZC31<@zVCasBr~H
zpqu|Y^Zyy_OzN0S5B?_j-IJwFg#(!hKc7tt6<J~bqRn>qOT;)a8wdI2xd3|DA@~t$
zBM=kS$Wp^{3X-b30802WQKSyNP~COg5bV<hd(1QYtpWX>l@xkHN_xT>c<ZX(>-ApU
z8Y+eK>P5=L=`dHcYE@*TWP2FgW#N?;sLUgBB}%x3jt^z|Th=PK?(ubyV~{Z^7>^vx
z>6wC*I4`ah!EABkyC;PlbEX=H@o8o)OWqluKBsHhsI9!=V^py{ib`p?)0gy5t|fQI
zW>&)fzYw7=k+~R7LD5bd-&Oo@`=UPy3j5z*T>v-|Kv#>(<GF1BS7Cp?$B?w2TAJ6V
zTBBG!(r%*a?LaH{Sg!;y>h^pvo!iV64}{j=7}sBkY-ZW9G2JW-GB9Q)$Rf&^48)?B
zMV_-sEJmTyPuJnL!s={QL;e2#dP7Qd;Gpc&f@$=cLO}nnMZqvo(R0Mx=2T|Wh?yQl
z&m-Im-Rc>8w(e2}DHYr~=y1<U|Fy^M>?LN?w)LZw<9lOhiGiq!E!11bp8KD;?e9z4
z+3&x5aj?oMAaKwcedi?tftZKnqQ<9E7M|b}W+u>P=@1-I{|(*$ecJuWo&pN2i?tCt
z&*c6LQRLNCDoGKcW!dm5R!@-fO2};x?T*dlw9nOi?tEZjm~9fF7XT~Vso3ojz(~ca
zHnY|GTH``*E2m`f+DTp$kj*a_SiAN9rCY)$(I-(_*C**C_Q%4)Kz<p`tIPqpq!Eaz
za}d>#938&D=$l?#MQm=|@5C(0`?@GUiP}Q8Qu-7RWib`hUp>{BM>;>)cf7P`JV%*S
z@FaL@xc5Des7q*m)n9p8f12EUTz%RMJyy=`X-)&J?-|h^pI`c;i#XEZn{TiL_=UL_
zq%_#HD0!;a9FtyB3X}M?zc>5GE&tQ={gp;k+%nODyQaokF=<gXxvZoX*U6RHR6p<K
zFZSnLqip;#I7m7z<}U}i>)D>agkDoSyjN}E%eZ!sP<UH1lLVSB?-O8H_h9T+_D08E
z$R8cY{T8+ly3?^oXm)M2*JS_^m&*<UJSH;18atG$Sp0m^=`YT1-6KXhgE^RyJvuz?
z9Me;M^wvR|f*oC}Leq=pJ9b}?BG8m$Dv!dG7@iKk%BZ*Z=(HH_7NlULtqxm%YQy2X
zW}Q*5N@1rvFnvFh{Z+>PivXTUa<rPTU2t=t(B|pfno{)7U2}Fho=S`>A<=&vd2oL}
zu!KAz#OD)#kSc(?ymrx@Q4V>V#dDoZOp^o8c_#GuX=mq7NTEneP|5%*bc*w&y}Fvl
z?u`S#2&Ouhn~^4AFT^wa+>mt1TE}HVg@qA)tjf5zz_-PkCo}q(^9J|?z8Q~w2l~?x
zK>zbjmQ0OHf0F?1GMJ3W!;XZx|KUVq6m2;CiM}@ACrFdYyR4-NHX&&vS>3Ip#{075
zGMq!<t~-ec&iiFd_hRh6q_sdAuyED1Hy<dU?%{UjM;c02m;#VHUB1r}!_E&b5Yg~#
zp6Pa?nW|V(7+0`p*<ihfpFH(&YFh#+j&rm*a30veTC6PVq%Z{BK26C)*_*RR*g3(^
zh?CB)Jn^9%Tu%dmA7wao8gIee8>5;#EU{GXi`JzD#+#eQPEpEW=DrS^`y@Y>tGl^9
z!oRi;1Ot#|dZj$;b+u7VWgLyg{HF?<7zyc2Bo>VjOx|XG*oy|S0t*9Fcx=NkNi>n!
zHNT7wWqHOpc^R2`$)$Oni-^Gei>wP-u+hmrx>lxzDXyniakFYCt}LR67pY9SlO)%9
zHAI`_l-FxA%j~@{+3Y^9Oih2sVfEsgsHlS@sw%I9eqBL9d-FCF3iGLwCMojX!zj@x
z%?uDWojIAuJ5g%?X1z|jY&e0N%M~*iv*Rhh$xVvqadY<lFVPd6dkh;jbA<J)^?r-d
zX8vQ?<-NK|lSnUXJ4dQ92l~F=v5eOc$~1b9!-Xh%8O*^`mGaS2l|&z)HOHJGBolZj
zZvQ(J5PnaFLDxsttLbCDbM<jC)rc_}sz%H!OH-qs57aGmF-)f7a<PPxF-&4K_i?Q~
zQX-rtuPzhAZ?A&#pI=s}quJ_<i^}h`i5GGvwg~oGEY#mg@tm;)TUSqw*iuuM;SP@y
zh?;LVaNV*jQH9rH=dA2$hn^171;eA%dp29HbS$q)*y|S{QIEy+mg%xgm#WE!F<<pk
zs^eT-j&hM4l6g~{c{mVqk-B#`PaAowSk1YMW#t(HEJk$Jv*^1GIl47#?3Msx{$P~;
zF#Z7i*M~Q`|M>Y2^@M#_Hf%>;)(-}*5c|+@`FJ5!$kvhkuX64!A%dW@6qXw^zI&+|
zyw$};Y4-`Q0`06BU{Q9~j_~`k86kRjAanA~V}g#IR$OwkUHcU>Eaf<;K<YDL;N>OW
zC}GJLVPXJ@x&FLunIkYy9aO7xYH@wX-k0knmGAXR$M>Re>(sPCUL5K1nE9}p6YqXw
zkz<kprn4mIh(#~eNNU!d%hTGk2tA`LS@KkqbHSWXo%!N2nsX6z{6x$4a38IBX`9{O
z@dELWrt@jH>al$YjOAPD*5dQQx4q?ESpYJ`5FXR;^D$U2zEx?4@-6B`DG=+kR0~x5
z0%%E;^9G<kL(QgLXFreWmMe21Qok!(a6ZaTfZ8XNdVGG|b%RPFo;Xdg*`_w2UG&;}
zY|2tMD_--O8*s0S1NB8ErbbfXJDQt|Psw@?)|Wds?AHL>X7*aRx2#Sdl6N;;Kfi#u
z9x6Z&c{WLJG2EVxKK%p-+`(;sY5sA`<;=`^1PNm0tK4lUsA25`LWbwqIhH&m@Yaqz
zFi)qj;(4RlaIaOm1!|}cmP1qRQNR;m=le(@4=r_J9N71u&cKz%ZdZR!>}`?XnA5y(
z^ahNf<n{Ve$n%Qud^@JX6z+AO?%1FdDKP`x!Q(1Q6wnL2VV`OOY=%N|1h@3HacMn1
zxN%cFvRwA380K4qr!qQY!<x<H%}hVOkhP^t!I`ErTLN?22*5F$-%p)sgTog4qQ(i1
zm^yY4oK0R^e@T{q!@tK+sU1&MRB8eOWGnfOnLE$^mJEg8lHs{Qb)Do<B$-1y^GA99
zyx5*nclkx|l)HDU<<j0FL)2Hfmx2T<)B<R*<EP=VM$4EwBnN`fOZ*T*rOy!P7W*Y}
z%2-etPC)A>WdZ6kylb?yq~09ftdB@2Q|UO^%{{d4CJ+ndp1)x04aLY^f7!?He%8G$
z4(*Ur_Q+d^EIMG}V)=N2^q+ALKxAZMSm(W~d(#c>1v48Dw?5|NZ_LS+z|-4EzuL9(
z<<fSS=gC{c_{q}#jtyh_=pef@`KB@aqxP1oIOF>mWrO|pI!ZEctl}ihmdjD_44lq6
z4a5ekrn{n+Z;<gh8Q#tQS&8bIiZ7)}So$l)yk$J=vxyx)gSt1&ku<K?Pqk23AB65v
zgBN$lT#_kytxflz?kg^&(vPw=CxB)Z(E^>WB)s0c9#BbHYT;?VG)f&lEPq@F{tR|~
z%z%Qo`)_U&zZV5Cqkqgya8sp5w}QX5t0&Mi<?I4_MP%4|M`@NQlo)T5chM%n*icTe
zgMu`Z>!?RYJ*l1<ri#YCTf~q?9)MB*aoOSlwqdoyfy@VZ+oaieS*sBv%8`|hI?bFy
z*$Z#^$%S5w?Q<n*3uri1g+f@XV5S{HST8G;@qn|fH2z*Yt{d_*#6?awuq&ig6Stc?
z?lcF1qhiL*Y6^;>exZrsb<S5P>7{!3lX}y<%Qc*~h?l~|eQ?|YIn3AArP<9vUX_Q6
zCirLM%MRJC_bY$^P28d5?y3#_m3|?)&xYE?_wNP<Eony#I$RlNzSe~746<}YQCg5b
zDrx9(^m19I+zoBDSMn$-e##XpDZy-0S$R9x7n)n=W--3wY7CXCP|i>jlI(Gv!qj4&
z&{(YFDYY$~n7?qg_AcpeJ=}vyz-)DEO*ZsGeIe|4yrXr2=HhZ`0UkEFD8=M{itOi4
z1K#B*!LW?t8hTUK`0VEwt(S;tpuxzU7?RhN`N7rkRI(b~5IyVdBG6{F?a(NN)TLJW
z?pNM;3ANaW>xNKCA7OGjLH>*-DC#a-I4~&otbO^TN}1NQdIt{>x8i7UFWJ%k>Ct5_
z5f7-Ap2GM`f%u2r-cvX+_T#T9=Wj81@SbRYL=ytSb5(R|W4dEaaYfhUO{Tk|PKVHQ
zsPt>*ZF3wtuV<;dtYlvj3YNm>m~>I(W-xm3XyNSPy!fy{p6?*XV6n|%W5si&Fq*L$
z5BX=d8qp!I<7Eoug7TaC+7IKF2fU5d=}mLqIpVYJ1n0MuOmc;7c@6{dEQ9E5-Ezb{
zNi<iu76kD(P7+PN0nJC-qITO#_aNhbQBQtSB}?_n3m@$Xb0u(igMN5}LVWYxfeJ=`
zsv=Lc`W~}c)z`7vsb_gAY|$j>yy!t8F?2ZTaMOEg-^@D?V}l(Pki4|!{#_79b?eOO
zG;f#~hQvayid7v6Eo0cK>V;K4|B148?cTiI)XF|giL}%11-ZQblctVCC{K<>0reR(
zwOR=<=3b7<nu$Jd>&@{XO#*f7wGvh?r>Qa{{i9lz{=?<>#i}I?DXb$}MJxYS@n|g6
z{kx&;TqaLg^4^{8&Vw9C_;08)N@1%dLYs<u1F2Qc>0KVXaVc)|ou&L@T0A0lWPR?<
zJ4C;Qi0)>E(zo|w=eb~m$0ekU64orTnwq7Ue+vKA)IgiHfrN0Ypk$bLgva}N)y8sc
z>#NmH)p&f~%uC-$6^%}fxn;DG;TCVOUzmu9d<4n%@M$%SJm75iQ`1I?J#VqR^2@*#
z%w3o5=-A;GqlBA`3Zu27h77~KsmZ+pkK4o>)kUusrHeTv2l^F#dxN1x3Cav`%@VU#
zkDX&J3n8HuiH?P08J7uZOS`V}I?_(sWm6VG9Qvy?M%~OnhiN<L2XP?j%xxm;{qwK1
ziZM%fK9dCDntExVAq5ld{zb*>UoAiG;AKU1eU>0UO2+_^X|eKiB|cuu^WmT%74HD<
zJ@9CG6tx}e=T~colqTrX@Q4sCbcPg}(G^kn3|{g>?v_F+wEHuzK*Q*ICXa+nCAq2z
z<gews)0$Gk^Jo<aCx8hr;i!g19tx>AnhB!e4_dW1kV$6LW$%i^IWysvLkA^ODD#${
zR#8F@RM^mI)NpRaPOFZWueQ?&wYLC@3*o3;b1Nv<UcHSTx=jwE%4LVR@Rz7zdlyn%
zoG(g39s`?(l<4Pu?q#B)U<f!$Vr%~jsJd_LlM!hqG2VVG!_-k`MSTsINWoH41q7)n
zC>?CT-Y{(%`o%gArKu6+f<i9z>#<ndX3j))SAV(d&|}7-@Y+t+!TW64xdwgE4sOK#
zap(K|d`UxQ>biTJZzy@q(caTliFL^w7+3W*Gx5ht`I2<OI#<R%q9*wv7f$9rohCRd
z%@fPNY@n_+XS_1gQ^-HaOfCslj|0GVI5}*J`Ro>f!Bxp{fmt4lSIqFN6$cabm%)QJ
zk^-fLEC8Q5tBu<j_p(Aw4*w}y1FU**IbW<uwLQ=4829s;$cvk-vl#V-0k0Hu*>(Sq
zACwEn?xQHX<y<8^#fnF=)VMDb1tUvFff?SJNpf4<kDI=8wTsRvk~Ki{mtHwz0z>7`
zU!1;%l**UrDK8~Ty2&~4EteLn)}QKJsnfnXe%(~E+iO_dl1?(?jqX_lZt3v({%0{#
z&}E-Jrhj!2iI7C+Z_q1AxwKFaX=CuEeX}oxO%hB<X&HmjQrBNhHPs;GU-}-^8ddA*
zMt+kC2C+bS^#+<=e6ajnkxgK|@^JqoG|&Z3#m`^xRI<ZXBO2GRe`8=r{r<;yQMK4g
z&Zd)=n<^GW)4k@DPbUQ3Bz_)SW1bF{Cb1bUG5qV>hv;Z&9%3FryrsC(Vpn7<!R&~8
zeiS9~0BcHzh~7RU9jZwC!}T%)vsF5`xG!ua@?)gJ_bu>+O38bl;;1-WC>dSZ*5-Wx
zXnK~%h8yS-X?~w%NCoLHo$?5dx7!)0ZfuLbbOm_%aZ~5{Y%6n3CD9<)#XElu-fNE{
z>6gC_T{MUu&q?Iz+dwPT?lGi3Q4Ss&57sA6N|x75+#fbB9?}P|L#yK|1n@U%MhWxg
z!>39I{$cz#c{^%y8q)h*pL*2P>?4Xk`?^haHBuZlO?wr#HqIrC+Q-?54bM%%>#t4a
zu5}2Hx<47%(XKgZa3zm$_c?U)g(R-9%&|J}1^~wS>O(m~vn&mAjLph!aPSWoL67!(
zk1eWfMNR@%7^Vd~$=Fw=j}j-vML7j8N;a4I<rg5;TNANDSTL#_`M<976^!MN;>$en
zC&$3%jH89aK4l&s?LF(&DlN-CQ)kU@vHUgE6k8YCRZW}M(zy#FI253o`YQH*EtmZF
zCgyC3jP>V>l$_|KrjMaF%pp(2Ei9<-!Q=GWrUf=$Ptum6yQ@weYigX$xHD-2JGUL?
zPNx+rJWf(6+~~~crVwYc%Or1xSqyEmGyIhw0=qJvt8AfSlqp3qj-Ep{K1)trEF%`i
zc?yiJsv$1YOQf|C1ZQj2Vq2)u49AfKHYKy${4&yI=M(J|j0TlsB@NPvd$<-P^wr~V
zLHg5?Fs^V~d1jeDTdwLdjc+6Zo{5Lz5ERDQv9Joci33IjL2m`I)W@^!M)!KjNGiwU
zOsshFZ*3FZPPpTci>@8pZDN7`2vc-kWjW|7Yty-FeCS#{FUUV6SBTewQ!bty47J%q
z6Uum3t|D@bC8*+}PT_g4@yc_G;^D5B5p0UmIh=RvjXXS&ycQD)8u;hj_dDxAd21>q
zpT5?8T_zn~h4st>U<XPMd*<>GB`iA7pKr0fIwcuBR&$R_!(XVDxLcV<Ttb#;f41Ah
zG~D>ob{~-0-{A%Iuk-WLdC%&evzI|#Qrh14A^shN)}e%%(8~C=bA_sO_4Iv=l@ES?
zqiXCgVfbgKJmC+=%A`GwF-f!XsR@SFQ5_jA5v}j%tgSSsxtW;x<+ymm{c0ENV7b~#
z6R!q!4d4?hW2gMTeJ{s3ikx!g$13Mi&~LL0OXBvc>OKDXZf`NpB{z1bAOYwN_+>LX
zon*pPnSR5D8tdt)<)Xl0Zlt{6q#qHYKa%s9pSzt#cc3cm|M)>n`#IO<eQ|_sNVty0
z91EAR<=LS`mr*cQ32ySL2)m1Y#-OkgQ(5`@VUqr##7@d5o0^opWz*eD0dUop+KY<Y
zVj!)=a$PRKyi0Oj=rI4-I)UWat!K(PMxdX&D9OPsdDtn9r^%lMWv^&M;ptOwL$g)d
zbM_JZGNVkC{#YhCZ2;nRDvy!3V&{vJFJtrKDMLE$!Nb_gN;>T2-Sm`{qK%>cQ|S>)
ziJ$Sfr*-ktixs{`T7V3rSG1vDy4$3F*g-KV{({Cz{&|K{Me(5R$&LItFQ2><S_aO;
zlf%fPSF_U%^imc^F+%>@wAo?7(%X2c8SH@KiO&lCK^?|0(&xB2>PqI!{jH5`+jtkk
z4V2wq$ad*fi*I~Ly@Ph)xOwyo-6p1%P0DKfa<c0;g>J9ChY_oi`SR@uqSGo5YzUen
zv5H=-dKAVZroD;>;m1pEmmJ0s2nt%bI`aFzsB;!0VQd|Ufme4Oz~(sLD+%FmKMvx`
zNG!v{SyS__IL!&Dv%n<|0VMS*dUZNV|IyH1qJ?uqr>^kT#zAJ{{PD~?mR!nZg?|m?
zyrz+~SyEFZRc3lH|1(CBfxe2S+X78vpfL^I6qO&zk%hhP3F3e{Od0v?p}8^@Zdrle
zYf}RBesP<K@94AJG3o905v6wWJ?68`T)$nD3^t$48nx0>DY#DHq&gM9#99jdb8~%}
zS^L+vP)xZ^g4t-^P1D1+o*<gia9iDOkd}cKdr6Jp*MqfFM=MbCVNT!2^kSWvyJk0?
zcw#<e{R8rB-y{nKW%Ig73q{W!ciJ@ftuj89_K*XCNWvAp{dRxE+O-k~3Q|Vrbjv9o
z=G#I)T5pHVI*^xR+c&$0{HI+{Cih#(<w+|PDpPx=2FG%g6TYOpCCR-W;P>5f>)gGA
zhU&Kk3?e`(w$~25SxY_Ukk*+k4Du<tn3^L-n<2S$WRT}(QRblCvEvVD9>(QuET(rr
zv(p_LUQkruzFzwGHzn$)GanyfF`|e6ya-@IE2Bg5#O1grB`u#<aU$9q{!d?S?!9e1
zRgQEm!g0~B)Q8;a)u2r#=WkqK<`f0qrR8L+f{|x)-bR34Jj3x{A!oOo$h{Uzzh(hV
z2U(_!HObrP_MFkvy0TIiF!m32%N+FW718c<PLF6Js8tih7XxR443<k@^_xts?7iw<
zjQNo)^0KW6BMhWQg~L;r+U#}~aFsG-H3X`|40KGhex+S7D4CdBSrYVlT2tUS?%6@^
zvos6|VslV6XU$U&$X!$aT|7_?Mqg0YMQ-=)pA_ZPjAAYjAu=6O+s?1O$KxZsF8t%Z
zdgZ>)oGSt=@N8+41T8Y;{Y!<iFhLfB?lE(P?4K6x<rB<lv%RYV5yI+aM#kS`$bVLH
zVa}Hg(GIL0h&+gv4$5e?nLbch$Ykw{*BH*MWUFSVto+ayRB?2Xcn{&Zk_KNbjxw2E
zk8R;kqxAaMPI+bQcj`-_V=mNP`|1?<4FYJ^p745AA>g>9j&oB!2plsXg8&{(3p_fK
z8K6EB@4mBOP`T_Y%6?Hnal%w#W>mVh*kcb&N>P6i@U)UpFe;UB(YbQ<&1^hAnj708
z69xxjq)N7<?>_b)Z0L_VDu$beFVVqjb)ow8f-R4V3DuM8>3YOp&=7571T*(K+@Be{
z9Rk8<Wwn8SgN(q6QaEiE=MkMlf~RJ{9C_}02u`t)SKQ3k@DEj(p}gan?92?{6vb;@
z>C|FEAs51yheOSblr1#oD*Tq1U`(va!C79pl%!ODbdYoYu{u}=Ok%MHH#J||=hy_9
z>TN+GqH(cWGLfHGAdE|;q6@iNOTHv^BDNTPigTlwm0?IX85p_YfQB~YF2;!T-z+#2
zGh`HYD5UlAI&OeR&oDPGf^OTN_U|tFI{w#7=vb(sjecl&cqk(#2Q8pL>^CO9a-3WI
zu+O^1h0*XdSbBGb;Q_D1_Ax<chS_KI`O%5Qoc6_)p6$#13@Z<PvFtMW1ccGhoMmfF
zva)T8U(ZVRyH|4=2a{FPW-#@^c9<_&@CSbq*5EM9X|kL?Nqq-A^u3alQga*T`Ds=r
zhv9Ez@q2M44{Y%|T;qsI#ymFlS{f*dngUD%F?YfUI4l$AKOt9ox`aES(M^VID&-sy
zcJ^WxD9+=+Tl$bQVOFT=3ZKJ!YNINoD+Q%<)_xKG$$9VFKqFwIHQW;@+7_=5;{_*0
z;y?e2f5-nQI$Yk4b2EXkaH2vcb*{7iRs4+`WBRqE{jj@CwW`R&SHcBcB5na=^-z<`
zMM<_0nTbQEaM{#gwYvJCkq@%Q7*j0X+ulUx0Ld4t{7~M!>}K7i?73xUu_RKf9&bRh
zv72Tq;ph6yc(B_|QGDr4>(%S70$Y<?W_kQH|0_cp_lsH|WLr`d<$){6-RQ_(bVx&m
zMdW(Y(DLD}S@;zIHf4V@mtp|}f2!@7+<_ea?K}njj%e_yplxsaJdfozeWgf$4Fmtv
z(<Qx3amyXfrLS3G2~^T|FUWrl2rtvk#ws<+B|x{-P=!Ye*>1+1%?)Tx^xc(qAn)*!
z!#V}}i02fgvjC>Xd*D(M7oeqbBDiqWY$n;WXN6Wwjk8Tl4T1UloFyBIP0&bV_C+=l
z`sA}$Iq%Y9obQ23sR>L|^iQ$`OASNj5l6qQ107*ljGpa=mIlF+rtjN6Oxr>&7-2gK
z^j!M$jR)FQfRodzaefAyI6PD_c0c&u<|TByLyIB$8nr5GLOx`s8Tu$JYjFwB!GrOI
zeJg4Qw)OM7m<3CP`>3r+H!}ICU={bax-tM<NaFswh<Y?Eu}iFJ>S&1hgn37UKFP{T
zUEYO-m)vm`#ab1EtxA+87kWWsGzLC}VM*eO$k$UcNl4)91&_h2vzv}BGP7s%#MFgQ
zHN*qz-_SXE&K?4v)bdsSa9GRj5!>mP^R=3UfwvH*X^RA>r=lYx^zHn;S>G=O62?r1
z-#ol!J`=Sc)<{REO5`OaA>*}^@o-G6d3H6~7{o>#cbS1BHwNIGw`Kbke?oBa5c)`x
ztIf<vHtqR9;j#gX)jw^aglX2xs+%+(sd>h)pLXogCYn>r2DYQ!zPR4k-}_9KcGdS(
z8}P>Ewpy6+k+ZmYE4Sy0ev^VyqtsA_8LMJpt#M3?%V%GkI>4ujW0w|mkEnD0AX_Gx
zZaRJ_RKaytoSCpxcIz#*5<6h+Owpo4Ro~Qc`Y8?ItA6Pr*J}Gc@401AK7ul`Hpx8T
z=xTOtFe=*G4ny`#C~ZVTnLe3Q{G%YBTqUp}MaKAayE0MKInMl&y+uvN1yK`6K&6VO
z>zpHg3s-<SaSQ^`UKIQbGYJOb0sZf2)+qf7UA@kMsTJiSS_O^&c;(ShfAu-=d=GKe
za?{a(=y`J{WtS1FbAQtt)f&m}H<~t8{@4M*6UYUy>YJ#0dxPKBDqbssiAO9sLK9{8
zh5Of{fle-69ex*$1$_Bv@zt`@+xnEoMRPgs>*NXd$z1lrPfqGiS0z0@D$pJ>hS0Mm
z@{<V%V~Q7_$V7RUn1+K!>S3MuQIVj)$C4iV(}u(C%pTtusnVkcZ$95k(!}JG!o9-)
zGy@c-d?7X>469<z^mp`&=~t%ig{3@)6L|Z#RjL)Kx6n~yQpZWmj^l$-R2FOxej1@x
z(|1Q&X*z8Q>L(J^?S}gLQ9@0dr`-2uguokO9O&bvMppyF!Vhk<rFZd0TFz6yMavio
zjvd^@EOgQlvGY0OyVHDO7w;)r+NZ)m&G>~61$G>XRI29^xWN(nj=$<TLt~wTOaER6
zJe$jv=W_7|wFpvTlb$nP%kCp+pdsR*cWiofv|lp(t|DE$qjfwk_H+&lWYSu9y5khc
z^m5(yW5P^?@g8mT^m99OiBeRVYEp2}+#BVi@kkjqY{UD3>@5I+#ve)k=0BSHB!zu#
z7=`<-+)&9kCg^(y<-B|^ELVMP*y$OR9WOM?ZSxuXfr&EYA0)$BC*RYQ6V@Q^Zryh$
z-oK2Du+~>TDHhvi#Xz-0x^6+i=b*@Fbzq*^=$Y#FkhesTGt91bLkk<=i9-9bug1->
zob+V0DQFe}_@!teU%IxtKn}287|_#il-kl_q6kqxMgr6h-(bL_(s^kl;S=n#E7Xml
zIl%mqeEnJI7yGI_gc!vgkDV|~;_7<YyzY|>IBAnU>D>1L6w~HOGn03q!yfEehZ>l$
zOHGEQe{wl}U^L&s85bk(7*Mn`GFtAs$RBY7eMm`*Vqw0$bNGaG_90c`2FYggxU=xX
zVAHZ`#f2squiuLaiA`5~R8cpZ+0Im#C1k~Ev>1Q1(@m|fw^(ANN*0gxfqxu6OdLJI
z(}8AE%mpNQoqw7$h;MPoXaNNQj7h({9On~z|Hnprp8KJL6EYT`&OuFEQm8MXjc>y1
z7!)=5b5AM&8-X2*X}Kry;kKy2ahd7OenO%6#gtZW<dtIvg7tE<6x1}4LC&7ZSY--j
zW!e{Onnzuq_RaL{joY=?&&<H<#%1#GjDkzdkc>F-9pd_rx}Df8`@tJz<3F(=d}hD?
zllIZoZPJtz+AATM)WHEh67Vaa_J^l&bkCXvtbE+s6GP_D@MTeA7f6QVM+Jwgfr$Uw
zgC7Jz1gFY63D2&#FOht5Q#?-RkKKXW$PmwI8#UwS0V(Igz$x1p!tTTa^rH(yM*cOw
z|JTp_c2`hE6`YvCs)FTrQugHEWC-{KKi~I1UlAu)%TMrB#ZNNhX-SJ#_=;f&{Xhb~
zJ`)c*s^iZf{{6G^9$X^zZ+n2P%|A2-m-}VHf)8fm{hK2KpWP?-dVP-Mvn?sWLBPK1
zQTTB%7UJc72UB;n@XhDV5iH`~K|uxyGXzB0kAGree}D1{0t^!W_rvf1JP`kTKQL76
zC<7b1583alPJCv6=bir3fbahM0KX6V;QzOye=fR$_*Yy0`;|<X&;R$${<q`<+>JN?
zzKVapz>oB=cKG)*7{=k>{KCHn{{MIcnWy``3=+scci10(p^ox$X^9_k%wfzCX}>u{
zV|oYPH*!je4IXeLrgvJA!(c)<03AAomoimc<UDw^Er6Ni0V!a^3cTSC=HLG9lg|pD
zJUpaW80Nb_KbpP`1|w2{Qza`Oz~`mFedg!-J2KCZDfw>h5vWOL82vKna<4?-5o;3+
zoKys%Zd5!P4RAaKeG|)lm*y_lf@{MkL9vzCd`2sI!Ak7!!!pyE_w)Gu`?YBDoNueH
za|0t+q+1J6XllwnI+DtDsc@sm$|obLd`pR13=qHX_aDK$LbXh$0-brM=`seOIkmZT
z^Jc#Yf#;H2Fk)VjwF%TBWKfZ=wfX%U9ej;}Xlv#)Q~{VM|2kwuNn~L27pa-g6%<X*
zYn@=|UU*b_$d$q@kJrNiPqRrJ+|{0iRk`2^x_m>}0_9pq%K<T_sa$PQ7tCfG!m5bH
z>54xSgdynZKSJoEE#Sq}^Wem8BV+Q*VrfVW%aK?}F5u^woZPkw2!$Q*mmFA7;QmKY
zt~l!&N<>Ls+m*k5+vmLwQAc5<!j2_*3*j^7_g`t}H=&eBHUFJ)Tx4!aj;)<)G=tY1
zE$Ya}kVqwS(sj%M@x)I*B&s*(X%dnGseUx66W6L?nU3B-Hhq~+=8XMaiu1Dp(&y#f
ze`VHZY%<fZB{+tHGf`|^8d0J{R_C$XX>bgHhqWwPRGRfV5PEo+1TZCCJqD`kz#Gq9
zl8s?_ZQG2Z-IDBhay%CZ=bP&8zJYjn_rDV*`QP+(#dB7Bt7@${@H>VTc%>xx{}%oI
z3^aC-{OkjOD<UtDY?4(C{*s|bio6J&-sHB}frc{JZf<_2T+Appk%$l3S`0k`y_)#L
zU?@s=>d8T%T618p#7v+c_XyBSA~+h&Onv}vGP`>ja>PKgfD@)X{F9CucRDEM&9O#{
z4IycjL!wCa)X~LGZldfe9jXajx2Gb#XxkZX!+9dhpcT*auvUc0>_XB9KwjbCS_e3X
z%8WQ0RP}ZosI^&4$I${#+wzpW<0)XsVL3=&zEz2WaVTcOD8V=5Al1~A2$?|xR2ilf
zdG5z$v3evBc{96f?(flY7J7}2WfhJM2f<z|ceL!Fo#mCF3Xy_#VIDzj&Zwn7q${B|
zKaYyHaT3g6LOd}2cQ_w1pbzNSe<{-I9l{RA88_@IZQJ{oKf%-nR`*W$4jr*#@F`O5
z69$-Dpd*MMu(?Py?2n*ewPgw?Q^qPzA%}B^5tp<tupr3k#5dldKSE^%Ckf4r82mA4
zy7>E5T5n^c3?3?b4KyZzjg9RzMn$A+#%{W*9(`Cd@bYKWv_tc#hgJqS>1ugfb@*%y
zN1E~~<8rCN7wOhbqKP!|Ok28k<|U+NRcF%3G!gV?BX6p(hi)GiG&lo$NJ2@Kif5+3
zJ{TpLA9w=N52HZEVI{|hW`!z>F;4k6dg+=LRyc>{HU0ix*6mNcIL>dO+PeFtZsFEq
z$mBX&TE*>|I5-Cu+(I-Q({J2(w`(h_aK~;jojyQ3u>8*u;UrOz=sF;KTBPi4e~L>^
zPjf$x@%!0id@PS>QhzQB&Sq)aU1{P|en(_-x`%0#h%`F8A|`#Qx}r%Ud4rg%`Xl@E
zq=j@?ET=dsI8vgo0yR>@?6QeKEOD){-R~;AH?bhnE+Ok!9pv_WwLQ=O>F{n=Tpu)V
zEZ>?WcdXG*r?<<2+hG-c<WTx$AOg){8=&D&e$e{<w~Z3yhqn9XV313owU4<#L`qmy
zbknF!t+7Jk9VG7($@GHp>3RbZ8>cyK4H*j{oq=1-M4|E~bIhD?12&r7Jn2mWWE;5B
zr5tK%0Z%%O4Ws6VvXC4XEt?_Odegw!5eg1X6A;8sDh_Vf-_6Vw{89zw8x+<j05~+b
zm9u1o;LrH40^xtnrV(HJ*}Glnf%=+;)6g}iv`%7s8_6q66=4`d3MjB?`~30CLO7eQ
zaTY43jAOQQ5kCXq10_R>;CS?4mtU-Tw4sq02X2c}Q}pKaKDu6)71kzvlh>|ck#@r+
zlYA0}y@Ns|-YV86Yl@qvN=4;-cihfG)hS(H-$>3EvaN-N^eZB+aV&$CMqc0GZ8@~^
z3wR7VJ-dSPgioRR#Lp{9FpX|dLgYFqXKDT1xg<A!tClfeLOB;Cp^&pd4*qs>dW6*3
zg{)1#^E6Gd;5<#oupnM8Z;6?)*Tr7>N)cGyXe-B4r&@`JC>?7j0W~fwLY@-+tES}&
z=F6liTIo1y6&igP&GaTNnxZA*D2L~<)u|Uh61R`~p$RZ^?PktL;&fqPMyAZVQk;yq
z*~ue9tY7@sLfBV{)p>FM<iNFd{ez-nLK0a|w}FVtFE|&lr!hkX)r>jL&T7}DU@%n3
zYCRW})Nxu<07~KyCx0)&fou46SABTis9@W+G(&635Xqm9K8Q!}i^))V)px{vZLZ(`
z#x4TcCE9;d9xM9ZLH~!gw~VS|i@F6#2qb6{G<a|e?(Xg$+#z^ycMI<Bt_OFA;2zuw
z?(W`2<lg(e*Zq3*kFFoUIGmEbYwula%{kXBkj)LOvm)S~w@@+K9q-6?NRp?3BLEez
z6S#P5U4gUPHP9V(vy_Wx29u<-4gt7d)H90YS`IDf%B{{k5RC+~70{%jfpI_i{__yw
zZq2N7j)L2#m}tH)_)09EIPl8Qc=GB#qvyB-q4tJRgn?s;T8@z%f4FgUqh7G&g;8r3
zORNIo0D?UQ+Hx^ZSVCKnSwL6Bq8V$IB3o~35#D?MHFGZIy@NsGCPBQ=b4|zFap0|i
zYF6<Fkut|&AC=VM_VXV$u`s&@Hz497#2`I9|7gda!xN8G>Vp_dKLS&5YhsI<_Mtv7
zd#Q~4OvFg;kZ5Q?J3vMn8NNf?=UAh4{VpYCIN?<I${$sKxspfdLsCAgT(-t>zXB*(
z`C@}gxN~JDsTyRYQd?NSIl>rTVOc#rzZ7)WKgi9<DrX0h3!?zzKKjpOXzgHd@q{y<
zVr=XKHQio_5XBqKslGn)?4doIPi=?SCXDO!Z{qBqqtS}X=P#+Pmop*JgXrskh|P6J
zV;HsyY|KkyN>DTme_e%kRi9z{(R~6c<<XK@;tU^GsD+$JMLyR-xTk;KvD~CqG~Oz(
zdhwdER&tNCnzp7<Ff^gyFjpRS4Y%+RwY9W}_&pF-`~Bx0>6jjME{%>{r>P@=_!Ycu
zq{Q(Ml^ewmyX?<aECok`*jz~ot+1WnZ75afcqfCNbkOSZgog<%=5kxe@J$;Yye3H@
z7_zfbX%kRW_lx>LvDvRAaG1tMZG3Q-yG`PJ&<+ARU;@u8acO`^|NXy#$T=0!S;ty}
zwkKJQ<H#w(VPAh7l1Qh~4>7aF+UU`Zrm}6qx}@bEv@rXy-nwh|=`*fbwCxUfUPb&+
zOY4&WDht)hLdYF1@D93!S1~tD+fu^-`J6Y0v#TJN5$-6BqjiX5f3M2{&iMkk7ylWQ
zPgYD+G>5MOEY@>6Uwt~YIdfy8KOsm8JeIn*FHXne!{edqWT>`#8_1#N2x8`$giHS#
zlur?oi6qctM-5JN;({2a?^8E_90C^)(JU_DA}k%740RCO1&zw8TqaQj<ha(}nNctU
zBZSw==lu*8dTrG@-idRKrs+YeErqr-4v!=VFTl=q|I-u1aFLzi(HJ&S1?LrFq%u4u
zh_eyqgZl=zFbRz}8d(W1w}pa<Z+mh&Rp=}ql@x{`3CBwtKKa5epw^wQBJw5HiivhO
zit&9{?Aj6~OSsgCi3`P=L0jEu!w^R?ku&uBkQNsm^(}{l%kdg7pt0RyFTqrrh;%wh
zRNl?j4>Q8U2*nhrJNXq{{>~Ie(_E;?4bgQ6Q*P$+PY%iLa#1x6oh*uwa3ntGT$yv)
zG&sYV?YWeG7uWc>7Baa9NGDtA!}Bl97w;4KIf@i7Lka`lZ{6=unW!|IX#Q*!$5Zj8
zwz5u5cC0v6lvy3yDZx0MK~Y)kXme6|hp}-a7By9=ji^tmx;EQ8Vw6aO!H|{z6vGb)
zqy7`dJ@iSZvW&azuCM4Kn|+Z7Pw9+48io`)Z8ddP$w%(DS9Y@KEi|_?)sI}$Au6k3
z)V=$zdm=~&9%ISe6DVq<XD^r#aUmf=Ue_h)vubsp$pk(#y>FC@a=H6s!dPH{;s(%@
z&l(k0)`np)`1W)fBFU3tN4_qISAj8z3dddC(0@R^4Y(&Y|6$4$zu4=L+d3K=Kvo0s
z$}EarB!aY%w&Ol?R#Dfk-1AVPQoaebr_z&OjX)P4%%kP)KQXY!W9Q6;_x|V76Lv?%
z>%FXAkV{D8&60Kpl1v0FRB?%p0ZoWR{shtO3}kmF@!@;Fk#hP)nGPhomm68=BC;Mo
z2?qpBbZI*U5JV#QlTonfyIfi5KBA2<xs<SG@vXJ8haA94xAaJq$;a0bbFvQ3x^*&+
z`7h<qy1g*14JYPZvVmESiYayCY5%0%lKg$GT+BKCB9b|SdpdD}BqEtKGZ)sFiz=Sh
z-06#8wW3`(m%aWsgQ*fyI!|sBds~VUiC!TY(@fD+)HO4sofaG0ee7KVZTNmMIbaqJ
z0+(T_`N4UjN<1E=7ZM`lFg0TWQFq!2>eB;%?W`=A2Pf3O9pTp9rgi*~s}8JAfR*){
zFOT^p6${K+LnCP)7!R@h4F9V<!6pFkLrlXx7*sbq=-gy6K9#HWZQa{E*x7tA|B{&>
z$hq1y`$Gs}QRqhyhH`T);g~0!KJi+Xu1`P$A-b7muzD7)2&Aw^^n1jz{1XTXi1+nq
zGUM_77xdqu?BEeq`kBhjvwd5!@PF(}jf{%KqY#eXh@Ux@=Z$nx(BM}tdYRNJo-d~y
zZtkmvoa>#IeAp#0Vkh|xJ?>(24?EZiS5P&QT8W@vbgCCPPA<-v;DT{0{hNfRa}^+$
zrXZ#Yyj}keutiwI9I-><{&KomI$2pA_xm$Rr-Y<I;E3e$;SBL6g9}fas=-Ec(=%G&
z2BY|LS3jCFwIwVJx65{qbE1Xa+xZRbPr6}H1aq8?=x2?zl}yIYs7n@APHJOzPPE<V
zuzI!RCsYUF0wpOwwGu#=(`pUSf0z-_6XOgxMGQ977@6SxNyWZCKl9mNWo5sVAgvQ}
zn2@SBX?q>JjP9XBsRu7b9L|4qgqG<iZ4f%gWwA5|w-&8W!qGR#9eAlDECEn|Z|_<c
z0%*!>0-McFa_!sK!Tp&<TT1+|73Jk8A7BhxzYZFn!6tZ6Q=`w<`Kvo7s{ZJ}3#GAO
zbmZ9$=|*Q-w3xX443U%ENWqf)u41$=biHWC6+6jcw}GBbRErlnY0WtYMej%+hqPs}
zscj~PQUW0gSOWSH-5+m=p?<kIfZbM9%QUt;ZmO&=Ke4v!z>@Ad9r?NPKinN<LG6D6
z;~r_q`*Qs-Krv;lv)y=OZ7;g&{i({l5wSv?>yEm?n|bNhdZVbJyr<I5{}&X&3YT%M
zTp-V>1Cw#4E@Ee|CgDW7{;Ub9*6(w$-IXlJZ&oa0pe0eh01I_Ky+ESMmUnJT=aGVC
za`YPntd!yZ*h>KG<~feltbR?GNH2nW3JV#u`@REXIM*0@*XwlqNKI1NymV%|EFXYZ
zg`S?7m@{ed82nM^EduP<{NFXMS8J>%e%&QNvB%IjurOF^JigZ^20T%g2d51PA^4-|
zi~=%g>v>bO&7>E|`rbj?M<@qAsqRaw0t-0mUsMG6+x_?z%*zBz%YOShD9mOb)kpyd
z0R$+HjiAWq37IQ}i?4KJu1xTSGxR-osjewk5=RjlAt}!|NE6+FwIe*iFC=Ldv!&JN
z6VMrirMA;tHHbh989yZbwi397dWN{T!z3>5aQXw+!`miTF|wsm<#@iT0SXh&Tfw~#
z02oG8Ax?Ey#!fr#WXDFB=XE`46cNNrr@~sqf_din<4GLWaujfHXh81Hed%HMZzZIM
zo39CSwAEqF`qz#^5672mbh4r}_U{JI-v*5IQr&IN2=&VFXm(T|hczC{-4smBAU}E~
zz`Mwb(^S7>z5&f7I26j-QG&CO0V~eyKlogF+OOl+Fd!#*miPn6T;j7+#@vfP3(_|6
zbLf426Ss`b$(vYs$9^#pHNtU+gnWPc^J>{$C=nX6fk%p6+mxT!`~{fG-v#~mZqLum
zZPh=qyT_7#&ytkMhJBwn#)W*M@z}uH%q-(d81~NALvEjRaOUrE4E|U8JKA{dZ=V0K
zLQFeu_roMMAp{!`SZe^}a2;-=gu$Ho(dE@s+N@?TEEqRnFMRzFCR8^$?h5%9TdxgE
z90IQ2|1C~tqrivlREJe4zFZ7(0M`Ye+UH9GbBd#)iZz=}DU^S_Y-pPTgm1EQtbj^n
zKZq*$Bi-Vv^NrLMa%*_PRD|o5tKT-4{JWn_7A;$*HSfUE<rd`V?;qJB2^W9Q6DI=f
zt`X531YNmyZl*7zh~FhrasLaDp0nyBPuJ{A*4ww5CxR5(I%?~ruF}#rHrGW$1D?bn
z2I7XV9L57EUV&|Y`X6ZTKVa52gK2TM8IoTM^VqqIkpS;*c@3k|ZgpttCp~L<6UA$M
zq2Hj_U)1zm5JmR}!xwk$aXG%Cf7eGUbg|$e;?s5|KqgsOJ}OgQ?c`VUP)fpQBmL?P
zEwF$3r2tjvR0ofbNHwJOvbDlT{>Meqas-6y(0Bi^%e*vz*<-f(_M^LZRO!GBVEmpp
z4^OziBc&NAF$fHRP2il|oI05Ek(b$++|%P(mU;R)y)IQrNT^R0x^5N>TV0MWgz{@&
zFj$8_u^xA6_Xg*~O5#VQJR`29=CtDgVW%5t<x$jo8>xtByZhA*a&a8L43wuZsXx2^
z1#m}+?{&L0w^~>#AKOcz3NlyE;zW#&?C$PxqPRFT+%{A(icT<mGk{Y^{DB7B9B5ft
z6McSPL38{A(tv*$c8q4hW8wWA9V8@G!jtc(?HIr?s`=>W2}Z?y>D;{Ob<c9D^PzGD
z0H2XFkl;L?^rY!QHZ1*Y_lB+WF5teI&k_nN+tN;VUJW2lb=@R=26tKiwk?fj)(NOj
z<GN6|nG7s?8dzbc0oG^b(E+uB5|LYe`MWiBR3ZN>B`PfTYmw(u1mOa3IWTKRpA)4j
zaMlO#Up?K@d3C^~cyIOs8IwyPI>9xOVqc-9u&%~rIRYSJ*$vo0H;r7Rf5w8r-&x1G
z31Y&W_Jfe-S#lrA({mX{xkT6!Bey$Z%ang)T(-VE)jty<{oNbef0QF&=N!YOvqGgX
zj@@nP*Y`@!a-8@%9H8w5B@v_9=gAP3IK%sUm(hQq>8RXU<sN=NSUNPTlhc?d5n=aZ
z6L95GzSBq5NT+ynKxVvif0J4{`)9{5FaF?=qe>DLRo|3R*>Ag0$x1YqBf?MCq^|nB
z%<XB9f(G}Wq}vmbAF(Z1%YFZx@h>Wu{?^&(6PJ_v@patb<JG1;TO_Ui(-nvEXh?-5
z%C}C#R2u4UDj4gl>)fKhmzJNFe><h+0AN$MZQD!56TjwN_N4Urm&#9vLoBXFeP3O;
zxBf2fC%t!E*bMi#Hx>VeF1Jk?e8HL1kRx9mHd<D<)bB#0-P8q8Y;u-yIK_oI>^2v2
zegOP;b7ROP^Yg7lC0W)r0<hwVF)17G=yw449_u%OcSk`eD*Fn|EHixnxFT;CRlO_s
z6Knnjh%XlTqgG-3&Kt3-AxCIgSYW04TaEYdBJS?o&;9-&WLi^!%>8cbv6xD<zt^ke
zMC_roF)~c;dN=8WDnXzWrsmq+<@wb1(qsv_Ij4vfY4MKL{-VCgkK{!y0T|oO8s1Hr
z!g!<wEn=G)Nu(BWA_M|syF|7*JdYBo!EhT2H&&nwnvL3u!UVGJl?mZe@^Dy4Fo#cn
zfWGvn&sYuXX0_7=Ke=!Fnx10Id{Yj48UwKc&|(y(iB+Y(X`tS(3S%n+cWcyfotUp!
z)Bfzz2BoP6lpM{&6#2hY5CRVfo4*sK6#?^S<)||}vYx>cF<b37ihcg&#T)IrB|av-
zT7aX)tfX|<euob<4p@dKmlQ9Eh@V<e%$6I{?hP0Mj7=<oe>A}U9ahWNRL7qiW}=`a
z?;DvPK4YuSkd`dpQ!+k|13HBUR*Ac>L(Xc4<9g3yLnf9v20O8Lj!rpd{Y#$ZI7(fj
zoVx@?EVBfy!N43JgLHywKeFsgkj~2bfsWwAhQvAQXZxREHVF01@(jC^pWAm4vkiIv
zlhADYyKD~K-YuiZw(n%ZJh9q%QS7H%lq}95UG-62yxnYJIm01;6n!3;RrDklqwebn
z0<STCe*JI#Tc{0H5qf@ceo1PAzvAkg4`xa6Gm_8T?utaZ$5T;cR47wwT=Z5kqP-jp
z?3VNI{c*Q)U#qUxet|iRXh`MKe5a|LUDZtKs$Ok2lEL+Ot?Rj!lp*H&cgoxRvyfV_
zv>M<vQ_dXE8n%#?&H-8kKmo*qQWcS%{mG3hWgA-U{Ib!_`tSVR{`mKVTuBCoSk;UU
zon;wHMJ9}g!Z$#{v>s2<MhfX{np-4vES2idyGsmHAO&#GEshCX^%Yx`2VeOxH)07B
zCNfZZs6}k|pbBzeX>3f^6%h$$tGt@&V<-qlk^>64c3jyMKez46I?Cw8S8_a1i(QpD
z6gV+O`J>HQkjg~jNkA*}sMy5sQcKtZdJC$SF^AxFV(^(-yJcQcoh|Axy}aqQ0TG&v
zCUv4=B)>a)j>E`#xb^oEYX2v4K^I$gYpdofj$PPJvV0ML5wd?s4h#ifSA&qX?7D?k
z&!O=n*rat{x3Ddi$EEXURm$A3lv@0=+w(@LlS`<OVRo4%7Yp(rodo<$&QREp$^lxz
zvHGIdV57}{7a%wD$Q+;)JRKkdm+o8Ha1O)^9#$xoN1<oy1rjo!*iSkUO&%XhdQ636
zp@EpSuaozPRCm$ahm<%7=(?|ZW0}Dm89}*W6`F}^p(i2d(z|_|Wq(-+S5?)bFVdp5
z^{Nd5hOyLqaFfDxzLCbPnGyb$dXr2^o6frf2&k<(tsm)W<ZR>3qrb`ddWM!d9FgL)
z1o=~JQj(#gqfW==e5zib7k>*IG{QW7UgtbN<7cS2;{fu98)S`}n6ySR)M?67r9>bK
z${A$-oIGbNqO#v8_bO<O?Z&6x7SDy%)~#DFS^Fwp@|>)IbkAgm>-4VQf6zq^&bD^8
zywQf;gZ)Y{W}n!gg>F<~(4cs)@w_}rO<N-kuQW1QL=5)Ky4rEpnJ;xJ_d&jh6R)AK
zXg#IY38+{6C&qlzKp%MeDlyt=sk_0oG>;VL#AlcE>5I&5c@yFjC8NtF+HO5%s2niV
zxt|CTA?jK&sI-p)lGv_6-+=0!kkB_)wRF0H%oR~Z>dUi40=c5E<>K7eu?KQ<Q{1vU
zxnffZqX^nOkxZnK_??CVBCMF30$Olo%8OoZ&L*&(_(kO0lAh|aXPs@szFsCIVO8dD
zc5|{aO^30KHzuTUG^Hb58YH;{>Bdo?VTLZ(bYHM2V(JM6vYa-7I-+a}{g^E6wD@_U
z&piH=_?(|62A73*4meeQ)y#u@<5Ke@501rfiq?cTY76Rnlno<<N!)Fu`q7R4LOS2$
zLRp<u{sj_H&jAx(o&EV00kx_<DR;A3vlyRaq>?W3I7pF~!@x22kpL5zcYo=6*f&5V
z<XX4Mp_&xH^>nIby+9nJR2l^Vl<58p7#BeGdy8?ql9;Y^KpX+U5^lwKoCG96_3q}o
z)n{YXFu%e74qiO_+~mS0JF=-52}ohx%*Pz#QgHNOVpgbE6ZHRIs0u~q|6iyIv)@$3
zJ`17%sIRNJW&cLDg^;2ZZYW+AkFA~|c208wJLpq1=C=#$yeaFN=|CUdC&>$)rD7Ge
zDQ=6k>u$3>iD*LA?@a5}3$%fRBhcSa3eZ*H=&GeRR&3TnECh@Z!P2jI|0ENTfj~&W
zsfI`pf7$<CD6ED!Tl0Pwqar;42o92)uNj`G@Ux~~&3+ES&jAB_NZ@Uf>dgjYw!_gU
zrZjTOTArc&q;K&-MBYsDE>jE=3;mk|AQIb4nTu7dSI0MaY#2%HH+;#E`tj4oSKzIe
zCKS4HDL^ran_aG+2DrP1^c##6(#CE`*5B0{EJ;l9gL2m`GQ6ma;*Hm~K@Ye!iXr^b
zLQfkY<dUyHX$3mJXliF8V)aH5YTEWY+xjNzTF%pVgVQ0u%G*?WkiX;OVCX9xCZrrU
zwjh4odVi`_5>*!I7R(%(Vki)hKs+X=9}NIrK*me+=c(NW)&Yd>Je|u+4i5@sOG!?}
zK$vJ+KX=s1b?;z5w-IQ9_-Qa}3R3yLm52!p;HXKv97TXvp!-sl1MkJDqx1Hmn=NA^
z2ERqp_`J%v%;-J_ONUJD-sOne&}70~(*BNd6my_3KtG@{HrOJTga*-}oT8k$0+72*
z7RRJ%KT0Msm&_)e5J$>YMlBV=$R~@Fe#Gr!#iLup*EB%{w~IB=Swl`r!A_4LEoS~y
z@DX?lJ^e-bVVR5g;+hUFS&_4`AK7bviq(Y6URk?7=a@kHLmEXS2!eC6)Eh|I2O5m8
zG?G`w(`M?-$7nn}IOnTZMxazQ1IDU1N4|D`X!bCe(lqNQN({ch;hPyKDKSWb4DA>r
zFH3+szLBKo>Gs`!+jLO3AAb=?BieNY5<KC4&M_ocUefcd8pG#Du)3=^LyKc{UkhKB
z0N96jf1eYcB+PAwfUuaXmo?&PcIzp}>j*H}wpf00w_o=%D&+^kp|f^=bNmR_=JD}K
zGRBS;YmE1`O)fhfBT1={Ng=l0W-W&|R);8Mz*|Sx=xW_IN|}HrK72pY{}&d8)G+A9
zbl{EigY#s*M2Mc!Ospbii^vgXTp)g<r5Ary!kSGeH)H=`Du0#A1TENy4}UNCMZdde
z&sbNg{PB%DL7G8I;9Seo;X?zHhWmy0w^pF|p)w4JdA39T;ZNqZ)9*zUO~WfMG(Uk~
zA@$+V3Yf`i#=K`-bZg&@_Cl8(!<c>3l+f*h+)*cfaM2Wl+?ew%!0T0MxcT{Qwu~-O
z#2YXzus@0|Fi6TY$I8dYT3>l-k_fVx*5-rD+uc#ZeLNsj)fjSUa^~;udTLC4>Ko=p
zt%*?$VeTI4B5r9i8!AN4eu8~th{&~hmdV9pe=?tD?4GYA;mf<p)9h)>y-x<i`p%yP
z#>)q76)%85s)~5xZU`(QnbLj{ZF5cxL#{gX-q|J>3Jm<+pZJqHy}tEiZmtY0Dx({w
zdF$2d>u_L2ec@+v{W8G#sFoUA5|mt}V~Jzd4>|h1E_dpXr`(^s6qz``;RTZU`|2E-
z=*;bzP+HcXH$>P2Z!Bw9Fs{ZRMK%&t=?Z7dYE+PL9!rvH^OS_e6Mcqd5}>D0i9X?`
z$X@cYry>S?F)Ug97=fZ3<HJuMgeo%trp5g?fq`V*n%<|o?7F2o{&)rk1|D|8jPU{^
zU}VoCNKQ_pZJmyp@sjFl$)&ALkNpQ1CE-+Jz*%ddG4Aw?ysa`cMjh2xv?0Ofv5+c?
z4m_4cy<J&HJKkR2*EQeymiURUJW_U0TUcxQd6dM+z_YYXzw+88!6jbDoPFb0@3)~J
z*Tt2Iw#v>{<|ulfLqr4W_{?*OD4ab|x=sbgz3l%F_txfN$T>S12(>;hE4ghRl+_y)
z#_^qB5$4stcLw1V`rOowhJQj+n}Ip=MZ@yVBNZHhakDB@E0K)UT?tXz<JwPJGj^$;
zI9Qqv5j(MEP0URCs^LZTEyIt&5<(G-B57%cQN@W!W9k_)I<(_Cb)_x!fkHZ>H=(|s
zji-f2k@%TS3X!<5uziL4hDI<c`_;7~jU0+|o}Y44IZeYAstC!Uz(()>#T3C9msVQ#
z)oo5rxij*2LM$A4#DDJU<rerGX=QWG8^5-p*=Ve6kGQ9rPen2IJynlg!l)=r$|2H=
zTr=(Hnwt~H$L&}NN$r8`2~4d0x$^aYqhlHf1ID4hYm{X@MPm;1N7WKOF%1u=En;lY
ztTc+!UnXXX5}eyCi`S0s!mH)gJ|1%GsF*z(P_<uc16Du<0bE@yF78muyh&K7oTVb0
zL-BdD4TXwT3z2Szz1o+hJxJT;B#>aEO#f*S=F&p`2hEyA5fRZ*B+o2W2XU9x)ay=)
zB2<>pYK^iGv=`rWQ4`QOR5b^X)8m!JbsO!AB+kp>vad3zT|#qq;D-qK{Yh{pg(F|S
zqEe<*!HB}8j1&$eSgqPlCp{dH8Vz!6S+Qogi(5O^OzqA^-f&)cCFVOp69R8tgHtZi
zk7Ok{bP1ysb{7!NwAgW_w^5`aO)VBrwRP;!tm~5<z!Tu#p*F-armivNo*~CJNU!*F
zXY+th!mWeCXcanSN|;QoeLh3nD~PpRciBnc9siN%Otr=(l?2qRX2%N4&5d!5>@M$A
zJ83|6VD;eOQJbV)HjPC0rKB66*xOe6vzh^!Ay{C-^is#U7IDo^z`dwUH@3D-FtwAA
z)g$udVQYfsJ)w*bcwE8@*W$QNC=rcpLGh#S*RTbLZb<VzbEJWeH;MNXW`F~xqjQ5}
zD@GzoVdnU^|4Ge6R;@*(1m}es=@<kAr{tkE3hF~ch~M)6@8k@-Zb|zC27zJ$C>BX%
zSb97l8Y{!IG^|<!Q&U4KwqV9*e?VH_)woFD9D61vQRv_W5Hy_B32)Q-s83xCHr1qv
z$}7+;4sHp!yHT2#l$0-vZ~78P5rxfZP}33s<hSO3czO5l8??3cj%U~30q~$*O?Q)%
zX;~+@gh<Aq1ZP8Z02(pNYXM?SfvIx{AY=VIg9sF2EMa=^g)SWY;EUijMFY-J#r3SX
znohiYWp*^sDUz|Rk>t<Wg8|~`-b~-gC4O1_HSGuF3zXlKFjW9=G3*R29Hia**+{9(
z`(7G`>UY;LZ*Lec7lc7yFO?#tbilVFQP(AGtlrKfxUCfFMo8xR@ogI>V9;}%d&9-Q
zPHG678s`<h0P_?fUiXZStG&4?qi7pk$f6C^->Q%kbuqP(-!Jr-MH#H=JM(d)ag<==
z$K;?ow9$7m4R6FsdZek2$3lAOBNVYKj`MxtKVbUN;*C9Zr8M|-_$>X|$=pS6+ZWTd
zRQnAhvlN=JVkmXXXXeJpZ>@PkLEQDa7hV&?{(7w18VwiTNea3}sW6+FuMXgH^cDyA
z^Vi&%2ayjjUZhhw|EC-g7+&Z4S4eQJPmiww^u1?;)99W(B|!SQG!UoUcND@{K&;0v
zIs0W&`1ia_b(<W0B!O~^#)HCiyyfVAdHYCmFLQ?fV$S~&){Z7k_HJV$`_s{pNE-qb
zsO$#!9~foHa4LYu@A-fAvD_90ZQ80rDSc53I$j3r5Js;%#n)-)OZv9e00bu}W1Q=G
z8UUMz+sl+kTRPzV-e&z#$?8ne)ENnj9rYH?tV2%pY$Ow|vm0lqUBq<R41>kq$MT0{
zjho;$Joe9)kN;?+fy4Y2HP7+K?&#yh3K^)3nCGp_hePBMx$}K-kc~)=-qqoxveVT0
zsmIa$k{5V4(=#6dxI_NB3g8DfmEq>COi5KSH4WpSMQ#+t{OgZH0ExGb{Vplqb!V;~
zc0C_JL@)u5$UnPm6OSeqCS@=H3S{6zfKvGry8}Q{n#1*W7*|BjuAn!hWvWZ$Ve$5l
zEUw44)YlGPXA@dB%Mf5@f3v4QNBgfH`8ELfu50Sy#Vf;6L@8<ar|UB*_)8A3@gp2S
zocoV(_gC-f0%C4}9ZHANeOBtG7fJtZ(DU!@7tj8yqr$SM3L7y`F;5Oh{y0bd?oG~`
zCv?wP8n8Wd0jhC)zqeSNrrY1N`7{&g+aHm|D$+Fp3rusr|4H2pPsdCGbq=?y17d-!
z>p<oKY>=@sRKf8F?5?-+NzW){;IqK70SyRP5`hKpcdtM;xVw_E#zu{FM`^hA4e2+m
zR6>q$nrL0`(?Kre&076B&^8nH{al-P|LeEtlBk*_6pj6{udj!aj#!dH?kNBFLzZZ|
zn_W97^>>LOOL5+;W6<Rd$FuHG;XCR9Z`1=eSXhSI!(<lcoxyrBd3M*rA%7FEnNv<^
zBcN6Ez%2uSre7j;j~BahwM^fAR137W-(_vQSbE0lDPBJx!sA8$Pnezdth{`!_`dM-
z^|09vzz6Vu-QQEV;NEO$RY9J5lZvXTbu2At_~ChPFvzC)YA*8-uKep8)JILh!=QO?
z(L412(UK4Rk1xSs2~2eDWS-zLj4>-OEqY~s4@v##o#VwMAort<d00;2Q`P}68(>p^
zCiOpj9fRMJSD2P=_nY)Ig?j!R3{0%<v~;4?KQz!<N^Az<`IGHuwxF%eYHia#p=TR=
z>uGn7vGbY*Ae^2zj|-spwFg`ZKDudkgN|AZVV+N%9`yXc1{YgFMa_5DOk$i>)*6WY
z^?k4}iL^%S7n+b{%=Q_<6whDwpnS&mVL#e(J;_Ro=M`vFSL=xV{&J%3)4*4hUo4IX
z<#TET`_Z=x^=L`1iGBEf<75*k%6#6^(RN#5C)_bhTvg8$9Fa+%=avWMGwB5TQ8W1|
z@b*f9jXvksjPvNxdRmiqy1}sf#nv>u1cn&3$Nlv<$L?~u0aZoiuLFYJ#-9nvJWVY$
z3=Kc;_FEtT8My53{`u~^lzbaty7lYYfn~^_sl(^?D)(9E{(RQwClxFb*J88O1KWaL
zubdQ|!Jo#QJKh$wfA>RwIv(c`Sfm23om&jVY+AhYhHt!pMwXBikFsZ8ZUes?(qJ+i
z==#xT8TJQ~b*?dvM>r-L1H<39RE1Gz=H2dcQte?`CHUw60OpPK(MNA>(|!xUe~>q%
z$0G4DIp^oc$||IbFA<=2t&r>`cw1$O3SKQ`u4sD)KgyA}_+~+Z9e?<fFh4gy<1`9)
zs<hG3nH5H3UQx0?U6mw|)&~%}+mN(I#?|e-xpWUBSw0KYf>(gZRW0SjCDsQn)#yKA
zNwcwqQiSlME6B@d6l)h*g~E~yJ&|G4(>y)D8N>>|yud$C0_Gw4Z%oboBp2Ijhq9HR
zCLua!v`_^BSkq|A1rbU{v5tl~-C&D3A>oMCMw4e}YszYbYIg2B8dqq((_D%e9MhtV
z<A;5Ah^yHitVaJDOw4kldYs=jXxlQ*TvP1d+m$hO&Qldk$=wZTrr-X7g@I-QnrXs$
z<z)UJ?RT>aecC&yyC&{+l(Q^hElSH8qWW+TLK^Z)BZF6RfqPqL2^>-p$jPPVw0bg0
z$ImWrv``4}7OSQ!LXT78mTWegDsb%VSFc0}6(GpQz0KxQNcS=O@5D>J8E6H->E42k
z;`}Fk9fd*rOeLm|Kyb0q1z5AeSLp*3+BWDv{r7wq{!?ed-6}(_F$~YMjwvRpvykvU
z;9mrqqM)Ok*De|3Hv&y}8shx9Nq$l3qhox=Lo5OoA3CY=DV^_1s$4v-HSEFGHPlsd
z{?xXiv>UC5tH~8bHh~>Hhgj}23T>&W6jkoAgtwQDnC)SQ*D2Usu7Or)e#IQv^rfd<
zJYyr%TfwGpF*3SZqc9~c7hO_3qBVg7&o4Jtt{1<WzBflXz~rH+uLw<rt-;YBN{*lK
z)PEm*u0pZcKhPNJEYUi?yfeP`Qlo)YMlJVa=`25pIoQqKL>t?Ara)m1d6P(M810V>
znz)c{ilikzm4NPilGLt3CZM&y%6`BOAxqy8tzdFaWpW2|Je+C!l)Kh1nfXgEZ`P1j
zpgY}^sJ_%aMRj3`^xDY$Vi$PZBY<je{oC`S?j)+1mAOqCx+j1K*D%h+6i`AjW_B2B
z{v&~q)`=&CUXDa2M^ygpT5q8RV?A0)@i$HSMJt)lE`Hg8L;Stj&pubL#upK&ik7xB
zLKhUX0{u`>uW+kRUn1;85G#efQ78>pFVN3Sbh&YLVrtp^Bt9f}aNl$~KgcV|w`hC$
z?JM&s*=~fX@I)LTrQ58NB)82j6qF-F=)GtJ0iDK&^?hC|3NvN$-Qx|YW-~!3_Jrx^
zLRs2VvcA6cZTmbJ7aan7kpv=rj)z05dR}D!h%gWV$(kC-FgWx6@eZ>=*|atA@}Hv{
zQ#bc2(BKU*vk287M3CLm25%(Rzcm(a_L9ikaR75P2VKW5T^VBtDEG?_SdKS$TxByX
zjsVVJatoPB`z8LEcJ{j`u20xUaA3C_|Cu&${~uv!r^TV5RI0%|i!{KPs*?yiS3@)U
z1M<4@@Jeq`CX8?t(jOeU4B|iX__y-Q1R%V|atUL8?eVO$bHgv#)$t`0ApR=mRFTQ}
zSqj)72^5+h5`0Q8mfQ&w(HdV<<_vp@JxPIPIzskJKUOFTgV&(Izz~fUlXl&>7=R?w
z<hQa&;U<EHno;8~g&OIjB4bEV8IeNZ-ApVOV@aOv;5(ls3{4eVYE>$rnyuDqI3`H?
zCVNnnKNIdNsb>!qlAyBrf1{^WKy>XAcNGcvssr=tJz6%T%y>cAqELAkAbCRggRTJ6
z_rd>!pV-qQkthro!XRo}nN~SITvj5o_!FtWZY`OTY%+Q?o_HMvp9}7ESf6+mw;K_K
zuZVKp5ptdd%$Ze&hC2ZKrtEB$-~=UZ8MFI1BKhmmrn>HlP(^ytOoi;>$A1wQy<?Y4
zg$X#12eF;m(~3>dpMBqeCyLM*E6$H1FqKJ~GX55!)ua@?y;>WydOgEkIy*=MI4jlD
zJ+%536>kgmRz{vB*&yWG5sC4q2yQ-Fc4>;hA0U;yfe8?eNhU{%WM2<>6_L7hIVY+d
z-GUp`Cv!ve$i@UXcac}89zOnfE!q`fio$qw4nob3sl_mB8@wd@t)&mm{H3LryqpXy
z+rC!o%ju8S-zS;~WbFJl8NdO^=yO|U!tf{i9Tw<iz4QBqg9l<?f}KB?hygwKyusSA
zFHdw{NScYF^_iD_UmtYdQ?MmR>DH2K4-7unU+dfF*50(QYq|8lJzaOq_F$wc52n;3
zk`YVbS~AtXw7u1SvR6G_CYkPGco{iB$+TG$o2^kS(=0Y*D`HnTYQp{`!pJwCTaAtr
z?@DUfW$Jcz);6jw^Km{jFaJLA(K3903j5C9E6rf{#|*CCgNmI=e-LF9JS>~w%|@#|
z4aU(ePxSPc+;7xns_&6I_{W;gQcg8eO7YCZ$i^W<QKM6r_PMQpYwQOz*~t&}hzTzN
zx}53XX#~mm3@B@ICKyp*hYhJFU*C%zSlXn6wiS6Y6<*4*ZlbGxt}eG6RMJ$Jd)uz}
z#TfTkJ;r5cl?+36OavBIge_HNXl1C1xvN_UDv@D&4SMNZH;b-XA}ORgX!THnv8;A<
zI83$<k!VQYOVX)SQi+m9ZTOX!`>Xn0<GEhIX6dzmxr`&v7Qyp%vHa({YK%&Q=?Jv<
zP9V<I6S$ie0Fq1bM%@5%ZrEa;@<5R%tOtW>DA5N~Qs6#wI@f2vH&TAjVr#-q)R$LL
zh9Dw*XSMoH&r^fXZ<{knyk}6)o4lFce!|tbaHJA0+f?n~f{|&xg_bTnWH`Ed)CLYL
zefi&X2W)P5f$P$PYaFsjF%sz~t6TBMFG<2GMdmZ$Ao}jA18^JV8zD$)Q0eDWvLCTv
zM>|=?Sm1U6qU&!(ES-DB_}N~*iuJpX6P#^5+&mD;ZNm~+e_?hR?D-ypBnC^F2DxMf
z#`J4tjWWED<AsS`Z%IOK*d9~4&_sWysg^-uBPsHChi|*1ZkkF`ctAe+JDmG99>2VO
zT=2PHg<VmrIjRP#)x`lv!utZZ00e_`tZm-zl6mNH@03IWU7)!9S!~atxV4N&XqL&0
zxc1Nhcs3UAYAo#c%`62R2vD!N+lf>mD8P+z&;*F<9njW;Qa%H3dz1Q$5d-I&>vLFb
zQg}_KufHX$<E=K7kcmN@$)S0%B<E+KpguTwk+pEz-_a?o8ALQoW^|f)RTEQt0k+T&
zeKkvzfheEr_0vapt7PDCV)~Ig7nmRWSx%{aO~(!f`v1?G{S<b3RNz%ZSpSZMbiTSQ
z(}lQ$XX7`7(p^YuqRkmN12h}bRBt8z!tDLLpjozuSLlmiqPn!hRc2~gRwtA-LadFw
zgTZ0<5Gc6|?Sn2B*8ex6eLXmJox`;2qt86^Jt})BkZR<N`2}X$pP0H_6-+u$?qz_Y
z#_gVeqahDTZPRjUT*5%J-bf`kkeN~B<Iek=`D#dvV+bBge_Ex8Pj4u4R>=Yq$IBI~
zCs7CD+crV7@M=W6F~8jIn6`=JcJ$+9GriP^EBaXI8Wa8$r~?vxFCtjj;@O``Ne#~Q
zF6ZP>4WoZl_J)zh4J56sQ5}LW#y`yf`Gw|lqObL_fc#qmr}28e4CAl}a<(vtb<&Ww
z`pf$HH!sO;-zIBKP|*db8e*H0LY?N8!I9@x;|gcNWOlyXPg!s>@EWTZ++p?8qs`4n
z386avg{uonjAcBCSMER5m$*J81>?Ry==3Z`AX1)A&o7CzU-AsscbcX8oI*lY7)vf+
zI+tGOta*m@s;RqM0ZXF@a+aalD5krf9S#ia>0E@LNB;1lxj5O#gx-GBB&)lzbS6Q=
zB?W+(WnJjCW8%Q2G-0ThK180fIBx2k#fsNfbUd|$bgK9MAH@7ee%a_?+)U476KHyA
z6wOm*xTe|TQ&&SnCEz-O?wg6|b~s&MIyE_0TSF_N<}>ngIj2M32xgH1>|Fi152}T1
zuV&%6RHqAC!HzOv@Jj?1B@~<3bUaI?MsFv^SY;;^e|3vJ-5(UXan(t##-vcSD!*{H
zM?Zvr(5}~j&Zt-r#|9JLf*x&Q`?HM5713}5vi-I2n6W?Z;wrm2U|aG{do5S0kA&D4
zi?}s~m|Qp%-U{(^=<0asNK+S5X=k@4cYF(D#5xQq=rg(8<G}t28QW$3<qN0J_$Uqa
zM_ThL7j+rz)&?9>*qn+#VyR5nlcv>XNGPoMV)t<|#@8dwf|g)=w_`cULJV(V4&w85
zRJq@=A;d!pC<w-DRz(LK=zuRK85B5Bg&Kkf)CqSB{w;#Q%8QOOAcl&&aS+dPCz5(%
zo@3IHGia|pXm*Bh<u$4qxfB;ECtxyj)v=6vQav}fCsdsw!Wzok!RCa)(+OQytg7R&
zHLSfAH@{TF8o%{2h)KWgrS;E!tD9$p4YM5>rCPAoTqMVy$kBp4u7^bqHoAPiXd<Jj
zkl3tcB3RBJ8Fl;E!uf-;qy$gD9^d+h-RJRLWX1lRo~Wg_r}}BaT+`soW?;8?bU{X@
zTh7%1oHd>_CjrhrE7qSR?4zlDv8LPVWuiaO>o3Stt;EuoX9MCa1Vd;(%oIM!F#spu
zYhqwc&w9qm4Mf*};p73%t64zJt&pkiQQsAgqND_4lhCot+Wm~pn<8WreI{q*yWmr-
z;fWXh@8o*urK$`sGIwG<@5Gqd<*+NO0F2o!9BYpum3Rc&-4!V+O*gk-c!|^e9EnK$
zJ#wE7LD}E}xYvwCxFN)hRNk;^gRBp8X!T`qU!3(d%!7zdt(c`5FK3|6RV6!B_x#wH
z>(1^B$3!Ca<Vp_dO2S~6vO`P2T?ai~P^XWPr>XS2-9t1{4jMa9TpJ*QOQL`P+m3o(
zdbZ<L>uEUQ8L<a+$E*l*)_cTHiz0)4zIBBnVtPd$&)aR=nnxcRB-zj_$&rx`i$iNN
znw+k8Jx$Du|D5w(lyE=KORCG<d5f^I;al(Ic1|3HGLCGxQ{2bMiD7LKAsfppBbvz=
zF->M;pY;OF)k_?y^Zrq{!lLjog24$bhkm%vZ+K5Uh;$zU4a>0|1*b<DM^A|k)l6Bv
zNKwSjI4kVCUf5_z0Sm%qfQ>fkjrkf+Gdd-RBb`d|^)@m9_ea=aB%_}Ie1N!%UvxeA
z5{v1p2>A153`bHnUT}`ZCXU6I-&X~cw&7<TDs|~<hD;JC%bYA=?vQq?IMHx^`L*nX
zQSh3+tc_y#VZ<nT5x^SSV~rO<dG8CAnS0j?+(_hDhqkm4pNSXH?s2TS8n`rlHxso^
z)=zb70b&UP%^sfZKkp=-gsigot#ZGm{#z@On0nA%$t9_N#BT@Vx-=H!#iBd3kOh6V
z_CQ=7)bL^29JJlawm39fkG|8lu1+(NNU!)hRqLRfO4c^4OrM>NU^b3mp1^1V(LQIh
zfU6WB?Ma@WTC6`6FRjA#MRwU9*<n1hJQ6?2KVgJ#n4Z5G;1|1p>|0<QOL)Y=wCLcq
zn9qHBafE+uH;7i0I*~`|YC<=qE2Flf%c@ILIqBk5NvhTT>a=as0kI0(bb1B{O301T
zsG!FxLNGUw3zz$>y|6F#fr>t-uPCDLyLmE{BN%t<Bu<II?fqAB|C={4V!KW<8l4?>
zAuyFa6!%NUEj`E;MFfNdMSHW4_q&B*rC0Gy{6|L!=iykIhvoDl52~+PD6Cn`nij0G
zNe7nS8>}@-)b6EB0`^3F{tEt7IsCUUM6@Z{f67S8C5k@)?k<DX!GOEV@;hew#kX{2
zK_pFfJV>=2nh*M+Oh5WKJ~Ez6*zROdoAA|$BVI(C_7Ok3zYOjrsJyxN^>?wQN84Ge
zvoq<6sV{$V)|+Zr^UL0|fg>uTTwqNyX=uE>^z*a7ORqva;P1lZ14nxAn-KXOswltq
zBU==2b5PWBpwGE=EmilOD!ubiPbu<6g*76j;^tUSG<)};d2TCpec8R5S6Ua08f&GF
z@t}U}m!If>{|e3sPWK>FmQ&jY{ASfTr|Q*e>yFBJ)BKO04!<6jn;NhtqhlarU?Fb`
z*_n#;uX2Y_zEg&@E3$%-%mGUEBA)Kz*`%HKKkMuFL%-GyeNpL&>2_wbss+AD65kMJ
zU@wN>JYVueid3Q?-CsXH@g7!rTTkdz0ezB~O*H+9i`J!d!fQXp)4DI9y*3H;L*tk}
z5*;9iX5u*6N^_3{6!e;R4HZsiHVZy+-TZzJF-W%2YDn~sj_`=5YIs+NZ&KUk+|^6(
zYCh`T-F~?B>u5-xC%1ht01+?rX&5)M$Au=AD(oD}V5@GonOW?(l}3mw-`O4(*;cny
z*I+3&IA`}L5{+3}O(_^y1gNEoqEpSfsl_9Xewri*!PqEI37A@6fU4EcR5UJTFo%kJ
z+XT9O%dlr|d+2%5!92MrvxP<IkHWZwHp%1q)212SMomm9*bP*~=;EPn%*vyS6_y>!
zpG3*Xk=s?2tfHhuS+94Mh^#6ZRYV%r6E%=9qidmDVJJ4;-%k*xj4@kMdwph)Ia4+&
z>KayDqEcmcaf)<$^0VUMK0|;)BO@c~qHA|l$~55Ve7BaVR={7XqUxhS^Ad-^_oLP)
z9)t9~-*%BtLeeLdvIYC#;AfYPV`k4aJa232*$7W~hAJ(QlylQuKX7oPTRhYQ_rXzl
z+vy(@MnH!Ci&oHJfJiV64b-i+T7HZHN^Jsvh*7rpZHd9Zf-(3|eO}4$q0!wtcDClz
zY_3;AuQVB>Z%<wRRn_{KyKc89*|(rR2H-_K6lG%xZ<<!btmF;MWrV1<_-kq36QOoR
zVIm);Gz+cK1BQ$EFr^aCS1o*zY7p~ugkeS&FaU5#xLz@^|CWX~cKZuW_i7l{2;`{{
z$9x@t{;NE@xO&(X7L|mDk1pC8S8BZdaL<B+&4E5@=;i>`hR*T*4huol{E0i$yrcRI
z>Rou<DjuuyYp^d_y9n>cNn(yJc@&jT9)F@WO~HGQUEg8E*%oowGQ_MpgEY1}{7x4k
z2z!B~ms{$C_~R7?i)N-)77>yHT3Rkm#17f(XS{m*m;KC_JU^!fs(rfx;ly9TC;Sfx
zJ(4qjtDTd@_niCbds(K*uK24JIiPwjDk@~cV$DLQ)!sfyPM*CeNredxf&n(oDEs8Y
za`|`yvKX#M$(SidNxSoaZ!CBbwFVq)Wg(|y`vfbrI9q+^hy_@H6~cp$=}rpcl44P*
zFq{JDl5?nJs7TH=UMUq@W^`XmfuL+HS!g{{z_y}e;>w#HGoB&i?MbfqMK6xiWFK^>
zseIaphP*CynrpPMyj|>+to!Ol3%<k<xtxNAfP48@mHv5zF$JQcmG=I3tda(o)Yg$(
z8iX;`0uwUB!BAHhT4c-&EDqGKRh~$dQZ@%tRXB}+7&;-)bsvhFf_KtAPGy{-v<Ssa
zrK5z=wx$QknEg>({bF{1;$Sb1QrZmF8oQ?Eu?DWbbJnLw>S=eu=ORc%B6;(D6=$_{
zZZluZX=MnqPe~yusF8C4l)Lw>TTte%X%eGrs=ph2u~4O;pt7{BBKeKMTXEH<(oBli
zIld+`XS%rL!pf}^-u2wotFq#h`r}N6oF!EzudDl38Vi~wVi^>a-VL?<sEX%xWf0aV
z^J`XNEg=8cEPy?@SJc3qWr|C<PwzM|+jEf7;C0m}N{PsT#gURvt|lc}e{kEqVzTfm
zWKwQgZAe8~>G*}SUy&2|=2t7ezPz_=6r5wLlPY_oqmu+X^0p==Y(pmwE-hybd~qW<
zZ;+26qdKO-Q-+9ITThOyCO=R~2hGAV2_XcECc$6*sA%X{3wf)*d#=kFty*Gp9_OFk
z?IfBSqiPxV$s!lAMThiAejQsRlX8=D{j2mieHetiK572cUCR*X)kb9Y*ULmfrK$<h
z5IJ$uQs$V1D$)+a7xmjRh<A=M$7W}`OG~@l>X!*&1Q)d*cgch_vqEcaTur|#Rvme~
z1haum)#CX$URUelpVan4td3AwUYa0cRIY`D1&Ft_ZjorMVvb@3yrv#i007+Smp17@
z&ouKf<z)8MLS%wfl+b!W+|D0cy}9I*ZNVB4@qo|_MF!sAD2Q}lZeT}oEO9M%P=Hwb
z>@<`5um_lOHVadGBb3CfWOZX(XqIU3)>I#uaOVnu&^LiE%D$L%NMc?Xv?rey0#Q4I
zhm~lS)pToWAX`@&vUQ7+I+<ZsTqF#aXs}k!PDG((6~nHen3&XHRPPV+ma^d~l4GNd
z_<C~JlVHCUa%Mehims&86Dp>eB~8EQ)xy))<gNCDo1!5GV{L7_%)!+deupQFvlHq)
zCk6Iu!zzd~bR5N>xIhYa5qkDSkTp)tE{0#R3AZ5$+Ac@4Z}~7&p88HY*JZ|G)071d
z_b!E{FOHS1CYeRrEeVar)$9A}w`_cbg5A`*;%Ju93DR2i%rm>nRmvok2(iQ6W~@c&
zk&<uwZf7)S4Ab|D=AB8B1BlSQSz^{vp=HD*LK<~Hdd$7DMozERWUYe~=DU*pvmbAX
z6>K&=?$;5B-MNpbXA43&LnXzSa!|q(NuoBhTUs6FijUQ-E|)155013f#y|6H7v6f7
zhUoH}2Ti}e4b<D<pMD<_l6hHwn>jHyZh@R{tiR@IV7yWF4Sy0;=VaG~T!-fjo@5(~
z%#RvPCjc7nsLsDBkFUA*mRB%L4QoY;5>pNl=SjgGdY(l>HDfqRINMG6j3kHQd}LW%
zhc<E(G+_}+<9RePN<!USV?u6wC5JdtOtiEcVOlR8OXAOAC-@h;#tZZw#t0ki9TvO1
zT=)5|6S1xfjY|wbrqcK6XX@xN0~d;C2L65}L^NV48YW7M>&?xr>!YG^jA2v6r6jn@
zWS&`|N5eDXp^q>d$h3C0syvFvo;}yf1N5BH1><2i(y9C98^wnleD3gbGMd<oj8k%_
z#rl=S=L(adu3fTY#PtaW7-HX<WKpd}bZcvAjc62<W0_TU9)3>UFDS(xUOIkf+I(~8
z(vaEwgu|GVS1hc$sm19yHTEsu2gv=pF{sl}gRqF{izE&T;0OJrzUpyCPAq%n7h$@F
zKwp$u9*3|@Mu5_*wty-1r9PtOi%*7867g0vIc8|z{x&|_V#4xdU+H?z{%5R;wxRi`
zR1^r%QcPlx+PMToJKE}E%Lk$NVChoNFk5;`ouZu^L39&Ni|Sm-Q{;eTsV2RlE2XuV
zFB8wluShctk8bt!lkPX>i>=480UemGt26U@>_KQS-dxwzyWVfTw_rZaI14c(ybnFv
zr!?ttm356Cs}rc?lwMTKU5?gJZGSb{Tv#a*R9oZ*DwWWvQ)muxQN2~xBP7i!2;#Hf
ztG`<IG%KcBY~BvJvPM^m)tKzQiV?e{m`%*G)=vuG@VldY3RZ{US@-mAXEc+XC7~py
zxcnU3xx~*{Ik{l8{%&vX6=5|5j9pYn+E0Xc!{&{k?xza1m+@E#F+X)AL|I2bl{#bS
zZBx#E#@YGia*pL5{D7~)mZ<&pg<$*LE}cJz8M=La(?_QhV~#vH+omPU;9f0Bt6h@D
zo6_vJ5)K|1jty!ETX!n)htOjzNI8vsapmOP(-mlgp%@%q^`(??m?AnOgvJ>u_ywZq
zMLiK|OLf9C$VK*=4Hf}CvX_eqL6D6)u(J{R15e5_S7w`5hHvfOrQ}YxB@X6fh&Z8}
zNZX#lg^(lE*L-Q=w3SiWEmzB?yR)kO`EVm949fn3Wuh!tH_ldmL!t4baT~gL-KL#J
z`Ug9TSp!nhf^ml3*KAU;*q$Ae@CkTD%9BCmTOP0-&CrJRn@Ym{D#wfzr-O@fgzNAD
zr9;w-_NnxHm5xhD{)nKQ2Eh(F4bG%~!O+x~uArbdsf(FIgAJhb3IYMeBkH)97wg59
zZ?zm9Z>>PatNE6TJB1AeQB4i;{EAr;BH1t_rjP-WeJZu)qC1!RL2@&CFw5p{)RlZ*
z55m3_i>^FC@yYDTMk#|EJoG(pg6oF{=?k=e`APR0o4J}Tazz2p+X3(kBrBTf;>BC8
zI*i4Bjb<B@wHNxKc^*9m66DZ6g)pcJVeBszE4BpJ>}aY*Od99Zu=H5oSIT2Z>b5$6
zQFN>)Fxpwz=$TZKR#%w|yu_3SywAF-p+#BjRjrt{724Q`_SkwBc&Nv@2srUu_mgyS
zhSq9x6rK%Cew72w4RjK6e-T?917TYf%DxB1RW5wzQs`efHUP1yIAs-6?`^LXCtKGJ
zgqFxXylJoz(=$G6tU|tvjampWYg_7AUCm<4<n{LE^UhkW2+Y}f`>Mi3p<x3YYs4(A
zVH_jQ-0(=~_|?km0dqA<PSMhX9~}b>IV@~Th@*Q&6OPkFOa?>d6(MmJaqFWaWU(4U
z0g4u@Q}XI5JhoaGd(ey&i<v63G5(a(YLuBuF;2?^O#N1NujshI^DtOA{AAH<T}XGb
z`WR2!+GA=Cz9>fDJ&uLgFsZS}^=8E#B@bxZt0)ZnhxlJ3+8U}DBNzNoo#oj-m|Ay{
zaX4OM*ss}&j^4k(llrvv`C7)e-9pvY%7oalJr=O=5&u$4#L8{kTAefzcNn@L99m%Q
zp?cMFcUVSPaGs!x5iTv?mCDGnWiov5_9L=7WgzNKzMMUbPHaU_&=%ex8Tl~FyT(vX
zdJ{&RUKUN;KunVx!=oeCMFlQuDdnN$GxiZIU!@%tyQ=*C?#T-27uMz<K7{>LZE7~B
zs-~zoWAPVJ&`~Pg9~EWWL|#ujK-g)JSjJ`9ixV8r<8prt9Y_AZ*n7*UD8KJ-SW!f!
z6zNm}rMpw41Oz08PKhChj-dq+rEAEcbB3;=K|pHg?(U&m`X2p$f4}>Gul2lmUOvxS
z_j$ou%sSU~ook<S_O;L6`}5ho(g8YUJ?enhJvn6|hMsTVXTx8@try3ZcT?e)Wt^w6
zd)onUkg8_(O*_%*^!w(f9n{v>d#+gmM>}gm1p4xF@1>Y(zgSb)FN)MktwNif1UFVL
zPKva)fbil0=dE&Pj(ABvbm5V3X<hbqrrMqbb1N*TY@RbMja8N-jN#qZQ6=HP8uTs!
zTIR=#GA>QV5V$^l7D~6qT3YV%mN}l5)vn2bDCaeS3R8H>m_rQ`fM4c_0DVMmwYcFJ
zP4t-+!ha>QzKV&@=`V0WU0IjIBuz+as(&+GO(m&0N_xL`QJfov6(iyVOBf|_bT5sT
z!pLP`_g86DZ4^D<4K{IQA8@kkzJS)?CsG|P;lW!qu7UfW7}Y8^N0O3e!$+Sys!K^$
z*`HRpdi~t#z<2$WmO+p8fhxWOCB8j<PSi8l)!$2RG}5yC^pbJ)bN~3>awgS&=>W@|
zqn6tP?QcBmjcX@neQ}o<t(2eOx$1^ui!G@VGBOA+lDod3;F94}2Cx&Ir^nz7ps?vg
z&V-wHK0R#g%e0F`Howt)C<H6GBzm9vq&j^Dkay1KlNrdSJnU&zRYSBZkbdu8U*c8X
z-6zADbr^^JYw-PkbkzzH-z7w-NgfjgjeW+!<j~j6z~e&Mj1qA)MDY*pwV&&X4rYf8
zY;VxP%F)E@&CGA9Y+{XSOCy5%W}tTzLY1TZFz%VRuJMBW5$ypoj7$+^yU-mTVjj&Q
zGSK)Wx$zAtXAV=q1t|G!&)$4_ID3Fb*h98J-PA_GY+p({kVc@gBQVe5E?9VYoLy);
zWQFF@q>hF2kL*C3EGtciM-)?y)wIZfUNh)tU)G`w+8M3HGWxK>9i%rFZAz6!9I&1P
zW#Q0|b^mPrQX?+X8=(V9tO)D9DAX5ZBAS87V&fO9pGHe@mgCNo?aG*ke@_Pa=B$!l
z^~ce4e4DYdy|x%g5q^;HAvI~lrnxFnJvK6SRKV7IVWHdnD0=Rs{&-HTOAg<6)|@8n
zOnb4wNoN|-;v#?)0=pOw4{KAy39~llCj%oI*=H}@Q-y=YuYTKNpMo}$YNe%1)W$lq
zF~SZlN#!l4H4q+?MS}K=`--yuoaIl%p3(GDdV;NzYT6fytJ9!OfSsd5py$mfB+&X7
zw=pekyy8a43k{E{quM?nWu%iq6;mWG^HVmr?rD{=JLlQ=%;j98x^+^wPC;<Rdlk#(
z*!Zg!8qa7$q|3^VyFbbtw&zv{j7?x!C1>GN>8zM%_&6YqXbY<O^JqSw3%dNUh}(oe
zr>8Fk1Z_Y{&sPM!!a_=H-{}omx~$9Ou{B`mf_*9@Ekl30{n+!@h~BLs<j<&!wcO!s
zL4JL}EGJa^di~RJwr4~+QRxFsdjVK<k79(hG)&9Mllf^+GQjUCqh*)rSo^L#*kC3^
zY-SbaFt=21`#oHJck)*&pSj3PUXv2<YVOQt9Ln#F8MDuE3#_2{rL8Fmrbn1LEzt|+
zlFh;gA3d}J^az`y`ff%b>MQ=e%L7vlZo-~KjEW+RmKjM^7tG^d{h&@*;&{$w9F}KW
zK+`1>L^MuHg6o-e<33Vv+`T1ec<h1$uQL)Tg=l&Ga^pH<@?Ly=sB=S0CZM3J#!#q#
za$IF!2(R;^?L>LJwlTZvIFv<HTuD5?NvvaMU`!3DI~|Om`LuaoS@e|d-16fbzbJ3Q
ztZtV7n-C6Ta=#HYR=<1v@xOc#=?`#<{PCe82L>yO;ullB=On^mZh8bx*_DA3TVp%p
ztJ&C?ihIqUYSc&`mGTP-LPrUTl3$g#enjMijCsCN+{b%%T}{ktigB^$l;U7lQ28?I
zdfwIex9S`iI}6rA)0|hYjgNMpc)Hlo4tT3@JP|JdIXacn&M@3Wzz=_212WWHN(%Ki
z*W>E<W6<8r=Q2K>b3>Fw-t3+!?~eI|0{A!d$IgJ7*5?XuO5)0sjn-%_e1Ngm8@=nW
zr2T3H^M|=g&RmD-6b~KV>!4knmaiJu<aN2~sw_i-YjGOYBF-N{`BtCY%(;>|rW_7s
zhJ{B$7as_Eo>E@2nNfg^@)i^n@%bmlx&3B^Vtu^>c3!;)t2CsD>S0S>yQDGb$(B4j
zb9FNQQkl~dC*+S_cY=%bTqDGK*22-8kS<@N0Hy5u%jCX;B+2)6{*+To<0o1aPiDI;
z`=j_o2bdiRrnVn+9<_ZCji;jB0EGQ0UU6oA%{w(^;}|f;C9t}?ey&=pvA-)`Dl@iN
zkjU1EaV+XeXxQYvadsi;fheGzc3>R;T+jAazcVM)#A3VpBgEBaYu-Dev^+&wEG;IO
z-Kr#M$C~earDiaqXs#V|SomYQ2BTny&)MPZZe6)q-dxfHVG`}ln!N@E$Ja+s#~`%|
zmOhr2wISImUSs<#Iyo5zkUOiKB>#_y7411lY)4u}<gUB+?LfulBS-5_&B9K1O9DYU
z%EW87AI=$K^5eBUzAK#-rLHfdcFM9!w$vHr>AcT$q@M&7;Y?{IM=7|oc!n+8hkrjx
zhMt9;fTWZ08BV@#q<SMKzlXqPa)D{!;JIV<LDT1NK0dJL^4POPpd($C)PE9RsJIjM
ziBnG7w;Ege$=y_~o<_Hd+ECKW;HK;T#Mnh}h5hyygeSj}CL;H~?#T|!=7v{Op4`^V
ze9zNKEGVLx*DL8rgL*#RNm|JA!l};ZO%<}^a$qe4oY$^CEiycIHKiC6_t~fsoT}QO
zejY>({4h81Y4z{~Z!JH6?6*<Mp6Ar8!#G?9r^?OcE-7(9?d>~WQd~>R-pGE{q>I~S
z%aRKl77II_K>pTzQ7YSpRbrRTYZ6&;o4D#t;-w)EJbqv3<+yPSTQRc#O__v3RX`_g
zir<>Sh^J(!iAd)7R&C-;)(OB@%uv7LCi#Xm&Dhjfy4Vkbe8l%seF_s#_MkX<9{a0#
zCUf4k&Wg7MSo%qA!=|s$1V|!1wf;cV9&?Wlt<<a?bn8AX&_nhYb1!S6)x8ul=f7Bs
zw}pI7gILjX@5K^xtZ--xVm%b7Ku5OqG6D5w@!hbFW0FV^UHva4gXpft#S3-Gclw&=
z9TSjMuIPJvHX=^eN5*lf(TBS$74Nw$@59VNXC4KKKVT@Y;<&9_s1<r0^cxdVQ_Ysy
z;qB&%Sqg@;o+erINP2tsvX+c{OX*AoBdGu8d;4|%0gk4>Al3Gep%OB5@$g-p345T<
zJ$J8Z>3G03oN1u?(+#v))u;K8-lxVj7`L$aMz*4!Z+A<4qAMk39M5jvziyghH8%)S
z)G=N)ztUM$1Q?C^K7>B>6+oiC8ETyE>S(vGW#^beU%ip#t@4Jk;Rev86~c-pn(#|h
z^D6WVJj0}PVj)wCZc$e9tmAG4Ui$sJGj2R@r^4XT&_?Uo$r;}x^a)eaO`$409KkE7
zqdbv-<P%-bTix+H>-`nl+keT&&%0q`i@iUYqr=(ZDBY+L7}u@tCa|4+lAbU4WAv)@
z(sg3iHudL3;ui}g#*(;H)@M09#Rl@mn&zq}JF3*mE)iNZ#b5T0Wf=?1%dtnT9~bIO
zZ+1wL<h>tHplSfzko~Y>)u|oeak(R^Ij+1=owYJwQK!))suDjf_8gz>=I+{cuhoKc
z;wb9}#>#ac%$HZG;u}MBGX?!2dOE%9^6uDqs6L4EHx0Uz<Tuv!9i^QbJ!j>IdYuS^
zXrQcs5UV0XJ_yE0AnOyb+EWNx<uB3;Q@iDj{?^?%c4n2k!5qhDmQtwtS`Y#|%*_lj
z)mHdToTVF|tvWsjkA(L1S$N&hJ>MFv^o}j3plj(&9$L1r8a<8TdA_bbJ3Pk&&ndE4
zxgSW4roTC_OM@iT*?J5f2-k0*t0cYtnDA)v<e0s=j2Gix`sv?bD344Wi1|e@l5Jzh
zU3m^SU5}vN*+r=yr0$5T&Yn3`-Ai?<c=&WkoVVV3B=t5?kxL;ZT_VH7Z7$4HNM9CF
z=3s%_WM!1f7xjZ6`U94+$n9nnRliCVwjIw!O8M&O)cTXJb;B53xp%@kZc{yX6%AW#
zs*3x0@rW}*%3fY_+ijwzyOTqg?9+E!8cR*CxjMS$B#0oY(VNXBD8ba&$P?2KUQ<!u
z-M+&492!%v<KPz(##S+N#ezA+(XyurL5_0B%k~l3L`B!51MTwbfDV_L8|@8!R-L||
zrHzp7J$$%4nhIdaDV0})HU9eG#Sa0Ob3pv#`^{7n71T8t=TByV>cSN%q7WA<T2B&R
zKDdCZhPPI%VSF~%mwE{1%?q62Rr!jMR$*X!_Aw9p7yW_C&@E6>?NXd4Ym@Fw$12|f
z`<c-_Ih<}sepZLdq#e>aU3I33)1}w(u<2ER7oy&B%?j!o!gCaEn|sV0CqFkfJC~uv
zxBi0C{Nj=&Y|wo~eb~$bYYec_!RiLJ5u0DarAae;9#*o@f{z`jH&(jiFd-Z83bk$G
zS?6gxaz8NjaArS(JT{@u?(*hbd{R^yQ<2!XmFM2uHM=6xdV+iJDE_GIlrZ2n30h?@
zLEvKktC_ekR@{Rk{D_4^gKxjTXsmpgv~xl69X(jalPY&JSe@z#Ow^z1l@iyDf96W{
zw7RLe<b4GR+gf$vd*e40SACY0gkh}|Ipoy2iLNKjKpq#f9jxs8678-IF%tSh3x93F
zeZ>RQ>JPV5SM^Ca)(heX5B*(vkGAu5G50-{&Msb7`H@{(r!OX=YPfjJ%foQ$#7;S$
z75~JUw*ZtRG)6>KJ=hv8%`DY(kt<;^qZk8EYEw7}=y}F}GLNH;{?Og7G&URDc9_O#
zX=DoH5h?ao4N!#~TA#ANm#O#is@e2|!`NIBOT?O7YCBh9{XH>k&O26i#71UHt$c%5
zL<-tvcHP$fJqXHu_~Fl3h+GI&N*rilFYbQl$<pC;*$9uz+<i=RhL+#`Kj;ZRHsjSA
zkX?gELqzXz0h*w;HgwBFiKe<SN3mYpS_~pmyizq^^cFER$S0sP7MaiS8LSu2L+3%s
z-2BPa-3cslLQx{f4)ztEBl+|(e?-0W@pC7Wkn{d|h*KpZS^Ul@Ml>sxdgd-wNrU@?
ztkxXgJkH(jWXdg_3WPjusB=1NbN{Z?p$<xv?pqqkrWQq5M+e#c5d8LF#<}$O(HMDH
z6_dgsH;$TNUY_SiA}U3O>CZ3qzUV6Z6nvFxd^XUM0$pyeCE!!jby95A`+yma=p_mg
z?*6h%{wWpwrSTv?S=Rvrx{K8zv1%`K#QNb^ZOL4{<qH=#z!PGH`N;8&102@b*7Nw;
z8fZ*uzct2%dz4{cIC#e4!eOrYiGxDTY_`%?Lcgu6rR<8Os9_mO$xe?@aa#RYbbPl@
z&=62`;VKx_n|W5R`>eu(7W{y_y6Y=6x`3a@;Lf$5+*~C-rpGQa|7P0f-QTp(lRsHV
z=>-SCd{CIf>cq6zyC13<pF}&8#G|DsW1holir44s6f;4zG1Xw^=`1Hi+0A)+ErNNh
z&m(&#zQftKBGp3<3hySA_NjWw8@`9lAa}X~y1k}KYxl4^`#nKV?rUQ(#JZCCDO8Ub
zD~3-m2MxvD{Xu9<!5d!YT*V*uK3f8;0sZhu*kL9rriym3-5)p_L=hy?4dBI%70Yeu
zd~ixXYJ0*!;2?AKHY$JFN4UcnHN<ScpVQ!LSXJ^?G`FBEm1B535T{M;lYOFr%EcOn
z`MtD`rM}R@oyh@f@Se1ga!v!oE0cuUDQ(<X7BoE0|E}gRSgV`@{yb@CTC9~K9(aC+
zYz&MqP}&EalD{Ph-0Qc!?&4byd!w?43!rgLQ9RdV3Ew%cR7cD*iEKL`$+PP=MZMx)
zkTcB_e`RM>d$ienUgDMP5zya2T|n_eZ0~Y1y|@Cj4xgJ7ix5X2*U>HF4=&?fJSPtq
z{qrE8&BsiQf+qQnmi68t()WSFztqRbSEhI4_9dK&seLhiFo8y0Wjk_dXYw8JbzJLj
zn0tIZ^+$zw4C3>Op9j~WWD-F{mJ~6YyRK6zURA4u>-X?=9cc!8$NTQu8U^DOmWg$o
zQDNM3vACjx4iP!J_k=lwWt*-l{O1>(2GOMiFC^qVht(T~;Cik%CHk?k1x>MX8Z0OJ
z`{8q8J4Ei+4sgR=U+=6OeFIH>iQ){AZHKVx4B606bKlvEKv(-Wo{LXsNxTZC$0+%C
zS%@O;2!5bY^^6lm*yv(m!`-#?Yc9~bBLalDIJ@pxo~{W~LHWTpIf_Y79v&7{pMJQy
zb^h^U6^KD8=YUO1j%;G13)9EcAl_A4Ce!xt@e%;i!#*WOtDc^w3V+ShiJ+*`SMPK7
zZI9N3(o0G{!SH&hADNat?}r1TcXu-;X}BLzg3{D582X&%KYCi05au4!Tg7Bszx98L
zz7|k)*;oeI;z((Z>r)ghEK#m~+O6V9(S?*ao<M52f>yuz=YN_OJ-^qXUDo)@LBO&$
zdha*bMiw#D8XOuoAcr<BjP4E^Kl46<J)L8<m2C@gg-4ol2i@k7#Talc3uAdfp=zzJ
zSk7!4?`%rrZ8v6BJPPZWRmafK^pHITyMqOtU#{+5OO)N$4HFSL*`Tj2Q6coUGy@BM
z1$A760QHy;Gext#S#&mO--6(tkZ%K<4LH0abP_bi##eYh9L1Rj(=4Jld}V}eS1z#C
z&l?!89q_Ur$tk6b9Xsco*gqwU&xif_vJ%u{ah>WvVh)yCT|o|mpl4OBKi^eVGs9i>
z*vu@=%@wE~g;VcJWQH#)yS~|Yr?b>9Y^X4tpk+PCcuRNmOzuouTN;*7#K+dZTeHY_
zi^wWbar5a~TK#@>s_(<>q$c;Rt=&l`nL^Eyv(g1%hO*`*NYp^U*?{8Ak%&88*_r0g
zqN>%D<HD|UC98JAy!jekU7rRbqgeGLX$+}YY#Kg{wm#1;jtESIZ=3wYq2NQ|c;GSo
z*(E<s{;;17isjVjU3E}ScrS##dO`mdwl@oYqAvc^?tPv2*0L|^cROsSLZ5Rrt{QTV
z6Q)~889qk=Hy8@vk*^t~YRn{?>OY`0HlMve$M20QHt+o9qB9eZXOk7xm?hMmv0v*l
z*2%xe<~55XpMGSgimM8!<25&|##cQp5?FDU)>qDXhEjOp3Kyb=?z353rA)hWJD%yu
z%@@DqjUUaJx6TeZ_u_+p|1kivKBk=4vUY@P28-GG)Xg)wi#uz}bUL~`3IJd!=N9m;
z7VQ)zpO~6@Fc#lz!(U|EZ)=Fsrgw#R{#q@~u<fGAre%sd9Sua~<q`9jjJrfnx``lS
zd<c@ahnY)R-nKd%<ws@ZvMLsA?qT<vKtTGJdmt)Ut?raVvbx)~C@;4^&+S7uf*iAT
ze(N}R;YXc1XJ+A#qb732h{n_8q@170UQhFgDH#Ykqmqj>6kfVmNvU3Ua}Mi#SX>{>
z;Q0t@uQG6>)HPidkoCTp^Z1Zyy=IpAJ8ac`!0>k*8AX+Dik^$QBJT(vg6$PtM)0!9
zrBWuj`7`esBfMf^QOHv71Vm6BBXmVSGDcak*SeP=Kv4bB&}C6bfS<Z0#X~F_WY#Ga
z)6=A%9|Ct<P|EldYeP2?ygydbuA}UkOBfyF=oatBT75)3veMV2xPe~o>5={|nJ2zK
zN$Ge=cZx8sqg-KUVLDydl{0$UCfM<5-JZGMZeJ&5*RV9U%K17I)i3Cl=X@&O#tSds
z(||G1*E{VwC)Hp|#auNIIVh0Wu@8Fm)DvY|7+Q(b*_PDE1Pc{Dn6cn_`%=EDye~~x
zGm#1%TP97a%=zdxL!_ujQ6vG^a2ttS)WD!|eITzUxj;HV!|O;WIwf#*$zqg;jxYAk
z%BNgqJALxpy1}Hb@75QY>2lMrbFm>VUMxEQ5C)rkI}XmO<mYq*YeM>awfA16YG)i*
z-<=+zT6;$;`t}s~=0!^;8&Nl*1t<!*Ja|ciCW0R3F+yKt#wiVf2u3G-Ds*Yn@Nqr=
z9I=);PXELMe=jspC0p)`9wDlp2aRPQi*=N1<w_g0ajn=Hm%_3zd|%5wE7?W=K9vw*
zQ{5q<G9R^b=5}%c3=wX`F8N6`MZuLAUT0d{>hFjs<c~_5LmjS{(}js@xba@y46eZ=
z3jDe-6~(uCeAXf<Dm<*eT*C0TnZl(6`llzgGASa&zd)O#mANQ%yXGO?GvlpQWn_9*
z7$Y#IPNYw!>r*5$u2QmFPX4nlwziP6l_vQ&yJ9Pq=!0%f7e^&VyYufq=7Q?3zro?M
z{`<l-rq;KlL9mxj@i7led%_{0qAQEYjc<y&4UB3Yd6wRhByZkO8@B~w=oV#XXp&wW
zj2#w9N^8H3p;jgWAU^d?|0W6n@&=p;=|&|$wTr;vV}KAbt<m=w^)C7R_QqMk-ap6|
ztcnF2KZnfEGtpNFRb>j<WBSnKC;j$8%mz)n7R?pMrYY*658_kR`Xp5!?#|U3ryc&W
zhp7KxL;YOIER6?dry~vf<_S<sMnehG2MJ=x-WL8hL5h|K4_hCoRGi!x>M!!9fpTB>
zOl@+5V*{{moIAT;P)e$Gdp}JD+Gri)+#e_^6Bp{<Yme#6^<*IeDawDy%=?o}b6`(u
zCN|$<a)GsMo#L$4Yo)>imPN6y#7!nByPg;$CcsA@3i#vpO+fbrkGghalm?@0E1!PX
z$ibc5P;&i@AGWfpvI1lU?w37vr=1L{483zzFSLqeQrW!3Ls(nV7n~0}ePvU0&Ao<8
zsrD)*$|b*!>rWCH1Db$jvHFmquV5t|KjswRJC6sLL3X8Ksr?ZQvRTO@`kp#zo<Sm|
ziiOaIt`zQzy`4^k=eA|i)<^U)P+LQN_xpF_fw8aI3HG^3S|45YC=YJ~*Rgt$cDDBf
z$I;R2{uk;K_nng4>$g125C`wB<z2y7_COXrSx`gVo&iUT@!=)wM|zzdk*SU#If}_?
zS5(PX{r!dQfEyn=-<LgetJWq;#c5*HBQi<KXH&5X#m)OWa}=PNos+Q!DMFjWL=ne2
ztQ)<JZgWJFu9BMP*Nv*1#ZC|52~-_(X|$vv)Pu1_7<((CAZeB~Oq6e4dqwAPNS@>c
z-+1~VGnNpmP~y$bf&HR#0#T|Q)tmWH7f%y_9F;z~ZZlx-ERmxxKoqGl^7-iuqJrEo
zsNJdB`sl^8J-RRO0j)TQ@<@kq(XV7Ql<+xp_A!=uj5joYpig@%n@lzSijUjB34>p$
zX9cho5v||DOY+1ic}KS3{A`at&4bbs>Dc)MjvUjHU=S(!+6Nc<t>rjHq%HCOYfzEk
zsUqX&7<eu^kG<G&$7<@ygCwYkNJI+Ocs`u$rpP2o_L<XeW?s=g7Ux1${_!j4#u}X#
zk-}P6ul8lnW|Ce|!*#{uPBNvQ#Hz3-=mPGGl_G&c6JS@wt}x@{TU`k_e^<D@vpvq-
z2fLx%#}Qkm?d}NHMhR>9gS!-b{NA^WS{^gDeR6?jY>IrN{8(qnIOw2Qt9<=gsMA#H
zgTI*e!N26I)16ELqRhyw!|F*Ci@$nG_u`X3F7nIsb$+?AP`z>LYZlO}E2eVQ%PkaV
z37Fd-e{IKQM9{rHV;6aH=T+!)o-{mLImemIH>{>-1#)1tJZ<CV1r6Xn*3WknDLf3P
z1fOb~dzM6x<&2;GbS;0ClJ#Jwt4jZ3YO4=~61F(9IvdPCV^uzwzomjF3q)i&C?u&p
zJ3QYb%SJU_3)Pp4v}8h5XuLisXJBK7V|b|EBqZG`R(4+c<C(oSQ;oi~f|ASX^E-tI
zU8Xf2UtB61UDoh`>&{}CYKL3*XHP4fv0o+<DRuY?adl|%FEuyvZ@epoTT8)RP2ZGl
zpmM|HWXTJDq?Cn+Rt**6k7Q9)L?=z$Z|Ynhxtm`ypKmFS`R#M4%?ylmizLe3GiOq{
zU!l=J#vG9=U0>dF894+J78@99z^J%V%&g=1q`P}2(NA)iDzuFb;d~HpC@*7HIO8$L
ze=$IR<bF6WJ>JnvFjt3q+Pjvo2|MR>G;rIjprYf>sO@AJ4bq?PZltc8m`61>24Qsb
zs-I4^?7hBJg~rsX4h}c1+XD+#bC?-==|jMXGKm=!<^<@Q)gX0;Fk+lemAl+^@M8k+
zu0u6$FR@Rdync+Ln8Jjmfda`$KV1n!S9-}$@|a5_nAo%K*f(+TxO}v^8)4-m6^+bs
zsL`Zmo4;~p7MEls_OFGjuL0y_{NcHg!L3d|Zj~<=ijsCIiuK(uw-iC}>L@v<1Zxt}
zU8O!1--+c!@-5*P)5k<1+ciozgjR+>5~>u1T8o(%*!MR3bBY%1^>HwxP##i^Er)a-
zAp~4geM<uihuV5Z*mi?;sGn{jSIYfZDE++K>^Gj!*7M&SdG;lm!m=qg3x7Qa_bi_L
zH7VL(Jbas0!gw?Ar3P}J8UuTbigEto7bBm8a7k$Z8o?<T(U{UwmC&5E>exTei@iHJ
zLyXR9m;Kop)bHzXA{kG-_mWf_lP;}<Nvwue<Y(Uy;?TC&1~VL;s59AcKIn^-=i|2x
zHnaEzoJXhjh~5iGihAxzJJUP)TP`>KiR2|!kK>ulMLmq0-b?q3;b!=QJob31n?Xfu
zTbgq#!WT`L_1)%PbNo&-)nPcrW<S3y_Xs|rv7#hzqB<?CxyfTU6eHj5py?$p*U{6J
zoPjt9e;D45+uQ-XNv}F#F?n%DE#Uio{%W-2I)2i2`M7GYoRL0xolW$0R(SNpSym9D
z1RD=GHxJ*HQWPq!j6Tmkb#pJHpjVzd(w^~6wF4C>>F~%>sqx?8pHs8KndWH<H_!kd
ztJ!6{A4To?Z>r%qp|TZN*@2>xOU*>2y<*{-Z`2mFtvf8fa#!GkiDP7{+zaZZAIW$f
zF@5u}D?F5`(EoP$@J*ePfK}LrmwPO{`$=-Z27fNW)&x{ycL1W|aOD(Svi?vf@-wSm
z!8x9fN=+JEJB;MZ#rD~{9-ACqY395wN4&Num1iNZwG#JpDRbg9v3CG0fM_Pu^J8L?
z@ZuK<_O-oN=-8=2b`G-)wvNh8Z3pZAKxcE+4^|gUHsMBJ+0FM7D{Ehh*0DmE`)MBA
z!Uh;=J6&W$Lh!tvEy|=SP%)x@Xq(YvX)5KU0*yqX8Tm(!_jVhEsv3ZWBH}t-&@-=i
zi+FRnSmMfq@*)A^h(^K!dqbU?fTrT&W9wN<(EVh*I6|OwpVcf@-+?~gS}fI^1OV5O
zm#@O*c)@E^kF%Dr;_ZV0{{k0njjR!(cC7J>zLx{J;%?(R_zCl^OJjl7`&7H{$D@JI
zMZ8syh1#4Bj?{eX1ldRV^Xb;gG~I7!Je6tY3X2^^Ngi0mm@Eu+c~<2*;;zJO_v2*o
z=`V)LMtYyRBV2{3tt^&EI-&DPkdO;U0!XBqMl{jZWNykp)upErz+v0~UdO9JyuNy2
zWT#}=opHXo4P}$jK!bpog7G*P%DdL0WB^X6`pzJlx0E9KI!9$r*vup0C0ZwH!3q_d
zwr}4)5FCINAj2<+J64@}`78!D#KMYY?;5>$*tERmEAl!smJ8?|uvtTEBop{aGp{n|
zkticmlDg(qu2W3jN#zxcb{}BY0)ybBlCpK4hK+PR{@+8LpTU0$V{T3~6&D&%Q1LJ2
za7G#X(o2ixI-fH{usSays-2tT`_EK4i~w)?lG<@T?KfntM$SL7I$G(md7n7MJ3nPe
zTte=QiROWpTaf|V<UOeV+JiOU0eR#h?$z$!Urm2IQm`;TOH3u!t-3Lw=#k|a{B6Pa
zE}-|_#EN4Yk58%+JM;f@rOm%(5j>txt-VKpV~zT8ywS<>UosfcWIq44%YT@#J)YO<
zwLUs0V#VpszOIw%mC?}f;<5ivCqFcM4VtRN&%hK4xJRLGLjYooWu78aln1w*(f7QV
z{IaNzF>kT|^6ozu+!OuFqwi<%tINSSo0S4>BZ*Ae)CDR3?#TX1XFg3@_Oq?U>u6cv
zNlf-KnBL~hni4?$pNrEY{$g9x`ImHxKKA@L`V^)BP-7B|Cakce<)$|yj7Ls7a#!cC
zw0kaqjDP>3Q2E>Z=JEa*xO+fTo&YkXuD_b=-n*C|w)RIC@a4irnuj%lPg%%2Q<t@S
zpZyL5&3mFDgYLcu>a=KT@<HBE#fQB_-^G7LCSq;w9uT1u{m~%Keel2hR_~h6T+B|V
zH*21-hyB30V*bk)gT?~bDO~4iHaFI?3VB86U^1dk|6kWY{W~-NjQr1&{=ZTL_Ca9A
z68C5`zK^ZZf5>CbdIAh#zF}dhCVziPANk|Qc<_h4X|!2<9#Hx({|)QaR`D{dC7?5u
zS`J;k6a7vVe>(=24LN-q5IbI+_;wX+_~H5AclM(`-@CiE4j2v<5AOX_5a1ojpM29t
z&qkiN<@@^n-l7)%I|KL92k_nY)$YQ{!q7g(Ki&Ss8y`Ijx$DfkZFIkQ`k!tQu@@0-
zHSrz)jAoDs9ftC||LdX(HrkA?4}kJ12L4N_lazJa^X{f|iR?iDnVRiC!*RZk280>i
zD=Q-h@?V&t#G!`r%>NGb9rvg&S^k|suYD<*IoDUhKQH$0Scwr5V<a2w3!2xhaerUx
zo`uJOb=wrSnL39z$`tXpSZ>{8^yizSR6tm%D#_vy&2tJ15`QcGPj~Y0G^RLj&21xV
zQ7X~Dgka8`e+Kds?3RIbYNu_+dzz;F&u#Acx$J$u@a>N!r4X&=>WcVh9PxiP$|K5x
zAM6+GPz6kMX<Ufmh2g(6OX<IRdam%O^ZXUJH4|`n&9Sc-0_o5QM|8!uwajcTdPWoV
zJWQn=Wam?jw8)s(9lH${17&E2#a)tL@z$dEryqHucVxQaA3=Vx$e2h*#2DuKqj~WI
zqgDTX!lUx9qb7`rg6gGSQy3v-UWn2GRXvef#rJO|t&aFtAr)q!NB*XVxF|c@(-zw3
z85=T1Hc==8EC)mTOX<7gCxd*GXwuGq2+43hB~O(6CAb&sLn`(lZw~jo5tM)B^e8Dw
z*Nv~;+i|JDE?+>zv<%H7j%Z-F{xlx*xQ^ruT`&FJ%pcB{2k`?9+MH3Z&@}jWkl3a>
zhvD0km<c<p>ufaO8Qm`H(f3j17LS;3x|_Khn4kM84OqK-Kr>8`#z{jyjO1hu;%Tp>
zRPA|X-CjsJtkLC4G{aXmjWNSIdIOw9JVNLi)zeSE_{|(3{{RtMHu_GDjMan%QZ}04
z_1Np7+LG_y&AjJ&Zwl5UDez^3p<xMdZ&tV$>e-*@w4dl%VzK~eJcSLsoiz*plxjU+
zO#1vZ=zgPC=04TH0&l{4cQM7ql`ziE9rL4)o<^e(9_3-{-`<=Z%v#H+E?=o&=67>1
zH_)pO^(H?zS4IxwUZs4!ysyVy*1$Lk(Ma2uo06@;4DU|77OEL6EUfCR#J}%6_!$Z>
zj9!lAnXA2$<;!!*3$KnK7qdlvWlIqr;_B197=m*H;SenQr7G)*Tx+1^z~ScMNS<;U
z`ud`A`uK;ZH4TkN_wK#qAVq^*BPm6$x2I2&CqR?s=Xnp1EPtP2u~odk;I_OE3aJ5q
z2s&#)7MkO=sgsa>gWZ!tU*+fVlY{7<Uvf+y6FxPYxbyFcfFPeD0S~>ze|9*lbj04A
zhbw+m-kgxL#j7ke%5N|>E*!P{-#I>F-aGX({*Q+DBgXhYZd)UDLGyP2|NdEg-HLwD
z|7biI=)3%n|KHL6Y(xV;|7kkkQC5^&wfX-3*~y2c4;`uB5oBA_PM{N|ei|qIJstk1
zC;5H0%fGa2|M?;8UK`rW_rKfV<3A()AMO8k_(vH3R}TL_lY_m))JR{3M6X{WSjmjo
zkEB1da04ilbSalZ&%o0w;<OZW#<W)Z$jFyfmF@eDd9jQ{gFDmFi#1kq^0RWqL`<$#
zod0fv77il4Peer4Ybsyn)`);>WcR4yd~BgO5#LkNjOR8ZfdaeM-0M2g6!sPs%cTGx
zPT7km7o2*803tkah|um^a{JygdJ8eZl~OTr?xX&$P3<k|AjrSuk?5!3K1T1t!5W(_
z<^CKAx|DfR?zi-2Dyr8~Sq2lV0$Qn4a2tg*j(0*mGD1TgP%CV&$1660qT;j;GN!9W
z7nNn{={~ueQ|x9$yXO}KXKO>wM@d0G&)sDDy@Y6i6#XLBr+ntp{R5@NW$O)m!e+S0
zKW56`#v_8(D|$Qt>4Bi3oDG6w=2&nkC%ah|bg<Ku7-GsM{~;j;ihySqa?vwMrS5Bv
zeG8ON6@yG*9E~F0(Arf;yA~^c#4m)Z+sH-mxnhSTCDzSa&1Hnr3-GOHi&IfJKwcfe
zSnA`Qx}b<_N+EDxK$b-q?@CVM&{9Um`Y&iBQtxrsUZDjzopTfl(lr0Cj`LGS>)RR&
zCd!5BtzoC|r+N<*mnH#)p#>j9Do%1@SxTK?OlL&Q;JW6E4NC~GXW2|jFqg)t&r%ya
z3_DAvw_gRT+WAIt-NF1=AVvbbQ`tmmjpi+X>vR$@c~s1Cs=3bHUj-Y=on#4B$J{Fo
zKm<D3N-L0t?AggvqhX{s=`~t0sef(1f8_KMEvI)Tj(dG7swp$ots&Px;K7AtNBQcb
z%$84%DCdy9dSbV$0DuZA%PE9GluK+x!=R#|bg{zUf8eQtHB(P;+?!CyH)3dgiqE%?
zuT%SCHB~b0H5L0ZqIbaNW}jLUiG-QkI>DNQ!>1f?*C6Agqp{xgqqc<qFma*>k^aR9
zt;~}iD)^pWr)j`j>l|N&scLt0N{dPcNP)GjZ^E7diUv~a!=mGQi7YuK+qdZ+ZOeA#
zXzjL_XoX%z*5Tei<Io{qUGK%#q+ZY4VDSClia!0-pASX)69Jn(u%gn(@RUJufzNuB
zVQSj(t2O#qgDKuY06E2QK2Px}db;bfooG#r+JShP(ya{lif9DUa1PObC+;3O;Wt>i
z0aRL>y)T?EDDYADBPpo?`M&AR!khJXz!Yg_;(m1v99-^!k9r`NR6y!UG1BsigZ6YQ
zG0IKmB<7BMWPniccq_CaMXbcs+*8hg&)FQ0O5aUK4LFDzrcqpY(=bv&bXKLkbqthU
zAcOg&rVqA6eeubBDm#DlEAhH9@SAl1hu*Gg%^ERv>G_qDx{NYrh_)an$}n0{Ywt8P
z@u{qj%yMYg_xs4tC$OR`?<Sa#m(>|+#!(e0EsLiyubUv~ai*XxeM%i_G{w-Ydy1K(
z`drY=6&3utBvI?6p<L^imsz-O>ev2LH*Wf~^<yCOzOYQA`65!w_$EC)U#D1YV>Y46
z{#$wb)Os8jU*rHA(c<Won!8GdS5Fz0MrsNFMp7@1pm8FdzAgKtsYVA3VW#7q)~)W+
zIzT!O-loaLQ{znto&#@HWI|rER#ONHi&!0opL{^h-**bRCVHSzkw1N^TceWL&Ljc9
zUcO01su@fa^+i>m9)E!xx4G6@)hA(U-1p|g79OUxcr~JD77i-r3L4ePe$Y+Ej+&K^
z^ebo<onv9=Hp-2-$_5Bd1ly$>{z@qX3^+YJn*Z-)CXoBpF|zx#FK?r4F3X<O;+Y9*
z8<!u2(mNR{Uaa#d_^y>DM;-22J3!hMylz=Pd$4HMIRpo_VxBadxG{%BkIn4l6B7=9
zHUmGI&YRtOn@dQOXV%Ejci8CRtB_QrY5-?(D(4y?xxsEfie`hH<v17|%V+WrFf(Wp
zJHJ@WGq)2t=ZxV}>xnfzy_~-NJzFkir_?oMlT*H9iHs5ZK+_UY&Jt3d&VFgisXPW9
z(k@48Jy%NJsnSYZ)&#RE1|JPXd&Lt=&XyBQ&!4K+Ig2zZ>7BhY(^$e-i`deqP%1ca
z5sM^D+pbUdSYe7&O?B5y$YxmvlKJW-|M33GoinBE(I@Uby8OAika-|XGj+JV<Vc_V
z+zehqUF96l710RH=}Uxm&zJ2-Z~5w8S+9|f51Lsg6~3;59W%bs?P~Q{53RqfB5*0Z
z1=-e&A1=rK5QBd;nEmT<`KwvPFw1<A67+u403uPl@C3tbk>RZ{leYIWC}%VJaVix1
zZk}0uqpeDW9x*FUaHKL@X?6ps_jcsH&Ire_&1NC<)i|mya<Xf6xk_$n`fB4~EJxgo
z+LkYj!`Wm@@~q)n@0LW7olrMzs95JjBqi6RTaPA1SddRaN5iKYg10Rbba6WDAA~EE
z!fpLjysg`a92iat#4+n$32)^E3sOKT98N~cZHtvxm$jZ#Jg03g4HYBQjc;t?Qb^O&
zizDoh(~C)^P3cxQfL3uRh;hI1(ox+p+RBO?bU5Uc)K5#dT6te#SsdMSqey(#F}Z=a
zsE>IDD+pefl@d_~E|Gp5VI3L{VyoELSSyC`cloy_ft$iVdoF1Saytgwqf~ukrUp}Q
z_7UDLmkEWfy|7~1yZ)-JMBVHcVwBb|Mvij?L0|$(>Z^qPSFg-Vc)fEJkt258Bu%uw
zaT%O)oNapG=}t=VF#yvjdU+7D@~-!)D>Y?Qqd{mXjNl4h%NoQ>uI2S{d@f~D8nYd;
zPbxTFd!bJrn6H=pI@vigBVNPso>2`7d@G-(n5Mq?fzQ)hzbRCmjte`!om9&k=X=3{
zZaK0%&!fj?A|xnpGWf+eB=>e}?Q0#ux!tnL9HMUSg(&Xev9*>A5N{w+#JO!^%!Oxu
zxtR6$>!60xgjaid>|*G&b_1vmVpPV&(R``0^{eMFjJiybm>6zynr=~jt@T-+t^7^c
zNw0=%e`uG*(nHxCt29>TjHmBUI1{B4Ij`S`ytUCT(eBY|HCFUeoQ@XKSHFH;QShst
zWb1N0RKZ*KN1cYRCPiv3yKruVi1Jcb(y+3n_-RMJ9C5`J#esJ5-<A01j}(s%=Ckid
zfJ2moY$MnJlz1R~hx86qKx4`-=pGT>(Ko#p=dAS;_s(swcOVoGCW~LW7`cnCjQW-@
zJL0&zMx93(8{a^kRZNk#T?Y3!3pG&24*Q5)^F4Rht}XWh@0mPW8C~to97b~Ubmp+I
z%ZX}Hq@qco>LyixXXv$^<g}y6IHD!#YeP2GLru>sj4%X!1B+4%*2~3Ef&N_Q{2EPy
z(tTWw8$aIM_eSX}WUao<S=mYAevd~46thII9J|Rx#YN&@mdQ=IR#&TodO~SnA_XIf
z4W?P!qG{(BLLf}#kUOG4F{UY$PD8`@4Y&wfW-oD#ST3VjfmFpiOC=HC@oius;BeVq
zXTdv}I4_@8@=4l-5G<F)liYWESX(S{!2^6XKdM0<LGtF<mfy!xANe(;Q_c=lt69rD
z0}%O^`yJQ}(1{sUUK48UO({9wFY24xKsIt&ms{)B->IlJOae0dj>{E(HT&c?2!z;K
zYcw|$vOzU+V7DMt3U{zaHGxa<t=K@Wqit4Hci^6Hwxgj0($pk+Y#`C-zccj|M@?c;
z538?jgUCblo_3QM{roNy02CCeIN)yZRlersJkMd0))xjjio8M;_}PN3*88+Y!Eho!
z<ZNdw2}g6WptEkOs6JSpUvOmm`L(>Kc`Ie#+oz@p@N)YvT%V0~7DZ#2#T7n<<9Xsx
zKRkl<Xu?R;lQ%XPJp8`)@y~aTJ@0WEtKRm0nE(76Oy&)dd#KmrwnrcmV9a3zETa2J
z%s}n3&g0-N(Z7x+d4pF$co7*?(tw}(T(+2ln1S=7Qa=j;R#fAWHaT-okNlCU-URZP
za^P{oj8UkVIZXd5mkIs(P|jDLFYJx2)uGtOY;$QB?-S4{vQ<)C^Vp)4&7^8~Xe{iX
zz&uAD>(Nj5EP9B3gQ4QMKi;61yQXhin|g!2&it7H<9|hl;qZjY93y|Oqk-_k-$h7&
zcD7T|pyVAf7t^a<xqTAR5#^p<ua+H$L$=1XrHAwv;d3sDMSi3~VH&F8ZD_JLJNa7<
zLF6`|&FZ;GZ#JboLP&q&GJ0pAM^6cM5DPTXn>ZzSh#xAay}G9Qr7EAPH&uCJC0N00
z)bkBiM2KQG?e7b)wCCDa5;1X?u_s$dQ(O~W^I*sBUk9-y1g*j<)A}DqXXzc}J;3Jk
zKJ-%-=-tg6-6t9=3-wWL(0Nv|lcYD7k}N^vWbrxEe(rsgu_~qahYj(<${Up5M4=$D
zBJPtk(vn$ED{$&5M~=uzn0$|0{Y;$6+N_pZzOlWHRa2XC`=aCL_W=)g>(N#c{dyNk
zXAG#7&S1LuR_jaecZz)0p%z+`Wk`!T@Q|%6E-=|7RwWXOU$i%-ysVeTYRILWmrT7@
z%lpGp!Z<aaPnJZ4iOrz)MFN|*cbW5+9K_Zv_^4b`E!vyM?A6LfZb8;48S|0If-cn<
zFR;G=V(}AY=#tq_PxYTE`T61-?Byp!rp=nxmZ{U%h?UkRZB>%rr2U+DYRb$E%=qY9
z&QyT|3W^7gSBb}}#?0#BX4_e`qfTPv9mLj5^w(yN7Mu6vS7U#Zo*o{<P9J<@3;^0?
zo9z(d4^1SRIHfwKd+0T^yNP`bt;z@5uM*A-Y_q3|_hg#x2e7pmw~bbWAn|I6q-0f6
zA*ykYeTc&~B0g2Q&rpeV;x)p{i{)di<HRhzzPcu#@^>;kU?NMj_QL=E^=oIdp{lv!
z53*HqQkT6h`A`&)=dJl}zGfr2g<Lmtm982KE{9CcNT0%~D+xo#vkt7}6c8=L(#mJZ
ztU+Spc3lRcgO0d5+~}H){ecQs&(78x8fCKHnAj!pIr3zEInMB9uN<jlptcvtTK_vZ
zS|?MJI35c8QS8|dRc0RY8(ZMu_(mY8z~vR5@i$rPk32^owW&W#4=&e7-k-IPDeHgx
zgwqys=gnLokdfU0gHKvl6!&$zGx7q$dS9;zisQ&g`k~1vlst!~8wxe`OVZ-@Z?4%o
z_ukX<GS22_lYeW=pyn1=NnrF+J?~;s=SDUSTnz>`D~pP}Nt6+TYIBDf53#RJZG*qp
zBFhxWnua2%)^F%&2|nX!vJY6y%-C-@jw#Qa$+uRQzW@d8r_{$jk2=t=rJnJZ1^wXq
zgsv|m3SE^37|)Aa#4`2oto<Sl0GB%owm{t+Z&E>|EDH%p6?4T9n*C{V^g*pGAgE7o
z%MNh!vI0yQ*j=^W4?Tg5bL|6$Es5YiJWZz>iXCA`%`|-HHWRp2`5Pi@YlUvUcUMJI
zNK3CLaSmeNGOxKv$AmxHGBa=ZB9O+0l8lXl4RHiTcurH48(@E<Ij8jooga@1ZiQ}s
z3#_&JQ;^8ug@4pDSQRNTX8+H6wJ3@Wxw}95I`WDMd;~UpK{C;2<i(<95Z(B)<E|@V
zLl@pg7119{)IdPfzW-}w45Rw0rUvA^D%Q+bwUIHy8jFz*PI)I@bq130SQ&k{XP*7s
zJ&K_xHJ4>8(Zpp^b$;}8UcKoqxFljFi6(vZwOQZ0*bN3vsS~govnD1d`&>?M0pOx|
zmT-y7Zy2)1XKgUwxgn}%03|93U)nq|*e88E`?EXd_o2RSr<>xTTC#jRqoi{Z1xu}g
zYdA0$b8wv_IcAJlTB~pz@twIxqo3|*cGjhlA28?C!7I)&&1G1P+GiJMwq;ZSu~xi7
z4OQyZ56tAFX_}7Jqxm#Idsm%nmnfP09A<%u*vq}NFl;z~c$h*zBW&eXx<a!d!BrRc
z2^e>db{d-5a1wJ`x66eV-Mf)*6|i)<g6TM+7E)qS@g9RbZgq@me!H{H!LH7c<4R2;
zYrGS4r$!~4m_75#TsqG$(o~)H)0+8i+|%~=*&6ak3MOc*F)urVu4s;&)r`@+0kG0G
z>8<G(pI5b13MoE>XpM;DoPgfFy2&D#XJ-V9%COQ%J8b|TIirD=ua?wCxGG|aQucqn
zEjeQ#5It$?Nn!l8&5A^|bEwp=#IuR_px_zT!M^isF{CdWU%i(b3lZb_D8~y<Es|;j
z+%0B2sa+t>6}8d0^P}CbntBmBcPUM8p0YrMdH-HIBP?Hw+3lBGye1R^yyu=jUBkg8
z^4itsn+vUrV9Hq)nG<XGhA+jydJgL0E<%WsXu9ds7$S5^?KSN_wAQ{VOYA1Q0)SN5
zKX<o83(IdEzfFDcD^<S&Wm^8u%t<vJkSH#${^n&#F_6f;G)E~VvPne=)#p&M8W&r8
zbAN&ipCoPHiA4|jba~qwSg0h^)37s)EN?9|^^v6A0?A~Bl`&(G_vd2+r`YJn3Nf9V
zCW<Tbf9}sBgoRCNqbr*@`46|G?4pt?)U-R|%y6Bd@&q<s0<^$XkB}U%0@lU0hu>gZ
zWD!gc09)*)7Izk<f>S^x!VY}ZIIDZ2y(%M&W{QSIP1ru|;`{M(IH4mM1~nB1P&0aY
zxXM`lADzF@MtJ8TF_jUmw**y}^|2}(YOtrp%Rs+uB?02(?S>Al0LI_ZD<`KNODH~+
zKjDI1YzHiAcUW)W;P%cLt9xUzC#vnTHT=hGRr!=O*I#I4e&EjEp-@RVuQ4S<4l}tT
zCN~M^6>e>oS{0vgU`M`lT#huR0!;`PYWp`95=n8kO514AxYk$gQs1T`in*<*7Q@xd
z%3N&*N2kJ>x3ly<*>UCds;(%F6IKGm70`4HFD$V7CS^bKHl&AC)PKP`pxkykiZwMt
z<d~Kejbz*zsi{{+k81C_)P$&h7)>L9+%FDuxyiz1_$9)as*0$Fyb|h6`naYC*M8A^
zYKsp?f}<$SH?y*xZT0Ev;!tj_XlDNcLl)d|d8py3+ejPfsg}-OF%oSfdXW9htc}QB
zd4_gBRto%a%mqH;ydgMd|0Vr6DK89+G=PY`smP~uQhDDBU9AMrn?V>=PHRIL^mrHs
z&;lA%N4tqIviWf|R1x}SBsuuol|_K?rDR}xBKuG3aBq`uj*)IkUWG!0&mT+rp0X6Y
z_H4t`a~`39jCyoRljFn&;)KcRswHGYU80OxpPFKC<D%MsLHU|V%nD0q#y`o6+xV5s
zA3vXERTq6_sW}A>dukzA$ejOmJEriqWX3SH2&%r?tEHw@{VHqG=yWb<Yg_%tH=X59
zHDv8UN<+uAEiRhKD)V&(>quSL%|>Z)_D5`GXqR0E0C+~FLC@R0$Q0!vnV^#y;x?w5
z2{}D&3nlSRfVb%W@BsLh_@A1@IEIYsdNC%W<&#biv-jFQhJ%XYOf<`eosx)w&(GoZ
zY>*H!FOfOW)U&Ft!m|MiDkL{rFg!%)mGh1)h$)YE)1aqpd}ZUzMKi}0A}Lg*sY(uv
zz1z-16?)LsIeC`Ue}ae=H<$3sFw_&f2;-qK5y$j&BJTxTw|ko=W!;4n-{fc6nZ3oB
zR^Y^o0H`u^J9#h6_9Ui7DKXyI9NWwDk~SJAMAsR$Dh?qR+l9e_xLQZmYuO{4`nx10
z*qqAhpyAbPS%Z`g7!#%IX(+LY#iAx{ENLc<#%(?bEaPf2*T5>!Dw0yZ5Lergi%$LP
zj;Fy5<DGD9?}1jXNsE;u&AoNIAqPV;OV?sqmPye_@Y{*=$CcdsR8>*~M*_&fPWudY
zA`c0jzn1W2Qr25FWXH^(BnXQZsOOfRTbd`}Cutrq?!!bMTVIOW{<g;7&8786M1cvt
zD2eURRm}#PyQ0Y`$ZxNJ6!#WipN+}=HRzXMT<jOAn!~3rW;H}3T9<aOOB=PF(lpL%
zc)lf>KqOZYzevg(X!>2z?mDwc2$qf@f?&VX?n<hiX`}DA&l+}4=Eob)j1-J%m#5TJ
zInb@lp5*$Uo%M6ckb~Ihu-v>Xl<%!Vkp}Tq*T^v+7n6$U&@avI60Dh|kK{~4%KWmU
zaSN|ZSWyfPR>aM(J8ug&?+?M8HJwV72y@txpN4lzMh|0uMILf`UFG^tYE9T-^C_#a
ziCgrPn->Al>izc9Zu5KAB)h}|e0Ja?!Ff3)bs4u-lg?oF7nGa^H@_~NHZE%vVoapp
zW6V)10FOLn;;U9M!glo|H8T6h7OJ^U4V;&LKjB4x)e44jO3)P|&`l8oZN?gjvK)p$
zOv(`(tS{C#1lu2+1=(sVOOs=D=Jr=#kSD`vy?;+K$WIaHneGtYX$5+>@8l0Tg~SG(
zno^SGQAQi2MwfT&S|FTuTc4oa(rKM{Z7amTn74~V!=O>dFO<t+jyi_HdnDwEzjadk
zZb0vf3ox5vQ)@Nl%GXkg$0Re&>6vSHE|9ZN^3b7yxq;tbmHVMVmx}H}Sr*N%mF<aX
z(dE!+2Xi%}2jng5HPR?B)&4ITCAq4)MjMEet<{MDFNN%zJTblPuJ?1bN(OR@Zeh&o
zg6I6B7K|^buWWE^LG1K~0KgK1PE=}D-;7f#yl^T6q5`jRs0vptbA0~%G(Wmm%N8Iu
z<0IR@2bFTNXwIM7x^O+mc2Nh|^vE}Ka%t9HB{8l!D5S7M$!x-X<flv#?Nj4F#r_vt
zXB|~l^zD045ozfLQR$MFE)}KW0Ea%(4bmN=G=kFI9J(70Al=>Fb?7=Yyp6wm@3`-c
zcm8DT!QN}Fx#s-N&zx&-jkdMS);VRoP2k0m{9gL6jC#yGdG$${V~T@c^H7%IFs?zb
zr6Skn6?)LA$Yb-9(AN8dgF?M};MGMN`IMxYQKn<Q<1Ry7_17f+jB-DC?yMYT%0yvP
z7I}H8Kst@3erJV&`m>`c%4F((2mOYek&zo+{f5-$aq<5B1Sl7-p1pe2g%KGoxR2jx
zTR0;$Xi1et5gQ$<rJ2t%KtRWA&l-OdrJ;E<P9E>VW)FMSa^4DPxSvrU85!+YW$z|5
z-pDx=&PLGch>5Gzig~rpfqX}^dl#JF`X3*E**o9rkU9?M8qK1>&{fPWTv1Nom%ALd
zJpW^-7E0hP5vQ0|w=rCt-P@h`VX(1ml(zSroq!pmTe>1j9G6s!h3$JI2R|MZqi+AW
z_{rsX2P6Eu!QCtSd5SBetq!f9HCArv(+|HQ=A(@joF66(y(6P=)?}^i&pG0;jZl95
zt&XC$CjaBffSB44k|R3h_Gwm*$qQ9*(SZWVzRO~03%3RvtMc0p1>RFm(&e1o%6yak
z?Z`^M_J9*r_?1e1ZgzvI8e8heD2dPf%ld)tklZNrhg7hU%u1WME&oPuzfZJLUi6#{
zmhzF@no0a>$H5QfwT@4pe$?=9#_brVU`26R&t!!6AZ8i(%h$$>zQ9zilD#yc;TMV;
zN1;b1>+n~^;c=EUi9@0$@CoVh%<CJ|HotNc60@LG`sVSZ_bUxz)TdrzjsTe^lhe@K
z%y0>rCyqYj)1Z>;VPQJmj&t!KPGV&4mktA|MK%A3A&g4?lX{X}UijW5?@W%y0e6Pa
zay}`$&MB2LboJv&`Stq^!OFr{Mgm1_cO%@1NAg2avFco7)3FIhjzdj*9v-i8HnQvb
z@K!I~$H)i9@MAWkqZ&k10Dg5VIM|SDq}%X(BimJG2F!KHJetEcy>%)#u>z_9*BIpp
znJYDW&pe5_{21?de*fI~v4Su~EU?=n8>Z01be0h0hjF|4<!_^QG+jxq=|&8Na*7kE
z1Wz?aQdF5c%TjHNz=%!(+!v~iQr?V}*3}0e^nG7tpUWp7s?fujK5@I}5Vb|iFlSpf
zSDZ?ouMH0v#R(3EL>0jt{%GRTo475sZXkU7!C92}Q=0~~GdG(JE*lzZ8pecEoN=9B
zw5qv%lAZ%HbMk4MilS6k$Qa$JCo=2Z^iRv0k-$@OsJ@Qojq(7E!QeYjvmhOfDSN(G
z!!6;GC|?X3FGhgqi;kUlxO9`gJ~|J)+M}iFT5FWoeaEeVT3hX>`(8oR=G#jzGryf^
zJR>tzq}2NIOBCDMkCN0?wY3K~7#n^eEFyi+=9D8r=#2-uy-yaDz20*+uI~&-J*R}~
z_l7`@FF1^@FJWRQB|>0f9%(-M<vVA+uRe!-G2V&s%b7^b9#L&bBJ8-IkaJ9?sw}z3
z-$mbC7MCm-zpmzwrtMpWS#wS*Z(Nh`MN=@z9xwEk;aO>@Y_l_0wq3my0juYT+d&VC
zJ_zSGm+-<^<~)-YlNm+2hIpSK`Fz?c0k0ji?jUp&oA?KSS2K%nE_OLv7#&Awx+y}t
z>>9nSav^@BOGPZwlU}=dOW{VYRUS0(|0-!5U3m6hEA^KCE?8R+*)%Z;O{FBQu65P@
zK6UWHCpDq*c)rWSvnVm5&~d-&YVom(G3-@WG3axWg_AJ&0}^Jr<f-{%gYejd`<z_~
z_B2NOt?CnM_bTP^Zs!NsD0tzIGu7R`!}#o7LFLu-GdKHDlZ|`-JjBLixl~%Xv@Q8E
z%w?l))5W&;1NEU?$vp!5(W?OqENb*Iz7^*p3Zi5*Cxqh^o23+9=dE2_@@6I4@5}ho
ze(Ec#PRBeJX7!k~`>&B+#PQHu>0%bG`;SQ?Y*IIHS{c*S!Iw3?Q`Bn7jO>G7f>I0b
zUzRlY2nlQ4Eg0Db-!5W?c-*fSd4LZ_`ER=%;6uDc_g)lg#&T=kVIpA{N=c822zSjA
zm$WF8VIO@qOCQ@ALa+r%wCW7h@bQ>v$_8g5Rd@14@O^V{a770sJwLm3=#caaCE&4)
zP|U(Lqo}ugM9F2f4iU~zFPWqLPPZG$@oq7BD4BY6Hl3~sv=+I;JV&D{3+GqGxD<<s
z-Ryfe|DM7Cx2B1a(oYDE^cKt;xSjP9(M}Nz&@HMSjffTN+d-}Sb&K-)H}$WWCM%^A
z{UE1l)mPZTm!j({9EGnR<|m3Q*HO;q1~|2<)vGqJ_HXDL$u?+bFfQ{xjt2Me(^@AM
zrvEb}%7W%pUYK&v>wB(S0VALvkeh6Pe|3~Nz+tg;x8OrgB3;VUy=!YZSK*+N9s%-M
zu4pMbcftB0aO^$7&1<zmDU+pFt&lS}-e!xx#=;MB5x3SGHYTUL+;}up-(s7Cw$B$7
zh$kC7UlR}DbZ<-6B}){gq(QDV63)s)BJ{^Wk!?A&J@!%E4x&SO<)0fR*xw{W->_5s
zZ9v}xRfI8%PR)N!V1wA}|FH<~nGy?R&pvE;D3_`e@ZEgo=_wmCw~aHGmp(*Qa-ZMB
zs-VkwG}S?1=4HRRF^hR^cz&C6%BbX##;~(L?{c5{8u5Debkj41K=(Fyt~R}zXxl*F
z;{m%cMvqA=VNM1__kt1@X9kB;wW$<?W0oT9I$;hys5CeW>~Fu7_1fnZxWRuZQCVEw
zRXw)CRkA%cVfjFd9pc?YR9WdAUH|nr`J{P7yZunzc^0UBOwSs9gC0AY{bWAu2S(--
z&bxNQsmr$blhzKUTgCbsnP;(Czqmj2h;p8i@G`KwCgUj7COgTFBa=6#5Rt{NUk|h&
zm)jX#oy>P(cINfOf6R`iBBo;LTU#?<VdOP}hOl<<z<E8*$}YJ=fO<pk#FL1H0D>oi
z)OU)-34PbO#FgL=EK5ll#_=Lv(Fg<7v%IUI%@pEE5tXE?teMrraeEvIk?k%zqlSn6
z?D?1-vXPPhR2imh#?#;XHG4hOX7jF;$Pm-WrsIcZou(21I~#btP(MLxwzg7u-hA}_
zb=Co~vEqxZi(UHQ*boNxfiHIChWV%OqQ4<O^;D7mvs)B<*gL6v+h!1#FT}q1j)9K9
zpmhASu%&$d?YX3@?%y?uSd%HoK`A<m;*W}TC9jOiIeOsyagU#cECQ<VK!Oxxzo3#=
zt!4PH59}|_6xjR{CQ%|4*f*8a-k@{VZxz>j7P^()`13U0BdKy->&xtWqluqGl{Nu5
zUZ2lIiW?L|{?!W<ha^{fD{`0>cIe!jHw?3dx%9#|@lov5n}MBXmmg{N{X(|*R<`kJ
zNf#yG)2s1ft(d`rIStU_i$oh(c)^>Pt$J&cy?s>4EiPHr6#SgRegPZZ5^`7ATYGum
z3$0+cm68v2SfCw)nAs`}6(ZUK`n;em8{C!q%r}u3!d$&HN$DG`%(s}JI)8fy==$rS
z*K1zirKm5UC)u0|tgO}-zv+mpg-&n!0OGIN_UQby+0wm$oJMR+1waBu<h1zuodJrE
z&2Vr_gOuxNGQJC2&^y`%zfkGV?ws?xplh@Ec=kBqrl9kf0Q{(RL8pKS^xwAArnDRH
zwE=+oj@CPlLN%35?C4!2C+}BB0mk&{)Y!Tt<rT)E&OoronlEZ+7E6Z3(Imh$5gv(2
z|BL${x8A}s8tqy!Qd8^UAL1hfyyrMUYRXJP8-72J<fEL3@pznr1mY@%xNMpuEME-5
z2uY*RA>4dWcG?9;&Q2ETvNI*B3A!V^Xk0$U23udL&UWTZ?ZH)@0~BcfyF=>Uq}rS{
zxfId~J|-)_8o|xU;{12s=_jLO_-j&bM5L1A%<=K}));VwqvaRb*o7nG-~T2_xfTnE
zGe5%PaiGHyzy?vO>y4Rv^}DG-!AJG>kG)UGrG!SzU3v70Z`>4_zQv5Ai1^!wwvC^D
z=<U73#2N=*!p5JmcO4AL_!YeiP`=)NagJHxU)GTzjpiLsK_=jX$4@Kr$@A93{LV+O
z_jd=m8L40agNA6>fNSsP)>@a4Lys4v7D@^|J&e_<QxFG`l6Ed;#7?HvX@ixH%c*iC
zY^#VSG~AAVq3ek=fe{nqQgZ^FoXb5Xq7cJ<L{OSZFTZ3&U9!J$M*8HnVEBn835jG-
z5-sUuThwE_^3i{W^>cr4P|<6zFx4}hH1o05@my7@T81>@1t00Iz4<4t4ezC1CV2)%
zn|vn3Y@vlcr>?Y|>YIwsu7fx~MqYkJP3tvd*G!B(&)+>CJ84W^+L+eK&7$)ok5*~>
z_I^(q{y`6V^kv~qh2j9ijmTk6?d8pQn?b~m+NZ(7_|DSp*pt0;il@1Dd*#Z~qE2yB
z9G^c1a%eZkrC8atCHGZL6<hmnAAOh3XM<~GIK(nN+~^6e@RDuKs}@@3d}({a8iqV;
z1BT3UTK;uyK10j#%;d58C@}PUoVLzUsB2{4D$3-MR-60$53RPWrI80lX=?Tes)2YU
zoZUB`sOsJrTSasLG!`Q*%e=ZsKPLF=yxSyuZplR57Zq#B9(XLuxp@@>|JIesWrR}F
zE_Du$xfs0j7c15hbDd}CE}1<Y-rrP49)`)MrOw{Te-=ny1YT2Lk=wsp^uq%K_BqN(
z0TgrAUv|Bem>;-w$x>(SJd+O&I<a?V7zDR0*Jz9mFdjwrc$K$qL29yd*^`$Kvt4zs
zYWY;-d04$*&FP<P;yqgW?6X;Ed#$)gGpOW%x!L+;4~%5Ee8m!In>8&@sy^=+eQ-0F
zuZTe^M#0D9R9hrGSYpjWKs<nb{uc#Y#p>wG!Q$o>vv8v!xn*UuxHppiCp>3sU(J}C
z-mRiem%jRovxFunqVaD=n9Tcq=FQFo39KImAU<s$40iY#Ty=qL358v0vDUg`yw3A!
z>O4d>yUC&Ex~cNb;c=ki>7E1i7PhVs)ds(dsl-8zpBYZ#L|qtc%2?r@(=KszoTJxN
z0bqJQL_j45cyPlV*1q*r!<cB6<h_TB^mY=P=-8#I@w?c>$6=9RZ_$P+^k=U8ypLHg
zY#p_$!T$tzEA~g#f{&x^kZ)uyQd~$|e~F)?{+8TocZ_0}qNXA4(Rdg-(PJ7PtD~u*
z78gyy^?B#wy|W>07;k5>>Cp=MKg8S3<`R@YfKvBBmbWD{ryG`h#H6#UXyf&sb?QJD
zRhwQgCDko<iwb%<h0%*0y^fwEBya*U__gUr&Y}F+5(ZS3k&(Eycz6~cnb2RpfQl7s
z_Vri;M>m0A4486E(@DwP6NWF$y6u(nCyjH4NumeKD|5f@7$|-6q<j#Q3>VlwH>N!=
zTGLP3$1miQlk2^$E}W0a(|tZ1PgwDezO+FVr668ay^k`7sc!<uV28S!Jpj%$Jn$`D
zCW_tv9<Cl<k)i#C=eNl7^Xxt*xDrtT02-ohY&BvCiC|RJJf+2se^Z|`4+)1Vg=cRZ
z<g--J6^PO?C)}9UyU{)N%T+o(stZ&9_7w&Z0ouXGcBXHc|4l^qLSOzFtL6<?-qf(&
zd@0TQ_NEhaWB4lRa=IxBJ<wu2y?e3Lgv#;%14R2G{u_wW&H_L*cDUfbfT&noa@he{
zI%DObs<M~P5#EN(XB@|G2W|RNZ7EO2B}<wd<d2K%o6o{tS7E<xSFpOoZTDvzY5}?w
z>OR8JbmusW<`^DlzKHTMXeW@9`K?NVQd&|U|E5S9(8-)PWmot;*$O}|<CzCE$+OIi
zzqooI&Owf9lv>BOU1_})(h`z3p^68t8Z}Qhiql0P60L8dB##F7LOfqIMVdVk<=FZl
z_LQn~rA*q~$hi10(VhR4Ym_lN)rv|!&wsd!Bt5iXhI8tfgYJMIn^My=@_;;zL^IZt
zKQ-<ZqcIFU0xz)1c{RQm7qgxnkA@Y}nn^3K70MSq6|uJ3$VBfZ%M-fbkz%Hv>EwIt
zY^t>TH~(twN(@936H?(+4>uVPO%0skR7|Iji!l!%Mq6plH&=v*U9-$ihO{E7Q;J)f
z#}>W(2zP$`O#QajpIgxmch726hxTk;1Vt@vCg*ePqbfTU?F$9OA}%FjF7We{CdV~M
zoi@I!g%!2mAR|PS%ElNsZlc+vf;rC*{hZLZ>up*xGb5r{`u`uoC>v!?va(5HR6C4t
zu#1_(&mR;hVjmcsP8X^lrtQrUf#x1Z*|B*rj&Wt?I~lhp=vUk(M{I^Ha*G5j4|-=A
zyI~Whl$r<;;NFCV$66QIPaqGKZB>L7UZIJ=eG)^oQu@ZVlKoDx9@Rz>a0CEYQXDex
zjPH+=!QvEqUzz73bzCE;W1vFd;R;-X1P!3|D&y?^>b}US=am8y;`E%3IakTD{bmXG
zNGvIf7Z?o)R}E{P`@hj)bRv8_*48k|F4W6ax|gixJJZ&zAO0;C`OhxaK=7mvDLMa}
zHae=+lK;gGt(rN*Ym134mEGv5)$;5FbP57K3`ncl&74Np;mf;weY&1?!?bvBPX*^&
z*$K_Ko7F=x@sjSflkSX{`>un_K21^cSy*c`dAgf1Ppu}l$gWuK7gdh&TKD>RK(tb}
zKc0xN#sVgP*_yGjB|cxq?bNUIKMFZpo-&8azOf8Tm?Po&huq%Adutj__wqw((`x;^
z<R+6Aq0;(sB)~RV>^G~ZaH{1@*Vmfdv(@AB{H4V4L)M(K`?H3nW69G=Kcb~It#S;F
zl>fnbY!E%~H?)58{bXboYM(*?>q?s_KnAL48TIv>^K}UoCKiK9c~|>23HkX6y}h@$
zRn7|!+x=blgQKHKV>#I-Gn|%Rs)hcY->`$dbImHJXBZ>zN*Gw{-xTjnx9+LU&h8zR
z)hp+mil|eF&#qEv91Gpz?83m)W-sKEm;0;{6~ReAI1WkX8U3(E6;sG*M5wy*<O<D3
z0ZlrVX{+`#=Gt21^zKkd_$swY)))TPrZh#3wrp<ugrmF}_?wMF62`@4`t&qq4b3u(
zJWl!7f9!1v9lYs*mdM^G<)eipE7>#pwrG35dj7oA>9z`YCj6VjCa)B}w~HFTw%N@t
z$iDa*W;KS<k>I=!HbXdExRagpeYtu!xXufvj@(gH8-8IabG4oVUVLzCHG!iFgafE4
zI|jt=C*C3|gM{J_4?7Z(q9cT*e(0&cca)<mW$okq?#)pBB~qkMP1ns1x|uBlAr3Zt
z$h~we{0uNc6&pb)S9}yx>fQzpr1nHRr9jspYJw%?wm6HGmYe4~?qV186|m~GkB^5~
z#JI!_<hKUvI&<Zd^Z8v*lAY}i>K{fJ$F=~h*Fq)zLp9+K5Z{rI4qR+!4KMW2zI$0p
zRL_~VUtxhJ##LrxJL|%#8#rVkn)=|$bD=`&?d7g?->kA$8VBG?<rbRrJ9p}Mh5^sf
zJx?QOGp&_<U3r?AVBu>F$expWD&xSHqq|Y`F=pD{(R?HkN26m5FE&_CxTf~rhTK#J
z?AxCXllZS%=62F6G)77aTl}&eQN<^b;OBkJ)~UIRv~XBJp85i#Mn35%D4wd1g<ls0
z$|9(#e>m@rFtS~QxEXpt8%I^~f!?F(P3E|*HxFl<Swp=OI2%@R@Fy`_<(3zEU!9?s
z6nfl`d9S9h_8AUR+SM}gStR=feMg;rK-lNli}y~eZ)F-bE0)2xYiPh_KJqKahp{n)
zMo=O16%$^PPYG+AECbci(HbBYPctesv~=;b`vr;FUM`{YcO+)hY0WfQkx4Is<|D0Z
zD~*=&pYN|&us(@)Te@<yPU-5_vka2p&C4Ye9B$f%B>bjwP?;YdSmRkZdq?kW{2#27
zGBvtnn39sDp|V*@m+1ZsweN6X`Ht$ne{Kf~LZ?n<tNl1KcfBSy!i<%5DVkDrzAyU}
z6U1dT$$h`Iy_Tggri(T7jxqbM5+F-9o$7-;T7x%6vZ9P;{17LtU7@{}01Xs)e;eOw
z=Pv$8Z+BIw5as_E=?z7lf<LNLqo`7#nY@#khAr=$Z0nA=3R4#AxtDKV0OuGeO{<r$
ztv%|AyIJw&)cL2=^~<P2-%;ShO<nUMz?Y3xz6FYV**1<`{AxJ!WIeHf4+z6H^yv-e
z%SSnPm8<2Vn-`YarDdv%U`Np;Z*DG=CDviNT@V~zd5ET_+AG43*_9-+EU0$DKvvv=
zX;{F(qjYF={_@oKNds%*gz5P+mLF!?+IDJs@tLn&wLgC=y^X!O$$d(kFSN^QBiL&>
zGfJ;qL(o|E)2NO%0DX*(F_G+X4bk#jR%8Ii5k6~vfy@+~<=t3U^`Rc?81*4xE)OK7
zlgO*AZ5M<mE-npR{a-(Bj`xW`BYu3Yl0tSy3zK6UT(QosV%<P0oSAD!g$cbC|G%#s
zAQoL+Aw?P%`q#OCwM@yr@-x3}qiefotxJIc2rb*GfHVBt(bB-M?S<syKP{3LgyeJD
zrG-~!HPZjgXY-#(l}mw!cc5nE7vQ|dNzNCLtGKv5tFVX*R!hGptv3bk^^L}5-z@DA
z8L8dE!iVk&68sn6tEeNgpY*^(J|2b><DzkbsjJACfIJ)5P!av-V83#+5kApjS);h%
z$MkYpVtD~`ad)}J<cZjBmKi!X4I3b8z~esMUBbX|zlVd1@QtFGzgrCAA;@B<-+?1?
zKAS$yrMZ0_^$<IPLl&dcPuf$la(svLZ*(rg-Hf@jHy$Mf8PTC}Zkff90-!F4w`z%=
z`l$JVy?L0J3z}w*B~>TPY@rrkkbK0BoPJ0-o?kS08=fL4zcF=*F(9Rp{>Lx1G-wk{
zOqdKM&8$ohrI3m&CnP@CZo^BIK@5Y6JKl3Vmi82F#`Drr{BIlLslBhS%1nq^?-ih0
z^gmG`BuJWQWvd`TMV&^y%>A7$NY9;RV?of%SY)JJLK_Z-^@D(e`8tEHu!<Q}oI55R
z@Fy7B69^Sj0@^=$ZKdBFETCm>S$3EwbT@bIdG^%(f=s?m1tDB)O?sH51l$`bNffRU
z=ot!!KNWpw-J}$_Ef~eY6{PVXL^7py=>U`e6b|#W^sf$DmU)8o%(se!U_7*YO<5{D
z`Rfpc;XsISD6jwtIR9>k3JdRmnlD*&QJHz$wEI}7Uw2v*8}7&<gu4`yREYi^bQyk?
z3u(C_arm8EU!So`lBi>=(sWrmt7OZHiB$Bg<ZxVMIEn{XDtnAFi_wPKRNCbU(qHDX
zy0qs%QLsRiqCLhBc3HkuObY3K$fo{}#|Vel&8`jYk_7FY5<LiwcZBj25-iSzi{Y+U
zdHG|I*}c&x<E#ea7Pu*ogUzLee=S{>@*PYrc+Z3bq7+<l`W;C0MhF>+m0Z;W9gLrj
z4KkZWMMv_{Jl^yrds*em@1`w~2%ip$%waCjPJXEwSMmgD0QsNXdbgN|*PvnN0rToR
zc-kBnI)%MwkHU(_@9-+I{3=KNq2Iuj@$sf)&m39De|#V$9Oc-CC%_GEEzv06J~6d&
zkuyPC#O{{*#k`)_#n7hWkrnJ$i;g5#q^>2-ipfK_HApFF8=P{rv5X5ol?X}KLpD|W
zH{!};?3kB?UGg3<YcBVb11;;UY?8jBl{*Rd13{PdHTLF7mJ*hhKr`UpK7W<ieJ|Ly
zKmYdl9T+CgMYtgq{o{iN2a>6<ONU8%HI5xVP4a=Ft+w!r@|bifKs5aLaA6&a?ymro
z7X{w^r*Ob7n9D6#iBv=@kDO-K_P85C**h|20p=dJ9Z*Pb_#&_2vfR_+5fl_9?aAu>
zEG51&6j9xpt60estl*he?teQ8F!9*0pvjgr#c*-w7uH=Ve~RzB!o^SJUx<r7OXEq?
z9P;JEH5pRt<_rG=)g?y-7Qk$ls1C?9Hnb3-G?m1O)^i&9R(k5g!AQdtk7tz9(AFB6
z;buKL#i_m6UbopEOsXGhIX^k}&?-Me`PH!FkNfndO!#kj^YprA2<pnefLD*#Kx8+x
zJ_TWJnyI?DE+{cnAo8x6>l1%X1Rww<o-L3$@;dbE{?yzItl(UDy3(Z!XEFQ@7hoT0
zG`s{XxmbC{Y}W`>UK{m?f*FT7wtxcH6~J)0ssE)S5!`q0h|Cps*QyfblSMqY0?ARy
zuNj`I2>JTb{h}IFyT0rzaXu7YhG!h>l0Aw+pZW~8XQ}0B*qwFu7JM?zMpQ1*DRMOj
z^O?e*KLD1m$yDa~m8J%VWs=al%Q(DFss7i2T7wjR%TuJkIAwKTX&}c#)fVjTbhonw
z#aR32W9(80ew?~0)=DBIZv21G<mM6{>|3v4=e>K{x;lS;lBbgC&$y*jW%dRegw?Qk
z)S%^`R8TDM&0dq>^yF$c@X_rl(vN#Dd!kKt{dOmVc-MpsG&O}cS#)7HAfowqdfO7H
zULaLI*Ds2l``+7|znUC|y+8YlirYsR0;^D&6G1|{q^ZUmH_f%`?xYkQA0I#4o8^Z=
zQjhMzWn0ZN=b2ikt-h11ASBb)f7aZ7q#H$IXJRq6c*kh7QA&@mPp9jwn#;*O0sVys
z|ExOSUUql0y&vYu4!if3rVjJf4L*8gZ(=^V>@5=e;5}!m#ipAs?0q)!>8;?AA0-AM
zmN$zla7V>&`eaC&m~s=wI1`k=v8p4+Oa@6x&>Ep8H3jaD&QX-}?#sj*8s^kMHtB&o
zhrZ~b0-p0yl+QNL99#IN${^!ip+yB9t$cTe4tSY=5s-1EkpX?NE5BIebS@r6hI1w|
zT>XH;x$osvo(juswc{@p6copUX+6YUb3>t(R<%~`TZ^g_^fU9@=&Pm2Ky{?B$FC^m
z>o?yrt-my|Nm_!9wewPFwcY}F`4-N}JHkURU<FvCq2(YIFtInJH-%f5uy+(!Z>qd<
z^h03C2;2j({FX!vB0E%r9D@hqxxKykBQJ1S{M7kD$0)>li7{D`mQ5D<zkL)YTr_Bb
zkqG`h<*Eecn0veZL#alg{*K6uL%>;@OZlU{ukUlj{$2YaeQCCuXPNB+)av$Q6mb?9
z+We0TFfZDt{cga|DUg3CF(prArTkHxN9y2cdcl_5G;RD~UE}sU@IpWxNvyAvSrEf@
zYOO(7w1|~WZ;0eZyM+oHK9`vNP-A<uGuAxvq<F*35eezf^m|3E^iFt5b-Ak^7{c##
zgoG${&%wS3qv+jS_qeLkngh&Z12B*Lf9An`l!A{l>l$hBWwVR-=0NMY&c*U2$Edyo
z6Crgbo+K4iAD3VMTmRvw3L}WiS|fg!LantL%RSn42r%79O-IE~4n-AJz}{yYQ|`$D
z#Rn4$b{6jw$I2FroR>I%vR;nV4~R5SaHIGTNfwT?u<qHG()kW>;qWYuKbXgmEIz=s
zY)Jl+VVO0-e6%FRflcX?&ikE@zz(D0?q<}NRd9OGT}rwU+wQQNT(Aab@p7TqcVLMD
z3)e=EB;kn|zW<}v=%hm=xg2?^I-bkez-r9*JNp`(ybgvy*&XXj3^g1~UL&GJ+#Vf7
z!#h;L^>@{Je5LJo-d{vKvLrm@YMMVj?#bn7=+G<Yx7hcZ$H(;pdtdr;^(fgIR1!+Y
z<X{_}%nbpBOHCEuZse`Jf9RqjX;PIzA1%nbq5`@+%4?OSXXzN<Ahp;XQMmKoC5xir
zEz&X$&kOyVG+wgQ!dUriGBX&Wy5(t>(RwB|7_seS^K*Q5y>P){v!8TIGXc<-=#RAV
zSHMN2OKK3xuDeo_pmTls$<Qf;ZQ68qJl%5=1MNhBDo1&QB-%JD*uUJN-sIl2ZXYVn
za=bjo?gc8JbCf4UdC0_6cwCm3i+pKbPr~8w@G0xt^XZ_(;@z8ul^;=IQi_T>i2VcJ
zFQ`Ex&<a}aJ1n9zWu#%wBY6MrdyKkCnO{EgCZxi!hv-pZgWRAO%?NXQ7ZM8rk$e5f
z@+{-npMaw`t;(fb1}&-qi{7{va6Xsapu9lN5#=nJW$<b=>4@3gVXk5+9;BF6*a}{6
z=SJeD{pTq$3vE6uypb+_>8`XjSjI40zt+-Dd_i?-^mhX6bu<cz-pE7k{DC8PysS`0
zYczB_a?(B?#B%J=Oa5?U7diBY$<6bhn)`e6oMrsJ?I6|bej8(hq;mJw9}0XqNvw3?
zn#0+fW-P6RNqsao9uX8D<6mGCj`iNbd{K*M5)IB3p><!zKFqVG9{eSHIT;#QPzx9v
zZX!xB9F9+~V6@@clS0Agp?K-jq+HM%?xk(_Wh-p|u+fpk9kBpo50P;iW9p5n;R2d7
ztFF8s4j&l5AR*O$6S|S)4p7iI8U#6>=kt&&b1pvY_s4VaVO@6_j74>jz`whO$&7?i
zOM;xA7GqO#FDDG~SuNErcwL{x>*?J^)4$O2YZh6%LSNK<ObayM!&3uW1ei|3&4`-s
zR=)p`I;b}a<d*%YZX+JXYc=~!XLQrzhH7qbbhl)Oa<-shRB0u|42m@D0qCl_pi8sn
z)9f0N!t4DS(NJp7%oQdxjyl`ls}VA8PP=yNmwD8P==~Wmk!#;?UaPs<6r+|PGbB@0
zA#$QB*Q<*LYMXm&u<3<$_@cAa=N#<ORyYYE?Q`H0B3mL`_dleh5qmcaMQyXyRt1%;
z1W9qveV{8Dn4mIaz?YGf31@MiH>#KF#tPl-_?+%1DO9Lw%|znN^IRV<0UZ}Tia4P-
za!S5(GqN#Xxjorm4~<-G^SVu^jovkT)LkX&xZ^k+y!6;06pjKLgET`#*M%ThNNJ7|
z?k|e44slAK`MD)+?}v*%RF=9NRe|Z{8*h{+arD$t(tkHFOzC(U&6E%}AOxGQ^bV7u
zda*iAc^1e-Kk;s9khs}lMlZ=wgT0MmhwqR2+Lic(UA7o@G;}W*+3MO&w;RvhyAJLK
z7ewzYn~YuzsOZLI^8=<<^vr_WrM1zoN7L%ATeCgo%6<NB3X&ZgqbG;T^GXPpfZOmd
z4|(&Pj??8-#DeIrkQcF=AcNMNRY@dMPBtK~vQhNN1Dc(DVuJa6#+Nb_-E{~FyB4*q
z?k*B%XdApScjkA7BE9+eIVChw^coo^cSl2&dhXsCF*#Mh9P1<moI^?USp>CpD+jC>
zOmBAnvg-&s!ZO?g&nO!4!*V*}0WcX{<3BD}gn`5j8Anb#K*p#lY1PjxTAwIX-Rtr5
zPMOWLpf}@KBwrDZOiR-Wz`TXeQDajGlvf!}{Tz8?3axyX`Q<18X;@pa?=QXt^Koad
z$1Zfg?bqD{@4l{?$b{zwg66WkuNZB=ZgbbhOXSRArUhbyg%hb89AhYMCNKL`tnbr4
zn!<~Pa31pN`>i`|ME}wn-I7D2xTb#sf!Eg$R(b-@YRFBuPn$EWnr}6gyw2KLf0Td|
zaIZ9I|Jj}j7N6|L{TDf|-HP~Y{M1Nk9cx&xP<D-ve<q1IZ|+H@7T-*r?ERYCw?w?H
z*kLcj55V4LW^Ge+4i?ZIN5gN>XE8<AROZ@P57=k365ND}gCR4!QwH?adzX3xV67z7
zaP7q^`|#gvl~F(M85BwxQGlk;)tvLw$-hr3U&8u9S700*C5o%6)nj5^1BLrH_bt06
z*wI7=8>xO!h1Xqjofd%FuDhKM5*CeK5YaC!f0v3k#1GV3mGH$uu2P>YU!$Xk#2P<@
z3s+pSd%v)Dj#sJDI6{b<(iYbWWob<f0ury!_U}xQOxNx;LW#)OLk>9&<#eLDJC>~4
z^P=tap+|q{cH_c1ym9Bb8R;ZT>5<#lw+y*bpJTlm&mptYPIoTL*G-M?7w}@C4)D4e
z;MqEfR3zoZ8iNW>E2rYug&ru7#40k)@OILZ-a8}qoNf;GX&Y(|rmJppo?0SwaN7>*
z_BSouAuF08^@`)WClr~hcTD%?aNUg2>@~A1cIm!%Cl;Gl)7QtuiC(u?#Jp?OxRTFt
z_V_G^9l>z-dfiC+7eHL8#7l5e@Yt@h`XGolKzS%X9CeLj?z|fNBjDkvCU%-DeDR^d
zy({#0ioSS93b9v0-mFUb)?vv5wT$f5LLBKuxbwN;^vU?I^jCo3m&2d1&fg^PfUM2e
zvGdK|A9uR%T~#*U=RHI>M$roI@+9WD?YHqzdtdR$u(<4{CkBvYcYZ(IIYNBbZ8>#B
zN;6uvdQ!ziATbwV!Rp3yYJj-afgaI&i;8RLj(vEy3YS$J&Og_R@py%m^kB95v?j5&
z3RRiA;Pe?1cf=_Pbd}j0F9q}(ij1WBZfG-XNG~WMq5OxG<<gxZ%m8fLQw)k$J?k%$
z1m^B;lg8mWAta?<Zv4-xl+!j1#$uj6NYig}Lr0(vob%RHgte`kSuL5r0Ujbp?OnF>
zxr4(DD$lo^u(-m9NkZx+N#ag_+i|6`&W{QJRlG#OA^$(3yYWb-Y`HdKx$NTNIxqMq
z3zw4WZ=WJ=k4o=*yi=t6q}9LYyjq2qM^FADrS=}gGq9mLnG(GNKFE=|)BiFB$%js&
zXb`U~?38YGz7D+9+oMB|^<LQR;nKYeGsj4$Dsjp}GenM&lHjaAf@SU^dv9bf&ix=P
z^mhUZ5KR4>pC}-S*-ObG16#sxwzPC7!dX<a%IS@}-G^zPEoxUWw-_?yAvNEU&+M(w
zP&Nt|&aMMe%M7*84(X&84m`7@b(Ir>(U5Al;XJY(;PhvyZgiI_IfAa`Czm}S7N!FE
zF^YQvBry!9?av)8ZwLdtSVbMyx6>iafW&GEfW{@ZKF8!jklQ7vds8<S+`TOf_>fG$
z7>jo?W17G7afG=v?LOY*2&FbHZ7o_=Yu`Os24TrM<GG&8sq1JqS1nxcxbNyZMHL&6
zJN6rUe{Q4#35hjkwz%4C3~Jwnts~(0bqt7bpDVT*Mb2f9^5T$86WRxn()nQpug<7B
z7yERsb_5XX`#_wYqh{@mP`Pvs_~~b?qbZv+cQkO(L&~hN>VH3Vd%m)C0iV#>84BDj
z|7zN6M)l<H$IM9xitsupe;FvfOWk{yz^ZH;l7;a_Y0X!+S9<je@0BS{KyKZeW15u$
ztk5x59n~Xoaa@^s@2Ty05Z&j#ibkrTiL0nFp=Ul&g`!Jzx6fElP<U|dU@*&#9Eg?C
z&;y(PG=zIgDF_P%cWR@Ah~Ug>r-}c;EmD};ac&^IbKZW(D7LQZoe)Vvu+SDzHV1X-
zF*UuT%^f`bNj5Pt0kL78F=S?306R&LB}6>W9lU`10@KqCShF=2A9iE$5@5N~tn_bq
zBVYyXZt?^kKEb~1e`B`_`u0l+#`)JMd8oJ}W7fB`eSNF%mM?)R;`O|_1H{NWJ&+hq
z(bVdgxWc5D3T}$pP;saoJOPvv?G%UlhVF%e*cgr19kL{8YYu_L(8`7pwa4`Y5n}z$
z`9nYK0((b;X<EPLrySa$S(0GRTf+wI+mrFt4U@qXU^Tn(apcIn*2LHgDxQ^a3L#_K
zW%}u<X4Wm+MB`aX12E;r``+ld97h^)JG~hpvnecOw4Sz`Ws=8M@v?v)qN4*wNBl-Z
z*KBa?5?#9fO>0wUyZolP`KEVRx0ujOx<m}bF>=O}S%dEQ5e3j?CX_rFQE9y5))c+o
zMVE)}sUFAYe0%YzT}ucc0|M4jLTT-<FLp#JKg+hG%bY~=OULL&qkv7e9wBX8%kJ|#
zW07}yJ}XQcgr+(iz`%uHU8FU^Ib}b}oG>aV3Mzx_kB&+bYYykH_b)mGy>hSj`F1qK
zzcuvLJM8^M90@*O>$*f54tBuV8I=qkw4BvRL2S=zxcSy(yuSXFgN=WlB=kf0V@vGT
zrNzlIB@c%j4&$X}=utwB{6lBGtrSyO#m-7*7fx)&SsCcoK^@Q;2YYq!HF^aAYr*4c
zQnh~7+AbfpJPlYJen=2Wmrg81^M~p`A@l3nv|oFeAyU{wd))d~>RPqunL`-O&U+1&
zR&%0D4-IHF*q|80j|t8)+|5Q%$DM?>gEy+pw{8x$lkoipM0usreD3zrubsul!~z4!
z$N!o^c=9l#Wk1L*rDAcL>-fF1b%}&_I?7iS4u^B)c;A=~_yfu<c%MZZf7W79@`8~#
z;Oj;tN)?V!?(oS(UQpUsdqXX!7&;RV#q&8EOXr<MLF-1F`COSf9cZBp$PG6kI0)ML
zKyFcPLvEfOqnc{2gY;sEA!hi4<9S|Wjv6t6SGm(#`blQ8V5Rrj{ZhP5y|_k-D)nN1
zpy)d730&Ew`klDAR9Nz7PiNIKPiIBN;r6mOhhd?$^)<S#;=CI)?0X3kMdkwx>u;a2
zEbB38hRSt3Q@u<`*k9VeJ{ogOte(3nIN!w=U@3eU`0?CfXSm5e2XHgh%~4ga03r5s
zmZbiOKScp|ku<{#@6Zm&Vw8?lxwp?}R4^B1GM%(!OBo?DKbo7is86nQfS{9=mWH2`
zAC?@!hO_+s+c%k>;vzvO)f6@VeN;q)K(A%JegA-R+cs(Fd%uuRr;VFeJX_}>CFd_i
zMP$Y#rMr`V76~>zxD0?H-W8Dp2i~gNZIK5QoXccJ9BOf{TR*AaW;|hs`A+@G2jzHc
zC7t*9ThX#C6u5srOU^u`<m=rhWB{B;&}7f)n#E@2$*oy$Zqv7ffY7ZZ=#OST>C!xs
ziHX#ro9oP>W~K#jX9@r}j*piVwr9(JoDnepCK!JoDEG{p(wr1VDRqCJQx0D0TVQRr
zY%Y*EY*<s<*;O4A8I~DQk>wUoIh5ndVQhANrSksc_kLuEi!P7@xT=6t1>Kz-#aGNN
zM!rL^E9e6P&b`0gT0I4{SB#;$r(Vk(QHk(NBG^rf7K#4=4*K=}UL7s-=>u53-}qTg
zgXOCF*uozuvzt?_+Ez*1!%KQ-3r-1Xc6?C(v*srx&<vEzsDSzS>4#Hx>U$fV1LD#B
zJvSXHJIM!&=jYA-JhWi#MO97zl5W7G1HwjEOY}Oz7{rkq@Ii--V}ST;%hb8FrPoZ9
z-@HC^BX`QOndT?$k8MK$WHN1OpHwh)pSeG%PR_1Al(9kxH+Wyi50IMgc^}p)K@*>w
zC0bCy>%X-GE$IvG$G=K}xEB4SlblxhmE3^vDzDzQzj`&OdYyvb6<X^)E(gbn-q4p_
zz$04KGu4I7geX$~p!SC{-qFx8Y!}!xK^ndKfV<3OwoH*`9FkFrY+pO=vqkFu41i^E
z6XF*IZ-U%6{cFW``!k?H#E$+s!uUzRBKj;w8*742=XREYojo2#5=yFPoGV-<OAfmd
z0<~;lx*9&wlIUf&X=$&SnggqY($bpo2-=(UL^YcIcyMo$0;Vw$=Wa`sHH!{we`VOM
zcHKPPcx-3KO&1Bhx8uJiY)YAK<cdXzjfQU0xU6b?K;u<4So#jBKDfNEz|t~3x&Z|1
zVdFm0&ReD?^a47Vu$~o$jp4Khx<6}wB7q#v&*m`si-7}b(R;g7@Cmzdz0<*a+d{Lz
z<Jp#{C90ZC_vXOZmP1U4TnB_9W=N`5Z7OFhsV8t99UTpg=+kC)(OZYRAW$?I0rxtb
z{&TVD6>`FYVap1jj}b6u>PcD{ZfY=2$MWuV%bc!xq^n|%T;%f5$?`4$3Ex!L>w_o=
zRL%wlCm_~rY%?9kp=2VLi?*51$#0nZOv%2r;twx#FF{g3B|&Pfr>7@Ld3lVYZnxw&
zDf@_pxxEJO7#sPS-IJGQKIUgfh*%xCW^A-BJhgO~!haJbkyX|clj!yxcVe&iXII9$
zPwF*$>Wy9%jSkd_BYBcH<j>EHIP|;4+El?zC3N!q18qd%-nw$F_DpnXN=FG9UtHun
zKH#Ujimu)i=FME3vM=o!%C%EXexLxV6$8A82J_Xp#DO-{ktd@Og_lZ-eXn?NxgnlB
zj4pj*Xy|T~bPTyCHC(Bd)Aeo(n?IV>N=X4RMB$OF2TZP(zrW7NsolmzJ7bRPbnRv0
z$XlTtAS4j82PFI~2?&jQ{>Nl6unr8Wa6i0fUD~^KyBUlS7L~SlsAqQ9^*XewnRlOi
zVMf{{P(V38b2J8fSBY+;+`j3D;w|nPT~d8`dl@3P)J^=_Si)Ub#4v5VHO_w_4Mx|<
z;%z%<kNuLvgu8C*JUtHS_Z1~HBPW;c^moAp@y8BkHNxA=Q}$#*r!?gJK}uhpz9UBq
zKRh%b!M*e>M1va~r^Ia1U}%OYJZ&L2Oj*9;S_I>7DN1PjjRL+G+2TO`>2CAL8tXfo
z-y4_uePIfdo{f@=Q+Eq-iEB{V?fLA;>Mz7ozW&))gZdy*w3%akdZ7H`x_8-Au{AhC
zajy~rw%z;CF@jInHFNepQ2^&oT2z<r2kIH6UGmDoNSmZUPwdg^j0A1GgpfqLukc6l
zzKS&K=NLn6!)Ly5rYpaWk4qlS6HQx5^)&SX4vh%-@Nc$ciF${a9AzM;3b4HzI{7eB
zW#7e?Ea0?1dKDovuZ1Iu){6g)L#bs_Ru~_CR<kr%ir8YX2x2P8ivLMus$w#f5}ELV
z7KrkTck*P+`m@z$T&w=bd^q*iLJN6ezUg*=Y_y@c6gT9}Ho9aQe{TxC)i_6CmQT7i
zyo)Ikycl-j-I>$!HYzz#?i;SM|7=nhv_1R9wb-`oqEO<_DkgXD_@^=45i0Mz*WM!;
z(6_+EDT(2e20ycr(kN)1Z8V@)AM3))7QOSxT8%rYl<&Hx`f&rXKa9ieT`>w%MK@Q^
zRXqkwz_OPNp9AJ3K~h(ISJJI^wvRLC6q6!i?{5s5t=?bW#Vka3j&7h<&alssr<@zB
zSYBOJFZ0-)$<zwB*2zXaivk{kZUxQs+ye)6Og1Q4G|m5Tb8`N0^HbRMP<^biX34{7
zs1Gm+!XRjF`Z}`M?#j&S2!#mYk85@PR<0Kc&punm=cRA92$~C}quGAM0<v=?*0_>S
z;j@NS>Gy@*h@ScZ(Y=MGhlIt7967T0X(blKnmV7*n_ef0Hkn-#$Czw8h#ok5H5LK&
z<z%Vskvx2<@YKS)-mRkkZgSKx*zq;Z(`$dkcIT;F{7FWiG$+RK3tSXo8B<{Ay9i?r
zKwrBqWYQ~m-Yp!IINz26b`Z7s6;d!&XAr<Ps#hK+`RmRGt$F3zqD0Luu{;?yRnp}N
z8+SfkLY{k)usdSc3NbLZ@9uT2L6#ixJSU2rf)^uMpt9L9*R~z&slT?#w>z<TJ&-QK
z<wu&?>s<9w{YoMmbeFy~{xDgY6EHUVtn*R#)YAU!?Lx7YV0MTf@0;A_fv3>|DPKVx
zG+BURv%W*zNJ=pQsn;O%@nyGniJUOx^I@7=%3XI%J_PmrLqZBovW|vauCk(x1Pw3y
z8G$MhFClN_Xhfh@$TZrDO#W_4p+IAYZv8|P@X?kt#VaK<x^WprMa7Z>8ew*vogU~y
zLz$D!cG@l*E+0OM0?7C1S*5;+f1XQrcA6$DeN59u59$>}cd_a;a_oQ{cM_uEQ__0H
zWX0fwG%RhJAXuhqIQCakyTHrkra#eQDD9IjX4Bghv`?~6kt!`y2GUG?YewyE?OhSG
zB{dKyvpM?vmz4bGwdD{wcT=HD&2J*6_(yqk`2{szH=f5eOEz~00gX4wqny6jE&J=K
zE6$d4P{$<bo9)p&$tiit2I(>E(Zf5z>c^qW%vJy4{IvqoRb+JevMH6yxUHcXQ6eMm
z{wJYrm^(FQ{U`C2@l^Pz>1t1}fu-AO6b1cIvOV>@IZ9S)Qr{YBufYW2Lc;<c6>s~r
z_h<D7`D6hm4f^MA<X2i(I^xiU6co`)@)&P6-dXqo0kR1wdL|gm#6Ht$ou!^ccZ9zz
z>^CQyt*rYnCQ=%}hh6*0EzJ89ZnQv|+}9Py&|tYMAQyiAU4jCtFOD%B;i2PtJvU=7
zh$yAYIGO%pPLYN9UMxACm3Gfd4=Wu}!E_xg*^sL7sIZ@NZm^_n=nbceHO`-{v6!l=
zPyzX}H>kv;h*UQ0jj~Z{=sE&5<I?UR1RnxCN5oT;&Ddt~45RD|uKOzwWZXsqzv=wI
zUzoxlG4S1RuO=u&sU4{DzOwdn<WaXmbHY-D9P`Wh?@H5@l7zvmRh?Z5RHCYR{Si7&
zJK1mA+FEUDtxJz*MbaCyG(6gl)8akQi{fhZm=fBcDvoBFI(L9Jxv$=C21qEyxASQ4
zWLxi4-Gyktl_ivaq#0ZRDhEK-OG{{xBp7XIm_^bhV=*@!`pl+YSTae$OoR}i<*bR*
z^JcSRyc2If*gd^3=~sQYF=uusu&=pF-(IRi<njnkY%NvS7_X|mlw9=IXg1Rt%-P*@
z_lh8n#;j{v%GJ}Gf{Y1fRXCVlmaEy}^5gbVV2N~{GWz$@6Ka-WhKT!VfFjH!6}tr^
zq+KZUG8nrc!FUN^W1+1|OCXy7e8hX#V7ca5WA@7EBWkyc?a2wdDU!a+xhx^v8WbEr
zMwGTc>4yrfmwR4*a~*tW>c6sVJS!}m+MwaXY4Ylq<NN8LFQ?eWCby5Ld?x;3m14+*
zRClzUR~)km2?N)N0K@zx>^a0gW~(Iai>Yq>ane|zqIcqZrms;)L|WBzU}6I6D};MJ
zi&en#a-ucKJLAhVAWpOWT65a_Hj{0YO^c@B^Er|=VGl3oLm`R^54UJo`f}e<70JE1
z5I0r`i3cfeW7t+ypxMjgBsBE=;$nWclW~qKlV{}3Gj#yeuy*UkgdyN)25&llk<zdp
z@&IY>?iz>eN1VK3<;G~{qx+}L{T>KfL_Yv^Qy;p=c~y+t9XotKwwNgUDtt__2MQWg
zi<8Hl!xp|dS3oxKV$mApE7$RJOZGnt7*V1oKlV15W-z^S)}P%~)O#W}QT?PEmrsOi
zS3I?05^Juz22ZHQOgjCbZrYjO?!LzBF2-g$5^#?=)YCTGf4wsONgABs%bFp9JCi+J
zs4ive;<Rcke+gDOJwE#DM#{M!t%;|!spz|OK&q=oJigIH);yFtkA(q5;}jqo0W*?o
zRbZ_HGOp{G^wEk+oy$ojuo86GC$}B&^nDfDt@igdY+GZF0UO9c_h3kmW;(9kQ)^gp
ziG{2DX=&aFOjs^mza=Dz#6tw9iAb{Ki-x%PfUikl-(@SL`pXE6nFbYDvX@L?k6z!{
zdN|x>uR)ZNLAyo+F0<{fO=S&piaVYiX|w*uDhucVK!GVVodm}^QH`gV`fi>YT?SJ$
z(#iQ@!G*Uq(6R2z6Q)rA(AjgxcS7y-w-%pjHO*DIcHt}uW;I~?2Vm7Y&*%Q(!%`;t
z+ET+<)R8lMU5f)!O2}uJQ0EnPwq8QsYWtG67k``?Aj;vGQI52N66iOrezZJf2bA*D
zj812-m3$RC0(9}Q!fIBcUKl>|E~Lt@u#U0HU?J{?T7tI>+nMZ5_d}>xEUt3{k||gA
zp+gwa+O?BSA2TR6K}|!eUe^*`h!%XC23J6)HaPFUWfeni3jFv-nht|0*P}Ot4OkIE
z{$6vDjk(XVN8E`g%NF0C59y_>h>N?DS8v)(m+$7J&V-J%*;L+IhyFPT$e9sixeOkD
z<)X2X9%FJWG^p2k?;K8wYrA&d%jwZ!96(G}_CBZxIa*+Vf;XG?Dr&8+<p<vjq4xMn
zOi0<x3U%t%bGS|B8m%;DUFrUO)kUOl>kvk^{c1_wNpOP(WVJdm8l{~OP2uH$<cpW=
zTLiGziV_~(v3f1w@nK6wA{fWWGw)y&2nt-ff{;7WpLU%8<D7Hxy&cH5#~NYUKOlKP
z$p`n}z4<a^Ud@7`iK|Ji7Z(hcA%fMX2!cHkd&1%U!tMw2Kkz$6-mf?rA^S^XvdgBR
zY5LPYwx<oRnvn_LD~<^ThnT6=iRE(OynAdd@|pZwfn^Ai3v{nrZDPYGP`UNosVHe`
zi%<~aHoM(_!=dch#9+f1$vB?Ge$`U;e(mmVP||9)^3U_!PdJAQr*=l9O|Q3@+qAa)
zm;Ds+Ris^QH~eDoR)bSq{Kx3hmM1<GvU3yum^Al$@-SXWv;Gpt>4ASU<>r9w&A^t{
zFVrHGt#D!`^mG*ySZo950|vryx(cj7!KJ@<)Kc7`9_V3(!JEqZ1(3CC85e(Zgj_2A
z;(JKb6mJA^{#0-ke6^LOCE=l_ot=kv%X-WL$W0b~TYVaWx(()2fW7vmj9k<vozosr
zab-|87gEast|G12<#@A&9AC~?l(u*SV~LxkMRIz$m^)Q@nCZI##*Hg^Q6!z@rDV+S
zF^J_%*>#`X(*FIQ>`ZUxxi%2J^y$?mv}2t%nz51&{*EwJSus1Bz3<)b;RAp#oIvUi
zeib>4L!7YrBf2*5pNwbO0D_?znp8+yWA+?TEEo*J;ZflD=dXko&j3KtvD{EAbO{2A
z2(cZs59;^GGEH&s?6<L=R)VKvW6hEf%`@pN8#8?p1E+EwOLuqCd^3@oi2)y$aBcUa
z8@2laT}0gJG|>jOvI9JNe;kgv(lNAG-KO1c&H19L;_GU(Yqm*fD`mM(OCKrhUmag<
zzCiE2$-iu7K9zrP+B_(O&UlwH3}CoR&wr~#A5W}hRX@^a>1vKCx?Q>+pq%|bOucnf
zl->J1jDd&<s5A(uNSCy9i6}@8T_Ye}(#;^P2#BO~4$aWbASFFRcSz09H8jKUyL~?2
zcdhsQ0j$O1aL>7~bIuie?~6d%zR}L_O+(eg(0A3(Q)VegS{QMT-62-h&nN{LYFZd2
zth!pv+65BTB!eeM57Ndq&JB<Joa9WhaV8RG%YZXESX=@r5xed;oXMo3{O72TAJ#AU
z&qeUi&nhEFJKzy?-8VnW(yTVu^HZLf*-#C%z0M3e2w|*Iux9$EVaL=tMd6LOwPIyx
z%^#H*U%y(QP?v@zf?zL_Ohq0hi(h4vlr~j5a67s=ds_>Im~t?<&~XDu=^L!t<UbE?
zj#cpbm!eU(GLNRxtrd%iCn;H>3Mt`0G#6zhF#U%1?P70a3*s(-qwBK#G3uw(e&!-1
zi{zJ;6}1zAXJBZQah~mQQ$`dV?x`0aDXt!5Io)#md7V3$XJjEOItWE0%Jkbr-Di|e
z*BJL2&9{tm<1hRu-VT}~AELib%3v$ZNX{P&@i2gspM1$G`(2RoSEyUy+#n5Q8rdTa
zOQu_iAEt~&EV#8`5sKmcF(g8#ki$)BcJA-fh+&po_VGJNKgt$D0H@@<vIf4nSdx^!
zp07WnAOu|7-?ZL7iLe)_7GURnh06E|tXu*6*qWArVTI$%Um9i>y@;q_Xm(cjIZy$x
zH^L5i!A%eSi!Z`1efbWs=xm*}%mEUFaoQoKohe(A7h*f>9}H`qb@0yEUD5enS4gJf
zaVSAf#?^J37pgyO=12Ps{@8}&o|9e|Wf<>U!dTSvaizhxDtmo%H9Ma#Ry?IHJ*F0c
zXcV_Z26;NYblp+FB@1MzYi<K@C|<!*m!sF7j@`vU^-`Dr{F0mZJ~i^q18eK+9kQWe
zN~Ci=l;{d}B7^U0Uz6|QS+A!A#1(D=z=s-xl+zz)Y2A;SfGsN7eHXv=wEad3ceGt$
z2MgFcly=)2of(+8S?kVY_sX%)!+vUaZD_h&8j$V<p9%T8EM^8kO1wy^RR>M@tVvj*
zh7vf_ZoiKp>2@lhvVI?~a6XwG0?Wcb*ZNTNWCgvhFPr_|^=ML5!~GI=<Kv2iS7j_T
z!K+{QP4pgkgb5p^3o}0Ay9s64D7Z#Q4<$IR`eU-Te_E>AOw~nsY#&Ha0Y0Nw4*PHm
zr!WN>Jc#n&Kglqb*k)Vi>|*|!?C6%^^nJ0+DkV<f(aBlRswk3!SGlm(3*&Lsl9W_A
zBSP{@g3J?AkDy<udyMp8z&`(%Zp_e-ptIRxJ-;2odb>x2nb~^vlF<2m2bnsj3beAw
zDrgwmvi1}I=DMu|$h}^SfA8~*Zk#DwFb5e_G0f4Y6lBp(@(5^|ja#Pdesr2|TD{3{
zU4R=mw@DveWlx<XP$d!(@bZ}r`<b|P*_7Qj`bX!*``fWBu8KSCF1-2Beu=d2?<wqR
z=XgRccveecsN@kv>(M!XZqxsYLZygE<yMB?pV4@$T3QUHl|Am%_jGL=V+CF)X|BS{
z1~$sXCw?O5Td^q90calq-?Au3po{=+!zN2Npqug}gw;R-sm5W6pL+Q~ZjO^}qwxj@
z9`W!6`l*O2Q@#t$3~vgodUZFN|CfuQb<k632Cw7&8#Cm*BnmY{`i7loxI-dSs9pGO
zrW#<8rc5jQA#G}W+1~E}AIDwc0{TnS3o0}Q{f$YtUMzq;i;pX8qar~%=y>bPm350=
zfLm}TJgE+F@7^&;a^3<PphW6;n{uU^Hp5k>3!@kpMV?_ZRI_+)nSa;8S|vq#4a9c~
z?q&HHDGzS%oAc?r@-Bf-cQL%L-)MHNF>LOEZkcJ`HW2ep{-#98Y+Z0U-`K|E<9~IQ
z?`ow=BQ##f!<j_NkYblmahJQZ;0+LR0z(kIl0lJQ#55!)xz^r1RxVnVNt%SbI5Sj?
zW`Kw}eiJoI9#5J~19fMiOP9stz}}qV`^o@5`X>KIdg}JW0eQZzRtlv9i~DLd3@eH#
znir~)R!{hhrE8+Rg4Bda|JDEOi3J;%X-Z!l2y)BM@vvFX^x*G9n)Xn11Uz0YVI{H4
z+JSXUD%m;`zWN6&g-w^|m1Lj{J#`SsHFM?|z0n+xe}A1D`I!&xebC+Y2J{S0GbH{A
zlrj9F`%UZNLvphCQz2%vrmw>-+DMt})?_ahP#uU{%G0suEdMJnE?g4x+c>vCO^`G4
zamJhkx?AOUP~tMEFo7jk0`o(SUDifrMdDCpF!HX75GkSBD>GW`BiEQwGs7OavE$gR
zEcd#fREcopPZ$DXFVj|HyvqRdJTe<4HxP7B{+41xX&(cu^L#BG*#9gyBKyyUK*2kt
z=C6<KXZb)}Mn1YN<i_+5vWYi5evr;NJCgIrcO38_3=d~?o&3${KdbCRBr(>h13d0@
zMMpcTcUGBvbxV1?w&gcHz>&D49=a=VE|-k=J6U}W)$Whh94rPDhakJE4m(7|CZ<O?
zaViAu_x3|QN5-$t;ECHq@j(?)6y(Rq^%*-6h`{b5kOhm1E5@_4o%T~P4m8jx*F4qK
zDH*!zbfwzo6sMlt-|8k5h{vby*M{xJBwzTw=Nrk3*jK{nNG>%t99-n4y_8TP9Fl(l
zO(j8kBJP}=y~u6J|6oa@>by7qYbhZ0s0)*zUEoAdE9`{&WO1usPk8+#iFI1*AyC4*
z3!KoLD))Sb)c6+E3*ULMNT3DGXl<EM&-2-z*Lex%-=!iyo;XcMw&5ULzY;EY))$)J
zC#a4c_eH)3SvSsN+b%E(N3sfefJq$2-S}{#r~5$FdyI^;xv4ee6hs^^{FG!ZLJ5G7
z;@<)XQoi50{6_lqoP6871ju8}S2&<!EC|GFnt@`LP`?2=VDU&XXQ%b~#uKm;<YXac
zdWRs_k<k+qRTJ^ky(YZlRELdCML?*X3QA$9?G<1PyZG&zFiPCJz3r<~ZJj<Bfq(Ni
z{t>3#mBb?Dqn1~)L%}L0XQez{%*d%hsrjXvOqth}yvmRH2a;!7b4$C6bLejjn~<wG
z!A$i(M~RB|p=`44iP=TT)|$%~<t=bETr6E88t|yAtyh2}<50y1k*)Sp=Gx(vmZUd@
zrEG0OlNh^@c^hUpu%z7Y@X)3o6rXQDrfzM^mx&DG?kk9wx%(u;dtK;^ns4E63yuP2
zLpcq1e~@9ELCB}_YiG~aABmu!b7G&lZZ*G2&vpE1Inv?xo{>{d`Ncy}QPhcVqg`87
z!gqS9bDa2#_{5Zimk$=-f`&o%{R0n}|1Bw=frJX9-VUMfHbs5&@u=&BP`CKm&h<so
zh{(clrM$0&F&}kGor}Fl2z0NRBRVs{q~`d#zj*9y-otgiHMZX@9#~d6aW21lg?VpN
z*~pP2Kkh?Q&W9=_2iUyhP$nEj#;H&NMcCAb1CWuZ5WUpFEb*KY{M^Tv(LV;>3R6d6
zLc7_$nCs+h$+}*wUbm>V&5q`qhu@s_?(^=?YnW&u-q*Ra`PItRJj~RL*o4xgc)nvy
zEZ}{52s@~OiK=k7z~!aqQb1#cZNy>46?&1#bVFSG%>&`t@(qTcxO(YY)0y6k{1%J3
zh)})IBHU<vUDoS$yF<Yj2g7T0Oi*6IL#IPvq6j?sICuB?t~<p3su?(kg+!;Act$$W
zt__0^AI|lt4DMZoX^xu=5TC_d4M<^IehfAH9)H$V9tuj}RvG>f_U*pPKlCl00qGV8
zCS=->lJs+7q{>tGF`?|%07)Ngrx2JuTsfq?f&BNOgsH5zw(<2y3NcXoUho*p(m%UV
z?c)JXzYI*)1$hlMH&S<*)X?D(Qj{tRok@wVnSBJP6?G5CyHX0qsmvTznJhF(Joe>P
z{#3^E+WjbA^J|waet<gefSoAO7mWa7L(lbs?dcX)^ZmxUGm6pbhf8l)D3@i`zCL%<
zQ4(4Io2emy&N;-6t)xL|GUx#4JE5tpz)U<hul%(Go*9n@v~Q+i<aELJ0_=9*@l0!{
zHL1$Jp!@vK3SDTB6Rk=FUD9Yo)4jtq*H~}(EoYJ()#o|{N+9~|M|PB?2H(fWCCnNz
zC|LmUado*6Gig01=}yCZi+hB*e3#n^hjlmPP|Ji5kO#=OmBcG}73GY|%L;7j|05^^
z;o?`Z&T&o@y`@U;06-HNdUGL1$w*0_c<nM>d*>~vgRCqunIc}43z2@ccQp!F%k18$
zG6aHVv!Ka9uQ2e2CSPz_^BH7xa5{g{p->4y8P{eUC{$D9^G=v)pa~7$3j2L*JIoEc
zImWc7sZDX7L<HGV;-9}j{Y-H$nR5JuocF!rhPF?!H=-LAybdmEGz6C;Eh|}`{F_QB
z_J31Rm_1(xrV6{9!(&V8nlCBGHOO4r;6RmU4{4q)&HY?X4HLfWK$}4+62;;-!WDKv
zrt%Hd`Wzd=o6@bPPW&WzikHEjPZNYe_pJh{g^sOFBtpJCMcMrq2bW4V^Uv_2a~)Xu
zVx22#s<@O{@^RkO(Hnsxyu<%GAIjLVc*N*iHw|-H!&N=c3R5?5Fhfu5->q+$Yn~mS
z$Ndi|<3(NMBxU@npVt)BJVe?KRlZaiHgiEWPaaX=Mah9kB!hW7BL#Vv-5SD!P{X&Y
z3Jvp(uhN9*0_OMKYK6Iga6Sb6b^hpJ+HuLaVq_B7k5`V(Gqn$wDC2Kd!alZ(P%g2>
zACL|2Sl4A)9awa|kd?)M%OzYB`0%72=Y>#@uKXeJc_@rq_$tFNoK}%oNJ-$L?qL2A
zd5x$wpVBKoC=Ac~P|#vgKyctdth*!`+BCu;wvZs%^e=D&fFJ%Xmcf9xbH{*Z*&g6&
z?bBbK;;;k2RsuK(Wl}IoDh@sgfZOtKP%nz0j4djeRFM5+srOFJ$I6(SYkk4L5B&bl
zyg+LnMl-^vu_{QQKy#<-&7kS*2*sHfJCfbnG~=GN$G@azO9;pY{X7u@g<AgB&R%tD
z-PD@c1T2G{IC>gYfd}j2(!}hNdFNr_b510?3@^BuP|{98=TG5ZtPO|dmE`T19Q}nU
zBEzYH14wz0O>`TPE_zj28zITGupW|K_FLpGtlLb%Qg$F>;EqgL*JZ~TxmLA=-BU?J
zkbv`X1d!%L*lr^vrETXVfjnzfgYmIbv$0r$ZF0-p4u_<K*VpX-J}1D3Fob4Z^DtB`
z|FN5KEAc<uEFK#t^*rg*e<kQp+clrnI<~bn0k#KxSI?`VB+t#4+^fG@Ym$@&F5-C!
z0c8Bq2Ou+gTjJbLf4poo^sV%@?e?To8=6At!6M?|?klsp`-dkUSC=dM<xM#~t@R5p
zBnJe3Sv8|%=<vZ$0!QK>JTM6nTvrUcG-QltwO!Y)DuJR_gJ2#v$JO3lQOi>_YwK&o
z?MG?4CK-T*+;t!VXbr|+6C&_oW?WsgiqE&lpP%pXG3l+W2EA~|RZO5YU~^AaT3ieT
zAh9~<C$_$2?3`K|0Gi`JXDTq_?3Xu{!H)8_8<P|DaH}1936XZ45-p|?o)(mCv)v^v
zj(h}282AQAsMjnk-V_h}_VgBL9vWG+dpLLJc1o07nBj59=yiDaP2H6&86$20Dxp%N
z4hs!K?xl(khaU-qyOaWz<wW|OU%pPwjphdyqx>eAG<=KZl#70HnXZ9*L>vNUk8`rD
zG$7kiE@5a*cXx$uzIsp*HDBj7sk#P0KUFDJ;VR5IEM`4j(;pxI@Km5<`0872YMq1S
zm69#D4y_FEMuQ5^pv6U^$9CPSJ-Do_tb^@KC8T;^)?fTk{QuZyKrGZnB0w~|`NXqz
z4gYN~tCj=s6C2Q*e)ZzF+yc`AygFV|VBX7NPWXtFTZl$kR$$_(4isSj=pl-x?TYV&
z4ho)&2`?B_m0a$K=35>by!E<$AIT_Mj7;aO+jpFUM!8ATy&e24rHR0#1ySBPP~v*7
z^|A1^#_&@g%9Hx?E_+j1`mbAC?x}4T>+ibHBTP-hq(-YH=DKfj@L_<^9B=qpmu+{D
zu1Mu+kpQp57;0~1lu#RNKU+R-Ry^`hO(t@+X=CY+!)TF~{nGwuokK#9T>Ahq>3@cM
zpfKhQ{VV>_WnOCMUfo|Pz4SX{XXmL%AZiF@Q_};;CrLY&0%4=tj9plA`5{9Knn22!
zByT>!1UU~^y&E7r)_E#@aZ)et+|a|A!77+vSiT5%MZy$gNa?0|^FRc;Qq}B?St~TE
z?Zf21sZ0#IvVyuI<A9S0!Dcuw-}Jmtm8xAarMnab7$qs5wVqa4VSo+CGotLpC|a>s
z4GR`?JHGKAYW9~|p)SOrU7)<>E3q+KQp+6ngdAWN{~G{_j&~@?XG&~gc>5ASW}?pf
z`T<yNi~QcV$*+*le4Mtn(G1d+`JZ$PNOr?btUg=hQVW6*dW32KB&X$pdPG7{=nl&T
z<@ji~>xK*|$=SJ_x7@EzIp7Ju`p`M?nR`q5TE14fb^AFb17%?wIv7p3PO9L3lFhW9
z3ZI!_%8PX45l0#(PNe)*yY)L%Ui-VQNU=|fZQ+>v?l>RzG5`qWIC*B)b#mFMQ{;$N
zXrUa^K53GBi?!7NdLv}&zYl?T(}fU7J)(alCCyf7*K@klN5;yTZ!s>_7!M6EBSR7}
zY!zvZ-ZFbT_~F5G<ph9DQ8*Z>9T*Dr*l^43?3{KZDp7H%bt$m`LTYY4PPQ`3aostk
zX(pSzwB?zBV03lbL_hg60Er%v&8}P6=H5{qs-kbwH&c7kY;6DM-Nn(@CmHqe$ii+s
zDjJxX)~1(AUe+&LG-R73#*`xr3LJf5MI;G~<}vxE92~@2QeG+l-~Kp=Im?%9ngmz_
zAIDY;I4X13L!06{Ryb4LoLw%t8{fqJGNa9MUkS~e!hcXq&#w~<022=eVjuJb6zH|(
zp{*YPw;8Q8!}<E}b|y*rrsLMCBw=6n@K8SwX1v=kDH-_GA;|D#Wg(&MI==zb0{=S(
z=*kS|B$%NFP;R?xHZeB-hEi-H*>v>2f2DuDhgSxE&6}fF2|Gz(&gmN8i2CZH{}wo_
zTrl01vO`J_sLRg0=n=94Y1GJJL6H~F6la#fGxnQ!d`zgk26<(#+gTJV6JPLQ&stJ8
z-0obRT8>r8D++3VFi3(vL*26=u?y0RK4gh_T%9xun9-*|pPUzO+KH%nEeqSe{Jr=s
zpcrl5qe?z^R^u>c`9=@IEY0pE1^Nf2Ci=ET3k&2D)Ye=CdY{RFdjLq<jnBA{CiAF7
zh^6YDMqhtS`uztFwiregxE{^^_t=4A*(QRiP5VCX*^#((!1oA@@J-!<TX!GE+0W*i
z6X@>QL~|bAl_}{i!P6!{4sSNOCx-puj80!mAo9!M`nfv&6=WYaewWWIaRpCAki7D~
zH}Uuq9-)vfaHi~^?O7oo?m>vTUmU)sEx(Ir^r2<+>%<RTH5%iy{cuLoccg-~srWTq
z{jVm9`p4{NnL>8C2%0HK9srz-e~%Z)n_?nJI(yZ$_EVKU&y-^`2Li9DYvsLcS;;6!
zW0Up?VqOR<5(Ef$SrbpT@rlHS<;SwJ9Mi=s6*!`mDf+cX*aBB+ax4cb<BncwvofkY
z>LkNM2UBr87+=EVS(2pJskRYo9!&!93%uV7@+Z-{Z_n<>DsDhTHTP@}*L;Y%fAW8D
zUrRpLo&vVvF~tVXm`zdz^xw)Ak}ak~|N8G_J#$YkR{C{^_Q+)8SG84%U9or_>E5*L
zwP^Z}aG;%)DoA|%$2|kLrre`O*~MCvymSgum2{>CirR2y@5yT>{C2@E{02B{5Cpus
zdtWXM30#KOzpfs=bG4F|_Q(c!fpQe;r7`5ANxZ}$?S?HOCTKRkMw5<i=qYx|j>sh7
z(lr06_38EhU7a$4g7h|92GtpLPX)L<s~FDXm!dRKpa?-y!+i6jm@N5eDUS6RG&RLC
zVW5sOh^R2=zBaVlkN-<s=loxlMr`h4dHRG-1c5v#5L3Xeto277vi*g`1d}^G3aCK}
zQ{-9}apGlF9z1wQXu>(M@p8&&)1G5RR3_j1{?IRMy5f@G(P9;6p$uS<|GmMcoN}R1
z=+sb<ba&F-w+I4S=+^AaTuj!j-$0{;3iY{Ph?N;?d#Q0H#BXqdX|d|@F!#7c-re69
zs|t|>TTX#348@67985zgWG~p?0IBuQBkH@W6rc#iQ8#NI(9473ezB;toUSo9e(k0f
z$#nh4RN?-qY;08Ff9^8g-RR5d@foPeS)=PlfN#4?a1i<lzT_viB(_{OFB1T7|2sna
z>l9P5l3=DbWwBJD(u+g^!F=Ixmk)ztVf^2$0Ey>l4M6^&I8TeD==^Kw4}4CWmh;1$
zck4fFhSq;rkN~IL*G7P@5;RrypnmQ~Owwm3g3kCNZ2pe5SfPJzShTLmKjEnWN!lxW
zC7`A_bCXh1)>4Y7Vh`@0)9mHu$9KwNpBAS4pF_K)xTA}~)xq{WITb}^^krvWpn%66
zxQ&NBF!0v{#4eQ9h}Iuk#3noSN;X&*ws_xD`j~zf|71HC^0`BAlSCxIh4dHU@J1o+
z5iN%z0BF~{PA8{%E*X-@QwS~YKWTnw6J|gzxCA*S|L^sT6Q0y~5AXZ2t8~Fd_P>l_
zCxcyrUBar~;U4Gnhc7q28G%Oq)BqR|h42HPH?p-~Gt9bt-VmC^7`Qr^UC1y0a*B7C
zSn862%ihWP$wDbt`Sgj&sX9a<L%-)JDkz_DfU#dtcw)`O4j~r!w#Sx-w7A8uf16iZ
zukuLq+Cm*-_`o~zsEqTT&=~o{|HaT*<z3}ML|aaF(Fwz#v}8v~HZnFK%;I_y%=-}U
zg$cWtWDYw_(aa&o;x*qwibh?<Uz+X?fhE0z^;@(ddWCxGrnxKOmvpcDAM`T<x$!Y;
zrFN=&fq$5F-an{B*%ouxsLKKkpx8PSo-Q5K>~^!`$&k5jiEM@y;!%Oa2gKOf57b>x
z8-Cul-V6$w-I#snDFnlJuPd4wlJ9C>A{Q*EeJoI5AX)Z}fnUN^P`rfe?gll$#A+3h
z(|%j3Xr^se$Y28%Y!ku^ibyf{iZtAp7<kUCPGYCf*d?IyP;RC>DCV?MG{BWqTnLX!
zOMMQZ^lb`k5{gU*jfPBt8EPlM)*eB&h>V1~rZ7uobKg5MAonb@{UQO@1Ohy&fpvNx
zkNs(7Z#)LufUA7eF|z+oAm#2)k|_eg58diXsi*L+@DvkIyS;!&{jQ+hB+Z~)tm(4v
z)N$9Qxkb+@J1L$)%`lIyyX};M_p=-W`HAo#Te6`wits6>2Prcthg>UP{DLE6s6-jq
z!f&mJ{t2?6#n0@-e6yejzIBAB^h*j^#Q?Kp$>QUm5jWMO9tCP^rfAkqjDy2CcvbF{
zv<>eR(&^1Nt;mN#|Ff{q8D1i(a$}@cvI8|P@f0;wTS8ux&tZ5H8m(DAZ?cPI$yaXs
zl}&y}-yzM=HG!S?1i|o!)x8>5+6OYIfnRYys#X}}seDlVi0V!tODG3a&s1NX0(wz{
z2izNbhPn^`PlrYw5O-A}gvlc749rN>!z%uY7p?@bgCF9xtW1Q+l`2Y{SNnPYZIpeE
zN=SzO9k?UNWzxjvHA5CGJ&;6@K?=OgUvJwp*NN`P<mRUImar0;SUIb~b>!g}ujEtp
z=cfT^!M}mXgjNp)BkkAO2+rg8qctN)fk@*o1&qbadW)RsKq*ey1QP|tqq@y&ZkC0+
z7gQB`6uzE3^>+Y}e|@fF!jRHG5G0My9o>`k^nV_*_2(3mdg|ZmQV}ybKv@CrFA-2!
z^^^#fCQ4(mYPly#^jY@sCyMjD`uEqF4pJ*$$=^dkyrg(44`6v_=?d2l_~ZZQfZ(Ov
zhTrs!0oC7Uk5ayM1|#nR(d!f7Tf_aX7p(U(2{YIX&$V2wQwhNL@KQ)jIK8#=Z&{ke
zZn8^-CgT4iZUbZZczCbS3Qs28+V%(a-*6UK7hZ$4poQAQgHV3OK~4bO?dU^^m-5U+
zjeVKsn7rm+EBgQb5cm>E`GpZNnMX4OFh_&@CP3p$l*Iox?SE4T4L^2J0oYpEMJ-%e
z3jZ`h{(nO-h~?FXT5(P>!FPPj14V=Qc-F73bO)Qtr=ESA5e-jhgxmWY8@O2RJzsZj
zO<O!ZdbZAMGPUiV%0HE~3ws}VZE=F^HxG<N#6HgVqaeUnu-er{)!kRr!J6u&@^qR^
zmr6K}3R_jw&!R{#Hb@$u_O@lNe>MC>&W^k&u=n9hQQ-Sr9-*`Kf=cYUyJ5nSo2yQ(
zfH?|pUBG#SlQy?Sq+{y%O9JQuTzZ{PvW`_S;CF96qW+sBb9G0(If^KMbWDQ8graYM
zLZxffn`C{)vaCZ)`X{537Wc!_$mtLMn4s}g6(eUE?}tN4nBX@yOKX`+Y%$k%{=V`j
zlR?dVO`VgCmNCNn7m1T7p)aPzIKCMRp%2TqE$-ELXiqj~Uu)L0#o!^pnMKM0uNzjw
zgMX=U2=KIc)Jrr5Uw6}kuhAqHBr<W95d(4XNV{)86KIHcephWclftBOBy5X{g^_fx
zoShvATCyE|Vs8HG9f}H(a0zf??56y;49h8yGgPXYC=q!?OV<SMfll{c%l87Sw<J0q
z#+ScdSV^VF);D)lx>&Yl1qkRKt!ijTCsZ?8X9rr}E2H<0xiS_P<Z|iXIrdWwt!hbh
zSReeppwtvA)G_6@-^ZEPb}o-w?SB|(nO%3PsJq;2nDKh-DO6DA&WjgTFD)xPJ~|-=
zZ^>R;t{vPfG+XT;WFXMP>NRNS$G+CNa)Qfs4vtiVhcFX(LfNNixz0twbM0f>x3Np$
zJW&zLe8jhC^)UDKloL&z*y@}O#@OuaSR#>P0>6@{qBviC+LSM?WOd`d*t+|g=2frk
za$`n*UwdyN4e3ntD%_7xOD6Nr7%oXpZBo8j){Bhi6~9;B{PTJ$kJo7K2x1r<iIW|3
zowo55D@Jg{05WnH;wwh3dDxnxtZp%vY23gqKZtDSm}35ihq|K%(JcipqVBK^`muL5
z$<Meu+t*mO2)cFzI$lFRy>^TK-cvoEUi~d6Ve{l>WC54+UH7_tx5iKxij4l(vk$ea
zjr!Wb${V!}%e$53eK3XFcv?cn6m`uq_-;tgPM0=oL1A*eNFevpUEqz?&ZKB!k5$ns
z^Qn+4W(yM7t$!SSgBq{_r^eyVLjwNvc1@TdC~OpVNl&5eykq9hP$?yY?GSE%r8aet
z{feEaLVrcMQFTUi)csw`!_tY(8iaAq<Z7qFFJRGdyS>UryY&h`XyY0yp~C^7Q3uv|
zZyx`g74)x@{@}<$VBt7P#M1KHY9j>Hv-kvyY)3Vl_%59f-<#8A4>kAxljQij2bF#=
z?O`I8U2t^%g8Hb?t!k=`#iU5$USL=*HezSyasMid!<)2Y<@)+R^;ibByT5-jgFgqd
z52#SnmJYguV$ZzRLXzyRgepM=4A%GgRH8-LH=DN2zxi@y)u!8lSU(u+tvLf=*N^j~
z##>hXRcx-QU*4Xl?Acr-jnloS&XKs+&~V!t;zn^%&9wi%to4>AsTNHSv*N_fwu(~<
zr5<Lz2D(VLEw(H_VL!)c?emu9efP$7vgL%KGEY2_H0WF@UiMd!sd5{uZQb@md&c7N
z{)tX<Iz8C$W?2n(vOE29J^eEp)m0LUIuYKoUuIP)OtJ=K1^GmzIMd1E_-pHrs8e{a
zuh~acm%vV14D#}J-V4;mq^?!k*XmSTPhLj>-PQg(0f*EkX5$-lHyfhbtJvz-)R}i1
zwHSb=0-p{&9&t>+5>*jXujK`az7-b*Ei61vuuz2-2tCw*y|cqGwUkTBXzH8%Oft*H
zPzWPxTRwL0f4*f<%3;_4-7WuCO4?65UR}Cp+4P^O`1#p&Z3WJ6Sxh;*#=?AWQL|h+
z!8rzY8`<NE);Uwe*=dL$$)vwdM5e3g#*e0wB~m0Z=$0hre%_2*L>N90*2A7XxDTtQ
zP46Jn>EjMESdWXe#-BJG?XVy`4hI_AAxuo>&S~&u{tidk{QPGDz!=6(Px9m~-RLj!
z^k$G*>||rLUNwaC*{XW9&n?Cf8OyXk|8qz<V;A4bi<WjNh3^5Xze5X@C8KyM2re(_
z=e8dsX7L(p?@(!c?)_aDaz+%d;eqZ*6S5A(yNzZuNM~j3h|@tqKZwwX^~LB-Y}tJ?
zOiL9&qP44e;#KdB9mmfnygv@dnDn)m{R9+ovgpQM8dsPWS0i#LimD6V6rVi<IiXsr
z<?5LvU$}W>WX^jSgf`2ZMJ89ivQp%pgWAHlofIzT^+#f%0Y?T$nZ8mzd!CYW%KaTx
zp;+2jie}8HmCBDL?>0g;s~Jh&;F7}6YXul2wb*NZm$znHM^EPh42#*_JOUin-@|uc
zEzq-VRbfKbr*+uU<BRIt{qO4h%J3MAGQ&4I_1GdkNy7^=-EP!|o2$yPOefIL(V?iH
zUdjU5EDSwn*o$522qgT<xdT_sU05vBI)DzUUJvUmpV$_MV=PM`s&H15(Cf}nY?yC$
zFzb2ac6x6kpWX8Jcgu@;0%S>s=^0M3+B61l;BnsFVdHu_;&PoVgG$IbWo6wN-IuD=
zGLYG=V|RQ#9nK`yw<Yc&n^rl^*&Ik;clg8`+Xs{gtpU=K<dJF=6jm|oi5?`0dbgC#
zHIWs)&#QMhKd9#llbt%Db=yvEm{EhBk2)K9k0xchS`7>k9s1B;wiDFDRMT&Uljqhc
z_I*AcVW(W`7>n06Tf%jb<Tq*LFbD07k_o<^CJVK$D8{^U@r{h|jCcC_pZ_KwVMBpF
zvsmLJMhiv^FY?;8dy6>#3PZUl7SPRL@f>EIdm!RBbw_}TXP}x9)0-oyY@VPdC~)>;
zuYvm-W}jf#Gw9>E*>ugi&a9eP9^jbRIJLW4z&$wsSubOKA-k**Ci(d6H}^4>6Qk3w
zo0V@W;ePXUeb@U{YEA5z&9=hr#gpG;FxcYOzEgUVm`L@hTdntZljjpAXoE?Q#>w;x
zKkftr&E;NS_qtiyaTgc@qj`iY<msG!ci7`Cwh%7xr_aVgqGi^Xxc1sNr$R5R<eGh<
zVy&o$fcT>T>!Kv@lBz}Lkdwj<`BcIRjx?H0?Vb#d?5pSZ_@#ru4e>qWSLh{1|MQ{8
zhL?=K$eFCZ;%`{0t1&Pii+|zRP^8;cv+X%Na_0;7s+Oo(%AQ-5p`xr0Ty@ZWwSS#^
z#1Mu%jMk@NOgy(eeyTiPuSm0EbcQpBd@j2>kj6v?WHa*yRx3`zuZ}unYi#}yH~F>`
zG5Rq<tmyR*3D%7(bE@(}5*_^zLl?Y*KA*<beauJcGBA6Vyx_5@KbzC<36+nnc?(o&
zSbA95`XWnU%mF0%qYnMz_V!fQb_Sluzmt@y5IS(a8P8#G9&c4{e;gd}-o#E!jAWOT
zzP$FGu6v<y9jTezbF<!fY&0jk?hR8KH6DnRWSku8_47%@mibS0BG>m0M+Y-l>!WXN
zmsL6rR#9XoVeP@fxKs24zuJ!lF}t(4nQtOszZa?tDe?8Ge)cL?{OcuYp-`~rY~t2+
zb@kcl`q&nzXl^2!U_edBen&}hdun$8=Z7qFAziLc$BF%N-;9d41oTMm4IyHE=);8@
z#A>D%o}|Vb3{1B4rd{pCS9}okwh1)a{jFeGAyHc6w>cN@Ih|~PTogRv4mBKBL@Eu<
zpjOR=ds;WI^+)<6KbIS?8jRbCU~^o>^muii2${yC@vDMVNV`)Ljuj|?cEtf{X|p=k
zOT4VKofjvko;*5YaV}U4okiVVgZa=^jO3$*)S;)B=V_!*7q^n8-|MV|4)wd<`f=06
z$Pjy$8g{~P(Mw-#VuCH7@c4*c>2;1N8}WnCBOyvNU$!JW`)M`Yr4f>0@Kw9au-zbS
z?7>j@K3u?RlBwS@ejK9ex6_vDPoL*v!L^Hf>h5=r#4mSe^m6V?x${LZ>Ew6-Wr3k&
zGxo3R5#coNw)76ym=}-q%Y-X)=<uqey!LdP_J=NJw@W$t)ul^mA-zrpLxJI9yTs}H
zs)ya7jvvO{WW$Pz>+%)pm^4r6Rwyl~O3)PvYrIiJo|YeWCY>~GW2;L$u4H$QI&^cd
zRQhcg<I?QnrX)Z=FIZboOjkpa9N<lF>XJG;EG@`&LRGyaBz;wiuv4~5ItBW(mdR0-
z`iqW#KXjnkh=`~P$}12tPfuCx4&-e@NlrgF%Y9awe$6{IIkh{aEJ(*>!WrphTh#JN
zWu_j#k++LRZH+maI<X<vUk<#lYwF9S+PDLATj<k*ta?OQ7~1Nc?|jecA1*vUJ}XnQ
z^iNF^)=%O?P&aq%4;w6CSU;Ji9I>u}p4D3UaVn98aSk*Ssj2e_@fbl-74GHU(}P0o
zm9SnXQ&+izoL3l(f{F0pVp><>v>Uo2!~&mL=_@z4pZJVX0u8;CfO$a=tz^#VbY?C{
zXkeFylsMHvfaMc==t8B`kY>VT(SW+C)NoD?`%Tb`m8oz&%5}=({g$=rnsa#F`Qunu
zA#+slO1^;nx?N0Si-7BN+R?P1qKdwAO3^Dsd@{O}%_pG4zFBm2E`{GP#d~W0fJW2=
zzIBQkVxXsO)}%U-bNuCl3+M~)0|<PL%hx&J+6#_AG)QF)`b^MNC1O1x?We$KWIW}%
zrG#Ym+}<(aot&|B1*zFDm<p$b$i!(X&(na<Bm5<9bYJPg>o=IbW>pyWx+@$7fiEF%
z6>+lW^$mdhPBO*gFKWe&tB!eBZ(5*Rr;3Xe0ncIZt*Gu1$+}jTr7}q0yvKY`#r-k!
z70oFy)^tvKUkf$0<<<wWjLJ+tT5tz}Fp=>5iF!-;!H>g(Z#1ji?3ufuL_77-9zjxr
zn#yf@2*fS{n<4GLzkpud+o-x3QXGO9Ia_Z#XEJ@dl7cvIftMtyDbM!5W9g5*rTuxY
z4`a0(tA{YUdoBanc2<^E_={JZbLn%KcCxcJJ-+2#fSPU)Cpaj4^94OgFV=KjYeVE2
zWAFoV5Q(L7oM}D4g=jCnjftKWu=*oaUx`VFfR7egE+fHfZJEb^wNKH1x?8dNNuBS@
z+e*ghFOL!kSWsIG$(TQ4-V2$!`I7Tj&7qZ~H$8j4c9XjPDf;Fu;}ZQ=DVf~WQgn|G
zqj@|uObE;D2EOitB|MKq#vP>Eg7fA=qE&0oVvu^ukc!gRoTA@q4FFf4l<rc5%oHU2
zE=k?$5QPz2$g{9ps}WvIp4Qi2G(IOQvH5am;5^{)u$G9C26vA7TLf7+;P&O<e14uT
zd-jRkhBVv6^0K6={OKr|bovX{ilK42{A}O_gU{-ik<E$mOr}*g^8OX~{>&071Vx8#
zUriY#rn#wKI1b+-V%>B@QXRB${@E(<yS`t0c1{|?97<}K>a<AXwk@6H?=N<YAQ)W?
zF!m+|Fm&!ec~(sZf*GHxQ@WZbGq*~L$|D#IuI3pPuQPd27rLIqP2hY)lLxw`nE{L)
za;kBYzhu8zUWR;-=3gl~t6o=YvK%lR>wp;fj2ZKyZeF6TM^lfmB37Y&10v<Q;z`)$
zm}(+%H6Ppr@!Z;=Xq{z`A|%!Q1Tj-D&F@s!mODxnyZGwI6H%LeukFRVid{VX+cS#@
ze&t<VIcbfWVRCYEB}uDRo1bn2#Md=?)v)H$E3)}T-*iWEsvA3l`kS%%a29w?&#RNr
z7^wu^o0Z%fiGZukv~b5LUp}N=w^mK_y4tvJc9v6q;Ev(heAgVA&ZeWX&n{<pl@HD^
zW@*uJYyj14$*WDED-qH?CQc5xlp;8RJ$ML=*Z0|GYuQc_=}OSjQ^?JB-MyzpKYi@T
z7`-sV6^c~75VIcLZT`h~RYtxp3z43x0nXdY@`|mFiK}3id5p|fiu~^7Lfkp5MyN)p
z?R;ZttWycx=$<p>X`S}m^G?gj_G~DtSAfUpMV-P<5wGU{$Y!*?kHc&(_}F(RH`7y3
zfOTUrnalm&={n1N(t@`KN|!*|-~7^obN>coH`mg-EOz$xvKv)x!Jql5&ox|_=ez&M
z{Jo5}8&+^YoV4Bzy<J6{g!5z-J@FDv5#zP?TctIwzN4z@LE0>KcFjA2-bEI9`~xD2
zlk(Yskc=B^*nxAOW}FbesBIzk6Ft7?9yE{ExMU{P!!P;ubCy>7H*zSeEyQto#~?nc
zxIVYj@{`wvsJZKW>K<8~3Pe#Sd@gmu6S!W-=z#0>XDq7r^J(TI>4mJZ)HsH+YZR9@
z(21r(sv8iP&A&{}DR}SyExB*8{Hb2^>8^ad7L!=pBJOv=IAX0?iKTi!X$Z3y?LF<I
zKd9`qXR78e%6$$V@dR$;n9Kb6Pr8H`FoLZDV%NR8?*=|>W~YAMZAIt5|J(!|on3ur
z6&Y-lv_znoEEI+4afnr_NOADtRzRAseJaX46Xk_l9mzG}Vj~jn!g8+%^IEq#4J9&@
zhVHBG`zN7sDIxrc9uSUz^9_LH&qb1Y>UgUxwRt(ex7hQ>?<|d&xHX2u*{>q&#&}=*
z`>)f!w->Iv28Zq7a?d<$0Qr?GIuAd-%;;4pd{>SIk+hUhf2}mNnAX;c_y|hV{I)jz
zBX7_tXA3Uk1{d$9qH5w(0d<msDtTXG&O0`Q@@HJRmi;H<P<n5}sXu_?s&@1Hph^~r
z1qs!;)!&tM0TJ>Eg8Bq>4YBa|@5+X8Hb&$^o>5jY@1Ed3y|*zjIC6`$$&w1mOr8h&
zMt^Wz9E+YVD*6&Vt~0H<PgAF!q-C%2mCokJl263N=5c&Ue}9dC__s8CGXASP%5tS>
zjl}r7+ff=Z-0N1%BEIruN*?P_?uhE|f?LLn{9Ui5*hQpA9+Rausr*7`A7Tp3l(+7V
z$t?(U<43N3&?1o^bOkFDp3EovR-5%$kQ%Odm^HtA{nF1?sol2PedR8%2n>`zl>CuC
z9q77YHqMzDN8yf1+X<qHZn#(!YZ&~R6S}&}$$%t-A(>3`{q5yEG+eGOIxT(LTx}2?
z+F^r2eH#AL2JK-sMMb$|Rxu2TS;`3GN6gpPb@v9aU4;8xR`1%B$ST@i3zhr5egED>
zX<jckLzm1ulH9L(JXwgVe;Y5OKun9w@bY-Zf|SA_saug)%T9100}f60ra~#bxsp<A
zx@j}MN%=)7jhY>pV;2xVZ4d@j6Tud$Qa`c3$@G`lUFrI5=Gv@=>Y{vCue<t0P8<vT
z{&*Od)AD=EzQu8xmh53M15%L3Mrv2CC~55B?{W7M&DH(;&BfJ6qI2$hu45sJN##Di
zquJ;`H}3AiF<QHNhdb5bENEWC*RF%Lt>Z1)xDEcz*U`LJE@XRP*UlVfNUB(#nxaKU
zo-z8B^2*$=dV9amlXix!^q>2DuOmySTGIugtXFosK58FQ$Z)6*f<2==WrL$npGY_9
zjof?UVxsh~OMAK*RUIL_XMH^kxEMpekF#~#0$VXQ7cm5FUXW3aB}GCJ`0+tx6Dx+Q
zYjM1psBEFm7%5AJJ4FQ9jr~mWmLzeP+FBY^dXO2NQliXbLJ6yJ`M@e^*Wi*w4PsEK
z^)4BWP7q)MZzQ%ziJP{JlsRX_85@x63Uj{jcwfn1Pbk*pdU%JA$X%e`?`ZA&<JR@F
zCGQ<D?o=~V9uYRz4bw|^J|mFSd|={@!_g5N^MX(wJC099)}zieGaWSR8(W=cqa9!^
zDQ-)7kMBuBO4`$-R#&9@0<8DH=XGOyQWnXFeC;!M0tSAvLrcUtEwate4z4n>YZmfb
zar<7}(kEviTyX3@jmf~}->({f_WBKp_Z+D+5BkI2EOhu%ema+A#F&X+3=t(#apJJW
z=Z?${yaK=$mzDLqT1yU>J{Fn&JQ5P4lEWmWfMM#$u`DR>t==swbh&0>dhMqwtFHny
z*{~29Bt>t<H<n`l4u11jyyPn}Eq1FtuBzug5zAR)(+DtSU?{B<h|1%GLv@EB8(`xM
zqiYyV>~;M9<5O1OlV)xlc+zhmyTF_*QRtX3cqejS4HMH{+7FI>?XA_8u~lDotZ2pK
z<+~7MSIsJ&)+Cw5TakNeF}4TJxf)Xx)r{`jZ^QJ?xZX@&4QJ1);uB3DA~h|`NYXKj
z;xTG#;M%Qbf2LJ6cu$e|QX+tpggi0lHrKo9seLh*!iz;mwd+@B8crRc<9UZ>um4t!
z+x+1vJunIw$nA^SSGyWGznWUeX#9Be%;r_yz_7H`#fB4afq%K=<}TKUsSeHHuYY!~
zc#}bk@b#hht%x3@i@dZL^e)N!`Vi1$<Rl*c)A)Fq%(5*$p?>H&?k=~NSI{_KE2FOO
zotoHM3H$mY>pe4@eRi);KP3u(kek5nb(!+!z40n|>lkRzw^(DYV80gLIjZyiDr}1(
z^JRmg*WgOfyz?aCnb*|>tk`=;Wy<i*UKsfEy|FGf(Pf|&^!>>9gXa~!d*tErCy5sM
zLKgUa=63W#E-0>y0kPW%PqXU0G{LSG?h|%?)~OOT4Q<V;49#rpXRs>}h@kg+^P56S
zkvGnnqon&{FE-a%PuiY3Hu$HkGQ!WjCKJ>gym$kOtCn68!aH8!VlOgV=sj<x#3EL~
z`mN?OaR7RrBEa2h3fyw}hlaQHvhT-DGbH2m8~Gm22^BwtIs^L$<jBH~Kk*PX`gDPu
zuC3l~x?-eCJGJ?4FP}fN8MkX)y5L_ga&C!g<yvdfK*G-~e>!e9=rdl>FgsDCAQ#oI
zvw<X<(DADY!tMF@HjkaGx{kFit!st422;wWuFiu-dOnrE6FyUCXmsQ6;k~cyFIrtL
zzp)%=X($X`^OmkOW@7SqqjmUZQl7urf7^qfxh2zb1ov~?5nLWg?R4s5_KY(&*KGA|
zm71Qi_XEglrl`Frs0eGRQiSd8wns(5*@-i8-!zZ>qws`faA`^P=HV(_b)^Uu26SRs
zI}=$|k~K-gv}OT|n_=2sb=PTBpPplXfZ&Yo((EfvfmPF7bU5Gl$S5yXNzeKtb@kY@
z8fH%=pHVi()Rs7Q8wHn(_%}|hx5Elr5<j_2(6^daAV%1E+@t$W?)ik!x*^QMr}JSP
zKB7|G5=;woSMfmG5?JrUbjHjrUqMyvB-^SblLi$L)3k%dyJ26Q4}Czv9)hHhQ?sLy
zE`^xIm0i|(2VXTneUL!eRT{(~@_lS>(dTgWc~K<G@nes^2A+B>`-!x_aQS?mgySP=
zH2fTW?aV7Fb>w0Z*6cB*YWPFAOmMj|5cZfnSzg+nb7VpIh-LY8r1Jh8RCU)Igy!j&
zkM*C)G})^9xYtz@-(#Sw;<cRcJQHyNO9IP~Dq(#)9!)JZS0Ra;?ifQQB;~zGjmtN3
z#r3Lgtk$!4&XbUmhuy8NtyFiQt3MRM5bVZal3&c)!cqNHtA$gK;a*DJ^?D4QUs`{?
zL5luN!}uW8JZCPKmyz52pT=L?lD5aVw8oeuR@gg!Z-)Rq*Ry5&9&yqBP8M{oO}(U!
z1TI1C=i7$X>u3?FlgTBP6CH+hY2QA@rU(slm<~hJrkD9(4c$rWxNJIf=dCNY%Sm(U
zy!G$wbiv|9v#(`;P|sV$bXtb?cp$45J@RI1C0!uXr++QqjP{T8$>`du-F0-Ku~82O
z&Ag_MJ1wpE<lOXHtsbySxLIDOkY*R4)i{`**&OAKXO6}mcO?^cK3tduD<#dUA`R?d
zyYm6*juxdZvH8jOO0J?QT+yfjc7L}wtpN(t>a?qNmzbnXNq~I@d9YtR__-5)viNPL
z?Z|cAwDYyYmD*8rFcsF{KpF-fZ5nX$K01M%Vy%w&VbTwr<I+E)bC(t_X6YcU-Miwg
zt3POz<MvuKz1<d-&Ps<v6&F`dBw9V{=Or(o43f=0H*9csIQOi}#?X8eaezuok?W1)
zWB5lo-*85WN2gPwW<5|VYo~?cJA1l0V*87|4z8MU^YuZc3l6J$OPBq{Q7YzX9Mck4
z6q_~5eS0m(1?H?FXWNs%cx;o{j90Ecmk|#et!vst8~eXw>P77Ks@)*;{xwB}Hy$<J
zJ1S1fQU1k-vn$lW^_GczABK9LCeXL_qThXRolY&Uzc)|ZM*R>J7ncxsFYT0~NPzpX
zY4*bG*m^tKYR%sDep~P%59`}aP*=GYDy_aaCka)27W0)FWu7V`!1*&wZN|p?W&+*Q
z<ga))Ci%2|<ud&<Dxnisj&MT{Ev7yOjemc<Ab$ay=V?H!vM!;GQrae$^1FR|+78cX
zro7?#i{`AP`iH2?Fp2D~-mq@u>lJXC<<Y3&wj}OGQmEF<HOor8=EIPUQz7nO9fH;?
zxxbQMNIR2S9O0B{^GQ=*Kjlp0*9h65)LWui&j%yx;@k83KYWx`jUAStU069&D2yZ|
z(L~w=J-vG}TKSz1UdZL(=K#)5?^s<*LJsh9zWcRt(wxg~hdi3}p482Eh`@^SqT+J~
z8@|*!4SpjXBP)5b%5v!|Fgo!=o{GiyMm7;?O5M1lpPoVWaxRhYtd{UA91+Lj&wYp_
zXK8Uf>6MW$L_gcZ3vRz5kW5SBYqLL<4^WiLZ_^V#+nwEaGYPjHm3JE7_VYND&l&B1
zuu8xlgIL-m)<^o=cpoq<^%RSfeDyCiRa+LT*vf>tdA}Ajl6c>1%s9ZfO1me(&&&wF
zn33coyKc2?K41qq8J?iF?zQggCYfCOYzf|x<1u~_2k+1HDLd+2*IWEXrFP?dWGp2n
zQsq(qrpM&)@(GfW{A_y$^uF<!@3?&Z_eAtg($Z71g0!UlPHj(#^R7CorNy;tYGf}T
zlRt5BrBdtuL1^n$65Y*EY1h;t5mfT(?IeyC*5CAUQA~1i#ZJpa?DA3OYvcDKvgS89
zR5PSBzg--1xtQzb8*qAof7+W$U~=Scl=q%6aUiBGn*}Uh{#`2|T5z77Wyw#_smOV9
zjn3b`dEf)6;JfaqO5m}hA}iLUF(t!OSMd+4y_ykx27KM}?l`f27avnYAoP^3jD~k8
zS}5Gz4#LOFrlnn3&$e3B8<EU)xmV8>U2<&}Of}pMh)cFQdmsHN0_`84NOr(a98+jT
zwBL(-2Twr`kn*Hbul~%%C}@pREoCPy&1r4xCbP2(OiX>&4c;7K(yU){{@wFVxosND
zi(}Y?q5m2iPX11lqMud;(;F>}379)I{HYe-+iUS<f`=VapNgJ|e7y1tIF1w8&1ANr
z`(JIh+WbYQPu;!<^Ysg1ndqGLudz_Qe>FT>-79S$T8#l<|Mhw^S8f$GutD7AnhG>F
zFnzyF?xnpP`smcONb4qoHM>uuW&i?Y^`X8SHL#)gkN`_JueMWN<3P@kE0?<dMgd-T
zCA}^`F<Ehmi=xZ)<%PRZUPT|X>kpH|CXA(3>phXT7>6QEb=p&OdnT^u>K4~vHf>Dl
zxjoSb8LYedb`_`Z(qryjSzfg<D?6s~PxNGtxwkm~$y8+DoGkjaOFY2{1eofbBp^`-
zJIpa?aLBcVmnRzG@F3#3<NMQ!da@@;hP#Mwvyh_MEkO^Q1UVZ0x-={2vr3qw*&7Gu
z$yfG9UM2yZ^DE<Z!ehdHPfZ5)91dQAYFxcFg0Vrl{W{6nkwOzkuBg8em6c6X5A2%d
z!N$e!E4=r@`zSE0lmXVR6`I3;2}tG>?E6K@g#-upV-t9Sod~gip447>&n4l%9s790
z>P+3auc=AA{M!>et+*&W?uz{rwVWvKR82KUcSOG9k<0L2Sqygs2H)8Cd@*0#r(WJ{
z*o}O;V7y$rU;#ZNuBU0^nx1@C07<a#<^I~lJa|=PY1CY?yh>6wy*Nx|Y<p?1Fpxe1
zH4QlHlJDp-2_$k6{(faC_*WDJr(wWu6Nm%|S}iKtL0)TSx0;EMhv$B3-u?4<`8qz_
z<e1LdkzW^7Q--WpdCFJQd^8+B(BTyA2ES4nJI}TNO3kt}**}9y<Inpnb59>dGocTO
zykwuBc#h@yi^z8kCY6nY`6WR=_-Zvs*#Hd2vFhBnPEV4vC7R8r!W;)z;K{w?`Z=Ag
zJrbBjMVaw=VY9oFW6ADH%E;N(MWb5qrlj{so1+(GB$88jSJVruH};yh(6qLA@m+7j
zT9E%b^W<Q>A~#Bwfq1xort{BL^WQmd4YhmByUiz&$+5+PGE8v~5(}kc9TQXf7TpxG
z-5Ys|Co_G|7%dr6lIj~xN%+Co)=eL~;e~_9{5U>GeVcXo%8mu0^Xi9LNl1av#1oAH
zhY3WdQJiKvb5b3tkm@_F0M61FZ=lil8a}OR%La5&q;K1Tj#h7<hPNODrYC`37&-+<
zuV&@{{f+2IUq*ciAhD;J&WQYJAC<EFKbLh}aOV|p&qVX&I2Has+P*rfji~#U3T-LY
zLMa6b6fa)fLxJM%#Y=I5y9Cmf;_mM5#Y?c_9wbPSK(Qdf-2yMP-}l~cz4!N<b=R!S
zB$JuBbLO6X_TBfM!wu?^S);g<@oh`zBp_K}faC%R3CbnClJHegYeN3%>%ri(Mvmt_
zr{k2{79nA2(Mb^D-l=!wKMJx3bZV(xPvc}7(s-n$lw|wn^I4V+j`}i7LOKPHH0!2O
zDlK9S<702kinqQiP8%5k>996Qm<()wq3%<ks5>UL98Ab~GM|s#KsC~#3va;D*ygrK
z#EX@P<K&gVy0$pf^WyyvBPpq^9b1-vGHG8g*o&4m<&?H%5^!ms90;!z-VES@3Xt%c
z1e0fw3>4|U<-}5_(H1ZLysJT_nD0tO9=mHMYV~5J^Jt9iIyfvc=5oi=<fhYofff}n
zb|%by@t+>l$DnJ$tK%voSDMPQ9pYY=P%lqI*1`;>%vvOlh&4$MY!mWBvQ``0M)<_e
z9BJK_`@0x>M`zI99yegNvSoh!B%{{!<S`+5Q0IKRXo+Udg=ETngL3budwUufe7wYm
z3(`D(q~e;l;pa|q)k)*`2q$)-<i*M(II52v{4s6DboX@idaR_WK5&t}H6t5$Mldt6
zgiy#8b={^X`vfTETeW8^m}t6Z%&n{_<K32(S5{En+&rNb)xtt01)pY0q0HPx;=PT{
z_g0ZzKYi@WcRow9oDl!9#USQ52Xbx%6WzPr&8pCj)b&lBU$+)DsdshTesXmKI?68P
z7P)7OlW|?3q&<sHD^>w9S<=33MXs*1-wL>)r^*;!S)uTvHk%0<k+*KLUaT@eY619e
zvk%14OvCR$`d+#g#F=3Pv3AXNcv@sVRZkTMmV7mKYdqN-X-6^W`1j(itc#w;gUSoR
zka!k&^H)YWbk^sGoyYlfz-G8~mTK*G%O)X_nP3gom-Nlidyq6ZuJb3v@*X;rq}dI~
zEc^plO4wwS7iDHCH+F@T<-O%|Rt0H*y~giS7Wei>6wt|Us+%flA-FMG1U9+6e@%<r
zZ=Xc))#vFdy&MX?TET6}gMYt?j-2N}u66I}+TO6Wx*Ib2{{CUJ@J)!Cp+tzjmf@tY
z(`qiBj&$*PueHG<^R=(M``+`>y#>7@1cXn+exrw*4%f%NL`uatozm)6s{VXuOq%8f
zzl2#{fw__6>6QGt($l>H7Pot~lc|*QgY^lc+KyJLV!O*8&OtPu`licIy|K@$#Jf`(
zqXU9W^@7aA%$L|MB;*D{oU0Q7WaNg(R&Du_Omhjr7Ts1qqn&CKkb!aDf_PYhr#yDZ
zDpGGMF8-r47owN6^w2yaLg@tjSaE-u(|{eGzL|OB;2^yd{qBQK@sbOPM{n*e)WbgZ
z`cg=%1+{xr!-6USq~87-;<IgI8hoFgX3vF99OgR|r@eT&7h%x79zm2jcZyioQHGMv
zbw^(o3T5;+crVD6M{ATRwwdfx65H+B$a)!f@?Mn7Rhi4150GI+FV$V&$I~tQ6ZNc9
zrN+!l4$*M>&GXnqMURyaVq)g|Imbo%sgDAJ8={g)gUlYy${K}STk6`s1_m3&>3r0J
zxCr7g7_ivRZMuNHl(#X?!@!C*Kd$7{8|p=*pbBz>Iw$eh3uPB1_AE())yNyVv)+~@
z^(_=mX>2}o=1(9<fA`?Iy3u(Xe(PE$5Dp$6kKLu5)ICF_l1^cq;PNSl$oDf<>BYu9
zvUD_L#T}KII^1v-jP+8jEXi5VpA-2ov8JGcdQ-DD7B`erJ#OaM6VQyZMH$;e>VWg`
z{YM0^bNC{{LXu+>fb0e(H~=%#$`jF#B@M#yLxO~tR>>SLV<jy*<2nzrs}}5DAwt`^
z+1QMA?2237J4bRC`PIXF0f!CvfPTQ6J^F37^F4;!B+h!|SN}5r=lTPqM56M_*a)*4
z`h|TJ-|k`cg}#2l;_ajtnRDk`k1KC&dn`eU63$=j6ik4>jhR}EV=5f%lMgpO7geKZ
zn4H1|dJ|GP;WiV259JfzCDg^cjdv;<6CYu-CZW1zv1TOu;f5T_9(4(y=esm*bzZp^
zPF@?!?&k0xz4r-HkzMVie16h5Q;;ayP9@?p<x_Y2rhI5~RFU_gud0mbHz9wdn-H*^
zmMCz*<<dAHV55BNF%I_h8WC^&Vv-!W!O?Ntx5=^Y*uY_^eNl7?g-@7>OWW)6wF2y`
zQ@+(1%<PuE337gox&5K+@kh<-UYS4M!YxwI^t<M0E+#9SNA5K!7)BjP#rd3``x{x;
z<QOClx{`q_P<a&+kMTElH{oxZ%y$GyT_ZZ{3`O@}>E&sE%<KW^x>Kd-fG=MKY`U5S
zS{J+ENvk(*$iT*{YL=vq)a}bX0IMxR6drAips$)UoV%^=H3H06vFm$e-UesSmUe~O
z4Q86iom7S1`)*cpAWvVPUUj<{mG!4x5zSj=RZQGA#cTX9Uwvw_80ttaw}_{=o+9BV
zPD6azeBu=(z?7nPWADPSX)4DQ<BLHqAxa51ibLTc-w4zIj@`#h?vO@VS4uiBLr>oI
z`Hw1t>&i{$tNBrR_k9P}Gu+u*_2L-oCn8rQoi0dQTrY_Msj&*+-j5>Df;G{FlW-Dz
z_LFOR!Rr|q*NyAx!PykbSt_ndc^X>5%v_J0&)06-Z&%!SW`DF5KB4V@G9y!b?=o;i
z;vJO<LlJINMg60`XGgq1g{QG`p<Xj(hj7afgdWOgsf58VrrI96I+3<3%!V3Q1^GL5
z_XR;o%_W4eDyz&U5jx%`=2k$4Hu;5=-*z6hZss5GpFAaf$JIM6qo&vSU3MUNy3TM!
z)FY1@pnd9wW9w?6TJO?8WT92RNXAH8;~OMzwPYc$ULZ}9q7Zm`+u$zhpqOxVdv33k
z(Sgn_SWBw2z9J*kq3WE*BBd2bb%HS-8CF}VAWyE#ne(DH&MNX#jamSMneM<uES?h6
z5lFnJGRTu@Zo_g2Oo>o6$%y-S6U^w41Rhwt(SHNp^|s-Y;PnupbZ<m1_=I11?Jc-2
zp1H!$d`|G&+sN8eIlbNUO*eWlN5C#=LnmKDy<f6^bQG}#$Ab>yIOZ2|8$BD<_M@V%
z8n30VPO}<YjOQ6AHZ$jU^VtpebENAQvGQuDP90vY6oTdyyK%uG7Bictz<t(IrMmg3
z<k<eR(u|&_i<tai2#)JCmC%Dg%?3&9kJQNc_1jBTka*5Ymqu}G6YG?|fw4l+nPMq6
zyHwX)E|(k3#lDm`&H>?em1aMp@GOY_)g+Ox13he^FRrYcYM_p;)${U!q@*xav$l6p
z&(4#_iIKin0-l@L_zE8MQ>Oe|H}n><2V92(!~y#^EP3@ls%sVvYytsUZKWSIrx;&A
z>rg|#;aS+Ow`cjA$-j3W%^xas<#j#TY2bl(t;-F!hj?26X5m>nE#-WrI$pvz`bxps
zv$m5!o?Tc};`<dh-z|A8qZaMj%8}kO>-h!)D-VO}B&%&BaEA{_Z7BMI6d&tRU9YQj
z-yopZn<+!v>yHZGh<mg3wFRBU)Oo9qO^aEkT<eIqj>|X*7Y3a$oru+bphCG_(7SJr
zcYB?6<rm$Pf&w)A`sefjI9$s%VeD6fM33A>n7np2OV-RZ)4zGx>;)L$w?)TqRGyK!
zJX4XTvC(lUad(Y)YUxQ~l6!ivbx;2uZdlD`lx4fV+}zF+mBJF&Kmr2#%Yw~vd}x8r
z(Fn}pQICk)FymEXevGZLb#0v%{;FSyFQVeW%3iq6UiJcDq)8)Pd)Vf`)=IROy1TK!
zL(OEDqv}4dC?7ZDix4-&`*6>Nw~GSL<T^f*^0GITHTL77M*c4QR6WT;IPT!+<saUs
z>#(TK#<0iWdKgcu^+WrSC^FI%<OZqP>d&?&Aw{XRuy?(%APJKiId2ml?+yV=m?ff*
zHyN0eSpbKf<E;Nav&9W>SpF$QD;q<MrfLV_k|OmeCNrOU7f7VrPr|Nhe>2b2@ZAu1
z-CPi#Zq@rg?dGdk8T`^}Z5oJS)>9D+#+2uA`rD_=kb3ltAxjamjP|0rGY5>$K#e0~
z^u+G<ZIMZr<~?h}bW#Y0FX@vu>KDiN*S!hMTTd8<1BL_bH-lD06Y>|&q>9f>82G17
zBWz2Q;px(b7xf=duy5Q(;WGeU&#|;hjXA~LWoJYvIi-RJGM`}{&k>A$T$Ex)_c5yA
zg^eaBMc~YIv0QRPeDi6YaoVea*@<;rCb&QrDTnTw-DBPjH+P<+A4?G^Z5ZoD7sR?n
zrd#+LV`YGQdh&c2C9Cn##=OKzf)R$9s*T$%G}sRmednudw>2GN+Y7fX6V+fMibM>j
zZ{)(c^=$cMb_gUR3Hw8F5%u?~E9Y2x>jp5ZzHygx4fkifD6)taaX`Am+&>J*OA!%+
zeFO5EAcqE3B}vVv(fh;;X`VOfk#fnmMMfM<H`jR`I~YU_V>_9Z`sLXjl%HD<Hf+k9
zN<Z(CKQ~|Oj{ZQL$nNg^AZ_~orsel?Wc}0^>krpYRPCadYU_3$VW)QI8Dx@@#VRpn
znfbu=Mjt=wn{f$xN5jr9s=Q*SPnWu6VupHJ9290VVdCS!JFJ_T7Eoi_qy(K0ZybA$
zwK?Dvo4=FQVa}sX)Nk>DzQU1prADduiwiR1)VpincRuJ0Ziva-2d}fE>fA0|Pn=`A
z+%;&l&g*eVc84MMP#a#PyKysC(r&xPPnD0NSt)lJDy{hKdLW+97G#gs?Ciy#Vu*;6
z9o}YQ-C1#EZvkmWR3n>e)BBvyvl*4CJS5*XT-#$jTVH8=@-w96!nSkwRyg)#iG}I*
z`~eQ37e(DUjANWUExaUgrE%AqIhGB9l0C!A#|BWIhK(2MM6DZjN7>JyZzJp?BLLNf
zJe!VVPCZT2-mf$N^ldc8LBvNsGF(<?$0q8OWhI0X1-=3txkP(ZP<iG~xVvxr)0R!q
z`Z5`Br`D3La^*|7Bccm;&&B)Vgol}c2!-N=#<@N<ws5@Rao%Xt$mHRsx5F6(gs{gM
zYy)}*nVh0y(k)=O&P*R*KlvUD{G8Q5uk!>lo?p55L>`oo=d%)l#oiWwZhJXiUE?w$
z^7DG9U`TsJN)h}m&xZ!IP|$X*dUScXQx_I_)1=WPf4H7lf3ce^9-+F?cE0T#P(`gj
z3|c3!=yuz&uCCl6kZW}s<OnFU7)Y>q?Q1v8-0U{Oj7+8)X-2hY-ipq6XL7iFG!$%h
zY=<lou_YfqOCEFgQX8&5$x=z6OcPMI%Yv``E^W%YziV^Ijc-KOP8c?$?e!&lxrP$Q
z@Wgl+nzFV?Kd2m^^0a_{urvT4n@=GwZ=VE;{MP$nBqzirkYsRtyVUQ5D(#1H*uk~f
z+3bWs={4ep_G-4Lt;$28-MbWr`zz0$@4CNj!%=T+9_1^7%u<=r)`q;aVp4%>>)n8C
zumShcnQ4INkJrpG1s?)&ZOMyJ4So`xgfKp&ev=Q>#>^`WSu|+A5Wnl8btysZwA91a
zYx$ge_T|Q}vSEi+JVh#dYqi;RJVkxraUi8rD&V)G=sQnUDES`iZ`0-v<}RNWkuCbi
ze~$ZUH)+m<6E&mY=L_FljWTGQA%aGYyWLfuMyIGzFJfp4TN-=m#$nbUt>M5)FeAc@
z;ZX2o;d*-CqNwF|0~8rAZ!#Qc#V$9$kjuLi?0-gHfF!cKZuMBZ&R>j)8+kZ77R3Hh
zL6TW=Y(Co$EWh=wJ^OhuNy*gHx01<HUf5-thaaTVpR>BsGs<lhsaV#sj(%;NFgPI+
z)HFVx304@vO{&?%O5Or7F^6-UBX&NI-I%Va0bm1Om3Qs~847cte3!@!Bcv9e!()1c
zkGm3@SJA^9vRoMxtt6eQv$9+Cr=T8F(2|p>KoIyn-ZUU?Pj{Y95WdHTaD%u?Ux_##
zxC~3mTbT*Ka5;vhc~^H<+sH?b2XQEFH-zO%dEO;-Pi+Q${{6$FXN@d;AWRn9g?0@;
zKvDOaO}k3QEK5MQl-V$@aSe};4=X$<B(rv12ol}mJKMJ0?amwZ;XU$TM&;ABg6`70
zf>Kz@#F$w2{l;wDc;Vy~?AG0aoVUBj9b1u&lIjFC^`t}Sv>tGC5!;C}<#3<3x?WT|
z;93^vZ|ry)-!R8Tssk{qiji|DR_}06-9r6mWK95mbklEWZJ*0xeb8duPwuV1vklhG
zpw{qDEJ{28xxaNo4<t_jn&K8$9mNH}c9;_AYoIrDJ&b-_15ac6guO&Ul!F}1WS1#a
z51Vvh{!?uWrgiq>v!Bhh<UVX{e8|UV_aRTtsdzTc9-&|e+Z~zFKKX{0i3ut(yk!OY
z{3?o^A0A=T4zO1aGbr17VJK0>?U++bR|px^=Cgm^er;w3!Ax*5{B9ZEV|lCA2TGl^
z=1m~(xA=aLkP_=IO9;BWR)9`d2aTO(7S_Y-KDzNDvvZD<lM`^lts%;lonCKMRc{9J
z&m=boDqBUTXQReIOIHK7o5~?>xUs&6Vgr=@`bLd@iH&Alm7j0Inhh2w4d}r~18ZD?
z?&U9fEhZFD*PW7868g?Qi0uBu!!ycxF8kS`W`ir>NCWvBUcL-b&-0^>cy^J-{iCad
zW)YFFp|kL%p625FOLDGeI~Vdhq`?*3Q<kgA%<My#Kxj=ZvGPn?9K6GsxlpJzF4Nw;
z`OtYM(Vy7}v@};lPWJMqzfhZC@noZ;O;&nG_k3I~{6L{e)3$=kdTJQ#mcCOCcQS%E
zPtu4~nCmP-HIRvXGMRE(aj7ijSBkxB;=q{B^O8|qFIQdKWF)Y8qQR;1tp)o<(^BA6
zu9{x%2}4cx@gDO_i!cbgC#K2zD63oR+nr=p_R<ji37xsjFFeyb>ZUC!uj{=_lRO{u
z(r%VFe;;3Ce|wtBR{yf5aqP15;1Dj9I3!QOu~df(7kLchYUnj(bSf8cv=XxKfk9hi
zDAyD+E@k&ST#<!Qp1w<G2SQ9n-0TXTOMVX!Q$@3EDT72LvAlSUMRD>J^jxS8gOx<!
z!qZ#5o>j9QUhO7?V5i;M(a2l9JjvA9>nzfP)dimrX8m$LpKVg;G}QWczKeHotDXgJ
z$%&nlUTtU-nen+6(hq*@*KsE5xpFwmXm3-s$D!A7%DisfrqN0jG(F#>Tb{Ij)Y}Yi
z_x9=RWjFAUe#fJJAvv7fGnKv=KbW{>%{0hc0N~q6hpcN<3a`|JihCjE7uHNN<hA2S
zc1mY=xt9i#?fF_SFDs;Pay&$ukD~8spPvRQ$H|RPSp)Ujq??!%BYmnkPW+GkBAEgD
z?-65=mEcW*PECQ-l36^Q3L}B5bVN&Xj=O?#bIWWmc_7|BXYP)vtWS|lS(~1hxz@(W
z8d61~T~cI|35SilwvUcnO|37t%p8VbYCWZ%7PNe1Stx>aF8y$(42^<3$EvO#nq`__
zd4vT3_*2cx?Ug!po=~i|2Mb6?>1gBv=T6yPP3%TIg*YpuYHk%sy?fixMBZOAGGCpG
zqPUVx9Y{ayxLOOKBo)%25q<rUhuwLQ^03|_!njN-04w44q*w&IT1#`jS$ol~cno9;
zK9B}lNOp@{FGKMfr2Q53+SU$oFe}U7wlp@c9N<g4lo~1}-;4wc)d%$p-~RFuYXuv_
zw=Jp<+e(1AP)%eos?;zo_ZnV}(lc^_4G$~I4VSa&S6`hE_x)D32J}%}iGnFpT5JT8
ziT`+c@vfeU629*FX~t2cS@?DkP+am&|Ic>+>%})Ft{3_ewlfI|j<4HJHogo-5?WNK
zBS&hZFzrg9NS6oaZ)ImZ4>M4H125eKJN^u`?w(Vf#A9Onf2%TW7zif_N_>&<DqSKv
zU3GE0AcU#Nk!^Q;zc_D>hS)Bm6}dIL>C8VrImt%~Eama{l<)eM<S@O62qStzj3~z4
zkrp$V`e^V+(4%^9TeO^x3pG%8Ip9iQ4BRYI1+~u)i7UFIc&@xpwl*so-OxQW6*%AR
zmtiXE)w<MS)~JMSFuV~fo&>1(nt9Z4Iy|qGEmhG1w{G@+zl#{qs_}X=Mp)5RV!dr`
zG;6atbgdHD;8?d|M$T(xw{Y)JiQ(i?olV6T<(G)>y+K?J8dL5D!?~ECX==7yQ`@<G
zK;X~{8w=1fodKB6a8c`On0mw$L1{kczIAT=l#co7paO2~*k=1kSF686SIT9=5l_4%
z=FF*)B~p9y(mx@6yV}ks-S%B3zk3<nbd<;<v;eR*8$U0E2)0-^wZEDA1Cnp)6M%h9
zJvp0VS$r!3%s5wf^8_Y9VH|bKipCR3OVBl`{fy^qmnx{?$cTp4i+x0(@*+Yv+=KzI
zWpBpEAhH#u+0-^}e7T!?c($0rl{|fkJ0nlZ;|!gnEOA9B$)9?l!Iw|RH;F~^YpMK?
zb?>=Pf~Q2c-flMI^KK})^^bcx`7kOAgN%SZ#axSM1zi@`hn>Ph%}7XsgXg~DiRz)&
zoI9U*Z3|D>8i^@|I|8w?N;pgJ4SG3jqc^#PuUuVOvX#n}PVB7Iw6Im1?SB0Ws@FHv
z_FC9|TO6irO1O>na&r4?GnCo;Xx)Aqkm&bO;!Lk*FLwu=c+IpEAE{DZQ8rbVm0kC=
z3S8YnZ-Ja<PB3EAcF*vwIRD(a?Wyc6>$$hy)AeM0LBFx7dUd%GifwdlDpeTc);+v1
zHP4Y~BG|w>oohJz+$s|^BVeEZov?o6&xmH>>AEd6U4-5I^EnD?wo{tIZ8Dr->niH5
zpW!B`UM=J<HQqJ#;Oxf%2M6qBW%M50Xm*~?8%}>uXQ(CEo*bDp#xb+fXfCCuC%gl;
zoZ24VqF$HN+Wt9n!{h6t2~1!oc3tpQn>Mhy`h_0tjVNyVUc?X!eDEfk<mJtAtv-R;
z`Xc)JJTEf3dTGzfG8+sWAPTPWhx0mw7pBfmW&6vxl#{MATK4+(^m8qd6U5QE+Q!jy
z5(QE%ECCYX1c_-jMLb2=v*eC5Z6ABTo+ZR=K`BSon$*XavU2purOwJ(y@wU;l*E1O
zuKdbRg;@*ENNp2mnH)Ibm+<5M5#Gv_i|tH(y(S+@Zk^f^SNECvqv}1x!op<AT9fj^
zaS?2}U4U%+Dr-ugyl^Gc?&cHpSk}0_Q)E_NP$w?A=7LND39apPakv_?3-q#XpgA2r
z?oaI`&*uf7p4kf6&$xdm2O@<(4XVZ6=E4ql3jK^w`70Sd)f+o!qHg3Xb&*UGK`z}S
zuzo&qk#h#=1!X)Ri%hUH*_+3K0mIIMAmR}wz3DAPfa6hVTXOpD`?1j6=#d&hA98Z+
zL;P+H&c*J!#@Pn9YWP)9ujtTA6w|%H<q-EN-lUT)*HgmVdRS4!`pORXD^slUK@X_{
z1|4#1QCDpr$AVQ&z8JcuN$(To#}Bq_W?gM&Dt?@5vT}@P9$?PTxS)FZ^y23?e=kpb
zO)Ok$g=wYXwb7Fz2oBzc5YoUh?Tf}<){BB00=3>5#As{CD!rVook;j8#FRC*L_Ga5
zov&Ll{8ysKm>j~9qvZ?<R&o{GcUCZby#sFKtD8ez1<j9?2qK)6MEFzSnId7SddKoU
zn_?#eJeA6!Rw71!J}4;28Pw)h=fDnYVcjLoJjLIpXpBRI9CJ|mCa470$B|>@<<1uo
z($H_U`WM83X%>X>1k?IWZWfeAyBU(^^;;?lh!Meh=^161?&9IF%VF+tr7{h(jVL^o
zjw;$w3eZqJcxVet&KWL_waZ|bRc@D)V-sw>v~7fXx{)jAmYiC6yCh8?Obl^gcFjCP
zt+tP>PM3<t8l^<yM0F;mhVu#lDv#3oD5#pM%iVio1zMk(-bTjZV!Xka)KR`&*Ddsh
z-t&Tk;huXxnq(TtTwGVSl=&(=M6*b6RER^3B2tN}b`tqe8#CaHtKoETmR(aUEvQ{B
zZQDyY0(D(tTHwwgaWC&IWX5I93AKhnQjC>lA-?Ciu`9Gog=O>}^<cE^)#~bjIwvaJ
zX74iYzAXF8BdNhNz6vgY9pz;q<dC?dhK9K`QJI!2EAb&_B+liIO3c3h*J^NO%;SS}
zSHp;#o)BNnx~rRP@ZaIjhD$%m`#nEF1bQSasaM{l-0ODJj~9g0DL|cNF!q&;z!;Z_
zsU}4qEG$t@qYU;suFNKSNXE<PPZ(aGUhxDqe|tZ1i)T`VEm)jYr&0kZxp@NrJFa>(
zaBJw&_=vt9p!N__ZCN9SvYBAe3Tsl+EbIefJi)p9W8zM)_$3xk@?TKv_(s9D*UeAw
zFGh-z?q>GjGcZ1}L9M;(QOl0oL*BYlK>p72l{J#&4z8v%KWHv=<Rjuz5?^>h;Yr>X
zp*i>i14FP+_Jf#)2MoJAJs=MgLp1R6aTa)~jUM4H#F>5T{Z6b}{EM`kIC~NE!u45_
zdtvv(Snh}QH%_lQgJVs5k8cI>P1Ntb|J3<gW0c_?Z@l-b)ThG6O&S)z>rZ)5xpNIe
z2)LyAqt}^{ikKqbtBODU*}wRWzSU8Ro&r*t3SBUNvst${q<5`}&SX^H75f}y!}P@E
zXX@X%RlPDDuZp~>?SXtE*Yj5?e-cl@1iyU#4M2b95JstB1^(<bClVZ+pz){;V#|J6
zgmL*~x8VMlUja?US;~Lxn9dydh;Uy2OdVK6Z@l71Z(+Qz>_FFb%VT$SF^2z~=kFs$
zuP!mr2V;OgsU6acm&$aI&7ZZ2v~DQV-ZLO)_b|UEv!NC`Y_V51ReXcB(E&L-VJPsi
zq-V4rVzv5BjYEY6w$P;{k2ZdTW$&<9S<-z9GfH}z5$u7|i`nnNn8opRe(ZVA_s{xm
zTHD0<dxAgm+U&O<-4CA1dYFuYMfOMc_L(;^JNF>=G7ed<(ergZ%rHXd(A?^Ajllr#
z4}#fl&SCjL+O_bez1uzSJxDn1FUp5-DAwi9yBLi3=o`CRj>!^rNufMT*}3oDgq_3t
zgzS6&_+ntRtWDphW%;&0dw|hGZMokm=C{B7`6m`W>fa1be-Fj@0{CLb^zY-p^~QsZ
z|1(nxde{HgA8bHf4=bnbe9#2+`(>{8=mgj3#Q{4TZB7pG9KFm8em3qtHg0MT#p0SM
zcZdnkPITd6=W7zbj-2&=p5C(6^xcn<R;x!jvP!-spvZn$+RJoKR*sIk>J<XmG?IlP
zW44NoetCuBuE21ox<7#CCGHBO9g_5b7U0uvE|90nN+kK6o;aPZCM%tJ9UUkj)A?3<
zTU9S<lGn1CMN<Im&_U4U$Y!SDn;PL`XWMx5IU%l0?^VA}(}=(dKHs)Bo56AJ_6Mn9
zrU`)`i3txjA0a=Vih=G2=9kC-*rznB9*0LLo#y!Bz0q=R<n(zobKoBlsyIC9@pIjT
zk}@>=Eg#QR-Ac_~n|U7dnP)<*t5m5@6F?++n?r);VaDF#^Gx3^bB-6x;}$OGYPE%z
z24d_fgYISNx1!Eca6->Hk!YG6!{l4v#wCWs0-^fQ0Qj7d_eFeFE;)&P>brG+G?NP?
z>u=iCuY3J=#3+qNfC(CXkii&h=xZ+_nf^H1EIqn9<9C~xBy{yRBQ(s>By&Wk?kY&S
zcOfX!J4jj7thCQ*5+v66h!eYcS_rPeo6gOx;24$n<_8bSs<OYFvee*dvi<Mm6V437
z7o@C})zO%*l0yxj65Hgfg#cfZR8AHZHmrtY6BDKOUkwd)=j`(n@#q;i4@cJ(eEeN4
zNPV53W9K|uvO1Qn22<y30h*oldp!P-t=x~RXnIeoEHwE1XW(6N@pI*PPI>kWT{7){
zupuylM_A2jQt++P$UXcdJo}4*Aqo_{<G9bXNk=HI#9O#oeYZ0+UNlFSvs~T0_AC_A
zX3OO8F?^eOdz{&;n3QpISwFs#amt}Dpsklb6GwhKPh(QhdTgBmsg6Jo(?DP^?y=9E
zLuB?bL=Ddo-gB*owVl!EOSn6^xltFx5AI}*qYiQ6O>uK#2-7K<l<U-onl$1B$mE1A
zCiy$Ijl6IB8{sG4)<~x#Mw%<%zbaG0iHz6!X}f`b?x|Kl>yMTNY_$3NKnta-8z9F>
z+8}mnA!p;9RW(#bhdHlmX>7pjt4pMFq}AfI-Lpr;3<21Af@fra<rS;$KeuH8JpIP{
z(Ocp_P0^_YTWYeP?2-o>9?QBWq8F-ssiBE2oYY!YI1vUn2L<YhrQk+=l+TPFI_{du
zU2oRfEr8f@gQ9q>=MPzl8?8#GDTOxs&IkDphx!D&ojca!d*NNZ*1vB$zIW-&k#!mw
z)VqWIX;wz)=3u=^qDe=+#0_Mr{htl0pq4_at*aM5dslBsg*BU-$|j-@oMvu7Vh&cP
zLF)_5w_jPL9vp~)ya+OXnQK&Nd`A>v7shh`Se^GqOJVAwa!en^e0#4|nlZezzM&@*
zQ^YE_<Unh-E*qm13J4rYF0Y`I&FOx#qJAw*BRlTI(kZLjgvU3>y=A8QLbNS~)+%|D
z@pbFasHfSE<R2Cm;2T-pJBxV(a0^mq?+(1Pn66&DgM%h((=T1?Qzh)l!&sTr{f32X
zToV&~we#=#-pUmLBo-)=%5E<%x*AQOo0OQaK+)^@A^*~aMg5yiK87|S)eJG!SfW7v
zJxFRRqe8R$5G-fNMzRWWowT$ea6%p8b=nDb(G4@d6kajh&4tnF9NLz&fi3tiS43Z^
zGj1umu6J7&7=-SkJ46N6%kggxt9EPr5GIQt{^T4^z-IHu>EOSHML*TuY<6JPI{u;l
z3S*`0mj&?@cZ^HwOZDuQZ%Q+p)~k3973A~P0~J@r0V6d#-h?w>mAngiw^vr?+w)(f
zKtN6=irFh%@<~qB(2ZFxbFh;7z<?QuNlt56L$0gW2XtSqv+%Q$h8rC`S~Ilxy^JGm
zCPc*W$fS+?c5nu|n}pXoxOUwZ-@7GSjdH0n<&7k5GY79<RBSFY!1xKfjgo{$8kC10
zFUsWv<og_&6<(NfR;Ie&p0C}?pV|5JU($xeLUunFUQ`6T_6$k_sRM@fwa4c@TYWB_
z_K+q;!J&9=8Orosj}H!b0d0m&AAGIuRkX@%2xFFND;E~M7D>ilz^=ZP7Aym8*r*;#
z<j8b}Z@=Yya6it|`Sb4p%ND9sN(Dr1^=5)oO@QSwGo*RNw6t#-0Mtrjl=X}nvRXMG
zda8<ozQ9W!Q;lT<x|9jDwVIkIo+%d-NCPEZGB-`N=)N;vc%^7Mfy}2yMJ~7B&c>S=
zDWI>YN&@7?2>7q@L@sL5+_{JfIf|}IEv`q-?|)8=J5&TOeYIP(^`dQ+a@=57r+C99
z?B=oJlXH8uPl?&FWExP&7FTd1z@_g)eWc3SX|m2^FMMI$#jyTzzYiz!&Kcdgq3!bX
ze~!8>l=gmL9k&RmMA5S~ikAYh({C)_$vn(?&u5b2T^KNzMpUVS*L>VIhqrTJW5X{7
zO_s?c=DPp@;@Zs>0$VcDr?KLFX{SoWbk1CzuI4%liD_kAYEqb@YOG}yhT_@-^nS<1
zmL^iRGxHVsA3%^`W$<TxLXyVFo4I3f+s*pJ6YQgO8a|#~S#NsOO=K(B`}6e9FOfU#
zwhzyWMn8AuinHmJNi1F-xh@}j*wFq9WEipQQRAM|X~G)7M<{uPv8%V`9%*J)JN{!_
zNbiZ@7l9Gy6Zz7UQ1MXn?;$;#_GN!!8-B^7A>#bI7v&T5rg@`0OAoQts*B>8SWyOB
zFNe)B>lCQ-fYeL$!QD-V6M-_-g!08|VrKF&8>C<<y3lp&!w?b*F(XY{R^_OnJiVDF
z%$)8xm)L}*_Iv+*TL15>{ul!TVg}F>XO)-zv{AF7^jx0BX=nQDP1L*48B3YIXPhp%
zkB<DQ4LzfGU&uD}0sw{KbQ*4Q!UozQ@V<u!;-Ss@%CTr)9NGwB5{v0l8ECrbXFsUz
z7K9Y8Q3G~YV3pqiefa~dC{%Kn1plp~jy3zc*KrU+!R31;%hajoIAnmBS^GvAHNy=t
zDK^(J7ijE3l%Uuh`naFuSfe3kHh|#LIQfI?qJFPN;*ua#<wfr(ohX&ySG8j14JHX-
zoi8zi4^t<mL#O9AS`$OYog}BJtxBG>+7AI~RL!rlIgI2}VU};J6o}~}sQZ;Xt{+;L
z>dIeXc;CUC?Pru-g^A0ZkffkU@@2pC-xu0EobQ*>&=0=CT3Tt;t<pQ|kAs{4mMgVK
zu&v{{FQ^l4z}YF$7zY)1uX}iPE}URsw1;5hwt&K3rl4)g>H8L$+2;VCT7$QN@|vTk
z%5MO8LkFAMTO%pOSnXj4NCr~Y+kzrviH|R8T6d+buXk*goew&!tQGdjY1jNIOu6S8
z=9-#YH2d%l9PtmZJ`&fKOg=L9P55KS`M|3q5w0;ce*EK2Vc-YSkeEY*V5N>1kNXd8
z%e>Y4Q`xve1a}HIU;eO0L~E$1I7QcZc6iB^D#%BW6K|!FkW$GE&zA7O$TMp#iq*`9
zgXn>Fe1d`kimDoG{n_6p^(z!IpOY<J`(>|uHd!}gthWxc2!HBp>E-DT{@GuYwsc*S
zU8R$M<8d#%JCDzax^t6_&TUyrp^&?9W9s^+sw`HAbc_bEK3in<YaW>YZ_{}hte$ri
z<xbMlY6;d{iom#T(|1lDOD@D#Lg8W0)JP#=+h`BOPX!24uIvi*L3^Obcg@RD>75Hg
z+ojjGcuR{lUQ4QCs@@yN7qPtsYZ*SEvWT=!iRO`hK@<E-hG&_+D=w;rt*SpYCfe+|
ze~|w!{>?$}j#$Ato9DR7)>m~9-VA@Zx1z4CzhA{+<u>3EbNUJyrj6@QX-~o~8%!c@
z?t$~KqIo-|ik(a|YLO;8Ux|#|)cqA0-#k#vRK;eLGk2g9EFK1hu`JTl7Vk@$n9&Ck
zycBqxv3>Xx+<V_`UB0U#o0uH?`;!KKgn)ENeGSPpeSm2GJLMs!>scONcb{-x<sps&
z<*<0IcPFos>qy43Yj`x8%gS0^SLJf0c<DYiq!IH<Sh>JpF1o37Pa+pSQnvh5*}L{T
zk!Bv$3hqp;)gh#*DPcL$g3JxgaoD{q+^eR9s>DeaCY5~h`lLu{ZB?zN>?k2y!i?vR
zBRR$c`%OyrmWev<LVAFNG0!bI-G8rdYPs(sDm6_%V5%tLB{PvojHempYJj>1k3lK{
z@-sC+DF}ZV#(ZFGpwQKed51)30NbGge_sX>^MzbBM^1kZL%u1@7pWC*M_#c%S9)VL
zQWZH;OfadXWFGSiiMD>;&54Vi?{{N~8^4|(WAm04>u2;4C$DRjTso53Ij0-<ddHgF
z@;ar14nEl15mxb}FlmhRnrCL5s4ii5_9;Z_*P#*iR<}S+Yv!q>Z!_Ot(2a619gM+@
zeJX9Uz?r)1IXm<h*?w`8W7X4P*J0ajm^^1)=fg)e>Ale%m@d;INfL4o(=#i<(phHY
z_P$LiU;dshd@?rB+z7Ip(`0F<S~{393$U>^`_^27T%W~hq^{D`H(?n}Wqz_=i-4pO
zQm`{lBtMldrde4FpyzC&DWzvnMP-TK7PHWmz$sV1Gc;>`tY>Zl+hn~AjcERSsCQ`3
zpw;V>wu?jY!O+F6eOytgAnY{0{7q#N4*jiL54v^1|N0l4uisTPTM)iyH^u=~<*W17
zyYLDf@pIKPHCl?f4nTZw_Fp?!eXK>#Li3zs!EES@4=4raaHe>&CSJ$UkoOrH@+OCl
ze+Y@@oUyT&4?cg$3$!Xh-jf#d3zVYW$PL{(DQ({CJo1(@JG4FfAXqTU7z@aw)l4CG
z{+VI(aOyDac}3Wn2p8{#Lv6|D+c`p!kdbcGk+cxk0wK?K6^_qp31ozpBf~}c2ULc9
zB+|(&tQokShTGQcv7H54prLThe*@U10-(NWrm|1=403K>GJYW!B~1<C(_^ZVIXWHH
z$4CBY9{<oBQy;2EV`LR?n}}s8J@%R{%2t6*HfBrEjen-(MGS6q3CNKj^;{w7Q@k!J
zN~1cVer3pXR!*`P+Y<lnH-_LY)wI#w-3!F9Hj#q6FNSo~g_ZgJdh36|_p2zIz!&|p
zYE06(DZ7)Jv-f)RQ`qFjutFwgB0Z<8+5rGXDajBOxqfc4?+a_D>0<};q|rs<S!_^*
zr?y-d!yoj!?IKK9oS!|Fm7vH+WNONEdbL>N=M~1lc(LYm!hS;3E4PqBtk(Eh()b{_
zbXBB5d|7*HhAZO=t*H{`>o3z!$+Wkm-D~8t8ArrUw&_<uJZ$D7Y}yYx&Z^_2g+o}7
zVG~K+nx|ot!wmAL70EBvAjYzWjm62$>;Hmm{NE*U<KfxW@rFjK_w3G9`G|Og#fCr<
z@t%p|VrGS1Fn5GQf>}sXZ+;&A6FOo4j+5^q9VKwua%Dog(Nmk}`b!<#m+{u*!+VjP
z!G%MGLpTk-{_;2{m^h_gFt~8P(z-eUlKH6MQ|Y6>b@t_J-&pUn%*m*N>52_){D)gz
zV~@G&zF$=tiolv)Vw@g0y5!TErYlb;S{$`NdV^f0EGQM0*(zqUl#E|*M5Ac|@WVid
zT(xTVsjWmcPi4u1UE`k=BBO8~7Gb*{L`%v}BxOeYxo+!NZpNd)KUDL<2ZOh|kXu~2
z1O0*P@laFWGduhX)OoT!BE3W*|0zN^q2P$W8V$nQ&z#YtoiDn@JZk~ohoN)fL-_4m
z|JvkF=B^R7zS#-{YjD8h`J+NRCO$rm85~JuEmmpQ{Q!HT7lb=*O@E{S-DA643)lL3
zb-v7WAzO$gP&?>rklI#cw5gE(K|oAjBv(!SmcmP*d#=jg#7^ic2|xA0L7h|4@Lhds
z20_WL&TIGN4wx8_VSvqCanR-SU7L6}BI$T~nBg1e%Ur5!KB6WQ>qJwZ&$Q0M-6*jA
zo{#tMgznTqkv&U?3m?#u5|%}8)1lLdU4d1n;L2VP`z=2^!U$-jPR`&g?0@(2KcoN_
zi|b~*(F<e9y+{JPs-x4anXziivJzmeuu6a1-_2Me5#&snwS#yH#XE;R?5^=Mh#WHG
z=GPKaE#PuV@1pVdrw{)jOQRT!=kBZUN|e{lFyV7)fr0(UPb&@&<LdJL^O~P=&fh5V
z)c)9LA%i-u+L%Vib_VC76Psuy-1*nPoB-w79s7a?7wB@w!Qe%mqxq{dn_kW3d!y23
z<x$n!e#YdP^s!9g29!8+VR4!bX=ao-XbZqRLRbW|%9Xd*tbfNWF_ztAKfsII6A~Y%
zXiDc>&-WDFnYe)sN`a+KEli=1pV`Yl8txi!QmQ@$N|U#MKeqNymto{+Lg1BVX<W8|
zn)`_B1Fh?_)R3D1rd%fJZao&at1kd{+^2X2Mcb9+k@HBceuaQ_9Q@@chRkorCiy9v
zy;r2lv#VK*sGo2QE`P2GQAA}qnm9ZOEP2P}4$&EsX0(3Y0Cf2lh_|L<j_s^;#E8q2
zF7JFU>ogT>s%<e$D%a-|l{ZUbGg6ehqv9d}tSBgwN%<St^kUulJi5Pu&CfT0ph)oF
zzy_Vrg|64<9Y-;uF&@<vIiR1Cv0oO_BwP5|G4ol|IltZ6^Sq1~+6m^~GmkrYb5{V_
z-+!0Scla>9JX1xtlQRw(Z-Uh*IHOI$JU3$=TR(5UH@MfSO#BdiQlLT>QA=6SB_TmC
z%$oJ|cFo6B$0E-oQdE{)0Mr~o+w$eF)!@ljv6RXs@id5v=KicaH{jEpCM&Ta+G>{g
zys6KJzX3~5k_xug1nVjf<F$GAbuLLMW!1Lsw&UV09ma6V<5XuO9|j{Iw0b9T3C2j1
zSSv-F5+hO!;v8PU9arBPa^@EU`j7LIhNVIzrr&)~J;hK<$qh3?2dq5Z!Fj-*Wd~2;
zl9GU`&j5qY(VOm8G*Bi)1Lb2m(C<4ddJlR=Db!XX^+*PU&a8sHs$=WSTvz&Vf#>}3
zi$j(sMZw8yhsu=N-@SptF-rP6OoSe;RqV*H@{wxh`9gxogNh}*0}_l!>V9vbSvN-e
z?JtNxWS%?WpDaLj42<?@f2w*mcjO)JDm8E*vhdtyWLKoCSEP$UrB=V;R++}D@8VTG
zp15SwTWhmgl;*M5%NCD6fpCp-K8@l5g9iMRNg&Zg^^3Cxwc@kQ<XM8Wp-Ce>g3lg1
zq%S<ay~sE)F{pH*gAXUNC{CCax!O%LV%$lbH@J<sxN*tHfyLjy2fLJWQ;mIV95Ncf
z%(cUp77ngo<e(K)Qm~!oSgj>Jv*LHYs1@+!pf>cE>K!FD<F+h5e+V$(dU=I*^lo>E
zO8y_B!b4lv_H`*K$(sXo6Qn#-vKss#JJMR)nxzTwYU;p>v#+ok>1Ui8%?U(wS8t=+
zphx%h6i?WgJXzp%VklYL@(5DGi~V;o$7$vDUifrjuf$>ue_9Kc<rZAGU-jMLg4?~!
ziE|E`JqB6!zfStApHNfRg+^V))+CIPN!lKq4`xkc!Clx$w_`e`dB!hbU!=BH6c*tZ
zNp0r&%_id)3OavSAwE3nYrKB`%)}gVjMw&NK|i@GhHefz>amnS#mCJ({hpv!xKSw&
z9S}T6Y}<wpGnv(pwnQ>OqctjFqppY<7onrAL{m=TAg&Kd(Y-zmL2gkB&c$*>?1a6e
zfp5{$VLmm(4}y$Bf|@zA1I-)?&jw#=F`tcA+Q|C)AgG^9kPqC!rDc=A^J6v8T}y?o
zw`Y`XrT;N8!YP!~J)_x}g1vPgjXB9Nu7Pt(<fP$g`;W-T$v2Uq3H5o%!Io_~<Maba
z<3|3*s#EK1)+r?N=>f^Bp*f<bO7-t5C~v2BDt6~e=85@etb}E~q1O#N7iXdV91=3K
z&UL!ok2@2Tz2}kWO8pJ`<aho4y7c}Y`?UC7Op5W^t1y*`pVNyizv9!+r;`=MsT3>=
z+bjp@yxSD9Zn0>0MVs9EJUd*aUQh#|SCR`<%wpRx4IPL%>wD-POVYLfy;+14sTN&3
zKl}qzmr<0vv>7(5J0#8eQW48`#a|*;tWmDth%Us<``PDt7Ny0t?eyw9@v1JwEfqk_
zNvF=Cl&n3UPrMVQ2I0}Ei_4bmxBK=XT0OR*YG)?AoXwJGknB(^m_#>j+aso~I%QH$
zf>5@h+WIIT_kxpSrRQ194~^%ojFlVQvJD(|aV0St<NQ7GW@ym=UuNenD7&Kt|HJH{
z(J3y_pL?nJTYKp-<VKhxez;cW1uBNhvCwd5%|m{B#aWCYJd*7d3ryQXz=i-Brbs{8
zY#B{e)s=PX)J)e^c>5#X!aqVuYdhEeV-46S2NSniL=BN!@#_;4$gJxK7z!knh4_^g
z*zOHo&aDVdDIp;|b4*haNm22)M5rW*DW8EEl0@fF1Wf<lWHY#bgOMHN@N3zl<0COK
zepync$*`8GD16Pz9*rgtxFfxahFW%z598$AsSPZ;hA(K0L_-lh<mmg`lm-22OPk7z
zz<WxXUQ}c)L{@J}+#B%7zlUwAe)6QANixTzN<%U6$YsiFGaF3R5l<KF{h@!)T9H^c
zbi%YQgV#DVUWf5{b}nIjZ=a1<r+>_(aQr)JPiA(Yn0SNq{rP17_(5AIUz;f{znR%f
zVq2{h3M*<O4wn{Qp%C-LSFhs@>XmK!|48P&<qn)(zvJXXonkvZbj-7*NR=Ce>WaG0
z0_fIXk|^of?)XCLVS=6^Dny1khCR81FCBg|USLXS_Y~Oi=*l$33|A{>@Eb5NjW}P?
z_I$c|ns1r;)-k`HwR%cI5<Q-9;-w<u;WUod>7HbA4xwVa^-h)RFys@<fJ_vwn$^q3
z9T&>v9DP0w(!>%$Q!1m7;T&&14$n-P)IDG3&;P?3{>vQwZ)^Ai<uL20VcMlPnBgp_
z>Q$8Ujj|*@s>}QOOE-h=<ENKkLxbN3$!vT&b(`7QhkmrKgjeTDaq6EnbFp1B&5wi(
zvgZbB0fHNJigz5GZrvbeF++EJfH;)|Wmw(3>Oe&U4-J4#;Q))J{XoqCPdk->lvl#A
zc4We<OL9huOe55!(}?p4pwK=Vn`OyaYbU5w?>SvsT9xxiG-Rr?vvi8^>c~im;wn+d
zX>cLq!h3oM@7s!6TU5ko$cj`;8Pm;xjTQZot?28S>g{+@HhzMmHe{mm_Uy8<s=MOA
z%_6s5_u+H&9L}>ozm8#=oNbNJA0|7iMQe}RrM(wlD--HWkuch&SZj7flJx*i2d*Ps
zdMI@>qs@Tanl|92{#8vsTBEY?Lev;Fn%7WXe&E~vlUd$1BuB}rhP#~aP5NZ<)D}ye
zWP?_|5HdxQgt@vlqFY0!3DiL8EFw~+%{>uUmhdE3x8av6NGy%tIUO?ean326Wf^)w
zP$e?$;wmD`^^c2R0r|8-Xr)<RAxX`@5K_=U=hd|4{(uI<fw(-g9Yc-Z_NSJxeyZL_
zM+)M$iW1m5d4xWAflZK0DHGeef_L`8AD>B7JN3+Rp><*KGS0C(Ss4Aw6VAV~uvk>7
z4K@pdu(7dNzJ+A7V|lBS!5r_ewUPMv*E^?IZ?;@9V>UrCTyiY136G?jCClKlleyI}
z#i+8<vNKcf8k0K~;jgdm75N|iiD;q?9q=~Dg#vo(yzCt4GB4+>&)AS)Ux4FolJn(m
zEMHU0C+F{$P1&v&sj#teoLQ=oS#JkufS&<p2LJI*&8m+%cz|s$Zjb{d!4cm)vOU(&
zwiP!B_)8i0^!|@tfn|R1UdE5U&Xif@Nn$Kn+4&ffzC*sM$jF7<G;Um$GA_Z#OB)$O
z917CXLCQkh2Us%Gq}DE5&Hmy}U2FV9gmP1!qtxq*zE(HW?1`oJO6yE`CE9EmJkgX0
zrnMjYg@tQ&Gg@cPO=gybnQg-L+O71O&j`gHQ7$XJ$Sc=z-%?1VYf>P__pPNtOP^r+
z1T#MlWQ*;0M2^i0yCF(wt|T?Y%{~b&m=YEkXwD?#lCQo~seI-pTwd`N2MH?uhxOk9
zCFcz~i=88_`<lM?=`VYX_ZNK^me6aH@tL7UYjmExjIZ0-*2VF}is<g5Gv_Q^|F(S_
z{p>PTy`*bQEd$Epe0zM$<oT>k*QbCse@^{h{IfC5R>z$Nyo{#BxU75LM&`M`;~tI9
zuyj6Kc`G1>8uxmDjd89q`X|uu5nO@*qvdq8Eu)4cBwja61@~VJ{&zj9d;#Q}#4NWq
zYcVx;Y8c`8Z*+C{?U(nfg>a3Y9nZf+jGc7%zex1o;a|S2p}9;&!4qpghvFdV@qc^&
ztw$HnrTHdja#pk40=k%3;^^rn4!m3-r}uSAXiCj9oQ+%Ka=3E>(<;ehLZLBq|MSpa
z_L%UWj(*Mml0`i+{oCV@k|DrA%?2%oHy@og$)NL_fOh_N`CHLrVeA+GW8?#AF-8?J
zF}_wTi5|P_-#@O;cE@9=%nBu&hN|=V(qVn^W!<Q6DxMov*p^0Do&MDclPtzD(EaAr
z4oysB9A}VeJ1k+puWZ~wZ;N@O#Q|S_ov!4A`HXC-vA*b;ZTwm4+-uFG{riuLKbEMP
z-@Um;K&9_a`gi%20uKA{=YM-1;QW)Q{P+H{@U#9&KmNPp6662R4=(GFtBZOeCzQBZ
zVIp7A!Qp?FFNBcdUk|2PAt#%`E8>3J;xq)XI6-W5hp`rn!jnmKD4OFfi&uue^N`%a
zX2eO76o5Ttb+vHdEJ=9SYK##^K%>tr#Kz&%0$nzi;JRbN;GYZ>P7kvGZ*@RLn&@eF
zh5JGn<(1TCBw8*|acz0o%+G$bDOw2j`Z{ztI#Q>Zf`$vA-DrHgSOJR9>CX=wBV9rF
zH_y6TNsW`o+{S)*Ujf<MINe-U(LC4!uQ3!m)IDLEH^v>_Xf2?U(Z&yS(nzCO5mBda
zxn2ke5mde@I_bJ$Wp<+PRdO^BBfA1YOW@MNmuhIS_kTnL0-RD2L0)DzXw7kEU+V6U
zJIRFMr|v@KAC*%7siI4@w#@?0QUY=Hvxj|~KNT5pan)bbGJ>Z~qeO|AL`YcYELx2%
zrUpH{R4r?p2c%wpGG+5()c+AL$5`QL+N_nEu@J+@rmtZ`zsJQz9NMv>F=696%p*OO
z^4WNBD`-D41Tt(@8Y3Mjr>NY2ToDgvSRsGr51`}YMvDpl+ihkVUv<%Ob~VV`%0?jk
zE0VbNVxhhg8p8*1&Og|$VxN$QE%)AR%C?iu3{>xpM7u+r*A7Ww>;8uq%D0~L-#0fa
z?U_Y(ttf}$ozc~-ICC%}!fC!GUx_GYM)r}xjnS2j<_*sIF(>vyeRw*UOmdg}%2Zbp
z>>K+O^;&Lz_iv#UrMHLS!d%r#2Wk7BYmR(twWRfJ@2DK7PJ8yX6L;W7OMU0g{inwq
z2z<o#LHACNdG$x_dcrmNj7V>Vo62_e?xsOGauW8$oe|=P|Esz43Tt9p`?y^J5he5@
z(u)ETgwRo>DP6kKgwTsX1VhIHxIsck0s;b|gertYYEYyIQiRZ}^xhF^!Z!ijy7xI3
z=klAI%rj3iS((gw+wc9)dTkX3U#whZ`D}h8zPa{`3H_y<8)xbT_TdCxrMlUrFGG}P
z^jj`VPhnjl2_WKa^^-@&C%{MKXeFbs_0NDV?r}>q;m^-gb3$fStXw~z_jAYbY_V#g
zMikwtkR$a?3W5r>uezr7mY+8;orM+hc&=!6wmo|>0#yeJ^Z7U7VB>uk($`Hc?{phZ
z?-oJRWMC(dgcu7XD*o|v2}=&-*jH$Hy~69}tS-UP2yx>?0m>CJwvFkcG>P2Y%5%sk
zIjW450D;UuXnaXG_l#JnSJ`*M&(5B`?#Q%LCu=^I6cT??NI>eD<Y=SyFBp3LZ0$t#
zR3vSe3BqdwNfJ@9{8Ed3QTltg(x^RE&>nc=c+sh-wPd_Ff5A#`O(q*l$V)ofjc-2K
z2aBMe!$JEz7r|!&#ENGc^zpkexq%mdjrcLqt~gJ}w3_~eS(R+dPo_bSbL9#HGjUl!
z22ilG`2P6G@sL)zN^qm|<rMuRBT2tcie+256XJeNj5v!-4y!o(IyD?f(pnlPz7A^M
z-eiU9dC~r84dWYkxSz~Om>9HL3oM3iy>r`tE&G=Mu6`}eB*)iHdxXhpCrd&kwg5ii
z;j{a(vA1~Q247UNtkg~jmQcOK2J17p+NG{8^QOqJe{=<^?nR6{PkDO#!lu|e9F418
zR0{L|o@7Ap-nQJg$J$5;ccA0TF2*9s8(v~Pk5x(yZ{OHPkh3{DP#X?dHwO!I@=`>K
zJ2E93vStq><@R+&W{MQ+<JV^JjYRKw|I(nf1X7pe;az-^RSZ)yQsi+M-ePS<jriT`
z_k=1Nd-4;qF>4GI<qyj?ouYyUuNyNBO+SRFTGQUumY!FedEus5lgS7`&cXz~a9`8;
zoJ)_uxTOr&*3Dau7N*T&y5Kw?>jWi)gP%kD)8gB6lK~O*No83Htv;_Y@2~xs>2F}t
z_=734{p@adb<n%B%PQkDq*hzTaO6;z&;AL9v5tR(;j!{-@y(=mNO|S%uMKLHAx|dI
z6cJGF*n8>E8e=)yMDh`LX<9qSsfAxrY5xZpet3nH7@K*b<L1XseuW%aPt*Mp>LWTE
zv~l5YJWD6~M#cC>;u_Q-O+C<gFQKTzWSO{sP)rnj3lusG^x2QhpzrZPoe%Tj_!>5<
zS|OviQb)8lJ}|d)+xX2o`b7L0Q2B_|tw^DQj+>Fp202w-&h3=+u7S(uFY3Jc9o5rP
zYQV<PV!<4eBP*b+JUjk{n4iRdrRTe%P;dR&w~tP>AJ`uJo=d?YMqN*y9jYbEL<@a<
z5*|$}U~*ksXWDpZh!5IrvjtHgXRJ1NG$a<Rh$EH!J<`3D?lSbXz|?^b17Cx1?dg)9
zj!@h@aLWA4#)91&TACx!*8m*G3Ck~}&Sv1<L>J<ag2X6{Tr`BxJh3@b7f1n~E
z&z?nwBu*{pVcdN@i>_413}bP9+Bfu162={2I38rh*w-y%uc7wG_u~G^ZJV0<Y71L*
z$yH1q%~z%r@^e*2faukqpz$DxxkS3SB^P=F!59;LrQ+vjPG;v)sYcfh)o!Rc*a>yY
zf-e64y!bD)TJQ_4Zv6wTewRDAS{kPgEwx121KH@SqzLZHh;QGcxY1>!&%#3QV6>+L
zZ@JE8r!0^0k8$!E*I%7QEPPO}MF^D&vshoYeRZ5@Qx{L`czcrU*d0bz$3DoeD?(yA
zOJu|)ZnJ=;3z{^&KB4z)EN?M%mxzJg)ALoPC<NC~FeDo*(nb5apTZpX*9sLa3r^4O
ze#kmFn3AS5N=GT+4l{$QTqJ2ChS_i)U^;(|w2=x6+J_9yd#CN$7!Flv_F)&I-#<UA
zefP~$Ta|sQWV%1q<f}dtI^@TgX_V-iP>~eK9?j;G*V4Ob(*=DQQ@OE@iRHTboeC0-
zDjRjK*Q7EQ&dMu%n#v|epWwjTqRfi@qDp|8+q;Z=e^yf$xNZMPB_t;JazjNCh3hBY
z?D+iVSmF%i9;S`*@jkY#c;zR)N1uIuj^zwaQ9EFEt7o2ok>T@xNQaz>Am*C-m15Ri
zKM45GVb&(B3^*2qgJF@`y^)vA53C5%tf7YghMs8$e6{jJu*M^E<Itn|vPqKsM7#HV
zDG8;0th=>UbzJyH=JH4O(@4o2%c>Z~7Q&|1PSG7sgy4veLv|DGni4M0+4*zai+PP4
zOo0l>mAvvvd1Nxi;PE(h)NrJqWTC|6ps|*3x5gE$X|D~ALmO>Odr4P=8_ZT8Srezx
zMZ5sl&gQZ8(3{&ng$0T|-LlLEUto#@Addu}vL$3;Lr|J7_0Dk8bXzk7Fzzf1O>m8l
zclF@)h@4rOs2X^Ow2PE%V~o&hej!YBcXhPz`Km`4yd%b3hpQYsL?dC@9u$qbi?O)S
z9kobukp7Y7ZVoWy=3F}Z!LlP`PJH&Zv6>0FMjx$hz~?3lG{3}0VsK9gJ#JrYT+_Rl
zu-6doKDSThL^m*`6jxVCWXokhWBDn?3ic;RyU`q=H!#39?wTm?HkN(qta|gmkvW^o
zEh-jGqkLX<vy)MrUfrr>z}h_=1p^(bL$EMz-a_l$bo{76!-F+BKK)#tYceh|6;m{a
zx4#}x;6B(sfba_$;A}lU(Cgqq#KjNP0!>|lx-+||xuB>-@dOtXdGIKSFdMaf=U#h`
zqBCgqYGNxSm)|3F3Zjt+jComg`Ql>D=7b;r;QpC(xPYbai~_jI>wVC!?`{)hO<gzR
z-f7rn6`1siZub9xV@WJQ1OElN(VhKum5&DL|3GFTLjtAELP-iAl=9|`qfK<sqW5%^
z=Ui%{kuP7E8bO3iM$f&1r^*7ET7k>{aKG2^Wt`>8q-(1rgOM+bX^D1q?@E5f04Bk<
zZ`WyeHEhj%54$1K=>o3TzP<T~&%M4wpkcd4c@iFc;IdBe0U-NeyV(lleWD2wqpoGl
zp%PFV9?KMF)x9pzt3{`ar!PR_`M`->=;HyOE@^hf(nPMxeRdX50FYJ5SGZVx1SUd3
z{Q@_lNVL$vaD&*03{(;}R*p|IK3(52Hw}GP;5YCmY#KUIuPZ1Fwl*Hnbm_q;y~hg6
zg(GF^G`&Cc4^ag(#76H?F5TS1vvVv6{klTyq+Olo4oB*ryu1Q5=O>@PFrrEAR0iUo
zOrsJ(^kYrI@M+BpmbmLvH<H^m38hAnc0uF0+zt6H@gmXTJ6G;tg?o~;o&;`bB#G#2
z>u@wT76}5$!1#`NQ5eO|;d7{A+8w(WK0gVG5B)Bxrid~<T!9a=f|6)71NOlYDB3?D
z&-Gta2(q;P7Jw^WPpOklKKyq4AmD!_y)aqoD8img{+%ia_i?d{ka*(>%(A35H}f9Q
za?{~oP0!qrc3vb?xrhwbEO1vWI3Ht1C5UX-NrsZ<^*W%$*7>=NOeoXE8#mMAH1s5K
z%<)#57hcz=W!upupw+vtW0Q-B$xH`XEtG}DU*?h~G5$L@_iB(d<BZmuTkoTyA!jI3
zvvwA?Dj}bfW)Nmx?n~sjf)&ScEacGte;6Hu0pZWl9kBn6;FL{rqU}^@OHVUF+d<KX
zM5!tHW+d>KPxaQ0@kyvm_WK$#_(_byz)HX@5w}!cKEf?2B!AH~rFZPm$$5Ru>i={}
zV>AMKmbJ~rd{VI1ZXk1{X@W30Ui%VG&A@zHIJE6;`Y33#cp>w@2M05O|G`S0Dx9p?
z+TXhq$|Ah#8@Vp`o2xMWn&+M;=YG4n&5aa4DHjCF0o$%T95`X#<q)k@Kton$RMF_c
z^Cp#!C2!cwZFr~F-R5G5QDNdp%}|RK&wc&GQss_xlINe)QnTjH`{kpbZlx%zOEZsr
z$~9(UWPsAfRt!15^J-iP&;zJRDBg^)(3E2u$;+V7&Kz(~c(r7S##S)=vDf6i7(Np&
zL`s#n_#rvbQUBqV%%|n37T!EpwjieV2R)G<`~fI0;(&Hu>VLVe_f3BNNq$uZbiyxC
z()u~N-q#(rit7Q`VbOe6^fN_G72h&mdtnxz*XZLS99b7tJoS&DPR2|UMHLETiV8%4
zcqu$!s(nzM40$n_;>!xjcoF;8T!G>g&q1&4kQAeb;p!E$O}ImLIIjn=76!@KowUyI
ztC2&-k&uM^%0;1DqV<|SZ%}=O@wg1Gj2Y$N%MdUkvp>y3$`IRYqv0PQ?vawC&`#PX
zb3I3Z8>5;PQS0XDKDL_4VRIqQMuw0YqfUMG&a+$2&~>A`#vh&Ulo>~rW84m-fGr0F
z4A<>ksqr`cl7PS9tK(e@F0c81#7asPTn|x6+t*SK+T!2|a*v#v^ZYn>sfPQ8&35T2
zf1w^2Vz=4@y^(Q+XikoYXf|(clQ!E(CyN0dT~0LW#xOgeh<K^-=<o{5-#iEo9Kd~l
z(+V?%-?U;KEwW?^_G~0$VR~o8Ei6$2H>=%cvd6LQ%;al$2|t3%IIR}B5;!9l-}Y8m
zwymbpZEYLg_%R$uYD&Md+cz4o7`gZ@+qPd4v$wH{z!J*p4nmW1Ur;i>8qYApo961|
z*(Ovkgq|?a+P>%ZTEq+rAR|FhaaepXxTxb51^;}L3tffc-0p3wg?M>Og(<#^hcom9
zBCH(E!xvX2m&25ImkNr`Zo=U*p3aHFbR!<yq1NMbJww9r3)c*;v`q2DHo9d#&=6xT
z)iES#n^8J_n(+mc>*SDJdJ`A5W!8DeUPry0T4+yNWCmv-IU>B><e1D|9RpTuB_n-#
zPg0ww@DyvPk0bRDKJ3-a;;D^+nR~s*4fP6FtdhvM78tbT-`Lq8zzIzjlx#+ZktIse
zyZK#@Q$+ltIz9Wc`1P#P<$+a_bS|@Fb{HU>Rc6zTJso+9WVWx14L2G<nRD_J9weWP
zVK%+*PaPtVt^o$%nSH72ykNT!F2Vvk%k#9N7+#4s9W-u%vGg|qsdV>%2D=Vw+8Slg
zh6gx!iH0l=){OhGIr={9|B_Bj7G~1_Vb2xnzLj(cqS%W^ONdV!r~;PYYVhu-P{LjF
zng}C9)a)*8Pk?65(3IKV+t|-=xf&Y=zh>XLcrqh7KXR+tEs)usTZtcXPNr{X5?<zW
z1EPlI8>uI^VDYltQ<4~?$b$&>1M>NPQ#jb8^GXA){g^F<LiJQ`-zEL`Y<I=PEvAy#
z-SPq|?4%22_b0{+{Yu&2&o3aWo;1(<6Px1qsFLxY${1GAYlBkT{)W$Ea&g^s`L_>c
zO#c3^X|FrxI>pUzQHh64y`lzG<a$ZAtV`gW&c>R&(h@DX20QE!G6$c9O|a83!P>Xw
z=XdHmHBi;Rh&sCrt3K@_*AQGyd0XvdvuW2Cp1TG*OK^|8l>6-VU#9Q(7e$e`)W(VE
z&<d#9G=l`<5)8BQY)h>CKCLten3Jys9a4=4WGbPfWTQ36xem*^KAQTV60HX~<!`$f
zqmi=NMT<VzhA!4G!6MNw3Y7Hj-P?A0++uH4gqx$~`07;SU4%Prr{4E6Kx!`2K+hR#
zYbVvz>s%}^s{d`<M9c!jF~>Jo{+IRD12+!@4XxlVLk&O+-Xso?g3i)hNkF_EV?VM%
zc#>SvyWqCMF5gdX48AORKmL~FxTY6HORw`gl{!$+kY6l-6*PJ$5?y<_yXY?^i{f9l
zZYh%Wp6Vw-4eL{6U(x{Yd43-_fC9399u5WGYS~w~p1rzXlgOj|0JGSEbpLYwkIKLG
zV;(*S?|ZWkyeNEnZ42|YF=(3~fxT<yb>(_7Wq(xu38lf$XJbOF)y9KLT`!RNdEQ?<
zP*58d5o(B5CzDfBZI3ybu{+_j`|x`phn4<neofhBPobqJ`a~L}9u?YoS~xL=lUu5`
zJgv8{h?As6Fd*R_DmS+2Fl+NaMg!e%bxDO>q#wW4)~WEFQ~1uO)5lJwvIzbx-{R~4
z^6q#Kg9LnYxqOM6WO}p?y9RNvzH{QODIhEj(|kXuKzZcfzi}qZt{<*<estt5vE+TU
zPki|w^l;ZNmZO2b<>ySP=~U@=NZj!5%0o`9Du0~h*iAvQGMBM49wf&u`ukR_FHPpC
zaU6HfInONRy*c<J>eOSGvA!)v9ERig*MgrwH9f0;k?lYn2b^S?5~z+}dG`}OGmNw6
zCw^=IBt<o-<0Mvw&hLmib+c+C;Uh)buadgI2(ga;d%MX(_YzQV{yuQ*X7cSLCE?M*
p_2d8k$S<kk|D!hyxKe%JKk0&6_v+W6=qC>Bj)J;;zMR>!{{woASB(Gw

literal 0
HcmV?d00001


From a420f9083ea9993152a5221b8875c34470561f13 Mon Sep 17 00:00:00 2001
From: Ryan Langman <rlangman@nvidia.com>
Date: Fri, 2 Jun 2023 14:24:43 -0700
Subject: [PATCH 005/123] [TTS] Implement new vocoder dataset (#6670)

* [TTS] Implement new vocoder dataset

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Redo config structure, minor fixes

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Fix alignment logging

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Fix script usage example

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Fixed epoch LR scheduling

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Support .nemo checkpoint in FP callback

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Remove align interpolator

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Remove HiFi-GAN defaults list interpolation

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Rename weighted_sample_steps to weighted_sampling_steps_per_epoch

Signed-off-by: Ryan <rlangman@nvidia.com>

---------

Signed-off-by: Ryan <rlangman@nvidia.com>
---
 .../{fastpitch_22050.yaml => fastpitch.yaml}  |  28 +--
 examples/tts/conf/hifigan/hifigan_data.yaml   | 133 ++++++++++++
 .../tts/conf/hifigan/sample/sample_22050.yaml |   3 +
 .../tts/conf/hifigan/sample/sample_44100.yaml |   3 +
 .../asr/parts/preprocessing/segment.py        |   1 +
 .../tts/data/text_to_speech_dataset.py        |  64 +++---
 nemo/collections/tts/data/vocoder_dataset.py  | 202 ++++++++++++++++++
 nemo/collections/tts/models/hifigan.py        | 195 +++++++++++------
 nemo/collections/tts/parts/utils/callbacks.py |  47 +++-
 nemo/core/optim/lr_scheduler.py               |   1 +
 .../tts/preprocess_audio.py                   |   2 +-
 .../dataset_processing/tts/preprocess_text.py |   4 +-
 12 files changed, 554 insertions(+), 129 deletions(-)
 rename examples/tts/conf/fastpitch/{fastpitch_22050.yaml => fastpitch.yaml} (90%)
 create mode 100644 examples/tts/conf/hifigan/hifigan_data.yaml
 create mode 100644 examples/tts/conf/hifigan/sample/sample_22050.yaml
 create mode 100644 examples/tts/conf/hifigan/sample/sample_44100.yaml
 create mode 100644 nemo/collections/tts/data/vocoder_dataset.py

diff --git a/examples/tts/conf/fastpitch/fastpitch_22050.yaml b/examples/tts/conf/fastpitch/fastpitch.yaml
similarity index 90%
rename from examples/tts/conf/fastpitch/fastpitch_22050.yaml
rename to examples/tts/conf/fastpitch/fastpitch.yaml
index 4022e8e91c97..1d552d058d76 100644
--- a/examples/tts/conf/fastpitch/fastpitch_22050.yaml
+++ b/examples/tts/conf/fastpitch/fastpitch.yaml
@@ -1,12 +1,15 @@
-# This config contains the default values for training a FastPitch model with aligner.
+# This config contains the default values for training an English FastPitch model.
 # If you want to train a model on other dataset, you can change config values according to your dataset.
 # Most dataset-specific arguments are in the head of the config file, see below.
 
 name: FastPitch
 
+defaults:
+  - feature: ???
+
 max_epochs: ???
 batch_size: 32
-weighted_sample_steps: null
+weighted_sampling_steps_per_epoch: null
 
 n_speakers: ???
 speaker_path: null
@@ -24,9 +27,6 @@ vocoder_type: ???
 vocoder_name: null
 vocoder_checkpoint_path: null
 
-defaults:
-  - feature: feature_22050
-
 model:
   learn_alignment: true
   bin_loss_warmup_epochs: 100
@@ -44,6 +44,7 @@ model:
   dur_loss_scale: 0.1
   pitch_loss_scale: 0.1
   energy_loss_scale: 0.1
+  aligner_loss_scale: 0.1
 
   preprocessor:
     _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
@@ -79,7 +80,6 @@ model:
       phoneme_dict: ${phoneme_dict_path}
       heteronyms: ${heteronyms_path}
       phoneme_probability: 0.8
-      # Relies on the heteronyms list for anything that needs to be disambiguated
       ignore_ambiguous_words: false
       use_chars: true
       use_stresses: true
@@ -94,30 +94,24 @@ model:
     field: energy
     stats_path: ${feature_stats_path}
 
-  align_prior_config:
-    _target_: nemo.collections.tts.data.text_to_speech_dataset.AlignPriorConfig
-    hop_length: ${feature.hop_length}
-    use_beta_binomial_interpolator: false
-
   train_ds:
     dataset:
       _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset
       dataset_meta: ${train_ds_meta}
-      weighted_sample_steps: ${weighted_sample_steps}
+      weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch}
       sample_rate: ${feature.sample_rate}
       speaker_path: ${speaker_path}
+      align_prior_hop_length: ${feature.hop_length}
       featurizers: ${feature.featurizers}
       feature_processors:
         pitch: ${model.pitch_processor}
         energy: ${model.energy_processor}
-      align_prior_config: ${model.align_prior_config}
       min_duration: 0.1
       max_duration: 10.0
 
     dataloader_params:
       batch_size: ${batch_size}
-      drop_last: true
-      num_workers: 8
+      num_workers: 4
 
   validation_ds:
     dataset:
@@ -125,11 +119,11 @@ model:
       dataset_meta: ${val_ds_meta}
       sample_rate: ${feature.sample_rate}
       speaker_path: ${speaker_path}
+      align_prior_hop_length: ${feature.hop_length}
       featurizers: ${feature.featurizers}
       feature_processors:
         pitch: ${model.pitch_processor}
         energy: ${model.energy_processor}
-      align_prior_config: ${model.align_prior_config}
 
     dataloader_params:
       batch_size: ${batch_size}
@@ -158,7 +152,7 @@ model:
       text_tokenizer: ${model.text_tokenizer}
       sample_rate: ${feature.sample_rate}
       speaker_path: ${speaker_path}
-      align_prior_config: ${model.align_prior_config}
+      align_prior_hop_length: ${feature.hop_length}
       featurizers: ${feature.featurizers}
 
       feature_processors:
diff --git a/examples/tts/conf/hifigan/hifigan_data.yaml b/examples/tts/conf/hifigan/hifigan_data.yaml
new file mode 100644
index 000000000000..fde2f169aa8d
--- /dev/null
+++ b/examples/tts/conf/hifigan/hifigan_data.yaml
@@ -0,0 +1,133 @@
+# This config contains the default values for training a HiFi-GAN model.
+# If you want to train model on other dataset, you can change config values according to your dataset.
+# Most dataset-specific arguments are in the head of the config file, see below.
+
+name: "HifiGan"
+
+defaults:
+  - feature: ???
+  - sample: ???
+  - model/generator: ???
+
+max_epochs: ???
+batch_size: 16
+weighted_sampling_steps_per_epoch: null
+
+train_ds_meta: ???
+val_ds_meta: ???
+log_ds_meta: ???
+
+log_dir: ???
+
+model:
+
+  max_epochs: ${max_epochs}
+  steps_per_epoch: ${weighted_sampling_steps_per_epoch}
+  l1_loss_factor: 60
+
+  preprocessor:
+    _target_: nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures
+    nfilt: ${feature.mel_feature.mel_dim}
+    lowfreq: ${feature.mel_feature.lowfreq}
+    highfreq: ${feature.mel_feature.highfreq}
+    n_fft: ${feature.win_length}
+    n_window_size: ${feature.win_length}
+    n_window_stride: ${feature.hop_length}
+    pad_to: 0
+    pad_value: 0
+    exact_pad: true
+    sample_rate: ${feature.sample_rate}
+    window: hann
+    normalize: null
+    preemph: null
+    dither: 0.0
+    frame_splicing: 1
+    log: true
+    log_zero_guard_type: add
+    log_zero_guard_value: 1.0
+    mag_power: 1.0
+    mel_norm: null
+    use_grads: false
+
+  train_ds:
+    dataset:
+      _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
+      weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch}
+      sample_rate: ${feature.sample_rate}
+      n_samples: ${sample.train_n_samples}
+      min_duration: 0.4
+      max_duration: null
+      dataset_meta: ${train_ds_meta}
+
+    dataloader_params:
+      batch_size: ${batch_size}
+      num_workers: 4
+
+  validation_ds:
+    dataset:
+      _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
+      sample_rate: ${feature.sample_rate}
+      n_samples: ${sample.val_n_samples}
+      min_duration: 3.0
+      max_duration: null
+      dataset_meta: ${val_ds_meta}
+
+    dataloader_params:
+      batch_size: ${batch_size}
+      num_workers: 2
+
+  log_config:
+    log_dir: ${log_dir}
+    log_epochs: [10, 50]
+    epoch_frequency: 100
+    log_tensorboard: false
+    log_wandb: false
+
+    generators:
+      - _target_: nemo.collections.tts.parts.utils.callbacks.VocoderArtifactGenerator
+
+    dataset:
+      _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
+      sample_rate: ${feature.sample_rate}
+      n_samples: null
+      min_duration: null
+      max_duration: null
+      dataset_meta: ${log_ds_meta}
+
+    dataloader_params:
+      batch_size: 4
+      num_workers: 2
+
+  optim:
+    _target_: torch.optim.AdamW
+    lr: 2e-4
+    betas: [0.8, 0.99]
+    weight_decay: 1e-6
+    sched:
+      name: ExponentialLR
+      gamma: 0.999
+
+trainer:
+  num_nodes: 1
+  devices: 1
+  accelerator: gpu
+  strategy: ddp
+  precision: 16
+  max_epochs: ${max_epochs}
+  accumulate_grad_batches: 1
+  enable_checkpointing: False  # Provided by exp_manager
+  logger: false # Provided by exp_manager
+  log_every_n_steps: 100
+  check_val_every_n_epoch: 10
+  benchmark: false
+
+exp_manager:
+  exp_dir: null
+  name: ${name}
+  create_tensorboard_logger: true
+  create_checkpoint_callback: true
+  create_wandb_logger: false
+  checkpoint_callback_params:
+    monitor: val_loss
+  resume_if_exists: false
+  resume_ignore_no_checkpoint: false
diff --git a/examples/tts/conf/hifigan/sample/sample_22050.yaml b/examples/tts/conf/hifigan/sample/sample_22050.yaml
new file mode 100644
index 000000000000..18bc206e2566
--- /dev/null
+++ b/examples/tts/conf/hifigan/sample/sample_22050.yaml
@@ -0,0 +1,3 @@
+# Audio dataset sampling config for 22.05khz sampling rate
+train_n_samples: 8192
+val_n_samples: 66048
diff --git a/examples/tts/conf/hifigan/sample/sample_44100.yaml b/examples/tts/conf/hifigan/sample/sample_44100.yaml
new file mode 100644
index 000000000000..d8315623bbbe
--- /dev/null
+++ b/examples/tts/conf/hifigan/sample/sample_44100.yaml
@@ -0,0 +1,3 @@
+# Audio dataset sampling config for 44.1khz sampling rate
+train_n_samples: 16384
+val_n_samples: 131072
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index 89458ff4c4f6..d586137d5ff2 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -423,6 +423,7 @@ def segment_from_file(
                     samples = f.read(dtype='float32')
         except RuntimeError as e:
             logging.error(f"Loading {audio_file} via SoundFile raised RuntimeError: `{e}`.")
+            raise e
 
         features = cls(
             samples, sample_rate, target_sr=target_sr, trim=trim, orig_sr=orig_sr, channel_selector=channel_selector
diff --git a/nemo/collections/tts/data/text_to_speech_dataset.py b/nemo/collections/tts/data/text_to_speech_dataset.py
index 47868d41d1ec..23ddb50346a2 100644
--- a/nemo/collections/tts/data/text_to_speech_dataset.py
+++ b/nemo/collections/tts/data/text_to_speech_dataset.py
@@ -25,7 +25,6 @@
 from nemo.collections.tts.parts.preprocessing.feature_processors import FeatureProcessor
 from nemo.collections.tts.parts.preprocessing.features import Featurizer
 from nemo.collections.tts.parts.utils.tts_dataset_utils import (
-    BetaBinomialInterpolator,
     beta_binomial_prior_distribution,
     filter_dataset_by_duration,
     get_abs_rel_paths,
@@ -55,12 +54,6 @@ class DatasetSample:
     speaker_index: int = None
 
 
-@dataclass
-class AlignPriorConfig:
-    hop_length: int
-    use_beta_binomial_interpolator: bool = False
-
-
 @experimental
 class TextToSpeechDataset(Dataset):
     """
@@ -71,15 +64,16 @@ class TextToSpeechDataset(Dataset):
         sample_rate: Sample rate to load audio as. If the audio is stored at a different sample rate, then it will
             be resampled.
         text_tokenizer: Tokenizer to apply to the text field.
-        weighted_sample_steps: Optional int, If provided, then data will be sampled (with replacement) based on
+        weighted_sampling_steps_per_epoch: Optional int, If provided, then data will be sampled (with replacement) based on
             the sample weights provided in the dataset metadata. If None, then sample weights will be ignored.
         speaker_path: Optional, path to JSON file with speaker indices, for multi-speaker training. Can be created with
             scripts.dataset_processing.tts.create_speaker_map.py
         featurizers: Optional, list of featurizers to load feature data from. Should be the same config provided
             when running scripts.dataset_processing.tts.compute_features.py before training.
         feature_processors: Optional, list of feature processors to run on training examples.
-        align_prior_config: Optional, if provided alignment prior will be calculated and included in
-            batch output.
+        align_prior_hop_length: Optional int, hop length of audio features.
+            If provided alignment prior will be calculated and included in batch output. Must match hop length
+            of audio features used for training.
         min_duration: Optional float, if provided audio files in the training manifest shorter than 'min_duration'
             will be ignored.
         max_duration: Optional float, if provided audio files in the training manifest longer than 'max_duration'
@@ -88,14 +82,14 @@ class TextToSpeechDataset(Dataset):
 
     def __init__(
         self,
-        dataset_meta: Dict[str, DatasetMeta],
+        dataset_meta: Dict,
         sample_rate: int,
         text_tokenizer: BaseTokenizer,
-        weighted_sample_steps: Optional[int] = None,
+        weighted_sampling_steps_per_epoch: Optional[int] = None,
         speaker_path: Optional[Path] = None,
         featurizers: Optional[Dict[str, Featurizer]] = None,
         feature_processors: Optional[Dict[str, FeatureProcessor]] = None,
-        align_prior_config: Optional[AlignPriorConfig] = None,
+        align_prior_hop_length: Optional[int] = None,
         min_duration: Optional[float] = None,
         max_duration: Optional[float] = None,
     ):
@@ -103,7 +97,9 @@ def __init__(
 
         self.sample_rate = sample_rate
         self.text_tokenizer = text_tokenizer
-        self.weighted_sample_steps = weighted_sample_steps
+        self.weighted_sampling_steps_per_epoch = weighted_sampling_steps_per_epoch
+        self.align_prior_hop_length = align_prior_hop_length
+        self.include_align_prior = self.align_prior_hop_length is not None
 
         if speaker_path:
             self.include_speaker = True
@@ -115,26 +111,21 @@ def __init__(
 
         if featurizers:
             logging.info(f"Found featurizers {featurizers.keys()}")
-            self.featurizers = featurizers.values()
+            self.featurizers = list(featurizers.values())
         else:
             self.featurizers = []
 
         if feature_processors:
             logging.info(f"Found featurize processors {feature_processors.keys()}")
-            self.feature_processors = feature_processors.values()
+            self.feature_processors = list(feature_processors.values())
         else:
             self.feature_processors = []
 
-        self.align_prior_config = align_prior_config
-        if self.align_prior_config.use_beta_binomial_interpolator:
-            self.beta_binomial_interpolator = BetaBinomialInterpolator()
-        else:
-            self.beta_binomial_interpolator = None
-
         self.data_samples = []
         self.sample_weights = []
-        for dataset_name, dataset in dataset_meta.items():
-            samples, weights = self._process_dataset(
+        for dataset_name, dataset_info in dataset_meta.items():
+            dataset = DatasetMeta(**dataset_info)
+            samples, weights = self._preprocess_manifest(
                 dataset_name=dataset_name,
                 dataset=dataset,
                 min_duration=min_duration,
@@ -145,15 +136,15 @@ def __init__(
             self.sample_weights += weights
 
     def get_sampler(self, batch_size: int) -> Optional[torch.utils.data.Sampler]:
-        if not self.weighted_sample_steps:
+        if not self.weighted_sampling_steps_per_epoch:
             return None
 
         sampler = get_weighted_sampler(
-            sample_weights=self.sample_weights, batch_size=batch_size, num_steps=self.weighted_sample_steps
+            sample_weights=self.sample_weights, batch_size=batch_size, num_steps=self.weighted_sampling_steps_per_epoch
         )
         return sampler
 
-    def _process_dataset(
+    def _preprocess_manifest(
         self,
         dataset_name: str,
         dataset: DatasetMeta,
@@ -169,8 +160,8 @@ def _process_dataset(
         logging.info(dataset_name)
         logging.info(f"Original # of files: {len(entries)}")
         logging.info(f"Filtered # of files: {len(filtered_entries)}")
-        logging.info(f"Original duration: {total_hours} hours")
-        logging.info(f"Filtered duration: {filtered_hours} hours")
+        logging.info(f"Original duration: {total_hours:.2f} hours")
+        logging.info(f"Filtered duration: {filtered_hours:.2f} hours")
 
         samples = []
         sample_weights = []
@@ -219,15 +210,10 @@ def __getitem__(self, index):
             example["speaker"] = data.speaker
             example["speaker_index"] = data.speaker_index
 
-        if self.align_prior_config:
+        if self.include_align_prior:
             text_len = len(tokens)
-            spec_len = 1 + librosa.core.samples_to_frames(
-                audio.shape[0], hop_length=self.align_prior_config.hop_length
-            )
-            if self.beta_binomial_interpolator:
-                align_prior = self.beta_binomial_interpolator(w=spec_len, h=text_len)
-            else:
-                align_prior = beta_binomial_prior_distribution(phoneme_count=text_len, mel_count=spec_len)
+            spec_len = 1 + librosa.core.samples_to_frames(audio.shape[0], hop_length=self.align_prior_hop_length)
+            align_prior = beta_binomial_prior_distribution(phoneme_count=text_len, mel_count=spec_len)
             align_prior = torch.tensor(align_prior, dtype=torch.float32)
             example["align_prior"] = align_prior
 
@@ -265,7 +251,7 @@ def collate_fn(self, batch: List[dict]):
             if self.include_speaker:
                 speaker_list.append(example["speaker_index"])
 
-            if self.align_prior_config:
+            if self.include_align_prior:
                 prior_list.append(example["align_prior"])
 
         batch_audio_len = torch.IntTensor(audio_len_list)
@@ -288,7 +274,7 @@ def collate_fn(self, batch: List[dict]):
         if self.include_speaker:
             batch_dict["speaker_id"] = torch.IntTensor(speaker_list)
 
-        if self.align_prior_config:
+        if self.include_align_prior:
             spec_max_len = max([prior.shape[0] for prior in prior_list])
             text_max_len = max([prior.shape[1] for prior in prior_list])
             batch_dict["align_prior_matrix"] = stack_tensors(prior_list, max_lens=[text_max_len, spec_max_len],)
diff --git a/nemo/collections/tts/data/vocoder_dataset.py b/nemo/collections/tts/data/vocoder_dataset.py
new file mode 100644
index 000000000000..9bb115ba2448
--- /dev/null
+++ b/nemo/collections/tts/data/vocoder_dataset.py
@@ -0,0 +1,202 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import traceback
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+import librosa
+import torch.utils.data
+
+from nemo.collections.asr.parts.preprocessing.segment import AudioSegment
+from nemo.collections.asr.parts.utils.manifest_utils import read_manifest
+from nemo.collections.tts.parts.preprocessing.feature_processors import FeatureProcessor
+from nemo.collections.tts.parts.utils.tts_dataset_utils import (
+    filter_dataset_by_duration,
+    get_abs_rel_paths,
+    get_weighted_sampler,
+    stack_tensors,
+)
+from nemo.core.classes import Dataset
+from nemo.utils import logging
+from nemo.utils.decorators import experimental
+
+
+@dataclass
+class DatasetMeta:
+    manifest_path: Path
+    audio_dir: Path
+    sample_weight: float = 1.0
+
+
+@dataclass
+class DatasetSample:
+    manifest_entry: dict
+    audio_dir: Path
+
+
+@experimental
+class VocoderDataset(Dataset):
+    """
+    Class for processing and loading Vocoder training examples.
+
+    Args:
+        dataset_meta: Dict of dataset names (string) to dataset metadata.
+        sample_rate: Sample rate to load audio as. If the audio is stored at a different sample rate, then it will
+            be resampled.
+        n_samples: Optional int, if provided then n_samples samples will be randomly sampled from the full
+            audio file.
+        weighted_sampling_steps_per_epoch: Optional int, If provided, then data will be sampled (with replacement) based on
+            the sample weights provided in the dataset metadata. If None, then sample weights will be ignored.
+        feature_processors: Optional, list of feature processors to run on training examples.
+        min_duration: Optional float, if provided audio files in the training manifest shorter than 'min_duration'
+            will be ignored.
+        max_duration: Optional float, if provided audio files in the training manifest longer than 'max_duration'
+            will be ignored.
+        num_audio_retries: Number of read attempts to make when sampling audio file, to avoid training failing
+            from sporadic IO errors.
+    """
+
+    def __init__(
+        self,
+        dataset_meta: Dict,
+        sample_rate: int,
+        n_samples: Optional[int] = None,
+        weighted_sampling_steps_per_epoch: Optional[int] = None,
+        feature_processors: Optional[Dict[str, FeatureProcessor]] = None,
+        min_duration: Optional[float] = None,
+        max_duration: Optional[float] = None,
+        num_audio_retries: int = 5,
+    ):
+        super().__init__()
+
+        self.sample_rate = sample_rate
+        self.n_samples = n_samples
+        self.weighted_sampling_steps_per_epoch = weighted_sampling_steps_per_epoch
+        self.num_audio_retries = num_audio_retries
+        self.load_precomputed_mel = False
+
+        if feature_processors:
+            logging.info(f"Found feature processors {feature_processors.keys()}")
+            self.feature_processors = list(feature_processors.values())
+        else:
+            self.feature_processors = []
+
+        self.data_samples = []
+        self.sample_weights = []
+        for dataset_name, dataset_info in dataset_meta.items():
+            dataset = DatasetMeta(**dataset_info)
+            samples, weights = self._preprocess_manifest(
+                dataset_name=dataset_name, dataset=dataset, min_duration=min_duration, max_duration=max_duration,
+            )
+            self.data_samples += samples
+            self.sample_weights += weights
+
+    def get_sampler(self, batch_size: int) -> Optional[torch.utils.data.Sampler]:
+        if not self.weighted_sampling_steps_per_epoch:
+            return None
+
+        sampler = get_weighted_sampler(
+            sample_weights=self.sample_weights, batch_size=batch_size, num_steps=self.weighted_sampling_steps_per_epoch
+        )
+        return sampler
+
+    def _segment_audio(self, audio_filepath: Path) -> AudioSegment:
+        # Retry file read multiple times as file seeking can produce random IO errors.
+        for _ in range(self.num_audio_retries):
+            try:
+                audio_segment = AudioSegment.segment_from_file(
+                    audio_filepath, target_sr=self.sample_rate, n_segments=self.n_samples,
+                )
+                return audio_segment
+            except Exception:
+                traceback.print_exc()
+
+        raise ValueError(f"Failed to read audio {audio_filepath}")
+
+    def _sample_audio(self, audio_filepath: Path) -> Tuple[torch.Tensor, torch.Tensor]:
+        if not self.n_samples:
+            audio_array, _ = librosa.load(audio_filepath, sr=self.sample_rate)
+        else:
+            audio_segment = self._segment_audio(audio_filepath)
+            audio_array = audio_segment.samples
+        audio = torch.tensor(audio_array)
+        audio_len = torch.tensor(audio.shape[0])
+        return audio, audio_len
+
+    @staticmethod
+    def _preprocess_manifest(
+        dataset_name: str, dataset: DatasetMeta, min_duration: float, max_duration: float,
+    ):
+        entries = read_manifest(dataset.manifest_path)
+        filtered_entries, total_hours, filtered_hours = filter_dataset_by_duration(
+            entries=entries, min_duration=min_duration, max_duration=max_duration
+        )
+
+        logging.info(dataset_name)
+        logging.info(f"Original # of files: {len(entries)}")
+        logging.info(f"Filtered # of files: {len(filtered_entries)}")
+        logging.info(f"Original duration: {total_hours:.2f} hours")
+        logging.info(f"Filtered duration: {filtered_hours:.2f} hours")
+
+        samples = []
+        sample_weights = []
+        for entry in filtered_entries:
+            sample = DatasetSample(manifest_entry=entry, audio_dir=Path(dataset.audio_dir),)
+            samples.append(sample)
+            sample_weights.append(dataset.sample_weight)
+
+        return samples, sample_weights
+
+    def __len__(self):
+        return len(self.data_samples)
+
+    def __getitem__(self, index):
+        data = self.data_samples[index]
+
+        audio_filepath = Path(data.manifest_entry["audio_filepath"])
+        audio_filepath_abs, audio_filepath_rel = get_abs_rel_paths(input_path=audio_filepath, base_path=data.audio_dir)
+
+        audio, audio_len = self._sample_audio(audio_filepath_abs)
+
+        example = {"audio_filepath": audio_filepath_rel, "audio": audio, "audio_len": audio_len}
+
+        for processor in self.feature_processors:
+            processor.process(example)
+
+        return example
+
+    def collate_fn(self, batch: List[dict]):
+        audio_filepath_list = []
+        audio_list = []
+        audio_len_list = []
+
+        for example in batch:
+            audio_filepath_list.append(example["audio_filepath"])
+            audio_list.append(example["audio"])
+            audio_len_list.append(example["audio_len"])
+
+        batch_audio_len = torch.IntTensor(audio_len_list)
+        audio_max_len = int(batch_audio_len.max().item())
+
+        batch_audio = stack_tensors(audio_list, max_lens=[audio_max_len])
+
+        batch_dict = {
+            "audio_filepaths": audio_filepath_list,
+            "audio": batch_audio,
+            "audio_lens": batch_audio_len,
+        }
+
+        return batch_dict
diff --git a/nemo/collections/tts/models/hifigan.py b/nemo/collections/tts/models/hifigan.py
index b7ab37e6589e..bf2eef33cdcf 100644
--- a/nemo/collections/tts/models/hifigan.py
+++ b/nemo/collections/tts/models/hifigan.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import itertools
+from pathlib import Path
 
 import torch
 import torch.nn.functional as F
@@ -23,12 +24,13 @@
 from nemo.collections.tts.losses.hifigan_losses import DiscriminatorLoss, FeatureMatchingLoss, GeneratorLoss
 from nemo.collections.tts.models.base import Vocoder
 from nemo.collections.tts.modules.hifigan_modules import MultiPeriodDiscriminator, MultiScaleDiscriminator
+from nemo.collections.tts.parts.utils.callbacks import LoggingCallback
 from nemo.collections.tts.parts.utils.helpers import get_batch_size, get_num_workers, plot_spectrogram_to_numpy
 from nemo.core.classes import Exportable
 from nemo.core.classes.common import PretrainedModelInfo, typecheck
 from nemo.core.neural_types.elements import AudioSignal, MelSpectrogramType
 from nemo.core.neural_types.neural_type import NeuralType
-from nemo.core.optim.lr_scheduler import CosineAnnealing, compute_max_steps
+from nemo.core.optim.lr_scheduler import compute_max_steps, prepare_lr_scheduler
 from nemo.utils import logging, model_utils
 
 HAVE_WANDB = True
@@ -47,6 +49,7 @@ def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None):
         # Convert to Hydra 1.0 compatible DictConfig
         cfg = model_utils.convert_model_config_to_dict_config(cfg)
         cfg = model_utils.maybe_update_config_version(cfg)
+        self.ds_class = cfg.train_ds.dataset._target_
 
         super().__init__(cfg=cfg, trainer=trainer)
 
@@ -69,9 +72,22 @@ def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None):
         if self._train_dl:
             self.input_as_mel = self._train_dl.dataset.load_precomputed_mel
 
+        self.log_audio = cfg.get("log_audio", False)
+        self.log_config = cfg.get("log_config", None)
+        self.lr_schedule_interval = None
         self.automatic_optimization = False
 
-    def _get_max_steps(self):
+    @property
+    def max_steps(self):
+        if "max_steps" in self._cfg:
+            return self._cfg.get("max_steps")
+
+        if "max_epochs" not in self._cfg:
+            raise ValueError("Must specify 'max_steps' or 'max_epochs'.")
+
+        if "steps_per_epoch" in self._cfg:
+            return self._cfg.max_epochs * self._cfg.steps_per_epoch
+
         return compute_max_steps(
             max_epochs=self._cfg.max_epochs,
             accumulate_grad_batches=self.trainer.accumulate_grad_batches,
@@ -84,16 +100,13 @@ def _get_max_steps(self):
 
     @staticmethod
     def get_warmup_steps(max_steps, warmup_steps, warmup_ratio):
-        if warmup_steps is not None and warmup_ratio is not None:
-            raise ValueError(f'Either use warmup_steps or warmup_ratio for scheduler')
-
         if warmup_steps is not None:
             return warmup_steps
 
         if warmup_ratio is not None:
             return warmup_ratio * max_steps
 
-        raise ValueError(f'Specify warmup_steps or warmup_ratio for scheduler')
+        return None
 
     def configure_optimizers(self):
         optim_config = self._cfg.optim.copy()
@@ -102,42 +115,47 @@ def configure_optimizers(self):
         sched_config = optim_config.pop("sched", None)
         OmegaConf.set_struct(optim_config, True)
 
-        optim_g = instantiate(optim_config, params=self.generator.parameters(),)
-        optim_d = instantiate(optim_config, params=itertools.chain(self.msd.parameters(), self.mpd.parameters()),)
-
-        # Backward compatibility
-        if sched_config is None and 'sched' in self._cfg:
-            sched_config = self._cfg.sched
-
-        if sched_config is not None:
-            max_steps = self._cfg.get("max_steps", None)
-            if max_steps is None or max_steps < 0:
-                max_steps = self._get_max_steps()
-
-            warmup_steps = HifiGanModel.get_warmup_steps(
-                max_steps=max_steps,
-                warmup_steps=sched_config.get("warmup_steps", None),
-                warmup_ratio=sched_config.get("warmup_ratio", None),
-            )
-
-            scheduler_g = CosineAnnealing(
-                optimizer=optim_g, max_steps=max_steps, min_lr=sched_config.min_lr, warmup_steps=warmup_steps,
-            )  # Use warmup to delay start
-            sch1_dict = {
-                'scheduler': scheduler_g,
-                'interval': 'step',
-            }
-
-            scheduler_d = CosineAnnealing(optimizer=optim_d, max_steps=max_steps, min_lr=sched_config.min_lr,)
-            sch2_dict = {
-                'scheduler': scheduler_d,
-                'interval': 'step',
-            }
-
-            return [optim_g, optim_d], [sch1_dict, sch2_dict]
-        else:
+        gen_params = self.generator.parameters()
+        disc_params = itertools.chain(self.msd.parameters(), self.mpd.parameters())
+        optim_g = instantiate(optim_config, params=gen_params)
+        optim_d = instantiate(optim_config, params=disc_params)
+
+        if sched_config is None:
             return [optim_g, optim_d]
 
+        max_steps = self.max_steps
+        warmup_steps = self.get_warmup_steps(
+            max_steps=max_steps,
+            warmup_steps=sched_config.get("warmup_steps", None),
+            warmup_ratio=sched_config.get("warmup_ratio", None),
+        )
+
+        OmegaConf.set_struct(sched_config, False)
+        sched_config["max_steps"] = max_steps
+        if warmup_steps:
+            sched_config["warmup_steps"] = warmup_steps
+            sched_config.pop("warmup_ratio", None)
+        OmegaConf.set_struct(sched_config, True)
+
+        scheduler_g = prepare_lr_scheduler(
+            optimizer=optim_g, scheduler_config=sched_config, train_dataloader=self._train_dl
+        )
+
+        scheduler_d = prepare_lr_scheduler(
+            optimizer=optim_d, scheduler_config=sched_config, train_dataloader=self._train_dl
+        )
+
+        self.lr_schedule_interval = scheduler_g["interval"]
+
+        return [optim_g, optim_d], [scheduler_g, scheduler_d]
+
+    def update_lr(self, interval="step"):
+        schedulers = self.lr_schedulers()
+        if schedulers is not None and self.lr_schedule_interval == interval:
+            sch1, sch2 = schedulers
+            sch1.step()
+            sch2.step()
+
     @typecheck()
     def forward(self, *, spec):
         """
@@ -153,12 +171,7 @@ def convert_spectrogram_to_audio(self, spec: 'torch.tensor') -> 'torch.tensor':
         return self(spec=spec).squeeze(1)
 
     def training_step(self, batch, batch_idx):
-        if self.input_as_mel:
-            # Pre-computed spectrograms will be used as input
-            audio, audio_len, audio_mel = batch
-        else:
-            audio, audio_len = batch
-            audio_mel, _ = self.audio_to_melspec_precessor(audio, audio_len)
+        audio, audio_len, audio_mel, _ = self._process_batch(batch)
 
         # Mel as input for L1 mel loss
         audio_trg_mel, _ = self.trg_melspec_fn(audio, audio_len)
@@ -196,12 +209,7 @@ def training_step(self, batch, batch_idx):
         self.manual_backward(loss_g)
         optim_g.step()
 
-        # Run schedulers
-        schedulers = self.lr_schedulers()
-        if schedulers is not None:
-            sch1, sch2 = schedulers
-            sch1.step()
-            sch2.step()
+        self.update_lr()
 
         metrics = {
             "g_loss_fm_mpd": loss_fm_mpd,
@@ -218,18 +226,13 @@ def training_step(self, batch, batch_idx):
         self.log_dict(metrics, on_step=True, sync_dist=True)
         self.log("g_l1_loss", loss_mel, prog_bar=True, logger=False, sync_dist=True)
 
+    def training_epoch_end(self, outputs) -> None:
+        self.update_lr("epoch")
+
     def validation_step(self, batch, batch_idx):
-        if self.input_as_mel:
-            audio, audio_len, audio_mel = batch
-            audio_mel_len = [audio_mel.shape[1]] * audio_mel.shape[0]
-        else:
-            audio, audio_len = batch
-            audio_mel, audio_mel_len = self.audio_to_melspec_precessor(audio, audio_len)
-        audio_pred = self(spec=audio_mel)
+        audio, audio_len, audio_mel, audio_mel_len = self._process_batch(batch)
 
-        # Perform bias denoising
-        pred_denoised = self._bias_denoise(audio_pred, audio_mel).squeeze(1)
-        pred_denoised_mel, _ = self.audio_to_melspec_precessor(pred_denoised, audio_len)
+        audio_pred = self(spec=audio_mel)
 
         if self.input_as_mel:
             gt_mel, gt_mel_len = self.audio_to_melspec_precessor(audio, audio_len)
@@ -239,7 +242,11 @@ def validation_step(self, batch, batch_idx):
         self.log_dict({"val_loss": loss_mel}, on_epoch=True, sync_dist=True)
 
         # Plot audio once per epoch
-        if batch_idx == 0 and isinstance(self.logger, WandbLogger) and HAVE_WANDB:
+        if self.log_audio and batch_idx == 0 and isinstance(self.logger, WandbLogger) and HAVE_WANDB:
+            # Perform bias denoising
+            pred_denoised = self._bias_denoise(audio_pred, audio_mel).squeeze(1)
+            pred_denoised_mel, _ = self.audio_to_melspec_precessor(pred_denoised, audio_len)
+
             clips = []
             specs = []
             for i in range(min(5, audio.shape[0])):
@@ -284,6 +291,21 @@ def validation_step(self, batch, batch_idx):
 
             self.logger.experiment.log({"audio": clips, "specs": specs})
 
+    def _process_batch(self, batch):
+        if self.input_as_mel:
+            audio, audio_len, audio_mel = batch
+            audio_mel_len = [audio_mel.shape[1]] * audio_mel.shape[0]
+            return audio, audio_len, audio_mel, audio_mel_len
+
+        if self.ds_class == "nemo.collections.tts.data.vocoder_dataset.VocoderDataset":
+            audio = batch.get("audio")
+            audio_len = batch.get("audio_lens")
+        else:
+            audio, audio_len = batch
+
+        audio_mel, audio_mel_len = self.audio_to_melspec_precessor(audio, audio_len)
+        return audio, audio_len, audio_mel, audio_mel_len
+
     def _bias_denoise(self, audio, mel):
         def stft(x):
             comp = torch.stft(x.squeeze(1), n_fft=1024, hop_length=256, win_length=1024, return_complex=True)
@@ -311,6 +333,19 @@ def istft(mags, phase):
 
         return audio_denoised
 
+    def _setup_train_dataloader(self, cfg):
+        dataset = instantiate(cfg.dataset)
+        sampler = dataset.get_sampler(cfg.dataloader_params.batch_size)
+        data_loader = torch.utils.data.DataLoader(
+            dataset, collate_fn=dataset.collate_fn, sampler=sampler, **cfg.dataloader_params
+        )
+        return data_loader
+
+    def _setup_test_dataloader(self, cfg):
+        dataset = instantiate(cfg.dataset)
+        data_loader = torch.utils.data.DataLoader(dataset, collate_fn=dataset.collate_fn, **cfg.dataloader_params)
+        return data_loader
+
     def __setup_dataloader_from_config(self, cfg, shuffle_should_be: bool = True, name: str = "train"):
         if "dataset" not in cfg or not isinstance(cfg.dataset, DictConfig):
             raise ValueError(f"No dataset for {name}")
@@ -333,14 +368,44 @@ def __setup_dataloader_from_config(self, cfg, shuffle_should_be: bool = True, na
         return torch.utils.data.DataLoader(dataset, collate_fn=dataset.collate_fn, **cfg.dataloader_params)
 
     def setup_training_data(self, cfg):
-        self._train_dl = self.__setup_dataloader_from_config(cfg)
+        if self.ds_class == "nemo.collections.tts.data.vocoder_dataset.VocoderDataset":
+            self._train_dl = self._setup_train_dataloader(cfg)
+        else:
+            self._train_dl = self.__setup_dataloader_from_config(cfg)
 
     def setup_validation_data(self, cfg):
-        self._validation_dl = self.__setup_dataloader_from_config(cfg, shuffle_should_be=False, name="validation")
+        if self.ds_class == "nemo.collections.tts.data.vocoder_dataset.VocoderDataset":
+            self._validation_dl = self._setup_test_dataloader(cfg)
+        else:
+            self._validation_dl = self.__setup_dataloader_from_config(cfg, shuffle_should_be=False, name="validation")
 
     def setup_test_data(self, cfg):
         pass
 
+    def configure_callbacks(self):
+        if not self.log_config:
+            return []
+
+        sample_ds_class = self.log_config.dataset._target_
+        if sample_ds_class != "nemo.collections.tts.data.vocoder_dataset.VocoderDataset":
+            raise ValueError(f"Sample logging only supported for VocoderDataset, got {sample_ds_class}")
+
+        data_loader = self._setup_test_dataloader(self.log_config)
+        generators = instantiate(self.log_config.generators)
+        log_dir = Path(self.log_config.log_dir) if self.log_config.log_dir else None
+        log_callback = LoggingCallback(
+            generators=generators,
+            data_loader=data_loader,
+            log_epochs=self.log_config.log_epochs,
+            epoch_frequency=self.log_config.epoch_frequency,
+            output_dir=log_dir,
+            loggers=self.trainer.loggers,
+            log_tensorboard=self.log_config.log_tensorboard,
+            log_wandb=self.log_config.log_wandb,
+        )
+
+        return [log_callback]
+
     @classmethod
     def list_available_models(cls) -> 'Optional[Dict[str, str]]':
         list_of_models = []
diff --git a/nemo/collections/tts/parts/utils/callbacks.py b/nemo/collections/tts/parts/utils/callbacks.py
index 0f8bd0fa4177..2320e5b21a7c 100644
--- a/nemo/collections/tts/parts/utils/callbacks.py
+++ b/nemo/collections/tts/parts/utils/callbacks.py
@@ -22,6 +22,7 @@
 import numpy as np
 import soundfile as sf
 import torch
+from einops import rearrange
 from pytorch_lightning import Callback, LightningModule, Trainer
 from pytorch_lightning.loggers import TensorBoardLogger
 from pytorch_lightning.loggers.logger import Logger
@@ -65,11 +66,13 @@ def _load_vocoder(model_name: Optional[str], checkpoint_path: Optional[str], typ
         raise ValueError(f"Unknown vocoder type '{type}'")
 
     if model_name is not None:
-        vocoder = model_type.from_pretrained(model_name).eval()
+        vocoder = model_type.from_pretrained(model_name)
+    elif checkpoint_path.endswith(".nemo"):
+        vocoder = model_type.restore_from(checkpoint_path)
     else:
-        vocoder = model_type.load_from_checkpoint(checkpoint_path).eval()
+        vocoder = model_type.load_from_checkpoint(checkpoint_path)
 
-    return vocoder
+    return vocoder.eval()
 
 
 @dataclass
@@ -229,6 +232,39 @@ def on_train_epoch_end(self, trainer: Trainer, model: LightningModule):
             self._log_image(image=image, log_dir=log_dir, step=model.global_step)
 
 
+class VocoderArtifactGenerator(ArtifactGenerator):
+    """
+    Generator for logging Vocoder model outputs.
+    """
+
+    def generate_artifacts(
+        self, model: LightningModule, batch_dict: Dict
+    ) -> Tuple[List[AudioArtifact], List[ImageArtifact]]:
+
+        audio_artifacts = []
+
+        audio_filepaths = batch_dict.get("audio_filepaths")
+        audio_ids = [create_id(p) for p in audio_filepaths]
+
+        audio = batch_dict.get("audio")
+        audio_len = batch_dict.get("audio_lens")
+
+        spec, spec_len = model.audio_to_melspec_precessor(audio, audio_len)
+
+        with torch.no_grad():
+            audio_pred = model.forward(spec=spec)
+            audio_pred = rearrange(audio_pred, "B 1 T -> B T")
+
+        for i, audio_id in enumerate(audio_ids):
+            audio_pred_i = audio_pred[i][: audio_len[i]].cpu().numpy()
+            audio_artifact = AudioArtifact(
+                id=f"audio_{audio_id}", data=audio_pred_i, filename=f"{audio_id}.wav", sample_rate=model.sample_rate,
+            )
+            audio_artifacts.append(audio_artifact)
+
+        return audio_artifacts, []
+
+
 class FastPitchArtifactGenerator(ArtifactGenerator):
     """
     Generator for logging FastPitch model outputs.
@@ -339,10 +375,9 @@ def _generate_gta_predictions(self, model: LightningModule, audio_ids: List[str]
             )
 
         if self.log_alignment:
-            # [B, T_spec, T_text]
-            attn = attn.squeeze(1)
+            attn = rearrange(attn, "B 1 T_spec T_text -> B T_text T_spec")
             for i, audio_id in enumerate(audio_ids):
-                attn_i = attn[i][: mels_pred_len[i], : text_lens[i]].cpu().numpy()
+                attn_i = attn[i][: text_lens[i], : mels_pred_len[i]].cpu().numpy()
                 alignment_artifact = ImageArtifact(
                     id=f"align_{audio_id}",
                     data=attn_i,
diff --git a/nemo/core/optim/lr_scheduler.py b/nemo/core/optim/lr_scheduler.py
index c454e6290477..73ad1e18a94b 100644
--- a/nemo/core/optim/lr_scheduler.py
+++ b/nemo/core/optim/lr_scheduler.py
@@ -975,5 +975,6 @@ def compute_max_steps(
 }
 
 EPOCH_SCHEDULERS = {
+    'ExponentialLR': pt_scheduler.ExponentialLR,
     'ReduceLROnPlateau': pt_scheduler.ReduceLROnPlateau,
 }
diff --git a/scripts/dataset_processing/tts/preprocess_audio.py b/scripts/dataset_processing/tts/preprocess_audio.py
index 9d2783ebff18..1912d45d4bed 100644
--- a/scripts/dataset_processing/tts/preprocess_audio.py
+++ b/scripts/dataset_processing/tts/preprocess_audio.py
@@ -24,7 +24,7 @@
 Most of these can also be done by the TTS data loader at training time, but doing them ahead of time
 lets us implement more complex processing, validate the correctness of the output, and save on compute time.
 
-$ python <nemo_root_path>/scripts/dataset_processing/tts/audio_processing/preprocess_audio.py \
+$ python <nemo_root_path>/scripts/dataset_processing/tts/preprocess_audio.py \
     --input_manifest="<data_root_path>/manifest.json" \
     --output_manifest="<data_root_path>/manifest_processed.json" \
     --input_audio_dir="<data_root_path>/audio" \
diff --git a/scripts/dataset_processing/tts/preprocess_text.py b/scripts/dataset_processing/tts/preprocess_text.py
index 30893156ebfb..8b9bdebe940d 100644
--- a/scripts/dataset_processing/tts/preprocess_text.py
+++ b/scripts/dataset_processing/tts/preprocess_text.py
@@ -49,7 +49,9 @@ def get_args():
         "--output_manifest", required=True, type=Path, help="Path to output training manifest with processed text.",
     )
     parser.add_argument(
-        "--overwrite", default=False, type=bool, help="Whether to overwrite the output manifest file if it exists.",
+        "--overwrite",
+        action=argparse.BooleanOptionalAction,
+        help="Whether to overwrite the output manifest file if it exists.",
     )
     parser.add_argument(
         "--lower_case", default=False, type=bool, help="Whether to convert the final text to lower case.",

From 9827c9b11ffc6ef10d5eb22c6b5bbc1927d7e2ec Mon Sep 17 00:00:00 2001
From: Evelina <10428420+ekmb@users.noreply.github.com>
Date: Fri, 2 Jun 2023 14:37:05 -0700
Subject: [PATCH 006/123] GPT inference long context (#6687)

* deb infer

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* deb infer

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* clean up

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* dont do maxlen trunc for non abs pos emb

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* dont do maxlen trunc for non abs pos emb

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* convert for training only

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add eval test, add save .nemo for sft model

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* jenkins format fix

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* update jenkins

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* update jenkins

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* fix jenkins

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* remove test, ci timeout

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* fix for m_gpt_eval.py

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* jenkins test

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* fix gpt_eval with sft model

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* revert jenkins

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* keep float conversion for model.generate()

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix inference dtype

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Evelina <ebakhturina@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 Jenkinsfile                                   | 25 ++++++++++++++++++-
 .../language_modeling/megatron_gpt_eval.py    |  2 +-
 .../conf/megatron_gpt_peft_eval_config.yaml   |  2 +-
 .../tuning/conf/megatron_gpt_sft.yaml         |  2 +-
 .../tuning/megatron_gpt_sft.py                |  7 ++++++
 .../megatron/gpt_sft_dataset.py               |  4 ++-
 .../megatron_gpt_sft_model.py                 |  8 +++++-
 .../nlp/modules/common/megatron/module.py     |  2 +-
 .../common/text_generation_strategy.py        |  7 ++++--
 .../modules/common/text_generation_utils.py   | 18 ++++++++-----
 10 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index aacdd9575764..79c696a48600 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -3407,7 +3407,30 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
             trainer.num_nodes=1"
       }
     }
-
+    stage('L2: Megatron GPT SFT Eval (inference seq len > training seq len)') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      steps{
+        sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py \
+            model.restore_from_path=/home/TestData/nlp/megatron_gpt_sft/megatron_gpt_rope_sft.nemo \
+            model.peft.restore_from_path=null \
+            model.data.test_ds.file_names=['/home/TestData/nlp/megatron_gpt_sft/sample.jsonl'] \
+            model.data.test_ds.names=['test'] \
+            model.data.test_ds.global_batch_size=1 \
+            model.data.test_ds.micro_batch_size=1 \
+            model.data.test_ds.tokens_to_generate=30 \
+            model.data.test_ds.max_seq_length=6000 \
+            inference.greedy=True \
+            inference.repetition_penalty=1.0 \
+            inference.outfile_path='examples/nlp/language_modeling/out.jsonl' && \
+            rm -rf examples/nlp/language_modeling/out.jsonl"
+      }
+    }
     stage('L2: Megatron GPT Prompt Tuning TP1 PP1') {
       when {
         anyOf {
diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py
index 14cdbf8a760c..d7319fb72a01 100644
--- a/examples/nlp/language_modeling/megatron_gpt_eval.py
+++ b/examples/nlp/language_modeling/megatron_gpt_eval.py
@@ -263,7 +263,7 @@ def main(cfg) -> None:
     print(response)
     print("***************************")
 
-    # Second method of running text generation, call trainer.predict
+    # Second method of running text generation, call trainer.predict [recommended]
     ds = RequestDataSet(OmegaConf.to_container(cfg.prompts))
     request_dl = DataLoader(dataset=ds, batch_size=2)
     config = OmegaConf.to_container(cfg.inference)
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
index c430bd7fab5f..69dc17f244f5 100755
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
@@ -129,4 +129,4 @@ inference:
   repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
   min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
   compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-  outfile_path: /home/adithyare/exp/foo.txt
\ No newline at end of file
+  outfile_path: output.txt
\ No newline at end of file
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
index 678851db3b01..f8a8e6b9dbc0 100644
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
@@ -29,7 +29,7 @@ exp_manager:
     monitor: validation_${model.data.validation_ds.metric.name}
     save_top_k: 2
     mode: max
-    save_nemo_on_train_end: False # Should be false, correct prompt learning model file is saved at model.nemo_path set below, 
+    save_nemo_on_train_end: False 
     filename: 'megatron_gpt_sft--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}-{consumed_samples}'
     model_parallel_size: ${model.tensor_model_parallel_size}
     save_best_model: True
diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py
index b2b8786df8c1..0737d55cc514 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py
@@ -61,6 +61,8 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False):
         gpt_cfg.hidden_dropout = cfg.model.get('hidden_dropout', 0.0)
         gpt_cfg.attention_dropout = cfg.model.get('attention_dropout', 0.0)
         gpt_cfg.ffn_dropout = cfg.model.ffn_dropout
+        sft_cls = MegatronGPTSFTModel
+        gpt_cfg.target = f"{sft_cls.__module__}.{sft_cls.__name__}"
 
         # This is needed when modifying a hparam file directly to load `.ckpt` files.
         # This is not needed to modify the cfg in `.nemo` files.
@@ -167,6 +169,10 @@ def main(cfg) -> None:
 
     trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint)
 
+    # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams
+    with open_dict(cfg):
+        cfg.model.precision = cfg.trainer.precision
+
     if cfg.model.restore_from_path:
         save_restore_connector = NLPSaveRestoreConnector()
         if os.path.isdir(cfg.model.restore_from_path):
@@ -177,6 +183,7 @@ def main(cfg) -> None:
             return_config=True,
             save_restore_connector=save_restore_connector,
         )
+        gpt_cfg = _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False)
         model = load_from_nemo(MegatronGPTSFTModel, cfg, trainer, gpt_cfg, modify_confg_fn=_modify_config)
     else:
         validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint)
diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
index 24b7fe8d3d6d..f9ef6c8470c2 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
@@ -169,7 +169,9 @@ def _process_example(self, example):
         tokenized_text = pre_pad + self.tokenizer.text_to_ids(text)
         context_ids = pre_pad + self.tokenizer.text_to_ids(context)
         answer_ids = tokenized_text[len(context_ids) :]
-        total_ids = len(context_ids) + len(answer_ids)
+
+        # for the long context cases, collate_fn includes self.tokens_to_generate for padding
+        total_ids = len(context_ids) + max(len(answer_ids), self.tokens_to_generate)
         if self.add_bos:
             total_ids += 1
         if self.add_sep:
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
index 7819d28e8150..1dc335b86609 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
@@ -550,7 +550,13 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
             return compute_prob_response
         else:
             del inference_config['compute_logprob']
-            inference_config['inputs'] = (batch['contexts'].cuda(), batch['context_lengths'].cuda())
+
+            # for megatron_gpt_eval.py
+            if isinstance(batch, list):
+                inference_config['inputs'] = batch
+            else:
+                # peft_eval.py
+                inference_config['inputs'] = (batch['contexts'].cuda(), batch['context_lengths'].cuda())
             return generate(self, **inference_config)
 
     def write_predictions_to_file(self, outputs, output_file_path_prefix):
diff --git a/nemo/collections/nlp/modules/common/megatron/module.py b/nemo/collections/nlp/modules/common/megatron/module.py
index 0a340985eec2..22a223013fd2 100644
--- a/nemo/collections/nlp/modules/common/megatron/module.py
+++ b/nemo/collections/nlp/modules/common/megatron/module.py
@@ -290,7 +290,7 @@ def forward(self, *inputs, **kwargs):
         if getattr(self.module, 'pre_process', True):
             inputs = fp32_to_float16(inputs, self.float16_converter)
         outputs = self.module(*inputs, **kwargs)
-        if parallel_state.is_pipeline_last_stage():
+        if parallel_state.is_pipeline_last_stage() and self.training:
             outputs = float16_to_fp32(outputs)
         return outputs
 
diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py
index 16935be1cc2d..27ae3b2606d3 100644
--- a/nemo/collections/nlp/modules/common/text_generation_strategy.py
+++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py
@@ -181,8 +181,11 @@ def __init__(self, model):
 
     def clip_max_len(self, maxlen: int) -> int:
         """ clip the max len based on the LM model max sequence length"""
-        if maxlen > self.model.cfg.encoder_seq_length + 1:
-            maxlen = self.model.cfg.encoder_seq_length + 1
+
+        # for positional embedding types that allow length extrapolation, don't clip the max length
+        if self.model.cfg.get("position_embedding_type", "learned_absolute") == "learned_absolute":
+            if maxlen > self.model.cfg.encoder_seq_length + 1:
+                maxlen = self.model.cfg.encoder_seq_length + 1
         return maxlen
 
     def init_batch(self, context_tokens: torch.Tensor, context_length: int):
diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index 8cfb02c5e321..3a07a807b11a 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -421,9 +421,15 @@ def synced_generate(
         if parallel_state.is_pipeline_first_stage():
             src = parallel_state.get_pipeline_model_parallel_last_rank()
             group = parallel_state.get_embedding_group()
-            output_logits = torch.empty(
-                tokens.size(0), context_length - 1, dtype=torch.float32, device=torch.device("cuda")
-            )
+
+            precision = model._trainer.precision
+            if precision in [16, "16"]:
+                dtype = torch.float16
+            elif precision == "bf16":
+                dtype = torch.bfloat16
+            else:
+                dtype = torch.float32
+            output_logits = torch.empty(tokens.size(0), context_length - 1, dtype=dtype, device=torch.device("cuda"))
             torch.distributed.broadcast(output_logits, src, group)
 
             if all_probs:
@@ -433,7 +439,7 @@ def synced_generate(
                     tokens.size(0),
                     context_length - 1,
                     model.padded_vocab_size,
-                    dtype=torch.float32,
+                    dtype=dtype,
                     device=torch.device("cuda"),
                 )
                 torch.distributed.broadcast(full_logits, src, group)
@@ -667,10 +673,10 @@ def sample_sequence_batch(
             output = inference_strategy.forward_step(batch, tensor_shape)
 
             if parallel_state.is_pipeline_last_stage():
-                output = output[0]['logits'].float()
+                output = output[0]['logits']
+
                 output = tensor_parallel.gather_from_tensor_model_parallel_region(output)
                 assert output is not None
-                output = output.float()
                 logits = output[:, -1].view(batch_size, -1).contiguous()
 
                 # make sure it will generate at least min_length

From d5819e9cc0d3733d7aeeef9bde64c12200b6c415 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Fri, 2 Jun 2023 18:08:29 -0400
Subject: [PATCH 007/123] TDT model pull request (#6536)

* TDT model pull request, initial draft

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* TDT PR WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT PR WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT PR WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* TDT WIP

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* addressed some review comments, part1

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* addressed some review comments, part1, one line fix

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add tests for comparing TDT alphas with pytorch VS kernel computation

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add tests for comparing multiblank alphas with pytorch VS kernel computation

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add tests for fixed case computation for TDT

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add more comments for greedy-batch decoding for TDT

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* include config for TDT model with stateless decoders

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* add reference to TDT in Readme

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* slight modification of config file comments

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* addressed more comments

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* more detailed comments for tdt kernel

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* one line fix

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* fixed small bug that results in test fails for rnnt_decoding

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* fixed small bug that results in test fails for rnnt_decoding

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixed small bug that results in test fails for rnnt_decoding

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* remove unused import

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

---------

Signed-off-by: Hainan Xu <hainanx@nvidia.com>
Co-authored-by: Hainan Xu <hainanx@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 README.rst                                    |   2 +-
 .../conformer_multiblank_transducer_bpe.yaml  |   1 +
 .../conf/conformer/tdt/conformer_tdt_bpe.yaml | 281 +++++++++
 .../tdt/conformer_tdt_bpe_stateless.yaml      | 278 +++++++++
 nemo/collections/asr/losses/rnnt.py           |  50 +-
 nemo/collections/asr/losses/rnnt_pytorch.py   | 138 ++++-
 nemo/collections/asr/metrics/rnnt_wer.py      | 113 +++-
 nemo/collections/asr/metrics/rnnt_wer_bpe.py  |   9 +-
 nemo/collections/asr/models/rnnt_models.py    |   7 +-
 .../asr/parts/numba/rnnt_loss/__init__.py     |   6 +-
 .../asr/parts/numba/rnnt_loss/rnnt.py         | 127 ++++
 .../asr/parts/numba/rnnt_loss/rnnt_pytorch.py | 226 ++++++-
 .../rnnt_loss/utils/cuda_utils/gpu_rnnt.py    | 315 +++++++++-
 .../utils/cuda_utils/gpu_rnnt_kernel.py       | 531 +++++++++++++++++
 .../parts/submodules/rnnt_greedy_decoding.py  | 561 ++++++++++++++++++
 .../asr/decoding/test_rnnt_decoding.py        |   2 +-
 .../asr/numba/rnnt_loss/test_rnnt_pytorch.py  |  71 ++-
 .../rnnt_loss/utils/test_gpu_rnnt_kernel.py   | 187 ++++++
 .../asr/test_asr_rnnt_encdec_model.py         |  44 ++
 19 files changed, 2887 insertions(+), 62 deletions(-)
 create mode 100644 examples/asr/conf/conformer/tdt/conformer_tdt_bpe.yaml
 create mode 100644 examples/asr/conf/conformer/tdt/conformer_tdt_bpe_stateless.yaml

diff --git a/README.rst b/README.rst
index 2e6f5580a3e5..b9ba7fce30f3 100644
--- a/README.rst
+++ b/README.rst
@@ -84,7 +84,7 @@ Key Features
             * CTC
             * Transducer/RNNT
             * Hybrid Transducer/CTC
-            * NeMo Original `Multi-blank Transducers <https://arxiv.org/abs/2211.03541>`_
+            * NeMo Original `Multi-blank Transducers <https://arxiv.org/abs/2211.03541>`_ and `Token-and-Duration Transducers (TDT) <https://arxiv.org/abs/2304.06795>`_
         * Streaming/Buffered ASR (CTC/Transducer) - `Chunked Inference Examples <https://github.com/NVIDIA/NeMo/tree/stable/examples/asr/asr_chunked_inference>`_
         * Cache-aware Streaming Conformer - `<https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#cache-aware-streaming-conformer>`_
         * Beam Search decoding
diff --git a/examples/asr/conf/conformer/multiblank/conformer_multiblank_transducer_bpe.yaml b/examples/asr/conf/conformer/multiblank/conformer_multiblank_transducer_bpe.yaml
index 84d767e4a3b5..51e57e72e2ad 100644
--- a/examples/asr/conf/conformer/multiblank/conformer_multiblank_transducer_bpe.yaml
+++ b/examples/asr/conf/conformer/multiblank/conformer_multiblank_transducer_bpe.yaml
@@ -179,6 +179,7 @@ model:
 
   decoding:
     strategy: "greedy_batch" # can be greedy, greedy_batch, beam, tsd, alsd.
+    model_type: "multiblank"
 
     # this must not be None in order to use the multi-blank specific decoding method.
     # you could set this to [1, 1, 1] so that big blanks are treated the same
diff --git a/examples/asr/conf/conformer/tdt/conformer_tdt_bpe.yaml b/examples/asr/conf/conformer/tdt/conformer_tdt_bpe.yaml
new file mode 100644
index 000000000000..0210bd5a2dad
--- /dev/null
+++ b/examples/asr/conf/conformer/tdt/conformer_tdt_bpe.yaml
@@ -0,0 +1,281 @@
+# This file contains the default values for training a Conformer-TDT ASR model, large size (~120M) with sub-word encoding.
+
+# You can find detailed info about TDT models at https://arxiv.org/abs/2304.06795. 
+
+# Architecture and training config:
+# Default learning parameters in this config are set for effective batch size of 2K. To train it with smaller effective
+# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches.
+# Here are the recommended configs for different variants of Conformer-Transducer, other parameters are the same as in this config file.
+
+# Note: the added duration outputs from the joiner make TDT models slightly larger than corresponding conventional RNN-T models,
+# although the difference is tiny -- the added number of params is roughly num-durations X (joint_hidden + pred_hidden), typically in the
+# order of thousands of params. This is negligible even with the "Small" config with around 14 million params.
+# Recommended duraction config is [0, 1, 2, ... , n] where optimal n is usually between 4 and 8 depending on the dataset.
+
+#  +--------------+---------+---------+----------+------------------+--------------+--------------------------+-----------------+
+#  | Model        | d_model | n_heads | n_layers | conv_kernel_size | weight_decay | pred_hidden/joint_hidden | pred_rnn_layers |
+#  +==============+=========+========+===========+==================+==============+==========================+=================+
+#  | Small   (14M)|   176   |    4   |    16     |       31         |     0.0      |           320            |        1        |
+#  +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+
+#  | Medium  (32M)|   256   |    4   |    16     |       31         |     1e-3     |           640            |        1        |
+#  +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+
+#  | Large  (120M)|   512   |    8   |    17     |       31         |     1e-3     |           640            |        1        |
+#  +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+
+#  | XLarge (644M)|  1024   |    8   |    24     |        5         |     1e-3     |           640            |        2        |
+#  +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+  
+
+# Default learning parameters in this config are set for global batch size of 2K while you may use lower values.
+# To increase the global batch size with limited number of GPUs, you may use higher accumulate_grad_batches.
+# However accumulate_grad_batches is better to be avoided as long as the global batch size is large enough and training is stable.
+
+name: "Conformer-TDT-BPE"
+
+model:
+  sample_rate: 16000
+  compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag.
+  log_prediction: true # enables logging sample predictions in the output during training
+  skip_nan_grad: false
+
+  model_defaults:
+    enc_hidden: ${model.encoder.d_model}
+    pred_hidden: 640
+    joint_hidden: 640
+
+    # variables for TDT configs.
+    tdt_durations: [0, 1, 2, 3, 4]
+    num_tdt_durations: 5
+    
+
+  train_ds:
+    manifest_filepath: ???
+    sample_rate: ${model.sample_rate}
+    batch_size: 16 # you may increase batch_size if your memory allows
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+    use_start_end_token: false
+    trim_silence: false
+    max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset
+    min_duration: 0.1
+    # tarred datasets
+    is_tarred: false
+    tarred_audio_filepaths: null
+    shuffle_n: 2048
+    # bucketing params
+    bucketing_strategy: "synced_randomized"
+    bucketing_batch_size: null
+
+  validation_ds:
+    manifest_filepath: ???
+    sample_rate: ${model.sample_rate}
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+    use_start_end_token: false
+
+  test_ds:
+    manifest_filepath: null
+    sample_rate: ${model.sample_rate}
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+    use_start_end_token: false
+
+  # You may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
+  tokenizer:
+    dir: ???  # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe)
+    type: bpe  # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
+
+  preprocessor:
+    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
+    sample_rate: ${model.sample_rate}
+    normalize: "per_feature"
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    features: 80
+    n_fft: 512
+    frame_splicing: 1
+    dither: 0.00001
+    pad_to: 0
+
+  spec_augment:
+    _target_: nemo.collections.asr.modules.SpectrogramAugmentation
+    freq_masks: 2 # set to zero to disable it
+    time_masks: 10 # set to zero to disable it
+    freq_width: 27
+    time_width: 0.05
+
+  encoder:
+    _target_: nemo.collections.asr.modules.ConformerEncoder
+    feat_in: ${model.preprocessor.features}
+    feat_out: -1 # you may set it if you need different output size other than the default d_model
+    n_layers: 17
+    d_model: 512
+
+    # Sub-sampling params
+    subsampling: striding # vggnet, striding, stacking or stacking_norm, dw_striding
+    subsampling_factor: 4 # must be power of 2 for striding and vggnet
+    subsampling_conv_channels: -1 # set to -1 to make it equal to the d_model
+    causal_downsampling: false
+
+    # Feed forward module's params
+    ff_expansion_factor: 4
+
+    # Multi-headed Attention Module's params
+    self_attention_model: rel_pos # rel_pos or abs_pos
+    n_heads: 8 # may need to be lower for smaller d_models
+    # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
+    att_context_size: [-1, -1] # -1 means unlimited context
+    att_context_style: regular # regular or chunked_limited
+    xscaling: true # scales up the input embeddings by sqrt(d_model)
+    untie_biases: true # unties the biases of the TransformerXL layers
+    pos_emb_max_len: 5000
+
+    # Convolution module's params
+    conv_kernel_size: 31
+    conv_norm_type: 'batch_norm' # batch_norm or layer_norm or groupnormN (N specifies the number of groups)
+    # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size
+    # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0]
+    conv_context_size: null
+
+    ### regularization
+    dropout: 0.1 # The dropout used in most of the Conformer Modules
+    dropout_emb: 0.0 # The dropout used for embeddings
+    dropout_att: 0.1 # The dropout for multi-headed attention modules
+
+  decoder:
+    _target_: nemo.collections.asr.modules.RNNTDecoder
+    normalization_mode: null # Currently only null is supported for export.
+    random_state_sampling: false # Random state sampling: https://arxiv.org/pdf/1910.11455.pdf
+    blank_as_pad: true # This flag must be set in order to support exporting of RNNT models + efficient inference.
+
+    prednet:
+      pred_hidden: ${model.model_defaults.pred_hidden}
+      pred_rnn_layers: 1
+      t_max: null
+      dropout: 0.2
+
+  joint:
+    _target_: nemo.collections.asr.modules.RNNTJoint
+    log_softmax: null  # 'null' would set it automatically according to CPU/GPU device
+    preserve_memory: false  # dramatically slows down training, but might preserve some memory
+
+    # Fuses the computation of prediction net + joint net + loss + WER calculation
+    # to be run on sub-batches of size `fused_batch_size`.
+    # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size.
+    # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss.
+    # Using small values here will preserve a lot of memory during training, but will make training slower as well.
+    # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1.
+    # However, to preserve memory, this ratio can be 1:8 or even 1:16.
+    # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow.
+    fuse_loss_wer: true
+    fused_batch_size: 16
+
+    jointnet:
+      joint_hidden: ${model.model_defaults.joint_hidden}
+      activation: "relu"
+      dropout: 0.2
+    num_extra_outputs: ${model.model_defaults.num_tdt_durations}
+
+  decoding:
+    # Using greedy decoding is highly recommended for TDT models. Using greedy-batch will give very bad results
+    # if omega is 0; even if omega is non-zero, greedy-batch results are still going to be inaccurate.
+    strategy: "greedy"
+
+    model_type: "tdt"
+
+    # this must not be None in order to use the TDT specific decoding method.
+    durations: ${model.model_defaults.tdt_durations}
+
+    # greedy strategy config
+    greedy:
+      max_symbols: 10
+
+    # beam strategy config
+    beam:
+      beam_size: 2
+      return_best_hypothesis: False
+      score_norm: true
+      tsd_max_sym_exp: 50  # for Time Synchronous Decoding
+      alsd_max_target_len: 2.0  # for Alignment-Length Synchronous Decoding
+
+  loss:
+    # This is the main different between a TDT model and a conventional RNNT model -- the loss function.
+    loss_name: "tdt"
+
+    tdt_kwargs:
+      # FastEmit regularization: https://arxiv.org/abs/2010.11148
+      # You may enable FastEmit to reduce the latency of the model for streaming
+      fastemit_lambda: 0.001  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
+      clamp: -1.0  # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only.
+
+      # refer to https://arxiv.org/abs/2304.06795 for the meaning of the following three configs.
+      durations: ${model.model_defaults.tdt_durations}
+      sigma: 0.05 # hyper-param for under-normalization.
+      omega: 0.1 # weight for regular RNN-T loss.
+
+  # Adds Gaussian noise to the gradients of the decoder to avoid overfitting
+  variational_noise:
+    start_step: 0
+    std: 0.0
+
+  optim:
+    name: adamw
+    lr: 5.0
+    # optimizer arguments
+    betas: [0.9, 0.98]
+    weight_decay: 1e-3
+
+    # scheduler setup
+    sched:
+      name: NoamAnnealing
+      d_model: ${model.encoder.d_model}
+      # scheduler config override
+      warmup_steps: 10000
+      warmup_ratio: null
+      min_lr: 1e-6
+
+trainer:
+  devices: -1 # number of GPUs, -1 would use all available GPUs
+  num_nodes: 1
+  max_epochs: 500
+  max_steps: -1 # computed at runtime if not set
+  val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
+  accelerator: auto
+  strategy: ddp
+  accumulate_grad_batches: 1
+  gradient_clip_val: 0.0
+  precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP.
+  log_every_n_steps: 10  # Interval of logging.
+  enable_progress_bar: True
+  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
+  num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
+  check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs
+  sync_batchnorm: true
+  enable_checkpointing: False  # Provided by exp_manager
+  logger: false  # Provided by exp_manager
+  benchmark: false # needs to be false for models with variable-length speech input as it slows down training
+
+
+exp_manager:
+  exp_dir: null
+  name: ${name}
+  create_tensorboard_logger: true
+  create_checkpoint_callback: true
+  checkpoint_callback_params:
+    # in case of multiple validation sets, first one is used
+    monitor: "val_wer"
+    mode: "min"
+    save_top_k: 5
+    always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints
+  resume_if_exists: false
+  resume_ignore_no_checkpoint: false
+
+  create_wandb_logger: false
+  wandb_logger_kwargs:
+    name: null
+    project: null
+
diff --git a/examples/asr/conf/conformer/tdt/conformer_tdt_bpe_stateless.yaml b/examples/asr/conf/conformer/tdt/conformer_tdt_bpe_stateless.yaml
new file mode 100644
index 000000000000..fefbd6f8f56c
--- /dev/null
+++ b/examples/asr/conf/conformer/tdt/conformer_tdt_bpe_stateless.yaml
@@ -0,0 +1,278 @@
+# This file contains the default values for training an TDT Conformer-Transducer ASR model, large size (~120M) with sub-word encoding.
+
+# You can find detailed info about TDT models at https://arxiv.org/abs/2304.06795. 
+
+# Architecture and training config:
+# Default learning parameters in this config are set for effective batch size of 2K. To train it with smaller effective
+# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches.
+# Here are the recommended configs for different variants of Conformer-Transducer, other parameters are the same as in this config file.
+
+# Note: the added duration outputs from the joiner make TDT models slightly larger than corresponding conventional RNN-T models,
+# although the difference is tiny -- the added number of params is roughly num-durations X (joint_hidden + pred_hidden), typically in the
+# order of thousands of params. This is negligible even with the "Small" config with around 14 million params.
+# Recommended duraction config is [0, 1, 2, ... , n] where optimal n is usually between 4 and 8 depending on the dataset.
+
+#  +--------------+---------+---------+----------+------------------+--------------+--------------------------+-----------------+
+#  | Model        | d_model | n_heads | n_layers | conv_kernel_size | weight_decay | pred_hidden/joint_hidden | decoder_context |
+#  +==============+=========+========+===========+==================+==============+==========================+=================+
+#  | Large  (117M)|   512   |    8   |    17     |       31         |     1e-3     |           640            |        2        |
+#  +--------------+---------+--------+-----------+------------------+--------------+--------------------------+-----------------+
+
+# Default learning parameters in this config are set for global batch size of 2K while you may use lower values.
+# To increase the global batch size with limited number of GPUs, you may use higher accumulate_grad_batches.
+# However accumulate_grad_batches is better to be avoided as long as the global batch size is large enough and training is stable.
+
+
+name: "Conformer-TDT-BPE-Stateless"
+
+model:
+  sample_rate: 16000
+  compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag.
+  log_prediction: true # enables logging sample predictions in the output during training
+  skip_nan_grad: false
+
+  model_defaults:
+    enc_hidden: ${model.encoder.d_model}
+    pred_hidden: 640
+    joint_hidden: 640
+
+    # variables for TDT configs.
+    tdt_durations: [0, 1, 2, 3, 4]
+    num_tdt_durations: 5
+    
+
+  train_ds:
+    manifest_filepath: ???
+    sample_rate: ${model.sample_rate}
+    batch_size: 16 # you may increase batch_size if your memory allows
+    shuffle: true
+    num_workers: 8
+    pin_memory: true
+    use_start_end_token: false
+    trim_silence: false
+    max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset
+    min_duration: 0.1
+    # tarred datasets
+    is_tarred: false
+    tarred_audio_filepaths: null
+    shuffle_n: 2048
+    # bucketing params
+    bucketing_strategy: "synced_randomized"
+    bucketing_batch_size: null
+
+  validation_ds:
+    manifest_filepath: ???
+    sample_rate: ${model.sample_rate}
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+    use_start_end_token: false
+
+  test_ds:
+    manifest_filepath: null
+    sample_rate: ${model.sample_rate}
+    batch_size: 16
+    shuffle: false
+    num_workers: 8
+    pin_memory: true
+    use_start_end_token: false
+
+  # You may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
+  tokenizer:
+    dir: ???  # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe)
+    type: bpe  # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
+
+  preprocessor:
+    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
+    sample_rate: ${model.sample_rate}
+    normalize: "per_feature"
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    features: 80
+    n_fft: 512
+    frame_splicing: 1
+    dither: 0.00001
+    pad_to: 0
+
+  spec_augment:
+    _target_: nemo.collections.asr.modules.SpectrogramAugmentation
+    freq_masks: 2 # set to zero to disable it
+    time_masks: 10 # set to zero to disable it
+    freq_width: 27
+    time_width: 0.05
+
+  encoder:
+    _target_: nemo.collections.asr.modules.ConformerEncoder
+    feat_in: ${model.preprocessor.features}
+    feat_out: -1 # you may set it if you need different output size other than the default d_model
+    n_layers: 17
+    d_model: 512
+
+    # Sub-sampling params
+    subsampling: striding # vggnet, striding, stacking or stacking_norm, dw_striding
+    subsampling_factor: 4 # must be power of 2 for striding and vggnet
+    subsampling_conv_channels: -1 # set to -1 to make it equal to the d_model
+    causal_downsampling: false
+
+    # Feed forward module's params
+    ff_expansion_factor: 4
+
+    # Multi-headed Attention Module's params
+    self_attention_model: rel_pos # rel_pos or abs_pos
+    n_heads: 8 # may need to be lower for smaller d_models
+    # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
+    att_context_size: [-1, -1] # -1 means unlimited context
+    att_context_style: regular # regular or chunked_limited
+    xscaling: true # scales up the input embeddings by sqrt(d_model)
+    untie_biases: true # unties the biases of the TransformerXL layers
+    pos_emb_max_len: 5000
+
+    # Convolution module's params
+    conv_kernel_size: 31
+    conv_norm_type: 'batch_norm' # batch_norm or layer_norm or groupnormN (N specifies the number of groups)
+    # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size
+    # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0]
+    conv_context_size: null
+
+    ### regularization
+    dropout: 0.1 # The dropout used in most of the Conformer Modules
+    dropout_emb: 0.0 # The dropout used for embeddings
+    dropout_att: 0.1 # The dropout for multi-headed attention modules
+
+  decoder:
+    _target_: nemo.collections.asr.modules.StatelessTransducerDecoder
+    context_size: 2  # The Stateless decoder uses 2 words as context by default.
+    normalization_mode: layer  # This helps stabilize training for Stateless decoders.
+
+    prednet:
+      pred_hidden: ${model.model_defaults.pred_hidden}
+      pred_rnn_layers: 1
+      t_max: null
+      dropout: 0.2
+
+  joint:
+    _target_: nemo.collections.asr.modules.RNNTJoint
+    log_softmax: null  # 'null' would set it automatically according to CPU/GPU device
+    preserve_memory: false  # dramatically slows down training, but might preserve some memory
+
+    # Fuses the computation of prediction net + joint net + loss + WER calculation
+    # to be run on sub-batches of size `fused_batch_size`.
+    # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size.
+    # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss.
+    # Using small values here will preserve a lot of memory during training, but will make training slower as well.
+    # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1.
+    # However, to preserve memory, this ratio can be 1:8 or even 1:16.
+    # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow.
+    fuse_loss_wer: true
+    fused_batch_size: 16
+
+    jointnet:
+      joint_hidden: ${model.model_defaults.joint_hidden}
+      activation: "relu"
+      dropout: 0.2
+
+    # this variable is non-zero for this TDT model, as well as multi-blank models. It represents the number of
+    # additional outputs from the joiner, besides all tokens in the BPE vocab plus the (standard) blank symbol.
+    num_extra_outputs: ${model.model_defaults.num_tdt_durations}
+
+  decoding:
+    # Using greedy decoding is highly recommended for TDT models. Using greedy-batch will give very bad results
+    # if omega is 0; even if omega is non-zero, greedy-batch results are still going to be inaccurate.
+    strategy: "greedy"
+
+    model_type: "tdt"
+
+    # this must not be None in order to use the TDT specific decoding method.
+    durations: ${model.model_defaults.tdt_durations}
+
+    # greedy strategy config
+    greedy:
+      max_symbols: 10
+
+    # beam strategy config
+    beam:
+      beam_size: 2
+      return_best_hypothesis: False
+      score_norm: true
+      tsd_max_sym_exp: 50  # for Time Synchronous Decoding
+      alsd_max_target_len: 2.0  # for Alignment-Length Synchronous Decoding
+
+  loss:
+    # This is the main different between a TDT model and a conventional RNNT model -- the loss function.
+    loss_name: "tdt"
+
+    tdt_kwargs:
+      # FastEmit regularization: https://arxiv.org/abs/2010.11148
+      # You may enable FastEmit to reduce the latency of the model for streaming
+      fastemit_lambda: 0.001  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
+      clamp: -1.0  # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only.
+
+      # refer to https://arxiv.org/abs/2304.06795 for the meaning of the following three configs.
+      durations: ${model.model_defaults.tdt_durations}
+      sigma: 0.05 # hyper-param for under-normalization.
+      omega: 0.1 # weight for regular RNN-T loss.
+
+  # Adds Gaussian noise to the gradients of the decoder to avoid overfitting
+  variational_noise:
+    start_step: 0
+    std: 0.0
+
+  optim:
+    name: adamw
+    lr: 5.0
+    # optimizer arguments
+    betas: [0.9, 0.98]
+    weight_decay: 1e-3
+
+    # scheduler setup
+    sched:
+      name: NoamAnnealing
+      d_model: ${model.encoder.d_model}
+      # scheduler config override
+      warmup_steps: 10000
+      warmup_ratio: null
+      min_lr: 1e-6
+
+trainer:
+  devices: -1 # number of GPUs, -1 would use all available GPUs
+  num_nodes: 1
+  max_epochs: 500
+  max_steps: -1 # computed at runtime if not set
+  val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
+  accelerator: auto
+  strategy: ddp
+  accumulate_grad_batches: 1
+  gradient_clip_val: 0.0
+  precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP.
+  log_every_n_steps: 10  # Interval of logging.
+  enable_progress_bar: True
+  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
+  num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
+  check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs
+  sync_batchnorm: true
+  enable_checkpointing: False  # Provided by exp_manager
+  logger: false  # Provided by exp_manager
+  benchmark: false # needs to be false for models with variable-length speech input as it slows down training
+
+
+exp_manager:
+  exp_dir: null
+  name: ${name}
+  create_tensorboard_logger: true
+  create_checkpoint_callback: true
+  checkpoint_callback_params:
+    # in case of multiple validation sets, first one is used
+    monitor: "val_wer"
+    mode: "min"
+    save_top_k: 5
+    always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints
+  resume_if_exists: false
+  resume_ignore_no_checkpoint: false
+
+  create_wandb_logger: false
+  wandb_logger_kwargs:
+    name: null
+    project: null
+
diff --git a/nemo/collections/asr/losses/rnnt.py b/nemo/collections/asr/losses/rnnt.py
index 63381f82ee4a..10b85acb42ef 100644
--- a/nemo/collections/asr/losses/rnnt.py
+++ b/nemo/collections/asr/losses/rnnt.py
@@ -35,7 +35,7 @@
 import torch
 from omegaconf import DictConfig, OmegaConf
 
-from nemo.collections.asr.losses.rnnt_pytorch import MultiblankRNNTLossPytorch, RNNTLossPytorch
+from nemo.collections.asr.losses.rnnt_pytorch import MultiblankRNNTLossPytorch, RNNTLossPytorch, TDTLossPytorch
 from nemo.core.classes import Loss, typecheck
 from nemo.core.neural_types import LabelsType, LengthsType, LogprobsType, LossType, NeuralType
 from nemo.core.utils.k2_utils import K2_INSTALLATION_MESSAGE
@@ -50,7 +50,7 @@
     WARP_RNNT_AVAILABLE = False
 
 try:
-    from nemo.collections.asr.parts.numba.rnnt_loss import MultiblankRNNTLossNumba, RNNTLossNumba
+    from nemo.collections.asr.parts.numba.rnnt_loss import MultiblankRNNTLossNumba, RNNTLossNumba, TDTLossNumba
 
     NUMBA_RNNT_AVAILABLE = True
 except (ImportError, ModuleNotFoundError):
@@ -138,6 +138,20 @@ class RNNTLossConfig:
         installation_msg=K2_INSTALLATION_MESSAGE,
         force_float32=False,
     ),
+    "tdt": RNNTLossConfig(
+        loss_name="tdt",
+        lib_name="numba",
+        min_version='0.53.0',
+        is_available=NUMBA_RNNT_AVAILABLE,
+        installation_msg=NUMBA_INSTALLATION_MESSAGE,
+    ),
+    "tdt_pytorch": RNNTLossConfig(
+        loss_name="tdt_pytorch",
+        lib_name="torch",
+        min_version='0.0',
+        is_available=True,
+        installation_msg="Pure Pytorch implementation of TDT loss. Slow and for debugging purposes only.",
+    ),
 }
 
 RNNT_LOSS_RESOLVER['default'] = RNNT_LOSS_RESOLVER['warprnnt_numba']
@@ -274,6 +288,30 @@ def resolve_rnnt_loss(loss_name: str, blank_idx: int, loss_kwargs: dict = None)
             blank=blank_idx, big_blank_durations=big_blank_durations, reduction='none', sigma=sigma
         )
         _warn_unused_additional_kwargs(loss_name, loss_kwargs)
+
+    elif loss_name == 'tdt':
+        fastemit_lambda = loss_kwargs.pop('fastemit_lambda', 0.0)
+        clamp = loss_kwargs.pop('clamp', -1.0)
+        durations = loss_kwargs.pop('durations', None)
+        sigma = loss_kwargs.pop('sigma', 0.0)
+        omega = loss_kwargs.pop('omega', 0.0)
+        loss_func = TDTLossNumba(
+            blank=blank_idx,
+            durations=durations,
+            reduction='none',
+            fastemit_lambda=fastemit_lambda,
+            clamp=clamp,
+            sigma=sigma,
+            omega=omega,
+        )
+        _warn_unused_additional_kwargs(loss_name, loss_kwargs)
+
+    elif loss_name == 'tdt_pytorch':
+        durations = loss_kwargs.pop('durations', None)
+        sigma = loss_kwargs.pop('sigma', 0.0)
+        loss_func = TDTLossPytorch(blank=blank_idx, durations=durations, reduction='none', sigma=sigma)
+        _warn_unused_additional_kwargs(loss_name, loss_kwargs)
+
     elif loss_name == "graph_rnnt":
         loss_kwargs = _clean_kwargs(loss_name, loss_kwargs, GraphRnntLoss.__init__, ignore_params={"blank"})
         loss_func = GraphRnntLoss(blank=blank_idx, **loss_kwargs)
@@ -345,7 +383,13 @@ def __init__(self, num_classes, reduction: str = 'mean_batch', loss_name: str =
 
         Args:
             num_classes: Number of target classes for the joint network to predict.
-                (Excluding the RNN-T blank token).
+                In all cases (conventional RNNT, multi-blank RNNT, and TDT model), this equals the token-id
+                for the standard "blank" symbol. In particular, say V is the number of non-blank tokens in
+                the vocabulary, then in the case of,
+                standard RNNT: num_classes = V
+                multiblank RNNT: num_classes = V + number-big-blanks (since we store big-blanks before 
+                                 standard blank, and the standard blank is the last symbol in the vocab)
+                TDT: num_classes = V. Note, V here does not include any of the "duration outputs".
 
             reduction: Type of reduction to perform on loss. Possible values are 
                 `mean_batch`, 'mean_volume`, `mean`, `sum` or None.
diff --git a/nemo/collections/asr/losses/rnnt_pytorch.py b/nemo/collections/asr/losses/rnnt_pytorch.py
index ab0b5cf4f630..bc6e5a25a3b2 100644
--- a/nemo/collections/asr/losses/rnnt_pytorch.py
+++ b/nemo/collections/asr/losses/rnnt_pytorch.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import List
+
 import torch
 
 from nemo.core.classes import Loss
@@ -112,6 +114,136 @@ def compute_forward_prob(self, acts, labels, act_lens, label_lens):
         return log_prob
 
 
+class TDTLossPytorch(Loss):
+    """
+    Pure Python implementation of TDT loss (https://arxiv.org/pdf/2304.06795.pdf)
+    """
+
+    @property
+    def input_types(self):
+        """Input types definitions for CTCLoss.
+        """
+        return {
+            "acts": NeuralType(('B', 'T', 'T', 'D'), LogprobsType()),
+            "labels": NeuralType(('B', 'T'), LabelsType()),
+            "act_lens": NeuralType(tuple('B'), LengthsType()),
+            "label_lens": NeuralType(tuple('B'), LengthsType()),
+        }
+
+    @property
+    def output_types(self):
+        """Output types definitions for CTCLoss.
+        loss:
+            NeuralType(None)
+        """
+        return {"loss": NeuralType(elements_type=LossType())}
+
+    def __init__(self, blank: int, durations: List[int] = [], reduction: str = 'sum', sigma: float = 0.0):
+        super().__init__()
+        self.blank = blank
+        self.durations = durations
+        self.n_durations = len(durations)
+        self.reduction = reduction
+        self.sigma = sigma
+
+    def forward(self, acts, labels, act_lens, label_lens):
+        label_acts = acts[:, :, :, : -self.n_durations]
+        duration_acts = acts[:, :, :, -self.n_durations :]
+
+        # the - self.sigma here is for logit-undernormalization. Check the paper for details.
+        label_acts = torch.log_softmax(label_acts, -1) - self.sigma
+
+        duration_acts = torch.log_softmax(duration_acts, -1)
+
+        forward_logprob, _ = self.compute_forward_prob(label_acts, duration_acts, labels, act_lens, label_lens)
+        losses = -forward_logprob
+        if self.reduction == 'mean_batch':
+            losses = losses.mean()  # global batch size average
+        elif self.reduction == 'mean':
+            losses = torch.div(losses, label_lens).mean()
+        elif self.reduction == 'sum':
+            losses = losses.sum()
+        elif self.reduction == 'mean_volume':
+            losses = losses.sum() / label_lens.sum()  # same as above but longer samples weigh more
+
+        return losses
+
+    def logsumexp(self, a, b):
+        ret = torch.logsumexp(torch.stack([a, b]), dim=0)
+        return ret
+
+    def compute_forward_prob(self, acts, duration_acts, labels, act_lens, label_lens):
+        """This function implements Equation 7 in the TDT paper https://arxiv.org/pdf/2304.06795.pdf,
+        Simply put, for each alpha(t, u), it sums over the contribution from all incoming blank arcs and non-blank arcs.
+        """
+        B, T, U, _ = acts.shape
+
+        log_alpha = torch.zeros(B, T, U)
+        log_alpha = log_alpha.cuda()
+        for b in range(B):
+            for t in range(T):
+                for u in range(U):
+                    if u == 0:
+                        if t == 0:
+                            # both t and u are 0, this is the base case for alphas.
+                            log_alpha[b, t, u] = 0.0
+                        else:
+                            # u = 0 and t != 0: only considers blank emissions.
+                            log_alpha[b, t, u] = -1000.0
+                            for n, l in enumerate(self.durations):
+                                if (
+                                    t - l >= 0 and l > 0
+                                ):  # checking conditions for blank emission, l has to be at least 1
+                                    tmp = (
+                                        log_alpha[b, t - l, u]
+                                        + acts[b, t - l, u, self.blank]
+                                        + duration_acts[b, t - l, u, n]
+                                    )
+                                    log_alpha[b, t, u] = self.logsumexp(tmp, 1.0 * log_alpha[b, t, u])
+
+                    else:
+                        # u != 0 here, need to consider both blanks and non-blanks.
+                        log_alpha[b, t, u] = -1000.0
+                        for n, l in enumerate(self.durations):
+                            if t - l >= 0:
+                                if l > 0:  # for blank emissions. Need to ensure index is not out-of-bound.
+                                    tmp = (
+                                        log_alpha[b, t - l, u]
+                                        + acts[b, t - l, u, self.blank]
+                                        + duration_acts[b, t - l, u, n]
+                                    )
+                                    log_alpha[b, t, u] = self.logsumexp(tmp, 1.0 * log_alpha[b, t, u])
+
+                                # non-blank emissions.
+                                tmp = (
+                                    log_alpha[b, t - l, u - 1]
+                                    + acts[b, t - l, u - 1, labels[b, u - 1]]
+                                    + duration_acts[b, t - l, u - 1, n]
+                                )
+                                log_alpha[b, t, u] = self.logsumexp(tmp, 1.0 * log_alpha[b, t, u])
+
+        log_probs = []
+        for b in range(B):
+            tt = torch.Tensor([-1000.0]).cuda()[0]
+
+            # need to loop over all possible ways that blank with different durations contributes to the final loss.
+            for n, l in enumerate(self.durations):
+                if act_lens[b] - l >= 0 and l > 0:
+                    bb = (
+                        log_alpha[b, act_lens[b] - l, label_lens[b]]
+                        + acts[b, act_lens[b] - l, label_lens[b], self.blank]
+                        + duration_acts[b, act_lens[b] - l, label_lens[b], n]
+                    )
+
+                    tt = self.logsumexp(bb, 1.0 * tt)
+
+            log_probs.append(tt)
+
+        log_prob = torch.stack(log_probs)
+
+        return log_prob, log_alpha
+
+
 class MultiblankRNNTLossPytorch(Loss):
     """
     Pure Python implementation of multi-blank transducer loss (https://arxiv.org/pdf/2211.03541.pdf)
@@ -136,7 +268,7 @@ def output_types(self):
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self, blank, big_blank_durations, reduction, sigma):
+    def __init__(self, blank, big_blank_durations, reduction: str = "sum", sigma: float = 0.0):
         super().__init__()
         self.blank = blank
         self.big_blank_durations = big_blank_durations
@@ -145,7 +277,7 @@ def __init__(self, blank, big_blank_durations, reduction, sigma):
 
     def forward(self, acts, labels, act_lens, label_lens):
         acts = torch.log_softmax(acts, -1) - self.sigma
-        forward_logprob = self.compute_forward_prob(acts, labels, act_lens, label_lens)
+        forward_logprob, _ = self.compute_forward_prob(acts, labels, act_lens, label_lens)
 
         losses = -forward_logprob
         if self.reduction == 'mean_batch':
@@ -234,4 +366,4 @@ def compute_forward_prob(self, acts, labels, act_lens, label_lens):
             log_probs.append(to_append)
         log_prob = torch.stack(log_probs)
 
-        return log_prob
+        return log_prob, log_alpha
diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py
index 1ccc2d0ac6fc..55f9f4b5ea9f 100644
--- a/nemo/collections/asr/metrics/rnnt_wer.py
+++ b/nemo/collections/asr/metrics/rnnt_wer.py
@@ -204,6 +204,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
         self.blank_id = blank_id
         self.num_extra_outputs = joint.num_extra_outputs
         self.big_blank_durations = self.cfg.get("big_blank_durations", None)
+        self.durations = self.cfg.get("durations", None)
         self.compute_hypothesis_token_set = self.cfg.get("compute_hypothesis_token_set", False)
         self.compute_langs = decoding_cfg.get('compute_langs', False)
         self.preserve_alignments = self.cfg.get('preserve_alignments', None)
@@ -211,9 +212,21 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
         self.compute_timestamps = self.cfg.get('compute_timestamps', None)
         self.word_seperator = self.cfg.get('word_seperator', ' ')
 
-        if self.big_blank_durations is not None:
+        if self.durations is not None:  # this means it's a TDT model.
+            if blank_id == 0:
+                raise ValueError("blank_id must equal len(non_blank_vocabs) for TDT models")
+            if self.big_blank_durations is not None:
+                raise ValueError("duration and big_blank_durations can't both be not None")
+            if self.cfg.strategy not in ['greedy', 'greedy_batch']:
+                raise ValueError("currently only greedy and greedy_batch inference is supported for TDT models")
+
+        if self.big_blank_durations is not None:  # this means it's a multi-blank model.
             if blank_id == 0:
                 raise ValueError("blank_id must equal len(vocabs) for multi-blank RNN-T models")
+            if self.cfg.strategy not in ['greedy', 'greedy_batch']:
+                raise ValueError(
+                    "currently only greedy and greedy_batch inference is supported for multi-blank models"
+                )
 
         possible_strategies = ['greedy', 'greedy_batch', 'beam', 'tsd', 'alsd', 'maes']
         if self.cfg.strategy not in possible_strategies:
@@ -254,17 +267,33 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
 
         if self.cfg.strategy == 'greedy':
             if self.big_blank_durations is None:
-                self.decoding = greedy_decode.GreedyRNNTInfer(
-                    decoder_model=decoder,
-                    joint_model=joint,
-                    blank_index=self.blank_id,
-                    max_symbols_per_step=(
-                        self.cfg.greedy.get('max_symbols', None) or self.cfg.greedy.get('max_symbols_per_step', None)
-                    ),
-                    preserve_alignments=self.preserve_alignments,
-                    preserve_frame_confidence=self.preserve_frame_confidence,
-                    confidence_method_cfg=self.confidence_method_cfg,
-                )
+                if self.durations is None:
+                    self.decoding = greedy_decode.GreedyRNNTInfer(
+                        decoder_model=decoder,
+                        joint_model=joint,
+                        blank_index=self.blank_id,
+                        max_symbols_per_step=(
+                            self.cfg.greedy.get('max_symbols', None)
+                            or self.cfg.greedy.get('max_symbols_per_step', None)
+                        ),
+                        preserve_alignments=self.preserve_alignments,
+                        preserve_frame_confidence=self.preserve_frame_confidence,
+                        confidence_method_cfg=self.confidence_method_cfg,
+                    )
+                else:
+                    self.decoding = greedy_decode.GreedyTDTInfer(
+                        decoder_model=decoder,
+                        joint_model=joint,
+                        blank_index=self.blank_id,
+                        durations=self.durations,
+                        max_symbols_per_step=(
+                            self.cfg.greedy.get('max_symbols', None)
+                            or self.cfg.greedy.get('max_symbols_per_step', None)
+                        ),
+                        preserve_alignments=self.preserve_alignments,
+                        preserve_frame_confidence=self.preserve_frame_confidence,
+                        confidence_method_cfg=self.confidence_method_cfg,
+                    )
             else:
                 self.decoding = greedy_decode.GreedyMultiblankRNNTInfer(
                     decoder_model=decoder,
@@ -281,17 +310,34 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
 
         elif self.cfg.strategy == 'greedy_batch':
             if self.big_blank_durations is None:
-                self.decoding = greedy_decode.GreedyBatchedRNNTInfer(
-                    decoder_model=decoder,
-                    joint_model=joint,
-                    blank_index=self.blank_id,
-                    max_symbols_per_step=(
-                        self.cfg.greedy.get('max_symbols', None) or self.cfg.greedy.get('max_symbols_per_step', None)
-                    ),
-                    preserve_alignments=self.preserve_alignments,
-                    preserve_frame_confidence=self.preserve_frame_confidence,
-                    confidence_method_cfg=self.confidence_method_cfg,
-                )
+                if self.durations is None:
+                    self.decoding = greedy_decode.GreedyBatchedRNNTInfer(
+                        decoder_model=decoder,
+                        joint_model=joint,
+                        blank_index=self.blank_id,
+                        max_symbols_per_step=(
+                            self.cfg.greedy.get('max_symbols', None)
+                            or self.cfg.greedy.get('max_symbols_per_step', None)
+                        ),
+                        preserve_alignments=self.preserve_alignments,
+                        preserve_frame_confidence=self.preserve_frame_confidence,
+                        confidence_method_cfg=self.confidence_method_cfg,
+                    )
+                else:
+                    self.decoding = greedy_decode.GreedyBatchedTDTInfer(
+                        decoder_model=decoder,
+                        joint_model=joint,
+                        blank_index=self.blank_id,
+                        durations=self.durations,
+                        max_symbols_per_step=(
+                            self.cfg.greedy.get('max_symbols', None)
+                            or self.cfg.greedy.get('max_symbols_per_step', None)
+                        ),
+                        preserve_alignments=self.preserve_alignments,
+                        preserve_frame_confidence=self.preserve_frame_confidence,
+                        confidence_method_cfg=self.confidence_method_cfg,
+                    )
+
             else:
                 self.decoding = greedy_decode.GreedyBatchedMultiblankRNNTInfer(
                     decoder_model=decoder,
@@ -481,12 +527,12 @@ def decode_hypothesis(self, hypotheses_list: List[Hypothesis]) -> List[Union[Hyp
 
             # RNN-T sample level is already preprocessed by implicit RNNT decoding
             # Simply remove any blank and possibly big blank tokens
-            if self.blank_id != 0:
-                num_extra_outputs = 0
-                if self.big_blank_durations is not None:
-                    num_extra_outputs += len(self.big_blank_durations)
+            if self.big_blank_durations is not None:  # multi-blank RNNT
+                num_extra_outputs = len(self.big_blank_durations)
                 prediction = [p for p in prediction if p < self.blank_id - num_extra_outputs]
-            else:
+            elif self.durations is not None:  # TDT model.
+                prediction = [p for p in prediction if p < self.blank_id]
+            else:  # standard RNN-T
                 prediction = [p for p in prediction if p != self.blank_id]
 
             # De-tokenize the integer tokens; if not computing timestamps
@@ -1058,9 +1104,12 @@ class RNNTDecoding(AbstractRNNTDecoding):
     def __init__(
         self, decoding_cfg, decoder, joint, vocabulary,
     ):
-        blank_id = (
-            len(vocabulary) + joint.num_extra_outputs
-        )  # we need to ensure blank is the last token in the vocab. This is needed for multi-blank RNN-T models.
+        # we need to ensure blank is the last token in the vocab for the case of RNNT and Multi-blank RNNT.
+        blank_id = len(vocabulary) + joint.num_extra_outputs
+
+        if hasattr(decoding_cfg, 'model_type') and decoding_cfg.model_type == 'tdt':
+            blank_id = len(vocabulary)
+
         self.labels_map = dict([(i, vocabulary[i]) for i in range(len(vocabulary))])
 
         super(RNNTDecoding, self).__init__(
@@ -1239,7 +1288,9 @@ def compute(self):
 
 @dataclass
 class RNNTDecodingConfig:
+    model_type: str = "rnnt"  # one of "rnnt", "multiblank" or "tdt"
     strategy: str = "greedy_batch"
+
     compute_hypothesis_token_set: bool = False
 
     # preserve decoding alignments
diff --git a/nemo/collections/asr/metrics/rnnt_wer_bpe.py b/nemo/collections/asr/metrics/rnnt_wer_bpe.py
index 99c71daebaa9..0870eb180776 100644
--- a/nemo/collections/asr/metrics/rnnt_wer_bpe.py
+++ b/nemo/collections/asr/metrics/rnnt_wer_bpe.py
@@ -196,11 +196,16 @@ class RNNTBPEDecoding(AbstractRNNTDecoding):
     """
 
     def __init__(self, decoding_cfg, decoder, joint, tokenizer: TokenizerSpec):
-        blank_id = tokenizer.tokenizer.vocab_size
+        blank_id = tokenizer.tokenizer.vocab_size  # RNNT or TDT models.
+
+        # multi-blank RNNTs
+        if hasattr(decoding_cfg, 'model_type') and decoding_cfg.model_type == 'multiblank':
+            blank_id = tokenizer.tokenizer.vocab_size + joint.num_extra_outputs
+
         self.tokenizer = tokenizer
 
         super(RNNTBPEDecoding, self).__init__(
-            decoding_cfg=decoding_cfg, decoder=decoder, joint=joint, blank_id=blank_id + joint.num_extra_outputs
+            decoding_cfg=decoding_cfg, decoder=decoder, joint=joint, blank_id=blank_id
         )
 
         if isinstance(self.decoding, rnnt_beam_decoding.BeamRNNTInfer):
diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py
index 84e08635834d..eec663813ca8 100644
--- a/nemo/collections/asr/models/rnnt_models.py
+++ b/nemo/collections/asr/models/rnnt_models.py
@@ -71,8 +71,13 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         # Setup RNNT Loss
         loss_name, loss_kwargs = self.extract_rnnt_loss_cfg(self.cfg.get("loss", None))
 
+        num_classes = self.joint.num_classes_with_blank - 1  # for standard RNNT and multi-blank
+
+        if loss_name == 'tdt':
+            num_classes = num_classes - self.joint.num_extra_outputs
+
         self.loss = RNNTLoss(
-            num_classes=self.joint.num_classes_with_blank - 1,
+            num_classes=num_classes,
             loss_name=loss_name,
             loss_kwargs=loss_kwargs,
             reduction=self.cfg.get("rnnt_reduction", "mean_batch"),
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/__init__.py b/nemo/collections/asr/parts/numba/rnnt_loss/__init__.py
index 66e30c77590a..055d7aeb5fd9 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/__init__.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/__init__.py
@@ -13,4 +13,8 @@
 # limitations under the License.
 
 from nemo.collections.asr.parts.numba.rnnt_loss.rnnt import rnnt_loss_cpu, rnnt_loss_gpu
-from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import MultiblankRNNTLossNumba, RNNTLossNumba
+from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import (
+    MultiblankRNNTLossNumba,
+    RNNTLossNumba,
+    TDTLossNumba,
+)
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py
index 64c8955006ed..118ee88acbfe 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py
@@ -236,6 +236,133 @@ def rnnt_loss_gpu(
     return True
 
 
+def tdt_loss_gpu(
+    label_acts: torch.Tensor,
+    duration_acts: torch.Tensor,
+    labels: torch.Tensor,
+    input_lengths: torch.Tensor,
+    label_lengths: torch.Tensor,
+    costs: torch.Tensor,
+    label_grads: torch.Tensor,
+    duration_grads: torch.Tensor,
+    blank_label: int,
+    durations: list,
+    fastemit_lambda: float,
+    clamp: float,
+    num_threads: int,
+    sigma: float,
+    omega: float,
+):
+    """
+    Wrapper method for accessing GPU TDT loss (https://arxiv.org/abs/2304.06795).
+
+    CUDA implementation ported from [HawkAaron/warp-transducer](https://github.com/HawkAaron/warp-transducer).
+
+    Args:
+        label_acts: Activation tensor of shape [B, T, U, V], where V includes the blank symbol.
+        duration_acts: Activation tensor of shape [B, T, U, D], where D is the number of durations.
+        labels: Ground truth labels of shape [B, U].
+        input_lengths: Lengths of the acoustic sequence as a vector of ints [B].
+        label_lengths: Lengths of the target sequence as a vector of ints [B].
+        costs: Zero vector of length [B] in which costs will be set.
+        label_grads: Zero tensor of shape [B, T, U, V] where the gradient to label_acts will be set.
+        duration_grads: Zero tensor of shape [B, T, U, D] where the gradient to duration_acts will be set.
+        blank_label: Index of the standard blank token in the vocabulary.
+        durations: A list of supported durations for TDT. Must include 0 and 1.
+        fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to
+            FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization.
+        clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp].
+        num_threads: Number of threads for OpenMP.
+        sigma: logit-undernormalization weight used in the multi-blank model. Refer to
+            the multi-blank paper https://arxiv.org/abs/2304.06795 for detailed explanations.
+        omega: weight for regular RNN-T loss
+    """
+    minibatch_size = label_acts.shape[0]
+    maxT = label_acts.shape[1]
+    maxU = label_acts.shape[2]
+    alphabet_size = label_acts.shape[3]
+
+    if hasattr(cuda, 'external_stream'):
+        stream = cuda.external_stream(torch.cuda.current_stream(label_acts.device).cuda_stream)
+    else:
+        stream = cuda.default_stream()
+
+    if num_threads < 0:
+        num_threads = multiprocessing.cpu_count()
+
+    num_threads = max(1, num_threads)  # have to use at least 1 thread
+
+    gpu_size, status = rnnt_helper.get_workspace_size(maxT, maxU, minibatch_size, gpu=True)
+
+    if status != global_constants.RNNTStatus.RNNT_STATUS_SUCCESS:
+        raise RuntimeError("Invalid parameter passed when calculating working space memory")
+
+    # Select GPU index
+    cuda.select_device(label_acts.device.index)
+    gpu_workspace = torch.zeros(gpu_size, device=label_acts.device, dtype=label_acts.dtype, requires_grad=False)
+
+    tdt_workspace = torch.zeros(len(durations), device=label_acts.device, dtype=torch.long, requires_grad=False)
+
+    for i in range(0, len(durations)):
+        tdt_workspace[i] = durations[i]
+
+    ### VIEW TENSORS AS VECTORS FOR POINTER INDEXING ###
+    label_acts, label_acts_shape = rnnt_helper.flatten_tensor(label_acts)
+    duration_acts, duration_acts_shape = rnnt_helper.flatten_tensor(duration_acts)
+
+    wrapper = gpu_rnnt.GPUTDT(
+        minibatch=minibatch_size,
+        maxT=maxT,
+        maxU=maxU,
+        alphabet_size=alphabet_size,
+        workspace=gpu_workspace,
+        tdt_workspace=tdt_workspace,
+        num_durations=len(durations),
+        blank=blank_label,
+        fastemit_lambda=fastemit_lambda,
+        clamp=clamp,
+        num_threads=num_threads,
+        stream=stream,
+        sigma=sigma,
+        omega=omega,
+    )
+
+    if label_grads is None:
+        status = wrapper.score_forward(
+            label_acts=label_acts.data,
+            duration_acts=duration_acts.data,
+            costs=costs.data,
+            pad_labels=labels.data,
+            label_lengths=label_lengths.data,
+            input_lengths=input_lengths.data,
+        )
+
+        if status != global_constants.RNNTStatus.RNNT_STATUS_SUCCESS:
+            raise RuntimeError("Could not calculate forward scores")
+
+    else:
+        ### FLATTEN GRAD TENSOR ###
+        label_grads, label_grads_shape = rnnt_helper.flatten_tensor(label_grads)
+        duration_grads, duration_grads_shape = rnnt_helper.flatten_tensor(duration_grads)
+
+        status = wrapper.cost_and_grad(
+            label_acts=label_acts.data,
+            duration_acts=duration_acts.data,
+            label_grads=label_grads.data,
+            duration_grads=duration_grads.data,
+            costs=costs.data,
+            pad_labels=labels.data,
+            label_lengths=label_lengths.data,
+            input_lengths=input_lengths.data,
+        )
+
+        if status != global_constants.RNNTStatus.RNNT_STATUS_SUCCESS:
+            raise RuntimeError("Could not calculate forward scores")
+
+    del gpu_workspace, tdt_workspace, wrapper
+    return True
+
+
 def multiblank_rnnt_loss_gpu(
     acts: torch.Tensor,
     labels: torch.Tensor,
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py
index 3ed9b82bf996..2ffe08be361e 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py
@@ -34,7 +34,7 @@
 from nemo.collections.asr.parts.numba.rnnt_loss import rnnt
 from nemo.collections.asr.parts.numba.rnnt_loss.utils.cpu_utils import cpu_rnnt
 
-__all__ = ['rnnt_loss', 'RNNTLossNumba', 'MultiblankRNNTLossNumba']
+__all__ = ['rnnt_loss', 'RNNTLossNumba', 'MultiblankRNNTLossNumba', 'TDTLossNumba']
 
 
 class _RNNTNumba(Function):
@@ -91,6 +91,111 @@ def backward(ctx, grad_output):
             return ctx.grads.mul_(grad_output), None, None, None, None, None, None, None
 
 
+class _TDTNumba(Function):
+    """
+    Numba class for Token-and-Duration Transducer (TDT) loss (https://arxiv.org/abs/2304.06795)
+    """
+
+    @staticmethod
+    def forward(
+        ctx,
+        label_acts,
+        duration_acts,
+        labels,
+        act_lens,
+        label_lens,
+        blank,
+        durations,
+        reduction,
+        fastemit_lambda,
+        clamp,
+        sigma,
+        omega,
+    ):
+        """
+        log_probs: Tensor of (batch x seqLength x labelLength x outputDim) containing output from network
+        labels: 2 dimensional Tensor containing all the targets of the batch with zero padded
+        act_lens: Tensor of size (batch) containing size of each output sequence from the network
+        label_lens: Tensor of (batch) containing label length of each example
+        fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to
+            FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization.
+
+        durations: list of durations for TDT model, must include 0 and 1, e.g.
+            [0, 1, 2, 3, 4].
+        sigma: hyper-parameter for logit under-normalization method for training
+            TDT models. Recommended value 0.05.
+        omega: probability for sampling the standard RNN-T loss.
+        Refer to https://arxiv.org/abs/2304.06795 for detailed explanations for
+            the above parameters;
+        """
+        is_cuda = label_acts.is_cuda
+
+        certify_inputs(label_acts, labels, act_lens, label_lens)
+        if clamp < 0:
+            raise ValueError("`clamp` must be 0.0 or positive float value.")
+
+        if is_cuda:
+            loss_func = rnnt.tdt_loss_gpu
+        else:
+            raise ValueError("TDT is not yet implemented for non CUDA computation.")
+
+        label_grads = torch.zeros_like(label_acts) if label_acts.requires_grad else None
+        duration_grads = torch.zeros_like(duration_acts) if duration_acts.requires_grad else None
+        minibatch_size = label_acts.size(0)
+        costs = torch.zeros(minibatch_size, device=label_acts.device, dtype=label_acts.dtype)
+
+        loss_func(
+            label_acts,
+            duration_acts,
+            labels=labels,
+            input_lengths=act_lens,
+            label_lengths=label_lens,
+            costs=costs,
+            label_grads=label_grads,
+            duration_grads=duration_grads,
+            blank_label=blank,
+            durations=durations,
+            fastemit_lambda=fastemit_lambda,
+            clamp=clamp,
+            sigma=sigma,
+            omega=omega,
+            num_threads=0,
+        )
+
+        if reduction in ['sum', 'mean']:
+            costs = costs.sum().unsqueeze_(-1)
+            if reduction == 'mean':
+                costs /= minibatch_size
+
+                if label_grads is not None:
+                    label_grads /= minibatch_size
+                    duration_grads /= minibatch_size
+
+        ctx.label_grads = label_grads
+        ctx.duration_grads = duration_grads
+
+        return costs
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        if grad_output is not None and ctx.label_grads is not None:
+            grad_output = grad_output.view(-1, 1, 1, 1).to(ctx.label_grads)
+            return (
+                ctx.label_grads.mul_(grad_output),
+                ctx.duration_grads.mul_(grad_output),
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+            )
+
+
 class _MultiblankRNNTNumba(Function):
     """
     Numba class for multi-blank transducer loss (https://arxiv.org/pdf/2211.03541.pdf)
@@ -237,6 +342,52 @@ def multiblank_rnnt_loss(
     )
 
 
+def tdt_loss(
+    acts,
+    labels,
+    act_lens,
+    label_lens,
+    blank,
+    durations=[],
+    reduction='mean',
+    fastemit_lambda: float = 0.0,
+    clamp: float = 0.0,
+):
+    """
+    TDT RNN Transducer (https://arxiv.org/abs/2304.06795) Loss (functional form)
+    Args:
+        acts: Tensor of (batch x seqLength x labelLength x outputDim) containing output from network
+        labels: 2 dimensional Tensor containing all the targets of the batch with zero padded
+        act_lens: Tensor of size (batch) containing size of each output sequence from the network
+        label_lens: Tensor of (batch) containing label length of each example
+        blank (int): standard blank label.
+        durations: list of durations for TDT model, e.g.
+            [0,1,2,3,4].
+        sigma: hyper-parameter for logit under-normalization method for training
+            multi-blank transducers. Recommended value 0.05.
+        Refer to https://arxiv.org/abs/2304.06795 for detailed explanations for
+            the last two params.
+        reduction (string, optional): Specifies the reduction to apply to the output:
+            'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
+            'mean': the output losses will be divided by the target lengths and
+            then the mean over the batch is taken. Default: 'mean'
+    """
+    if not acts.is_cuda:
+        # Since CPU requires log_softmax to be computed explicitly, we need to perform grad clipping
+        # *after* we have obtained the gradients of loss(logsoftmax()).
+        # This is highly wasteful since it requires a copy of the entire joint tensor which is expensive.
+        # CUDA version is much more efficient since it performs an inplace logsoftmax, and therefore
+        # can inplace clamp the gradient.
+        if clamp > 0.0:
+            acts = cpu_rnnt.LogSoftmaxGradModification.apply(acts, clamp)
+
+        # NOTE manually done log_softmax for CPU version,
+        # log_softmax is computed within GPU version.
+        acts = torch.nn.functional.log_softmax(acts, -1)
+
+    return _TDTNumba.apply(acts, labels, act_lens, label_lens, blank, durations, reduction, fastemit_lambda, clamp)
+
+
 class RNNTLossNumba(Module):
     """
     Parameters:
@@ -354,6 +505,79 @@ def forward(self, acts, labels, act_lens, label_lens):
         )
 
 
+class TDTLossNumba(Module):
+    """
+    Parameters:
+        blank (int): standard blank label.
+        durations: list of durations for TDT model, e.g.
+            [0, 1, 2, 3, 4].
+        sigma: hyper-parameter for logit under-normalization method for training
+            TDT. Recommended value 0.05.
+        omega: hyper-parameter for RNN-T loss for loss combination.
+        Refer to https://arxiv.org/abs/2304.06795 for detailed explanations for
+            the above parameters;
+
+        reduction (string, optional): Specifies the reduction to apply to the output:
+            'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
+            'mean': the output losses will be divided by the target lengths and
+            then the mean over the batch is taken. Default: 'mean'
+        fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to
+                FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization.
+        clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp].
+    """
+
+    def __init__(
+        self,
+        blank,
+        durations=None,
+        reduction='mean',
+        fastemit_lambda: float = 0.0,
+        clamp: float = -1,
+        sigma: float = 0.0,
+        omega: float = 0.0,
+    ):
+        super(TDTLossNumba, self).__init__()
+        self.blank = blank
+        self.durations = durations if durations is not None else []
+        self.fastemit_lambda = fastemit_lambda
+        self.clamp = float(clamp) if clamp > 0 else 0.0
+        self.reduction = reduction
+        self.loss = _TDTNumba.apply
+        self.sigma = sigma
+        self.omega = omega
+
+    def forward(self, acts, labels, act_lens, label_lens):
+        """
+        log_probs: Tensor of (batch x seqLength x labelLength x outputDim) containing output from network
+        labels: 2 dimensional Tensor containing all the targets of the batch with zero padded
+        act_lens: Tensor of size (batch) containing size of each output sequence from the network
+        label_lens: Tensor of (batch) containing label length of each example
+        """
+
+        # TODO(hainan): in the future, we could further optimize this so that we don't need to
+        # make contiguous copies of the acts tensor.
+        label_acts, duration_acts = torch.split(
+            acts, [acts.shape[-1] - len(self.durations), len(self.durations)], dim=-1
+        )
+        label_acts = label_acts.contiguous()
+        duration_acts = torch.nn.functional.log_softmax(duration_acts, dim=-1).contiguous()
+
+        return self.loss(
+            label_acts,
+            duration_acts,
+            labels,
+            act_lens,
+            label_lens,
+            self.blank,
+            self.durations,
+            self.reduction,
+            self.fastemit_lambda,
+            self.clamp,
+            self.sigma,
+            self.omega,
+        )
+
+
 def check_type(var, t, name):
     if var.dtype is not t:
         raise TypeError("{} must be {}".format(name, t))
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt.py b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt.py
index dca4e732c062..70ffb459cb97 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt.py
@@ -27,6 +27,7 @@
 # limitations under the License.
 
 import multiprocessing
+import random
 from typing import Optional, Tuple
 
 import numba
@@ -499,24 +500,306 @@ def _prepare_workspace(self) -> (int, Tuple[torch.Tensor]):
             An int, representing the offset of the used workspace (practically, the slice of the workspace consumed)
             A tuple of tensors representing the shared workspace.
         """
-        used_offset = 0
+        used_offset, (denom, alphas, betas, llForward, llBackward) = super()._prepare_workspace()
 
-        # // denom
-        denom = self.gpu_workspace[used_offset : used_offset + self.maxT_ * self.maxU_ * self.minibatch_]
-        used_offset += self.maxT_ * self.maxU_ * self.minibatch_
+        bigblank_durations = self.big_blank_workspace[: self.num_big_blanks]
 
-        # // alphas & betas
-        alphas = self.gpu_workspace[used_offset : used_offset + self.maxT_ * self.maxU_ * self.minibatch_]
-        used_offset += self.maxT_ * self.maxU_ * self.minibatch_
-        betas = self.gpu_workspace[used_offset : used_offset + self.maxT_ * self.maxU_ * self.minibatch_]
-        used_offset += self.maxT_ * self.maxU_ * self.minibatch_
+        return used_offset, (denom, alphas, betas, llForward, llBackward, bigblank_durations)
 
-        # // logllh
-        llForward = self.gpu_workspace[used_offset : used_offset + self.minibatch_]
-        used_offset += self.minibatch_
-        llBackward = self.gpu_workspace[used_offset : used_offset + self.minibatch_]
-        used_offset += self.minibatch_
 
-        bigblank_durations = self.big_blank_workspace[: self.num_big_blanks]
+class GPUTDT(GPURNNT):
+    def __init__(
+        self,
+        sigma: float,
+        omega: float,
+        num_durations: int,
+        minibatch: int,
+        maxT: int,
+        maxU: int,
+        alphabet_size: int,
+        workspace,
+        tdt_workspace,
+        blank: int,
+        fastemit_lambda: float,
+        clamp: float,
+        num_threads: int,
+        stream,
+    ):
+        """
+        Helper class to launch the CUDA Kernels to compute TDT Loss (https://arxiv.org/pdf/2211.03541).
 
-        return used_offset, (denom, alphas, betas, llForward, llBackward, bigblank_durations)
+        Args:
+            sigma: Hyper-parameter related to the logit-normalization method in training tdt transducers.
+            omega: Hyper-parameter related to the sampled training.
+            num_durations: Number of durations the model supports.
+            minibatch: Int representing the batch size.
+            maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor.
+            maxU: The maximum possible target sequence length. Represents U in the logprobs tensor.
+            alphabet_size: The vocabulary dimension V + 1 + num-big-blanks
+            workspace: An allocated chunk of memory that will be sliced off and reshaped into required
+                blocks used as working memory.
+            tdt_workspace: An allocated chunk of memory that will be sliced off and reshaped into required
+                blocks used as working memory specifically for the tdt related computations.
+            blank: Index of the blank token in the vocabulary. Must be the last token in the vocab.
+            fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to
+                FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization.
+            clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp].
+            num_threads: Number of OMP threads to launch.
+            stream: Numba Cuda Stream.
+        """
+        super().__init__(
+            minibatch, maxT, maxU, alphabet_size, workspace, blank, fastemit_lambda, clamp, num_threads, stream
+        )
+        self.tdt_workspace = cuda.as_cuda_array(
+            tdt_workspace
+        )  # a flat vector of integer numbers that represents allocated memory slices
+
+        self.num_durations = num_durations
+        self.sigma = sigma
+        self.omega = omega
+
+    def compute_cost_and_score(
+        self,
+        label_acts: torch.Tensor,
+        duration_acts: torch.Tensor,
+        label_grads: Optional[torch.Tensor],
+        duration_grads: Optional[torch.Tensor],
+        costs: torch.Tensor,
+        labels: torch.Tensor,
+        label_lengths: torch.Tensor,
+        input_lengths: torch.Tensor,
+    ) -> global_constants.RNNTStatus:
+        """
+        Compute both the loss and the gradients.
+
+        Args:
+            label_acts: A flattened tensor of shape [B, T, U, V] representing the activation matrix for tokens.
+            duration_acts: A flattened tensor of shape [B, T, U, D] representing the activation matrix for durations.
+            label_grad: A flattented zero tensor of same shape as label_acts.
+            duration_grad: A flattented zero tensor of same shape as duration_acts.
+            costs: A zero vector of length B which will be updated inplace with the log probability costs.
+            flat_labels: A flattened matrix of labels of shape [B, U]
+            label_lengths: A vector of length B that contains the original lengths of the acoustic sequence.
+            input_lengths: A vector of length B that contains the original lengths of the target sequence.
+
+        Updates:
+            This will launch kernels that will update inline the following variables:
+            -   *_grads: Gradients of the activation matrix wrt the costs vector.
+            -   costs: Negative log likelihood of the forward variable.
+
+        Returns:
+            An enum that either represents a successful RNNT operation or failure.
+        """
+        training = label_grads is not None
+
+        if training:
+            label_grads *= 0.0  # zero grads
+            duration_grads *= 0.0  # zero grads
+
+        _, (denom, alphas, betas, llForward, llBackward, durations) = self._prepare_workspace()
+
+        ######## START EXECUTION ########
+        self.log_softmax(label_acts, denom)
+
+        r = random.uniform(0, 1)
+        if r < self.omega:
+            # Compute alphas
+            gpu_rnnt_kernel.compute_alphas_kernel[self.minibatch_, self.maxU_, self.stream_, 0](
+                label_acts,
+                denom,
+                alphas,
+                llForward,
+                input_lengths,
+                label_lengths,
+                labels,
+                self.minibatch_,
+                self.maxT_,
+                self.maxU_,
+                self.alphabet_size_,
+                self.blank_,
+            )
+        else:
+            # Compute alphas
+            gpu_rnnt_kernel.compute_tdt_alphas_kernel[self.minibatch_, self.maxU_, self.stream_, 0](
+                label_acts,
+                duration_acts,
+                denom,
+                self.sigma,
+                alphas,
+                llForward,
+                input_lengths,
+                label_lengths,
+                labels,
+                self.minibatch_,
+                self.maxT_,
+                self.maxU_,
+                self.alphabet_size_,
+                self.blank_,
+                durations,
+                self.num_durations,
+            )
+
+        if training:
+            # Compute betas
+            if r < self.omega:
+                gpu_rnnt_kernel.compute_betas_kernel[self.minibatch_, self.maxU_, self.stream_, 0](
+                    label_acts,
+                    denom,
+                    betas,
+                    llBackward,
+                    input_lengths,
+                    label_lengths,
+                    labels,
+                    self.minibatch_,
+                    self.maxT_,
+                    self.maxU_,
+                    self.alphabet_size_,
+                    self.blank_,
+                )
+
+                # Compute gradient
+                grad_blocks_per_grid = self.minibatch_ * self.maxT_ * self.maxU_
+                grad_threads_per_block = gpu_rnnt_kernel.GPU_RNNT_THREAD_SIZE
+                gpu_rnnt_kernel.compute_grad_kernel[grad_blocks_per_grid, grad_threads_per_block, self.stream_, 0](
+                    label_grads,
+                    label_acts,
+                    denom,
+                    alphas,
+                    betas,
+                    llForward,
+                    input_lengths,
+                    label_lengths,
+                    labels,
+                    self.minibatch_,
+                    self.maxT_,
+                    self.maxU_,
+                    self.alphabet_size_,
+                    self.blank_,
+                    self.fastemit_lambda_,
+                    self.clamp_,
+                )
+            else:
+                gpu_rnnt_kernel.compute_tdt_betas_kernel[self.minibatch_, self.maxU_, self.stream_, 0](
+                    label_acts,
+                    duration_acts,
+                    denom,
+                    self.sigma,
+                    betas,
+                    llBackward,
+                    input_lengths,
+                    label_lengths,
+                    labels,
+                    self.minibatch_,
+                    self.maxT_,
+                    self.maxU_,
+                    self.alphabet_size_,
+                    self.blank_,
+                    durations,
+                    self.num_durations,
+                )
+
+                # Compute gradient
+                grad_blocks_per_grid = self.minibatch_ * self.maxT_ * self.maxU_
+                grad_threads_per_block = gpu_rnnt_kernel.GPU_RNNT_THREAD_SIZE
+                gpu_rnnt_kernel.compute_tdt_grad_kernel[grad_blocks_per_grid, grad_threads_per_block, self.stream_, 0](
+                    label_grads,
+                    duration_grads,
+                    label_acts,
+                    duration_acts,
+                    denom,
+                    self.sigma,
+                    alphas,
+                    betas,
+                    llForward,
+                    input_lengths,
+                    label_lengths,
+                    labels,
+                    self.minibatch_,
+                    self.maxT_,
+                    self.maxU_,
+                    self.alphabet_size_,
+                    self.blank_,
+                    durations,
+                    self.num_durations,
+                    self.fastemit_lambda_,
+                    self.clamp_,
+                )
+
+        # // cost copy, negate (for log likelihood) and update with additional regularizers
+        # This needs to be done via CUDA, because we used temporary memory llForward
+        # passed to alpha, which was updated with log likelihoods.
+        # But copying this data into a pytorch pointer is more difficult (numba api is one way)
+        # Therefore launch a pointwise CUDA kernel to update the costs inplace from data of llForward
+        # Then negate to compute the loglikelihood.
+        threadsperblock = min(costs.shape[0], 32)
+        blockspergrid = (costs.shape[0] + (threadsperblock - 1)) // threadsperblock
+        rnnt_helper.compute_costs_data[blockspergrid, threadsperblock, self.stream_, 0](
+            llForward, costs, self.fastemit_lambda_
+        )
+        self.stream_.synchronize()
+
+        return global_constants.RNNTStatus.RNNT_STATUS_SUCCESS
+
+    def cost_and_grad(
+        self,
+        label_acts: torch.Tensor,
+        duration_acts: torch.Tensor,
+        label_grads: torch.Tensor,
+        duration_grads: torch.Tensor,
+        costs: torch.Tensor,
+        pad_labels: torch.Tensor,
+        label_lengths: torch.Tensor,
+        input_lengths: torch.Tensor,
+    ):
+        if (
+            duration_acts is None
+            or label_acts is None
+            or label_grads is None
+            or duration_grads is None
+            or costs is None
+            or pad_labels is None
+            or label_lengths is None
+            or input_lengths is None
+        ):
+            return global_constants.RNNTStatus.RNNT_STATUS_INVALID_VALUE
+
+        return self.compute_cost_and_score(
+            label_acts, duration_acts, label_grads, duration_grads, costs, pad_labels, label_lengths, input_lengths
+        )
+
+    def score_forward(
+        self,
+        label_acts: torch.Tensor,
+        duration_acts: torch.Tensor,
+        costs: torch.Tensor,
+        pad_labels: torch.Tensor,
+        label_lengths: torch.Tensor,
+        input_lengths: torch.Tensor,
+    ):
+        if (
+            label_acts is None
+            or duration_acts is None
+            or costs is None
+            or pad_labels is None
+            or label_lengths is None
+            or input_lengths is None
+        ):
+            return global_constants.RNNTStatus.RNNT_STATUS_INVALID_VALUE
+
+        return self.compute_cost_and_score(
+            label_acts, duration_acts, None, None, costs, pad_labels, label_lengths, input_lengths
+        )
+
+    def _prepare_workspace(self) -> (int, Tuple[torch.Tensor]):
+        """
+        Helper method that uses the workspace and constructs slices of it that can be used.
+
+        Returns:
+            An int, representing the offset of the used workspace (practically, the slice of the workspace consumed)
+            A tuple of tensors representing the shared workspace.
+        """
+        used_offset, (denom, alphas, betas, llForward, llBackward) = super()._prepare_workspace()
+
+        durations = self.tdt_workspace[: self.num_durations]
+
+        return used_offset, (denom, alphas, betas, llForward, llBackward, durations)
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt_kernel.py b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt_kernel.py
index dbeb1544e7e3..4153af060941 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt_kernel.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cuda_utils/gpu_rnnt_kernel.py
@@ -35,6 +35,8 @@
 
 GPU_RNNT_THREAD_SIZE = 256
 
+INF = 10000.0
+
 
 @cuda.jit(device=True, inline=True)
 def logp(
@@ -62,6 +64,12 @@ def logp(
     return denom[col] + acts[col * alphabet_size + v]
 
 
+@cuda.jit(device=True, inline=True)
+def logp_duration(acts: torch.Tensor, maxT: int, maxU: int, num_durations: int, mb: int, t: int, u: int, v: int):
+    col = (mb * maxT + t) * maxU + u
+    return acts[col * num_durations + v]
+
+
 @cuda.jit()
 def compute_alphas_kernel(
     acts: torch.Tensor,
@@ -875,3 +883,526 @@ def compute_multiblank_grad_kernel(
             # update internal index through the thread_buffer;
             # until idx < V + 1, such that entire vocabulary has been updated.
             idx += GPU_RNNT_THREAD_SIZE
+
+
+@cuda.jit()
+def compute_tdt_alphas_kernel(
+    acts: torch.Tensor,
+    duration_acts: torch.Tensor,
+    denom: torch.Tensor,
+    sigma: float,
+    alphas: torch.Tensor,
+    llForward: torch.Tensor,
+    xlen: torch.Tensor,
+    ylen: torch.Tensor,
+    mlabels: torch.Tensor,  # [B]
+    minibatch: int,
+    maxT: int,
+    maxU: int,
+    alphabet_size: int,
+    blank_: int,
+    durations: torch.Tensor,
+    num_durations: int,
+):
+    """
+    Compute alpha (forward variable) probabilities over the transduction step.
+
+    Args:
+        acts: Tensor of shape [B, T, U, V] flattened. Represents the logprobs activation tensor for tokens.
+        duration_acts: Tensor of shape [B, T, U, D] flattened. Represents the logprobs activation tensor for duration.
+        denom: Tensor of shape [B, T, U] flattened. Represents the denominator of the logprobs activation tensor for tokens.
+
+        alphas: Zero tensor of shape [B, T, U]. Will be updated inside the kernel with the forward variable
+            probabilities.
+        llForward: Zero tensor of shape [B]. Represents the log-likelihood of the forward pass.
+            Returned as the forward pass loss that is reduced by the optimizer.
+        xlen: Vector of length B which contains the actual acoustic sequence lengths in the padded
+            activation tensor.
+        ylen: Vector of length B which contains the actual target sequence lengths in the padded
+            activation tensor.
+        mlabels: Matrix of shape [B, U+1] (+1 here is due to <SOS> token - usually the RNNT blank).
+            The matrix contains the padded target transcription that must be predicted.
+        minibatch: Int representing the batch size.
+        maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor.
+        maxU: The maximum possible target sequence length. Represents U in the logprobs tensor.
+        alphabet_size: The vocabulary dimension V+1 (inclusive of RNNT blank).
+        blank_: Index of the TDT blank token in the vocabulary. Must be the last token in the vocab.
+
+    Updates:
+        Kernel inplace updates the following inputs:
+        -   alphas: forward variable scores.
+        -   llForward: log-likelihood of forward variable.
+    """
+    # // launch B blocks, each block has U threads
+    b = cuda.blockIdx.x  # // batch id
+    u = cuda.threadIdx.x  # label id, u
+    T = xlen[b]  # select AM length of current sample
+    U = ylen[b] + 1  # select target length of current sample, +1 for the blank token
+
+    labels: torch.Tensor = mlabels[b]  # mb label start point, equivalent to mlabels + b * (maxU - 1)
+    offset = b * maxT * maxU  # pointer indexing offset
+
+    # alphas += offset # pointer offset, ignored since we explicitly add offset
+
+    # Initilize alpha[b, t=0, u=0] for all b in B
+    if u == 0:
+        alphas[offset] = 0
+
+    # sync until all alphas are initialized
+    cuda.syncthreads()
+
+    # Ordinary alpha calculations, broadcast across B=b and U=u
+    # Look up forward variable calculation from rnnt_numpy.forward_pass()
+    for n in range(1, T + U - 1):
+        t = n - u
+
+        if u == 0:
+            # when u == 0, we only consider blank emissions.
+            if t > 0 and t < T:
+                alphas[offset + t * maxU + u] = -INF
+
+                for i in range(1, num_durations):  # skip 0 since blank emission has to advance by at least one
+                    if t >= durations[i]:
+                        alphas[offset + t * maxU + u] = rnnt_helper.log_sum_exp(
+                            alphas[offset + t * maxU + u],  # the current alpha value
+                            alphas[offset + (t - durations[i]) * maxU + u]  # alpha(t - duration, u)
+                            + logp(
+                                denom, acts, maxT, maxU, alphabet_size, b, t - durations[i], u, blank_
+                            )  # logp of blank emission
+                            - sigma  #  logit under-normalization
+                            + logp_duration(
+                                duration_acts, maxT, maxU, num_durations, b, t - durations[i], u, i
+                            ),  # logp of duration
+                        )
+                    else:
+                        break  # since durations are in ascending order, when we encounter a duration that is too large, then
+                        # there is no need to check larger durations after that.
+
+        elif u < U:
+            # when t == 0, we only consider the non-blank emission.
+            if t == 0:
+                alphas[offset + u] = (
+                    alphas[offset + u - 1]  # alpha(t, u - 1)
+                    + logp(
+                        denom, acts, maxT, maxU, alphabet_size, b, t, u - 1, labels[u - 1]
+                    )  # logp of token emission
+                    - sigma  # logit under-normalization
+                    + logp_duration(
+                        duration_acts, maxT, maxU, num_durations, b, t, u - 1, 0
+                    )  # t = 0, so it must be duration = 0. Therefore the last argument passed to logp_duration() is 0.
+                )
+
+            # now we have t != 0 and u != 0, and we need to consider both non-blank and blank emissions.
+            elif t > 0 and t < T:
+                no_emit = -INF  # no_emit stores the score for all blank emissions.
+                for i in range(1, num_durations):
+                    if t >= durations[i]:
+                        no_emit = rnnt_helper.log_sum_exp(
+                            no_emit,  # current score
+                            alphas[offset + (t - durations[i]) * maxU + u]  # alpha(t - duration, u)
+                            + logp(
+                                denom, acts, maxT, maxU, alphabet_size, b, t - durations[i], u, blank_
+                            )  # logp of blank emission
+                            - sigma  #  logit under-normalization
+                            + logp_duration(
+                                duration_acts, maxT, maxU, num_durations, b, t - durations[i], u, i
+                            ),  # logp of duration
+                        )
+                    else:
+                        break  # we can exit the loop early here, same as the case for u == 0 above.
+
+                emit = -INF  # emit stores the score for non-blank emissions.
+                for i in range(0, num_durations):
+                    if t >= durations[i]:
+                        emit = rnnt_helper.log_sum_exp(
+                            emit,  # current score
+                            alphas[offset + (t - durations[i]) * maxU + u - 1]  # alpha(t - duration, u - 1)
+                            + logp(
+                                denom, acts, maxT, maxU, alphabet_size, b, t - durations[i], u - 1, labels[u - 1]
+                            )  # logp of non-blank emission
+                            - sigma  #  logit under-normalization
+                            + logp_duration(
+                                duration_acts, maxT, maxU, num_durations, b, t - durations[i], u - 1, i
+                            ),  # logp of duration
+                        )
+                    else:
+                        break  # we can exit the loop early here, same as the case for u == 0 above.
+
+                # combining blank and non-blank emissions.
+                alphas[offset + t * maxU + u] = rnnt_helper.log_sum_exp(emit, no_emit)
+
+        # sync across all B=b and U=u
+        cuda.syncthreads()
+
+    # After final sync, the forward log-likelihood can be computed as the summataion of
+    # alpha(T - duration, U - 1) + logp(blank, duration | t - duration, U - 1), over different durations.
+    if u == 0:
+        # first we consider duration = 1
+        loglike = (
+            alphas[offset + (T - 1) * maxU + U - 1]
+            + logp(denom, acts, maxT, maxU, alphabet_size, b, T - 1, U - 1, blank_)
+            - sigma
+            + logp_duration(duration_acts, maxT, maxU, num_durations, b, T - 1, U - 1, 1)
+        )
+
+        # then we add the scores for duration > 1, if such durations are possible given the audio lengths.
+        for i in range(2, num_durations):
+            if T >= durations[i]:
+                big_blank_loglike = (
+                    alphas[offset + (T - durations[i]) * maxU + U - 1]
+                    + logp(denom, acts, maxT, maxU, alphabet_size, b, T - durations[i], U - 1, blank_)
+                    - sigma
+                    + logp_duration(duration_acts, maxT, maxU, num_durations, b, T - durations[i], U - 1, i)
+                )
+                loglike = rnnt_helper.log_sum_exp(loglike, big_blank_loglike)
+            else:
+                break
+
+        llForward[b] = loglike
+
+
+@cuda.jit()
+def compute_tdt_betas_kernel(
+    acts: torch.Tensor,
+    duration_acts: torch.Tensor,
+    denom: torch.Tensor,
+    sigma: float,
+    betas: torch.Tensor,
+    llBackward: torch.Tensor,
+    xlen: torch.Tensor,
+    ylen: torch.Tensor,
+    mlabels: torch.Tensor,  # [B, U]
+    minibatch: int,
+    maxT: int,
+    maxU: int,
+    alphabet_size: int,
+    blank_: int,
+    durations: torch.Tensor,
+    num_durations: int,
+):
+    """
+    Compute beta (backward variable) probabilities over the transduction step.
+
+    Args:
+        acts: Tensor of shape [B, T, U, V] flattened. Represents the logprobs activation tensor for tokens.
+        duration_acts: Tensor of shape [B, T, U, D] flattened. Represents the logprobs activation tensor for duations.
+        denom: Tensor of shape [B, T, U] flattened. Represents the denominator of the logprobs activation tensor
+            across entire vocabulary.
+        betas: Zero tensor of shape [B, T, U]. Will be updated inside the kernel with the backward variable
+            probabilities.
+        llBackward: Zero tensor of shape [B]. Represents the log-likelihood of the backward pass.
+            Returned as the backward pass loss that is reduced by the optimizer.
+        xlen: Vector of length B which contains the actual acoustic sequence lengths in the padded
+            activation tensor.
+        ylen: Vector of length B which contains the actual target sequence lengths in the padded
+            activation tensor.
+        mlabels: Matrix of shape [B, U+1] (+1 here is due to <SOS> token - usually the RNNT blank).
+            The matrix contains the padded target transcription that must be predicted.
+        minibatch: Int representing the batch size.
+        maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor.
+        maxU: The maximum possible target sequence length. Represents U in the logprobs tensor.
+        alphabet_size: The vocabulary dimension V+1 (inclusive of RNNT blank).
+        blank_: Index of the RNNT blank token in the vocabulary. Generally the first or last token in the vocab.
+
+    Updates:
+        Kernel inplace updates the following inputs:
+        -   betas: backward variable scores.
+        -   llBackward: log-likelihood of backward variable.
+    """
+    # // launch B blocks, each block has U threads
+    b = cuda.blockIdx.x  # // batch id
+    u = cuda.threadIdx.x  # label id, u
+    T = xlen[b]  # select AM length of current sample
+    U = ylen[b] + 1  # select target length of current sample, +1 for the blank token
+
+    labels: torch.Tensor = mlabels[b]  # mb label start point, equivalent to mlabels + b * (maxU - 1)
+    offset = b * maxT * maxU  # pointer indexing offset
+
+    # betas += offset # pointer offset, ignored since we explicitly add offset
+
+    # Initilize beta[b, t=T-1, u=U-1] for all b in B with log_probs[b, t=T-1, u=U-1, blank]
+    if u == 0:
+        betas[offset + (T - 1) * maxU + U - 1] = (
+            logp(denom, acts, maxT, maxU, alphabet_size, b, T - 1, U - 1, blank_)
+            - sigma
+            + logp_duration(duration_acts, maxT, maxU, num_durations, b, T - 1, U - 1, 1)
+        )
+
+    # sync until all betas are initialized
+    cuda.syncthreads()
+
+    # Ordinary beta calculations, broadcast across B=b and U=u
+    # Look up backward variable calculation from rnnt_numpy.backward_pass()
+    for n in range(T + U - 2, -1, -1):
+        t = n - u
+
+        if u == U - 1:
+            # u == U - 1, we only consider blank emissions.
+            if t >= 0 and t + 1 < T:
+                betas[offset + t * maxU + U - 1] = -INF
+                for i in range(1, num_durations):
+                    # although similar, the computation for beta's is slightly more complex for boundary cases.
+                    # the following two cases correspond to whether t is exactly certain duration away from T.
+                    # and they have slightly different update rules.
+
+                    if t + durations[i] < T:
+                        betas[offset + t * maxU + U - 1] = rnnt_helper.log_sum_exp(
+                            betas[offset + t * maxU + U - 1],
+                            betas[
+                                offset + (t + durations[i]) * maxU + U - 1
+                            ]  # beta[t, U - 1] depends on the value beta[t + duration, U - 1] here.
+                            + logp(denom, acts, maxT, maxU, alphabet_size, b, t, U - 1, blank_)  # log prob of blank
+                            + logp_duration(
+                                duration_acts, maxT, maxU, num_durations, b, t, U - 1, i
+                            )  # log prob of duration (durations[i])
+                            - sigma,  # for logit undernormalization
+                        )
+                    elif t + durations[i] == T:
+                        betas[offset + t * maxU + U - 1] = rnnt_helper.log_sum_exp(
+                            betas[offset + t * maxU + U - 1],
+                            # here we have one fewer term than the "if" block above. This could be seen as having "0" here since
+                            # beta[t + duration, U - 1] isn't defined because t + duration is out of bound.
+                            logp(denom, acts, maxT, maxU, alphabet_size, b, t, U - 1, blank_)  # log prob of blank
+                            + logp_duration(
+                                duration_acts, maxT, maxU, num_durations, b, t, U - 1, i
+                            )  # log prob of duration (durations[i])
+                            - sigma,  # for logit undernormalization. Basically every time sigma shows up is because of logit undernormalization.
+                        )
+
+        elif u < U - 1:
+            if t == T - 1:
+                # t == T - 1, so we only consider non-blank with duration 0. (Note, we can't have blank emissions with duration = 0)
+                betas[offset + (T - 1) * maxU + u] = (
+                    betas[offset + (T - 1) * maxU + u + 1]
+                    + logp(denom, acts, maxT, maxU, alphabet_size, b, T - 1, u, labels[u])  # non-blank log prob
+                    + logp_duration(duration_acts, maxT, maxU, num_durations, b, T - 1, u, 0)  # log prob of duration 0
+                    - sigma
+                )
+
+            elif t >= 0 and t < T - 1:
+                # now we need to consider both blank andnon-blanks. Similar to alphas, we first compute them separately with no_emit and emit.
+                no_emit = -INF
+                for i in range(1, num_durations):
+                    if t + durations[i] < T:
+                        no_emit = rnnt_helper.log_sum_exp(
+                            no_emit,
+                            betas[offset + (t + durations[i]) * maxU + u]
+                            + logp(denom, acts, maxT, maxU, alphabet_size, b, t, u, blank_)
+                            + logp_duration(duration_acts, maxT, maxU, num_durations, b, t, u, i)
+                            - sigma,
+                        )
+
+                emit = -INF
+                for i in range(0, num_durations):
+                    if t + durations[i] < T:
+                        emit = rnnt_helper.log_sum_exp(
+                            emit,
+                            betas[offset + (t + durations[i]) * maxU + u + 1]
+                            + logp(denom, acts, maxT, maxU, alphabet_size, b, t, u, labels[u])
+                            + logp_duration(duration_acts, maxT, maxU, num_durations, b, t, u, i)
+                            - sigma,
+                        )
+
+                # combining all blank emissions and all non-blank emissions.
+                betas[offset + t * maxU + u] = rnnt_helper.log_sum_exp(emit, no_emit)
+
+        # sync across all B=b and U=u
+        cuda.syncthreads()
+
+    # After final sync, betas[b, 0, 0] gives log-likelihood of backward pass, same with conventional Transducers.
+    if u == 0:
+        llBackward[b] = betas[offset]
+
+
+@cuda.jit()
+def compute_tdt_grad_kernel(
+    label_grads: torch.Tensor,
+    duration_grads: torch.Tensor,
+    acts: torch.Tensor,
+    duration_acts: torch.Tensor,
+    denom: torch.Tensor,
+    sigma: float,
+    alphas: torch.Tensor,
+    betas: torch.Tensor,
+    logll: torch.Tensor,
+    xlen: torch.Tensor,
+    ylen: torch.Tensor,
+    mlabels: torch.Tensor,  # [B, U]
+    minibatch: int,
+    maxT: int,
+    maxU: int,
+    alphabet_size: int,
+    blank_: int,
+    durations: torch.Tensor,
+    num_durations: int,
+    fastemit_lambda: float,
+    clamp: float,
+):
+    """
+    Compute gradients over the transduction step.
+
+    Args:
+        grads: Zero Tensor of shape [B, T, U, V] to store gradients for tokens.
+        duration_grads: Zero Tensor of shape [B, T, U, D] to store gradients for durations.
+
+        acts: Tensor of shape [B, T, U, V] flattened. Represents the logprobs activation tensor for tokens.
+        duration_acts: Tensor of shape [B, T, U, D] flattened. Represents the logprobs activation tensor for durations.
+        denom: Tensor of shape [B, T, U] flattened. Represents the denominator of the logprobs activation tensor
+            across entire vocabulary.
+        alphas: Alpha variable, contains forward probabilities. A tensor of shape [B, T, U].
+        betas: Beta varoable, contains backward probabilities. A tensor of shape [B, T, U].
+        logll: Log-likelihood of the forward variable, represented as a vector of shape [B].
+            Represents the log-likelihood of the forward pass.
+        xlen: Vector of length B which contains the actual acoustic sequence lengths in the padded
+            activation tensor.
+        ylen: Vector of length B which contains the actual target sequence lengths in the padded
+            activation tensor.
+        mlabels: Matrix of shape [B, U+1] (+1 here is due to <SOS> token - usually the RNNT blank).
+            The matrix contains the padded target transcription that must be predicted.
+        minibatch: Int representing the batch size.
+        maxT: The maximum possible acoustic sequence length. Represents T in the logprobs tensor.
+        maxU: The maximum possible target sequence length. Represents U in the logprobs tensor.
+        alphabet_size: The vocabulary dimension V+1 (inclusive of RNNT blank).
+        blank_: Index of the RNNT blank token in the vocabulary. Generally the first or last token in the vocab.
+        fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to
+            FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization.
+        clamp: Float value. When set to value >= 0.0, will clamp the gradient to [-clamp, clamp].
+
+    Updates:
+        Kernel inplace updates the following inputs:
+        -   grads: Gradients with respect to the log likelihood (logll).
+    """
+    # Kernel call:
+    # blocks_per_grid = minibatch (b) * maxT (t) * maxU (u)
+    # threads_per_block = constant buffer size of parallel threads (v :: Constant)
+    tid = cuda.threadIdx.x  # represents v, taking steps of some constant size
+    idx = tid  # index of v < V+1; in steps of constant buffer size
+    col = cuda.blockIdx.x  # represents a fused index of b * t * u
+
+    # Decompose original indices from fused `col`
+    u = col % maxU  # (b * t * u) % u = u
+    bt = (col - u) // maxU  # (b * t * u - u) // U = b * t
+    t = bt % maxT  # (b * t) % t = t
+    mb = (bt - t) // maxT  # (b * t - t) // T = b
+
+    # constants
+    T = xlen[mb]  # select AM length of current sample
+    U = ylen[mb] + 1  # select target length of current sample, +1 for the blank token
+    labels: torch.Tensor = mlabels[mb]  # labels = mlabels + mb * (maxU - 1);
+
+    # Buffered gradient calculations, broadcast across B=b, T=t and U=u, looped over V with some constant stride.
+    # Look up gradient calculation from rnnt_numpy.compute_gradient()
+
+    if t < T and u < U:
+        logpk_blank = (
+            denom[col] + acts[col * alphabet_size + blank_] - sigma
+        )  # whenever sigma is used, it is for logit under-normalization.
+
+        if idx < num_durations:
+            grad = 0.0
+            if t + durations[idx] < T and u < U - 1:  # for label
+                logpk_label = denom[col] + acts[col * alphabet_size + labels[u]] - sigma
+                grad -= math.exp(alphas[col] + betas[col + 1 + durations[idx] * maxU] + logpk_label - logll[mb])
+
+            if t + durations[idx] < T and idx > 0:  # for blank in the middle
+                grad -= math.exp(alphas[col] + betas[col + durations[idx] * maxU] + logpk_blank - logll[mb])
+
+            if t + durations[idx] == T and idx >= 1 and u == U - 1:  # for blank as the last symbol
+                grad -= math.exp(alphas[col] + logpk_blank - logll[mb])
+
+            grad = grad * math.exp(duration_acts[col * num_durations + idx])
+            duration_grads[col * num_durations + idx] = grad
+
+        # For cuda kernels, maximum number of threads per block is limited to some value.
+        # However, it may be the case that vocabulary size is larger than this limit
+        # To work around this, an arbitrary thread buffer size is chosen such that,
+        # 1) each element within the thread pool operates independently of the other
+        # 2) An inner while loop moves the index of each buffer element by the size of the buffer itself,
+        #    such that all elements of the vocabulary size are covered in (V + 1 // thread_buffer) number of steps.
+        # As such, each thread will perform the while loop at least (V + 1 // thread_buffer) number of times
+        while idx < alphabet_size:
+            # remember, `col` represents the tri-index [b, t, u]
+            # therefore; logpk = denom[b, t, u] + acts[b, t, u, v]
+            logpk = denom[col] + acts[col * alphabet_size + idx]
+            # initialize the grad of the sample acts[b, t, u, v]
+            grad = math.exp(alphas[col] + betas[col] + logpk - logll[mb])
+
+            # If FastEmit regularization is enabled, calculate the gradeint of probability of predicting the next label
+            # at the current timestep.
+            # The formula for this is Equation 9 in https://arxiv.org/abs/2010.11148, multiplied by the log probability
+            # of the current step (t, u), normalized by the total log likelihood.
+            # Once the gradient has been calculated, scale it by `fastemit_lambda`, as in Equation 10.
+            if fastemit_lambda > 0.0 and u < U - 1:
+                fastemit_grad = 0.0
+
+                for i in range(0, num_durations):
+                    if t + durations[i] < T:
+                        fastemit_grad += fastemit_lambda * math.exp(
+                            alphas[col]  # alphas(t, u)
+                            + (denom[col] + acts[col * alphabet_size + labels[u]])  # log prob of token emission
+                            + duration_acts[col * num_durations + i]  # duration log-prob
+                            + betas[col + 1 + durations[i] * maxU]  # betas(t, u+1)
+                            + logpk  # log Pr(k|t, u)
+                            - sigma  # for logit under-normalization
+                            - logll[mb]  # total log likelihood for normalization
+                        )
+            else:
+                fastemit_grad = 0.0
+
+            # Update the gradient of act[b, t, u, v] with the gradient from FastEmit regularization
+            grad = grad + fastemit_grad
+
+            # grad to last blank transition
+            # grad[b, T-1, U-1, v=blank] -= exp(alphas[b, t, u] + logpk - sigma - logll[b] + logp(duration) for all possible non-zero durations.
+            if idx == blank_ and u == U - 1:
+                for i in range(1, num_durations):
+                    if t == T - durations[i]:
+                        grad -= math.exp(
+                            alphas[col] + logpk - sigma - logll[mb] + duration_acts[col * num_durations + i]
+                        )
+
+            # grad of blank across t < T;
+            # grad[b, t<T-1, u, v=blank] -= exp(alphas[b, t, u] + logpk - sigma + logp_duration - logll[b] + betas[b, t + duration, u]) for all non-zero durations
+            if idx == blank_:
+                for i in range(1, num_durations):
+                    if t < T - durations[i]:
+                        grad -= math.exp(
+                            alphas[col]
+                            + logpk
+                            - sigma
+                            - logll[mb]
+                            + betas[col + maxU * durations[i]]
+                            + duration_acts[col * num_durations + i]
+                        )
+
+            # grad of correct token across u < U;
+            # grad[b, t, u<U-1, v=label[u]] -= exp(alphas[b, t, u] + logpk - sigma + logp_duration - logll[b] + betas[b, t + duration, u + 1]) for all blank durations.
+            # Scale the gradient by (1.0 + FastEmit_lambda) in log space, then exponentiate
+            if u < U - 1 and idx == labels[u]:
+                # exp(log(1 + fastemit_lambda) + ...) is numerically more stable than
+                # multiplying (1.0 + fastemit_lambda) with result.
+                for i in range(num_durations):
+                    if t + durations[i] < T:
+                        grad -= math.exp(
+                            math.log1p(fastemit_lambda)
+                            + alphas[col]
+                            + logpk
+                            - sigma
+                            - logll[mb]
+                            + betas[col + 1 + maxU * durations[i]]
+                            + duration_acts[col * num_durations + i]
+                        )
+
+            # update grads[b, t, u, v] = grad
+            label_grads[col * alphabet_size + idx] = grad
+
+            # clamp gradient (if needed)
+            if clamp > 0.0:
+                g = label_grads[col * alphabet_size + idx]
+                g = min(g, clamp)
+                g = max(g, -clamp)
+                label_grads[col * alphabet_size + idx] = g
+
+            # update internal index through the thread_buffer;
+            # until idx < V + 1, such that entire vocabulary has been updated.
+            idx += GPU_RNNT_THREAD_SIZE
diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
index 5e98b03f2fe2..42b14fd7b8bf 100644
--- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
+++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
@@ -2202,3 +2202,564 @@ class GreedyBatchedRNNTInferConfig:
     preserve_alignments: bool = False
     preserve_frame_confidence: bool = False
     confidence_method_cfg: Optional[ConfidenceMethodConfig] = None
+
+
+class GreedyTDTInfer(_GreedyRNNTInfer):
+    """A greedy TDT decoder.
+
+    Sequence level greedy decoding, performed auto-repressively.
+
+    Args:
+        decoder_model: rnnt_utils.AbstractRNNTDecoder implementation.
+        joint_model: rnnt_utils.AbstractRNNTJoint implementation.
+        blank_index: int index of the blank token. Must be len(vocabulary) for TDT models.
+        durations: a list containing durations for TDT.
+        max_symbols_per_step: Optional int. The maximum number of symbols that can be added
+            to a sequence in a single time step; if set to None then there is
+            no limit.
+        preserve_alignments: Bool flag which preserves the history of alignments generated during
+            greedy decoding (sample / batched). When set to true, the Hypothesis will contain
+            the non-null value for `alignments` in it. Here, `alignments` is a List of List of
+            Tuple(Tensor (of length V + 1 + num-big-blanks), Tensor(scalar, label after argmax)).
+            The length of the list corresponds to the Acoustic Length (T).
+            Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more targets from a vocabulary.
+            U is the number of target tokens for the current timestep Ti.
+        preserve_frame_confidence: Bool flag which preserves the history of per-frame confidence scores generated
+            during greedy decoding (sample / batched). When set to true, the Hypothesis will contain
+            the non-null value for `frame_confidence` in it. Here, `frame_confidence` is a List of List of floats.
+            The length of the list corresponds to the Acoustic Length (T).
+            Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
+            U is the number of target tokens for the current timestep Ti.
+        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+            confidence scores.
+            name: The method name (str).
+                Supported values:
+                    - 'max_prob' for using the maximum token probability as a confidence.
+                    - 'entropy' for using normalized entropy of a log-likelihood vector.
+            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+                Supported values:
+                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                        the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
+                        Note that for this entropy, the temperature should comply the following inequality:
+                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                    - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
+                        Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
+                        where α is a parameter. When α == 1, it works like the Gibbs entropy.
+                        More: https://en.wikipedia.org/wiki/Tsallis_entropy
+                    - 'renui' for the Rényi entropy.
+                        Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
+                        where α is a parameter. When α == 1, it works like the Gibbs entropy.
+                        More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
+            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the temperature equals one, scaling is not applied to 'max_prob',
+                and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
+            entropy_norm: A mapping of the entropy value to the interval [0,1].
+                Supported values:
+                    - 'lin' for using the linear mapping.
+                    - 'exp' for using exponential mapping with linear shift.
+    """
+
+    def __init__(
+        self,
+        decoder_model: rnnt_abstract.AbstractRNNTDecoder,
+        joint_model: rnnt_abstract.AbstractRNNTJoint,
+        blank_index: int,
+        durations: list,
+        max_symbols_per_step: Optional[int] = None,
+        preserve_alignments: bool = False,
+        preserve_frame_confidence: bool = False,
+        confidence_method_cfg: Optional[DictConfig] = None,
+    ):
+        super().__init__(
+            decoder_model=decoder_model,
+            joint_model=joint_model,
+            blank_index=blank_index,
+            max_symbols_per_step=max_symbols_per_step,
+            preserve_alignments=preserve_alignments,
+            preserve_frame_confidence=preserve_frame_confidence,
+            confidence_method_cfg=confidence_method_cfg,
+        )
+        self.durations = durations
+
+    @typecheck()
+    def forward(
+        self,
+        encoder_output: torch.Tensor,
+        encoded_lengths: torch.Tensor,
+        partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None,
+    ):
+        """Returns a list of hypotheses given an input batch of the encoder hidden embedding.
+        Output token is generated auto-repressively.
+        Args:
+            encoder_output: A tensor of size (batch, features, timesteps).
+            encoded_lengths: list of int representing the length of each sequence
+                output sequence.
+        Returns:
+            packed list containing batch number of sentences (Hypotheses).
+        """
+        # Preserve decoder and joint training state
+        decoder_training_state = self.decoder.training
+        joint_training_state = self.joint.training
+
+        with torch.inference_mode():
+            # Apply optional preprocessing
+            encoder_output = encoder_output.transpose(1, 2)  # (B, T, D)
+
+            self.decoder.eval()
+            self.joint.eval()
+
+            hypotheses = []
+            # Process each sequence independently
+            with self.decoder.as_frozen(), self.joint.as_frozen():
+                for batch_idx in range(encoder_output.size(0)):
+                    inseq = encoder_output[batch_idx, :, :].unsqueeze(1)  # [T, 1, D]
+                    logitlen = encoded_lengths[batch_idx]
+
+                    partial_hypothesis = partial_hypotheses[batch_idx] if partial_hypotheses is not None else None
+                    hypothesis = self._greedy_decode(inseq, logitlen, partial_hypotheses=partial_hypothesis)
+                    hypotheses.append(hypothesis)
+
+            # Pack results into Hypotheses
+            packed_result = pack_hypotheses(hypotheses, encoded_lengths)
+
+        self.decoder.train(decoder_training_state)
+        self.joint.train(joint_training_state)
+
+        return (packed_result,)
+
+    @torch.no_grad()
+    def _greedy_decode(
+        self, x: torch.Tensor, out_len: torch.Tensor, partial_hypotheses: Optional[rnnt_utils.Hypothesis] = None
+    ):
+        # x: [T, 1, D]
+        # out_len: [seq_len]
+
+        # Initialize blank state and empty label set in Hypothesis
+        hypothesis = rnnt_utils.Hypothesis(score=0.0, y_sequence=[], dec_state=None, timestep=[], last_token=None)
+
+        if partial_hypotheses is not None:
+            hypothesis.last_token = partial_hypotheses.last_token
+            hypothesis.y_sequence = (
+                partial_hypotheses.y_sequence.cpu().tolist()
+                if isinstance(partial_hypotheses.y_sequence, torch.Tensor)
+                else partial_hypotheses.y_sequence
+            )
+            if partial_hypotheses.dec_state is not None:
+                hypothesis.dec_state = self.decoder.batch_concat_states([partial_hypotheses.dec_state])
+                hypothesis.dec_state = _states_to_device(hypothesis.dec_state, x.device)
+
+        if self.preserve_alignments:
+            # Alignments is a 2-dimensional dangling list representing T x U
+            hypothesis.alignments = [[]]
+
+        if self.preserve_frame_confidence:
+            hypothesis.frame_confidence = [[]]
+
+        time_idx = 0
+        while time_idx < out_len:
+            # Extract encoder embedding at timestep t
+            # f = x[time_idx, :, :].unsqueeze(0)  # [1, 1, D]
+            f = x.narrow(dim=0, start=time_idx, length=1)
+
+            # Setup exit flags and counter
+            not_blank = True
+            symbols_added = 0
+
+            need_loop = True
+            # While blank is not predicted, or we dont run out of max symbols per timestep
+            while need_loop and (self.max_symbols is None or symbols_added < self.max_symbols):
+                # In the first timestep, we initialize the network with RNNT Blank
+                # In later timesteps, we provide previous predicted label as input.
+                if hypothesis.last_token is None and hypothesis.dec_state is None:
+                    last_label = self._SOS
+                else:
+                    last_label = label_collate([[hypothesis.last_token]])
+
+                # Perform prediction network and joint network steps.
+                g, hidden_prime = self._pred_step(last_label, hypothesis.dec_state)
+                # If preserving per-frame confidence, log_normalize must be true
+                logits = self._joint_step(f, g, log_normalize=False)
+                logp = logits[0, 0, 0, : -len(self.durations)]
+                if self.preserve_frame_confidence:
+                    logp = torch.log_softmax(logp, -1)
+
+                duration_logp = torch.log_softmax(logits[0, 0, 0, -len(self.durations) :], dim=-1)
+                del g
+
+                # torch.max(0) op doesnt exist for FP 16.
+                if logp.dtype != torch.float32:
+                    logp = logp.float()
+
+                # get index k, of max prob
+                v, k = logp.max(0)
+                k = k.item()  # K is the label at timestep t_s in inner loop, s >= 0.
+
+                d_v, d_k = duration_logp.max(0)
+                d_k = d_k.item()
+
+                skip = self.durations[d_k]
+
+                if self.preserve_alignments:
+                    # insert logprobs into last timestep
+                    hypothesis.alignments[-1].append((logp.to('cpu'), torch.tensor(k, dtype=torch.int32)))
+
+                if self.preserve_frame_confidence:
+                    # insert confidence into last timestep
+                    hypothesis.frame_confidence[-1].append(self._get_confidence(logp))
+
+                del logp
+
+                # If blank token is predicted, exit inner loop, move onto next timestep t
+                if k == self._blank_index:
+                    not_blank = False
+
+                    # this rarely happens, but we manually increment the `skip` number
+                    # if blank is emitted and duration=0 is predicted. This prevents possible
+                    # infinite loops.
+                    if skip == 0:
+                        skip = 1
+
+                    if self.preserve_alignments:
+                        # convert Ti-th logits into a torch array
+                        hypothesis.alignments.append([])  # blank buffer for next timestep
+
+                    if self.preserve_frame_confidence:
+                        hypothesis.frame_confidence.append([])  # blank buffer for next timestep
+                else:
+                    # Append token to label set, update RNN state.
+                    hypothesis.y_sequence.append(k)
+                    hypothesis.score += float(v)
+                    hypothesis.timestep.append(time_idx)
+                    hypothesis.dec_state = hidden_prime
+                    hypothesis.last_token = k
+
+                # Increment token counter.
+                symbols_added += 1
+                time_idx += skip
+                need_loop = skip == 0
+
+            if symbols_added == self.max_symbols:
+                time_idx += 1
+
+        # Remove trailing empty list of Alignments
+        if self.preserve_alignments:
+            if len(hypothesis.alignments[-1]) == 0:
+                del hypothesis.alignments[-1]
+
+        # Remove trailing empty list of per-frame confidence
+        if self.preserve_frame_confidence:
+            if len(hypothesis.frame_confidence[-1]) == 0:
+                del hypothesis.frame_confidence[-1]
+
+        # Unpack the hidden states
+        hypothesis.dec_state = self.decoder.batch_select_state(hypothesis.dec_state, 0)
+
+        return hypothesis
+
+
+class GreedyBatchedTDTInfer(_GreedyRNNTInfer):
+    """A batch level greedy TDT decoder.
+    Batch level greedy decoding, performed auto-repressively.
+    Args:
+        decoder_model: rnnt_utils.AbstractRNNTDecoder implementation.
+        joint_model: rnnt_utils.AbstractRNNTJoint implementation.
+        blank_index: int index of the blank token. Must be len(vocabulary) for TDT models.
+        durations: a list containing durations.
+        max_symbols_per_step: Optional int. The maximum number of symbols that can be added
+            to a sequence in a single time step; if set to None then there is
+            no limit.
+        preserve_alignments: Bool flag which preserves the history of alignments generated during
+            greedy decoding (sample / batched). When set to true, the Hypothesis will contain
+            the non-null value for `alignments` in it. Here, `alignments` is a List of List of
+            Tuple(Tensor (of length V + 1 + num-big-blanks), Tensor(scalar, label after argmax)).
+            The length of the list corresponds to the Acoustic Length (T).
+            Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more targets from a vocabulary.
+            U is the number of target tokens for the current timestep Ti.
+        preserve_frame_confidence: Bool flag which preserves the history of per-frame confidence scores generated
+            during greedy decoding (sample / batched). When set to true, the Hypothesis will contain
+            the non-null value for `frame_confidence` in it. Here, `frame_confidence` is a List of List of floats.
+            The length of the list corresponds to the Acoustic Length (T).
+            Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
+            U is the number of target tokens for the current timestep Ti.
+        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+            confidence scores.
+            name: The method name (str).
+                Supported values:
+                    - 'max_prob' for using the maximum token probability as a confidence.
+                    - 'entropy' for using normalized entropy of a log-likelihood vector.
+            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+                Supported values:
+                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                        the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
+                        Note that for this entropy, the temperature should comply the following inequality:
+                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                    - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
+                        Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
+                        where α is a parameter. When α == 1, it works like the Gibbs entropy.
+                        More: https://en.wikipedia.org/wiki/Tsallis_entropy
+                    - 'renui' for the Rényi entropy.
+                        Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
+                        where α is a parameter. When α == 1, it works like the Gibbs entropy.
+                        More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
+            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the temperature equals one, scaling is not applied to 'max_prob',
+                and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
+            entropy_norm: A mapping of the entropy value to the interval [0,1].
+                Supported values:
+                    - 'lin' for using the linear mapping.
+                    - 'exp' for using exponential mapping with linear shift.
+    """
+
+    def __init__(
+        self,
+        decoder_model: rnnt_abstract.AbstractRNNTDecoder,
+        joint_model: rnnt_abstract.AbstractRNNTJoint,
+        blank_index: int,
+        durations: List[int],
+        max_symbols_per_step: Optional[int] = None,
+        preserve_alignments: bool = False,
+        preserve_frame_confidence: bool = False,
+        confidence_method_cfg: Optional[DictConfig] = None,
+    ):
+        super().__init__(
+            decoder_model=decoder_model,
+            joint_model=joint_model,
+            blank_index=blank_index,
+            max_symbols_per_step=max_symbols_per_step,
+            preserve_alignments=preserve_alignments,
+            preserve_frame_confidence=preserve_frame_confidence,
+            confidence_method_cfg=confidence_method_cfg,
+        )
+        self.durations = durations
+
+        # Depending on availability of `blank_as_pad` support
+        # switch between more efficient batch decoding technique
+        if self.decoder.blank_as_pad:
+            self._greedy_decode = self._greedy_decode_blank_as_pad
+        else:
+            self._greedy_decode = self._greedy_decode_masked
+
+    @typecheck()
+    def forward(
+        self,
+        encoder_output: torch.Tensor,
+        encoded_lengths: torch.Tensor,
+        partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None,
+    ):
+        """Returns a list of hypotheses given an input batch of the encoder hidden embedding.
+        Output token is generated auto-repressively.
+        Args:
+            encoder_output: A tensor of size (batch, features, timesteps).
+            encoded_lengths: list of int representing the length of each sequence
+                output sequence.
+        Returns:
+            packed list containing batch number of sentences (Hypotheses).
+        """
+        # Preserve decoder and joint training state
+        decoder_training_state = self.decoder.training
+        joint_training_state = self.joint.training
+
+        with torch.inference_mode():
+            # Apply optional preprocessing
+            encoder_output = encoder_output.transpose(1, 2)  # (B, T, D)
+            logitlen = encoded_lengths
+
+            self.decoder.eval()
+            self.joint.eval()
+
+            with self.decoder.as_frozen(), self.joint.as_frozen():
+                inseq = encoder_output  # [B, T, D]
+                hypotheses = self._greedy_decode(
+                    inseq, logitlen, device=inseq.device, partial_hypotheses=partial_hypotheses
+                )
+
+            # Pack the hypotheses results
+            packed_result = pack_hypotheses(hypotheses, logitlen)
+
+        self.decoder.train(decoder_training_state)
+        self.joint.train(joint_training_state)
+
+        return (packed_result,)
+
+    def _greedy_decode_blank_as_pad(
+        self,
+        x: torch.Tensor,
+        out_len: torch.Tensor,
+        device: torch.device,
+        partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None,
+    ):
+        if partial_hypotheses is not None:
+            raise NotImplementedError("`partial_hypotheses` support is not supported")
+
+        with torch.inference_mode():
+            # x: [B, T, D]
+            # out_len: [B]
+            # device: torch.device
+
+            # Initialize list of Hypothesis
+            batchsize = x.shape[0]
+            hypotheses = [
+                rnnt_utils.Hypothesis(score=0.0, y_sequence=[], timestep=[], dec_state=None) for _ in range(batchsize)
+            ]
+
+            # Initialize Hidden state matrix (shared by entire batch)
+            hidden = None
+
+            # If alignments need to be preserved, register a danling list to hold the values
+            if self.preserve_alignments:
+                # alignments is a 3-dimensional dangling list representing B x T x U
+                for hyp in hypotheses:
+                    hyp.alignments = [[]]
+
+            # If confidence scores need to be preserved, register a danling list to hold the values
+            if self.preserve_frame_confidence:
+                # frame_confidence is a 3-dimensional dangling list representing B x T x U
+                for hyp in hypotheses:
+                    hyp.frame_confidence = [[]]
+                    hyp.y_3best = [[]]
+                    hyp.frame_confidence_3best = [[[]]]
+                    hyp.logp = [[]]
+
+            # Last Label buffer + Last Label without blank buffer
+            # batch level equivalent of the last_label
+            last_label = torch.full([batchsize, 1], fill_value=self._blank_index, dtype=torch.long, device=device)
+
+            # Mask buffers
+            blank_mask = torch.full([batchsize], fill_value=0, dtype=torch.bool, device=device)
+
+            # Get max sequence length
+            max_out_len = out_len.max()
+
+            # skip means the number of frames the next decoding step should "jump" to. When skip == 1
+            # it means the next decoding step will just use the next input frame.
+            skip = 1
+            for time_idx in range(max_out_len):
+                if skip > 1:  # if skip > 1 at the current step, we decrement it and skip the current frame.
+                    skip -= 1
+                    continue
+                f = x.narrow(dim=1, start=time_idx, length=1)  # [B, 1, D]
+
+                # need_to_stay is a boolean indicates whether the next decoding step should remain in the same frame.
+                need_to_stay = True
+                symbols_added = 0
+
+                # Reset blank mask
+                blank_mask.mul_(False)
+
+                # Update blank mask with time mask
+                # Batch: [B, T, D], but Bi may have seq len < max(seq_lens_in_batch)
+                # Forcibly mask with "blank" tokens, for all sample where current time step T > seq_len
+                blank_mask = time_idx >= out_len
+
+                # Start inner loop
+                while need_to_stay and (self.max_symbols is None or symbols_added < self.max_symbols):
+                    # Batch prediction and joint network steps
+                    # If very first prediction step, submit SOS tag (blank) to pred_step.
+                    # This feeds a zero tensor as input to AbstractRNNTDecoder to prime the state
+                    if time_idx == 0 and symbols_added == 0 and hidden is None:
+                        g, hidden_prime = self._pred_step(self._SOS, hidden, batch_size=batchsize)
+                    else:
+                        # Perform batch step prediction of decoder, getting new states and scores ("g")
+                        g, hidden_prime = self._pred_step(last_label, hidden, batch_size=batchsize)
+
+                    # Batched joint step - Output = [B, V + 1 + num-big-blanks]
+                    # Note: log_normalize must not be True here since the joiner output is contanetation of both token logits and duration logits,
+                    # and they need to be normalized independently.
+                    joined = self._joint_step(f, g, log_normalize=None)
+                    logp = joined[:, 0, 0, : -len(self.durations)]
+                    duration_logp = joined[:, 0, 0, -len(self.durations) :]
+
+                    if logp.dtype != torch.float32:
+                        logp = logp.float()
+                        duration_logp = duration_logp.float()
+
+                    # get the max for both token and duration predictions.
+                    v, k = logp.max(1)
+                    dv, dk = duration_logp.max(1)
+
+                    # here we set the skip value to be the minimum of all predicted durations, hense the "torch.min(dk)" call there.
+                    # Please refer to Section 5.2 of our paper https://arxiv.org/pdf/2304.06795.pdf for explanation of this.
+                    skip = self.durations[int(torch.min(dk))]
+
+                    # this is a special case: if all batches emit blanks, we require that skip be at least 1
+                    # so we don't loop forever at the current frame.
+                    if blank_mask.all():
+                        if skip == 0:
+                            skip = 1
+
+                    need_to_stay = skip == 0
+                    del g
+
+                    # Update blank mask with current predicted blanks
+                    # This is accumulating blanks over all time steps T and all target steps min(max_symbols, U)
+                    k_is_blank = k == self._blank_index
+                    blank_mask.bitwise_or_(k_is_blank)
+
+                    del k_is_blank
+                    del logp, duration_logp
+
+                    # If all samples predict / have predicted prior blanks, exit loop early
+                    # This is equivalent to if single sample predicted k
+                    if not blank_mask.all():
+                        # Collect batch indices where blanks occurred now/past
+                        blank_indices = (blank_mask == 1).nonzero(as_tuple=False)
+
+                        # Recover prior state for all samples which predicted blank now/past
+                        if hidden is not None:
+                            hidden_prime = self.decoder.batch_copy_states(hidden_prime, hidden, blank_indices)
+
+                        elif len(blank_indices) > 0 and hidden is None:
+                            # Reset state if there were some blank and other non-blank predictions in batch
+                            # Original state is filled with zeros so we just multiply
+                            # LSTM has 2 states
+                            hidden_prime = self.decoder.batch_copy_states(hidden_prime, None, blank_indices, value=0.0)
+
+                        # Recover prior predicted label for all samples which predicted blank now/past
+                        k[blank_indices] = last_label[blank_indices, 0]
+
+                        # Update new label and hidden state for next iteration
+                        last_label = k.clone().view(-1, 1)
+                        hidden = hidden_prime
+
+                        # Update predicted labels, accounting for time mask
+                        # If blank was predicted even once, now or in the past,
+                        # Force the current predicted label to also be blank
+                        # This ensures that blanks propogate across all timesteps
+                        # once they have occured (normally stopping condition of sample level loop).
+                        for kidx, ki in enumerate(k):
+                            if blank_mask[kidx] == 0:
+                                hypotheses[kidx].y_sequence.append(ki)
+                                hypotheses[kidx].timestep.append(time_idx)
+                                hypotheses[kidx].score += float(v[kidx])
+
+                        symbols_added += 1
+
+            # Remove trailing empty list of alignments at T_{am-len} x Uj
+            if self.preserve_alignments:
+                for batch_idx in range(batchsize):
+                    if len(hypotheses[batch_idx].alignments[-1]) == 0:
+                        del hypotheses[batch_idx].alignments[-1]
+
+            # Remove trailing empty list of confidence scores at T_{am-len} x Uj
+            if self.preserve_frame_confidence:
+                for batch_idx in range(batchsize):
+                    if len(hypotheses[batch_idx].frame_confidence[-1]) == 0:
+                        del hypotheses[batch_idx].frame_confidence[-1]
+                        del hypotheses[batch_idx].y_3best[-1]
+                        del hypotheses[batch_idx].frame_confidence_3best[-1]
+                        del hypotheses[batch_idx].logp[-1]
+
+        # Preserve states
+        for batch_idx in range(batchsize):
+            hypotheses[batch_idx].dec_state = self.decoder.batch_select_state(hidden, batch_idx)
+
+        return hypotheses
+
+    def _greedy_decode_masked(
+        self,
+        x: torch.Tensor,
+        out_len: torch.Tensor,
+        device: torch.device,
+        partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None,
+    ):
+        raise NotImplementedError("masked greedy-batched decode is not supported for TDT models.")
diff --git a/tests/collections/asr/decoding/test_rnnt_decoding.py b/tests/collections/asr/decoding/test_rnnt_decoding.py
index 9dd955c24a70..ac90e62036e0 100644
--- a/tests/collections/asr/decoding/test_rnnt_decoding.py
+++ b/tests/collections/asr/decoding/test_rnnt_decoding.py
@@ -130,7 +130,7 @@ def test_constructor(self):
 
     @pytest.mark.unit
     def test_constructor_subword(self, tmp_tokenizer):
-        cfg = RNNTBPEDecodingConfig()
+        cfg = RNNTDecodingConfig()
         vocab = tmp_tokenizer.vocab
         decoder = get_rnnt_decoder(vocab_size=len(vocab))
         joint = get_rnnt_joint(vocab_size=len(vocab))
diff --git a/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py b/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py
index 7764649bf1fa..3fbfcf6df54b 100644
--- a/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py
+++ b/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py
@@ -18,9 +18,13 @@
 import pytest
 import torch
 
-from nemo.collections.asr.losses.rnnt import MultiblankRNNTLossPytorch, RNNTLossPytorch
+from nemo.collections.asr.losses.rnnt import MultiblankRNNTLossPytorch, RNNTLossPytorch, TDTLossPytorch
 from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_numpy import RNNTLoss as RNNTLoss_Numpy
-from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import MultiblankRNNTLossNumba, RNNTLossNumba
+from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import (
+    MultiblankRNNTLossNumba,
+    RNNTLossNumba,
+    TDTLossNumba,
+)
 from nemo.core.utils import numba_utils
 from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
@@ -494,5 +498,68 @@ def test_case_randomized_act_label(self, device):
             assert np.allclose(pt_grads, ag_grads, rtol=1e-2), "multi-blank gradient mismatch."
 
 
+class TestTDTLoss:
+    @pytest.mark.unit
+    @pytest.mark.parametrize('device', DEVICES)
+    def test_case_randomized_act_label(self, device):
+        if device == 'cuda':
+            numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
+
+            B, T, U, V = 4, 8, 4, 8  # here V is number of non blank labels
+            durations = [0, 1, 2, 3, 4, 5]
+            sigma = 0.05
+
+            acts = torch.rand([B, T, U, V + 1 + len(durations)])
+            labels = [[random.randrange(0, V) for i in range(U - 1)] for j in range(B)]
+
+            fn_pt = TDTLossNumba(blank=V, reduction='sum', durations=durations, sigma=sigma)
+            pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device)
+
+            fn_ag = TDTLossPytorch(
+                blank=V, reduction='sum', durations=durations, sigma=sigma
+            )  # ag for automatic gradient computation
+            ag_cost, ag_grads = wrap_and_call(fn_ag, acts, labels, device)
+
+            assert np.allclose(pt_cost, ag_cost, rtol=1e-6), "tdt costs mismatch."
+            assert np.allclose(pt_grads, ag_grads, rtol=1e-2), "td gradient mismatch."
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize('device', DEVICES)
+    def test_case_fixed_case_act_label(self, device):
+        if device == 'cuda':
+            numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
+
+            B, T, U, V = 1, 3, 2, 3  # here V is number of non blank labels
+            durations = [0, 1, 2]
+            sigma = 0.05
+
+            acts = torch.zeros([B, T, U, V + 1 + len(durations)])
+            labels = [[(i + j) % (V - 1) for i in range(U - 1)] for j in range(B)]
+
+            fn_pt = TDTLossNumba(blank=V, reduction='sum', durations=durations, sigma=sigma)
+            pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device)
+
+            expected_cost = 4.155739
+            expected_grads = [
+                [
+                    [
+                        [-0.64962804, 0.25, 0.25, 0.14962798, 0.2672583, -0.16792619, -0.09933221],
+                        [0.01651875, 0.01651875, 0.01651875, -0.04955626, 0.022025, -0.01227201, -0.009753],
+                    ],
+                    [
+                        [-0.04892651, 0.01714851, 0.01714851, 0.01462949, -0.01143234, -0.01143234, 0.02286467],
+                        [0.12531489, 0.12531489, 0.12531489, -0.37594467, 0.16708651, 0.13027048, -0.29735702],
+                    ],
+                    [
+                        [-0.02572276, 0.00857425, 0.00857425, 0.00857425, -0.02286468, 0.01143234, 0.01143234],
+                        [0.13388914, 0.13388914, 0.13388914, -0.40166742, 0.17851885, -0.35703772, 0.17851885],
+                    ],
+                ]
+            ]
+
+            assert np.allclose(pt_cost, expected_cost, rtol=1e-6), "tdt costs mismatch."
+            assert np.allclose(pt_grads, expected_grads, rtol=1e-2), "td gradient mismatch."
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py b/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py
index acab5963fa72..230b6b7c099f 100644
--- a/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py
+++ b/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py
@@ -17,6 +17,7 @@
 import torch
 from numba import cuda
 
+from nemo.collections.asr.losses.rnnt_pytorch import MultiblankRNNTLossPytorch, TDTLossPytorch
 from nemo.collections.asr.parts.numba.rnnt_loss import rnnt_numpy
 from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import certify_inputs
 from nemo.collections.asr.parts.numba.rnnt_loss.utils.cuda_utils import gpu_rnnt_kernel, reduce
@@ -504,3 +505,189 @@ def test_compute_grads_kernel_clamp(self):
 
         assert np.abs(diff).mean() <= 1e-5
         assert np.square(diff).mean() <= 1e-10
+
+
+class TestTDTCUDAKernels:
+    @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
+    @pytest.mark.unit
+    def test_compute_alphas_kernel(self):
+        numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
+
+        random = np.random.RandomState(0)
+        original_shape = [1, 15, 11, 3]
+        durations = [0, 1, 2]
+        B, T, U, V = original_shape
+        Vd = len(durations)
+
+        duration_act_shape = [B, T, U, Vd]
+        sigma = 0.05
+
+        # for passing into the kernel function -- it expected unnormalized logits
+        x = random.randn(*original_shape)
+        # for passing into the pytorch function -- it expected normalized logits
+        normalized_x = log_softmax(x, axis=-1) - 0.05
+
+        xd = random.randn(*duration_act_shape)
+        # duration logits are normalized before passing into the loss computation.
+        xd = log_softmax(xd, axis=-1)
+
+        labels = np.array([[1, 1, 1, 1, 0, 0, 1, 0, 0, 1]])  # [1, 10]
+        blank_idx = V - 1
+
+        pytorch_tdt_loss = TDTLossPytorch(blank_idx, durations, sigma=sigma)
+
+        # Pytorch kernel
+        device = torch.device('cuda')
+        if hasattr(cuda, 'external_stream'):
+            stream = cuda.external_stream(torch.cuda.current_stream(device).cuda_stream)
+        else:
+            stream = cuda.default_stream()
+
+        x = torch.tensor(x, device=device, dtype=torch.float32)
+        normalized_x = torch.tensor(normalized_x, device=device, dtype=torch.float32)
+        xd = torch.tensor(xd, device=device, dtype=torch.float32)
+        labels = torch.tensor(labels, device=device, dtype=torch.long)
+        durations = torch.tensor(durations, device=device, dtype=torch.long)
+
+        # Allocate workspace memory
+        denom = torch.zeros(B * T * U, device=device, dtype=x.dtype)
+        alphas = torch.zeros(B * T * U, device=device, dtype=x.dtype)
+        llForward = torch.zeros(B, device=device, dtype=x.dtype)
+        input_lengths = torch.tensor([T], dtype=torch.long, device=device)
+        label_lengths = torch.tensor([U - 1], dtype=torch.long, device=device)
+
+        ground_log_likelihood, ground_alphas = pytorch_tdt_loss.compute_forward_prob(
+            normalized_x, xd, labels, input_lengths, label_lengths
+        )
+
+        # certify input data
+        certify_inputs(x, labels, input_lengths, label_lengths)
+
+        # flatten activation tensor (for pointer based indexing)
+        x = x.view([-1])
+        xd = xd.view([-1])
+
+        # call kernel
+        # log softmax reduction
+        reduce.reduce_max(x, denom, rows=V, cols=B * T * U, minus=False, stream=stream)
+        reduce.reduce_exp(x, denom, rows=V, cols=B * T * U, minus=True, stream=stream)
+
+        # alpha kernel
+        gpu_rnnt_kernel.compute_tdt_alphas_kernel[B, U, stream, 0](
+            x,
+            xd,
+            denom,
+            sigma,
+            alphas,
+            llForward,
+            input_lengths,
+            label_lengths,
+            labels,
+            B,
+            T,
+            U,
+            V,
+            blank_idx,
+            durations,
+            Vd,
+        )
+
+        # sync kernel
+        stream.synchronize()
+
+        # reshape alphas
+        alphas = alphas.view([B, T, U])
+        diff = torch.norm(ground_alphas - alphas)
+        ll_diff = torch.norm(ground_log_likelihood - llForward)
+
+        assert diff <= 1e-3
+        assert ll_diff <= 1e-3
+
+
+class TestMultiblankRNNTCUDAKernels:
+    @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
+    @pytest.mark.unit
+    def test_compute_alphas_kernel(self):
+        numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
+
+        random = np.random.RandomState(0)
+        original_shape = [1, 15, 11, 6]
+        big_blank_durations = [2, 3, 4]
+        B, T, U, V = original_shape
+        num_big_blanks = len(big_blank_durations)
+
+        sigma = 0.05
+
+        # for passing into the kernel function -- it expected unnormalized logits
+        x = random.randn(*original_shape)
+        # for passing into the pytorch function -- it expected normalized logits
+        normalized_x = log_softmax(x, axis=-1) - sigma
+
+        labels = np.array([[1, 1, 1, 1, 0, 0, 1, 0, 0, 1]])  # [1, 10]
+        blank_idx = V - 1
+
+        pytorch_multiblank_loss = MultiblankRNNTLossPytorch(blank_idx, big_blank_durations, sigma=sigma)
+
+        # Pytorch kernel
+        device = torch.device('cuda')
+        if hasattr(cuda, 'external_stream'):
+            stream = cuda.external_stream(torch.cuda.current_stream(device).cuda_stream)
+        else:
+            stream = cuda.default_stream()
+
+        x = torch.tensor(x, device=device, dtype=torch.float32)
+        normalized_x = torch.tensor(normalized_x, device=device, dtype=torch.float32)
+        labels = torch.tensor(labels, device=device, dtype=torch.long)
+        big_blank_durations = torch.tensor(big_blank_durations, device=device, dtype=torch.long)
+
+        # Allocate workspace memory
+        denom = torch.zeros(B * T * U, device=device, dtype=x.dtype)
+        alphas = torch.zeros(B * T * U, device=device, dtype=x.dtype)
+        llForward = torch.zeros(B, device=device, dtype=x.dtype)
+        input_lengths = torch.tensor([T], dtype=torch.long, device=device)
+        label_lengths = torch.tensor([U - 1], dtype=torch.long, device=device)
+
+        ground_log_likelihood, ground_alphas = pytorch_multiblank_loss.compute_forward_prob(
+            normalized_x, labels, input_lengths, label_lengths
+        )
+
+        # certify input data
+        certify_inputs(x, labels, input_lengths, label_lengths)
+
+        # flatten activation tensor (for pointer based indexing)
+        x = x.view([-1])
+
+        # call kernel
+        # log softmax reduction
+        reduce.reduce_max(x, denom, rows=V, cols=B * T * U, minus=False, stream=stream)
+        reduce.reduce_exp(x, denom, rows=V, cols=B * T * U, minus=True, stream=stream)
+
+        # alpha kernel
+        gpu_rnnt_kernel.compute_multiblank_alphas_kernel[B, U, stream, 0](
+            x,
+            denom,
+            sigma,
+            alphas,
+            llForward,
+            input_lengths,
+            label_lengths,
+            labels,
+            B,
+            T,
+            U,
+            V,
+            blank_idx,
+            big_blank_durations,
+            num_big_blanks,
+        )
+
+        # sync kernel
+        stream.synchronize()
+
+        # reshape alphas
+        alphas = alphas.view([B, T, U])
+        diff = torch.norm(ground_alphas - alphas)
+        ll_diff = torch.norm(ground_log_likelihood - llForward)
+
+        assert diff <= 1e-3
+        assert ll_diff <= 1e-3
diff --git a/tests/collections/asr/test_asr_rnnt_encdec_model.py b/tests/collections/asr/test_asr_rnnt_encdec_model.py
index 5b30489f846c..68f1e38f797b 100644
--- a/tests/collections/asr/test_asr_rnnt_encdec_model.py
+++ b/tests/collections/asr/test_asr_rnnt_encdec_model.py
@@ -363,6 +363,50 @@ def test_multiblank_rnnt_greedy_decoding(self, greedy_class):
             with torch.no_grad():
                 _ = greedy(encoder_output=enc_out, encoded_lengths=enc_len)
 
+    @pytest.mark.skipif(
+        not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.',
+    )
+    @pytest.mark.unit
+    @pytest.mark.parametrize(
+        "greedy_class", [greedy_decode.GreedyMultiblankRNNTInfer, greedy_decode.GreedyBatchedMultiblankRNNTInfer],
+    )
+    def test_multiblank_rnnt_greedy_decoding(self, greedy_class):
+        token_list = [" ", "a", "b", "c"]
+        vocab_size = len(token_list)
+        big_blank_durations = [2, 4]
+
+        encoder_output_size = 4
+        decoder_output_size = 4
+        joint_output_shape = 4
+
+        prednet_cfg = {'pred_hidden': decoder_output_size, 'pred_rnn_layers': 1}
+        jointnet_cfg = {
+            'encoder_hidden': encoder_output_size,
+            'pred_hidden': decoder_output_size,
+            'joint_hidden': joint_output_shape,
+            'activation': 'relu',
+        }
+
+        decoder = RNNTDecoder(prednet_cfg, vocab_size)
+        joint_net = RNNTJoint(
+            jointnet_cfg, vocab_size, vocabulary=token_list, num_extra_outputs=len(big_blank_durations)
+        )
+
+        greedy = greedy_class(
+            decoder,
+            joint_net,
+            blank_index=len(token_list),
+            big_blank_durations=big_blank_durations,
+            max_symbols_per_step=5,
+        )
+
+        # (B, D, T)
+        enc_out = torch.randn(1, encoder_output_size, 30)
+        enc_len = torch.tensor([30], dtype=torch.int32)
+
+        with torch.no_grad():
+            _ = greedy(encoder_output=enc_out, encoded_lengths=enc_len)
+
     @pytest.mark.skipif(
         not NUMBA_RNNT_LOSS_AVAILABLE, reason='RNNTLoss has not been compiled with appropriate numba version.',
     )

From ef740068e75bf55aac14c1432707fc4ef136bb04 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 2 Jun 2023 17:53:33 -0600
Subject: [PATCH 008/123] Fix get_parameters when using main params optimizer
 (#6764) (#6787)

* fix get param


* change name


---------

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 .../models/language_modeling/megatron_base_model.py  | 12 +++++++-----
 nemo/core/optim/optimizer_with_main_params.py        |  4 ++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index 1237491fa39c..2aaedbe5a806 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -240,14 +240,16 @@ def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by
         )
         return after
 
-    def _get_parameters(self):
+    def get_parameters_with_grad(self):
         """
-        private method to load all the trainable parameters from optimizer param groups
+        Get all parameters with grad from optimizer param groups
         """
         params = []
         for param_group in self._optimizer_param_groups:
             for param in param_group['params']:
-                if param.requires_grad:  # (@adithyare) adapter training with pp>1 can result in params with no grads
+                if (
+                    param.grad is not None
+                ):  # (@adithyare) adapter training with pp>1 can result in params with no grads
                     params.append(param)
         return params
 
@@ -272,9 +274,9 @@ def configure_gradient_clipping(self, *args, **kwargs):
         else:
             if self.megatron_amp_o2:
                 # grep fp32 master parameters for gradient clipping
-                parameters = self._optimizer.get_parameters()
+                parameters = self._optimizer.get_parameters_with_grad()
             else:
-                parameters = self._get_parameters()
+                parameters = self.get_parameters_with_grad()
             grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val)
 
         self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1)
diff --git a/nemo/core/optim/optimizer_with_main_params.py b/nemo/core/optim/optimizer_with_main_params.py
index c9790ee2a139..44d54a0e63ff 100644
--- a/nemo/core/optim/optimizer_with_main_params.py
+++ b/nemo/core/optim/optimizer_with_main_params.py
@@ -488,11 +488,11 @@ def async_master_grads_allreudce(self):
     def fp32_grad_accumulation(self):
         return self._fp32_grad_accum
 
-    def get_parameters(self):
+    def get_parameters_with_grad(self):
         params = []
         for param_group in self.optimizer.param_groups:
             for param in param_group['params']:
-                if param.requires_grad:  # (@adithyare) added to enable pp>1 training for adapters
+                if param.grad is not None:  # (@adithyare) added to enable pp>1 training for adapters
                     params.append(param)
         return params
 

From 19a8d2f63a14d7a98d176c4357f6ad60e049d2ac Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 2 Jun 2023 17:54:01 -0600
Subject: [PATCH 009/123] Lddl bert (#6761) (#6790)

* initial POC for LDDL Bert

* Finish LDDL POC

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* address comments

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix merge head

* resolving merge

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add support for  val/test loaders

* change to new LDDL class + add winding

* fix logging level

* fix winding

* test fix

* fixes to winding

* add file system

* add prepemption optimizations

* more logging

* more prints

* better logging

* asfsf

* add barrier

* removing prints

* working with mb lddl loader

* final changes

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update requirements file with LDDL


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* revert adding to requirements

---------

Signed-off-by: wdykas <wdykas@nvidia.com>
Co-authored-by: wdykas <73254672+wdykas@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 .../conf/megatron_bert_config.yaml            |   2 +-
 .../megatron_bert_pretraining.py              |   5 +-
 .../language_modeling/megatron_bert_model.py  | 129 +++++++++++++++++-
 3 files changed, 126 insertions(+), 10 deletions(-)

diff --git a/examples/nlp/language_modeling/conf/megatron_bert_config.yaml b/examples/nlp/language_modeling/conf/megatron_bert_config.yaml
index cbc0562e2904..a7e3364d41b4 100644
--- a/examples/nlp/language_modeling/conf/megatron_bert_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_bert_config.yaml
@@ -133,7 +133,7 @@ model:
     seq_length: ${model.encoder_seq_length}
     skip_warmup: True
     num_workers: 0
-    dataloader_type: single # cyclic
+    dataloader_type: single  # cyclic, LDDL
     reset_position_ids: False # Reset position ids after end-of-document token
     reset_attention_mask: False # Reset attention mask after end-of-document token
     eod_mask_loss: False # Mask loss for the end of document tokens
diff --git a/examples/nlp/language_modeling/megatron_bert_pretraining.py b/examples/nlp/language_modeling/megatron_bert_pretraining.py
index e6abee295a1a..5f0b74db92b6 100644
--- a/examples/nlp/language_modeling/megatron_bert_pretraining.py
+++ b/examples/nlp/language_modeling/megatron_bert_pretraining.py
@@ -29,11 +29,12 @@
 from nemo.utils import logging
 from nemo.utils.exp_manager import exp_manager
 
-mp.set_start_method("spawn", force=True)
-
 
 @hydra_runner(config_path="conf", config_name="megatron_bert_config")
 def main(cfg) -> None:
+    if cfg.model.data.dataloader_type != "LDDL":
+        mp.set_start_method("spawn", force=True)
+
     logging.info("\n\n************** Experiment configuration ***********")
     logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
 
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
index 64430a669269..cac1a50e98ae 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
@@ -40,6 +40,7 @@
 from nemo.core.neural_types import ChannelType, MaskType, NeuralType
 from nemo.utils import AppState, logging
 
+
 try:
     from apex.transformer.pipeline_parallel.utils import get_num_microbatches
 
@@ -49,6 +50,14 @@
 
     HAVE_APEX = False
 
+try:
+    import logging
+    from lddl.torch_mp import get_bert_pretrain_data_loader
+
+    HAVE_LDDL = True
+except (ImportError, ModuleNotFoundError):
+    HAVE_LDDL = False
+
 try:
     from megatron.core import parallel_state
     from megatron.core.pipeline_parallel.schedules import get_forward_backward_func
@@ -300,7 +309,12 @@ def training_step(self, dataloader_iter, batch_idx):
                     for param in module.embedding.parameters():
                         param.data_ptr()
 
-        tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size]
+        if self.cfg.data.dataloader_type == "LDDL":
+            # this is of type bert dataset
+            seq_length = dataloader_iter.iterator.loaders.get_seqlen()
+            tensor_shape = [seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size]
+        else:
+            tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size]
 
         # run forward and backwards passes for an entire global batch
         # we do this inside training_step to support pipeline parallelism
@@ -324,7 +338,10 @@ def training_step(self, dataloader_iter, batch_idx):
             loss_tensor = torch.vstack(loss_tensors_list)
             loss_mean = loss_tensor.mean(axis=0)
         else:
-            loss_mean = torch.tensor([0.0, 0.0]).cuda()
+            if self.cfg.bert_binary_head == True:
+                loss_mean = torch.tensor([0.0, 0.0, 0.0]).cuda()
+            else:
+                loss_mean = torch.tensor([0.0, 0.0]).cuda()
 
         # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced
         if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False):
@@ -404,7 +421,12 @@ def allreduce_first_last_embeddings(self):
                 torch.distributed.all_reduce(grad, group=parallel_state.get_embedding_group())
 
     def validation_step(self, dataloader_iter, batch_idx):
-        tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size]
+
+        if self.cfg.data.dataloader_type == "LDDL":
+            seq_length = dataloader_iter.iterator.get_seqlen()
+            tensor_shape = [seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size]
+        else:
+            tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size]
 
         fwd_bwd_function = get_forward_backward_func()
 
@@ -476,6 +498,95 @@ def loss_func(self, loss_mask, sentence_order, output_tensor):
             #     [lm_loss])
             # return loss, {'lm loss': averaged_losses[0]}
 
+    def build_LDDL_data(self, cfg):
+        if not HAVE_LDDL:
+            raise ImportError(
+                "LDDL was not found. Please see the LDDL README for installation instructions: https://github.com/NVIDIA/LDDL#installation."
+            )
+        logging.info(f'Starting building LDDL Dataloaders')
+        self._train_ds = None
+        self._validation_ds = None
+        self._test_ds = None
+        data_parallel_size = parallel_state.get_data_parallel_world_size()
+        num_micro_batches = self.cfg.global_batch_size // (self.cfg.micro_batch_size * data_parallel_size)
+        global_batch_size_on_this_data_parallel_rank = num_micro_batches * self.cfg.micro_batch_size
+        samples_consumed_dploader = self.compute_consumed_samples(0) // data_parallel_size
+        # We run under the assumption that the datapath is the prefix if LDDL dataloader
+        train_lddl_data_path = self.cfg.data.data_prefix[0]
+        self._train_dl = get_bert_pretrain_data_loader(
+            train_lddl_data_path,
+            dp_rank=parallel_state.get_data_parallel_rank(),
+            local_rank=self.local_rank,
+            shuffle_buffer_size=16384,
+            shuffle_buffer_warmup_factor=16,
+            vocab_file=self.cfg.tokenizer.vocab_file,
+            data_loader_kwargs={
+                'batch_size': global_batch_size_on_this_data_parallel_rank,
+                'num_workers': self.cfg.data.num_workers,
+                'prefetch_factor': 2,
+            },
+            mlm_probability=0.15,
+            base_seed=self.cfg.seed,
+            log_level=logging.CRITICAL,
+            log_dir="/tmp/log",
+            return_raw_samples=False,
+            start_epoch=0,
+            sequence_length_alignment=8,
+            ignore_index=-1,
+            samples_seen=samples_consumed_dploader,
+            micro_batch_size=self.cfg.micro_batch_size,
+        )
+        logging.info(f'Completed build train LDDL Dataloader')
+        if len(self.cfg.data.data_prefix) > 1:
+            val_lddl_data_path = self.cfg.data.data_prefix[1]
+            self._validation_dl = get_bert_pretrain_data_loader(
+                val_lddl_data_path,
+                dp_rank=parallel_state.get_data_parallel_rank(),
+                local_rank=self.local_rank,
+                shuffle_buffer_size=16384,
+                shuffle_buffer_warmup_factor=16,
+                vocab_file=self.cfg.tokenizer.vocab_file,
+                data_loader_kwargs={
+                    'batch_size': global_batch_size_on_this_data_parallel_rank,
+                    'num_workers': self.cfg.data.num_workers,
+                    'prefetch_factor': 2,
+                },
+                mlm_probability=0.15,
+                base_seed=self.cfg.seed,
+                log_level=logging.CRITICAL,
+                log_dir="/tmp/log",
+                return_raw_samples=False,
+                start_epoch=0,
+                sequence_length_alignment=8,
+                ignore_index=-1,
+                micro_batch_size=self.cfg.micro_batch_size,
+            )
+        if len(self.cfg.data.data_prefix) > 2:
+            test_lddl_data_path = self.cfg.data.data_prefix[2]
+            self._test_dl = get_bert_pretrain_data_loader(
+                test_lddl_data_path,
+                dp_rank=parallel_state.get_data_parallel_rank(),
+                local_rank=self.local_rank,
+                shuffle_buffer_size=16384,
+                shuffle_buffer_warmup_factor=16,
+                vocab_file=self.cfg.tokenizer.vocab_file,
+                data_loader_kwargs={
+                    'batch_size': global_batch_size_on_this_data_parallel_rank,
+                    'num_workers': self.cfg.data.num_workers,
+                    'prefetch_factor': 2,
+                },
+                mlm_probability=0.15,
+                base_seed=self.cfg.seed,
+                log_level=logging.CRITICAL,
+                log_dir="/tmp/log",
+                return_raw_samples=False,
+                start_epoch=0,
+                sequence_length_alignment=8,
+                ignore_index=-1,
+                micro_batch_size=self.cfg.micro_batch_size,
+            )
+        logging.info(f'Finished building LDDL Dataloaders')
+
     def build_train_valid_test_datasets(self):
         logging.info('Building Bert datasets.')
         if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float):
@@ -581,10 +692,14 @@ def setup(self, stage=None):
         else:
             # TODO: consider adding a ModelPT guard to check if model is being restored.
             # allowing restored models to optionally setup datasets
-            self.build_train_valid_test_datasets()
-            self.setup_training_data(self.cfg.data)
-            self.setup_validation_data(self.cfg.data)
-            self.setup_test_data(self.cfg.data)
+            if self.cfg.data.dataloader_type == "LDDL":
+                self.build_LDDL_data(self.cfg.data)
+                torch.distributed.barrier()
+            else:
+                self.build_train_valid_test_datasets()
+                self.setup_training_data(self.cfg.data)
+                self.setup_validation_data(self.cfg.data)
+                self.setup_test_data(self.cfg.data)
 
         # when using pipeline model parallel the final stage need to initialize word embeddings
         if parallel_state.get_pipeline_model_parallel_world_size() > 1:

From a7403c26f79b4914a7735f64f9da60124342d9c7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 2 Jun 2023 17:55:29 -0600
Subject: [PATCH 010/123] Fix check (#6798) (#6800)

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
---
 .../nlp/data/language_modeling/megatron/gpt_dataset.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py
index cf1de245d0e7..d7113e7cdde3 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py
@@ -601,7 +601,7 @@ def _build_index_mappings(
                 last_epoch_num_samples = num_samples - num_samples_from_epochs_minus_one
                 assert last_epoch_num_samples >= 0, 'last epoch number of samples should be non-negative.'
                 num_samples_per_epoch = (tokens_per_epoch - add_extra_token) // seq_length
-                assert last_epoch_num_samples < (
+                assert last_epoch_num_samples <= (
                     num_samples_per_epoch + 1
                 ), 'last epoch number of samples exceeded max value.'
                 # If we have less than 80% of the samples for the last epoch,

From d9843331f7b0e0df536637b187a0d6b87181bd2c Mon Sep 17 00:00:00 2001
From: mikolajblaz <mikolajblaz@users.noreply.github.com>
Date: Sat, 3 Jun 2023 01:56:31 +0200
Subject: [PATCH 011/123] Fix validation with drop_last=False (#6704)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Mikołaj Błaż <mblaz@nvidia.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 .../nlp/models/language_modeling/megatron_gpt_model.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 8ee470d70a7f..fd1382e668cf 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -455,7 +455,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only):
 
         # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready
         losses_reduced_per_micro_batch = fwd_bwd_function(
-            forward_step_func=self.get_forward_output_and_loss_func(),
+            forward_step_func=self.get_forward_output_and_loss_func(forward_only),
             data_iterator=self._make_data_iterator_list(dataloader_iter),
             model=self.model,
             num_microbatches=get_num_microbatches(),

From 8f26d838074b2a1b8d4d85fb3fc7c63453a2be80 Mon Sep 17 00:00:00 2001
From: George <37293288+Jorjeous@users.noreply.github.com>
Date: Sat, 3 Jun 2023 09:19:03 +0400
Subject: [PATCH 012/123] SDE unt lvl comparison (#6669)

Added a visual utterance-level comparison of two ASR models

Signed-off-by: George <gzelenfroind@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 docs/source/tools/comparison_tool.rst       |  31 +-
 docs/source/tools/images/scr_10.png         | Bin 0 -> 42125 bytes
 docs/source/tools/images/scr_11.png         | Bin 0 -> 50431 bytes
 tools/speech_data_explorer/data_explorer.py | 547 ++++++++++++++++++--
 4 files changed, 537 insertions(+), 41 deletions(-)
 create mode 100644 docs/source/tools/images/scr_10.png
 create mode 100644 docs/source/tools/images/scr_11.png

diff --git a/docs/source/tools/comparison_tool.rst b/docs/source/tools/comparison_tool.rst
index 1e28621704a6..6e5d28a0feb3 100644
--- a/docs/source/tools/comparison_tool.rst
+++ b/docs/source/tools/comparison_tool.rst
@@ -1,7 +1,7 @@
 Comparison tool for ASR Models
 ==============================
 
-The Comparison Tool (CT) allows to compare predictions of different ASR models at word accuracy level. 
+The Comparison Tool (CT) allows to compare predictions of different ASR models at word accuracy and utterance level. 
 
 +--------------------------------------------------------------------------------------------------------------------------+
 | **Comparison tool features:**                                                                                            |
@@ -12,6 +12,10 @@ The Comparison Tool (CT) allows to compare predictions of different ASR models a
 +--------------------------------------------------------------------------------------------------------------------------+
 | visual comparison of predictions of different models                                                                     |
 +--------------------------------------------------------------------------------------------------------------------------+
+| visual comparison of utterances by their WER/CER                                                                         |
++--------------------------------------------------------------------------------------------------------------------------+
+| listening selected utterance                                                                                             |
++--------------------------------------------------------------------------------------------------------------------------+
 
 Getting Started
 ---------------
@@ -151,3 +155,28 @@ In this case, all points lying above the diagonal have higher accuracy with the
 Points marked with circles should be explored first.
 
 Words in the first quarter were well recognized by both models, and conversely, words in the third quarter were poorly recognized by both models.
+
+To compare models at utterance level, pick it at top dropdown field.
+
+At the next field you could choose metric: WER or CER
+
+    .. image:: images/scr_10.png
+        :align: center
+        :width: 800px
+        :alt: Switch mode
+		
+When an utterance level is selected, it is possible to click on a point on the graph, and the corresponding utterance will be automatically selected. 
+
+If audio files are available, there will be an option to listen to the audio recording and view its waveform. 
+
+    .. image:: images/scr_11.png
+        :align: center
+        :width: 800px
+        :alt: Audio player
+		
+In this mode, filtering is still available as well.
+
+**Limitations**
+
+To ensure efficient processing and avoid issues with memory limitations and slow performance, it is recommended to keep the manifests within the limits of 320 hours or around 170,000 utterances. 
+Exceeding these limits may result in both memory constraints and slower processing.
\ No newline at end of file
diff --git a/docs/source/tools/images/scr_10.png b/docs/source/tools/images/scr_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..71c378efe57bbb800029b40a3c764d366ce6a930
GIT binary patch
literal 42125
zcmcG#2UOEf*Di{Jq9P(9C>^OHO?q#N^rE659ciI=0tr=6QF`x1q=`U6Zy}%}H9&v}
zp*LwU5G0|8asvPNJ?E_Ne&0R!u6sY$VoiQCzs&5}Gy9o6&))g?R9}Po8q+m0GBRo{
zO||D_WaJ;o$S%@dxlF40T4eH*R9x_RuJMQrGRT4<Jx~Bt^;F5os^h7SUSA?TU-i^9
z@gXC-(Q)>Fq1)q&9T`~>r<R)PivY`wW2zkbo#6e`_N}P&`_G=<Zhrs^>q-dFdEPEs
zap$7rl-1PjMd``6uU^r&OX{)fwf)jdd#{!?8oQfsYWA_>?WY}v4{lla?>sZH`D|qV
zwCGySlZyLQcj-Mo@Dfj30ZQV4L0R!tIUivdw3KZ(M|QRUpr2Gr03lb!^BKMI{_l6F
zNFtJvbtQy!C{dn$y6#P!Xg~Wvc9Z+!Sut|u)`hdOt?TM7G_bF~7`738Q$9^wV~L0@
zvYi%e_J|BNxfHAe(CRiNa+(#`e|+Ax2bMD+v?q(`eMhEtExBQzV``yieMmH1m`nQd
z<{6=hp0g78gTrTKJ=UA}Yhm|gosII@@YdEk5}NCn9G*R)4GGmrr;+tKDW~W>kUQIV
z1|jJLpHf&pB6qgBPv5s_mF_4ggVu0W#~J0?+|vfEkaX%hn00T4)*FJi73Dpk_Cif8
z?kydmR%XAaHwTj-DY+SXtDzW9P+cX@@l4L#ueu_FCgJspa)|U^pzQP}$Z04IrIhTx
zw<#!FT4~lIEV=0isjW=XnOS}ag(ID!*ZT*HF$WZ(0(`kyH0fH#dn~f0eM0qT6H7|;
z)Uj!-E9TASa4S>@+0v%dtVoFNC%oA6t63iN4f7q0NM7>Fb!XG*8c1}JBoqDKSMR1*
z^HaZFlR?(Um;9bQd$4(~x3bk&uK}?iN_uA8D5GYKo?DAfS6%(oH$FIv8$jcz%_Pn#
z8&Zo9HOP|ollSTO+I7{M32x4r8x|^ql!-lx5IdEY?vvy{9$4`1RS=0E(@O8n$DZ!W
zXU^5uQvJcWCNAH}-pd6Ir0b|pN?)f=W8ZX49D@x6)*?Mi6erWX(>vOB{VH!~u9j$^
zHPSDa7hn_~PwFbwH>`OBPS{BE4C_kqOc>{6Ma_`e{p{7_{g!zT@FzNPwKv6U&<<l#
zuW1U^r&I*!x9fB7)NCs{T%_M5$OSJh<C0m+?Lm!KS6rjhP%REhR=22s)sS;_FW3Cc
z4|#9?%lRT?<4^(4@~l0-nWbYnJlZS=@NUgpY0@uq?vZpQ;$)?#%o|Sk3YoqFT2Osm
z;#X6F^=>YCD_nST*fS=FpxJK?ir3{klkh^ZC{)YW6f(^qv`WkLm*@V<oWCh0Il)DK
zs_hwoi6{4Lz50v^9M8<YwzNV@)sc&`oCAC^Lh<oRO>&CCsT=NcH6~rYQ^E<$BQDtO
zmR6Q%yG-dhR-NTE2x9s3y2|~<Jp)JgM99lo1%Ct=jk>S&z@i{v>Jg?I8az2;1X}<a
zBrR_|LvvhqLacH&7w144pM3bYaJxbu0Z1SuP`(@6b>F|a(A7cDinjq)cJ=+_c9uG8
zSZCWEQ2EEC%(++gt5Y@4OMH<xFma40M6Qy-<TyP`|9+rWECMyNz1}O<#T?cpxcZwF
zj=`1xmeb>CFvGdzbJ^Ar|B#dw>!0iJHxOu~3K$&2-X9<i26<B^F?!*+rDqB*cwZ>M
zyx^7b8>1sfpQ(fV`<eZ-FZrruy%TE?5PV2Msto&HK+>e^-8W|T;~F683S?^A%==)q
zSqAd6uS99+aGsUl{%4tR<?Q({?LV*)L_vZ{(jU9(zh3+AXtw%I@zF)zer054_sIuV
zono%_U;=!_tF$?O&<|*XKTI1;imTs!((njzt9W|zrCsARdXispm!2e7*c-R7M(!-K
z)z03e$+}^XU?So(x%8RDDmAJ$2CF1oD|730v3L+%qc7)LZu#&P%1FAs9(iDn+m3q)
z{LH18Kfmfjovu@o_c}j1XJ~p+X?UzNXLEv~Rs~UrcD`eNLwaVReGcVR8k@~y!7neG
zJ$O3u1n+33a|n}dFLTmbO<d{d4?S|n?Rit6M)_GSsjQ6zTv}4a|ALZNh`==oX?C0s
zKIHvTJG4&IY>zY3L8)*~OQewaB?Q5HHUn;+>7;QXH~vPR&8=wTKH1+!Jx?aTqJwi?
zn<ZH}atZU?plO#4W@!z`=(i3EDRhHm?=AUbPLrknVQQQ8plvU|zT+TOY!RY=^K<P6
z;veJ@|335py1XUKh#GSzFWvU8%VpgnI>?r|wG0Y(D>%*m+UOVUo~Y&V+gtVy(P%7c
zDe1dXn<IWaH+>&nv@NaYDUU>OCA%jpqfa;d#o7UYJHJq%OxNK(KTATEy^h+gk--Eh
zBCHIjRTET=X7x`LOb0m}K05f$Ix{lt-CWBysXLo1I~6j!WZ;7E^FAgW4)SNKayk5M
z#7_oH2CiW#R;Ia^LPmT^H&_&}kSR8pDX3V^poJa9d|K_tpg$H_rnPk&MFr%eF?1sS
z`RlKQ5N?PxR911N%L?JZDS9Hv375AqXh2>+fnn&9pu2v6LMB9;>V2!X$S+Oj`m-mV
zZrfz;S?yLYsFF??h9Hj8nw>0mr8V?ZvAN$SivleJo@O}Z3SwK#AzR^61HU<w<{QTv
z`V%#zi(RSgt(6K~Ke5nOoE@Hy=CL3>n`pERfW6>CezB0BKGjN4vt3cHQQ7m5_AB~W
zYwM6n8)N#(sO&8J&^=+N`eZ7M*`#n;x!X5t()X=Z%0LNUgx*?Mq74fHTP%qybUA6=
z8tmq|d2M|_Y1Ror+m=;L<N2&EM{H4{#nPdk$%R-;^1E^)7*3Ow0W?K=qpw9CF7MCB
z{b2ZPwM(irat7ZMaft|)%UO0c%a8|u0XB}hH%(I4DgI&l`H8NmnI|K}ZL1C$D->es
z+-SBJ+jb{#IVc$&S1(Y62u{l{K~SW<q_R8n2wuG0FuR4!+c1v|h(pgj$0beGm)nkQ
z+uOjh|M>ZW^?n!Qe1j<{zxP&g@tEnVtPXU5mf+Oy4`N^L`UnZwN1+uZAMbC*XUG}I
zNr7h6r+6fV4Xlv0e2BXqWK6X=@&4N-FlZd=y8{pryy0UP1GHk5eF@oupst4?2OkVr
zcCQteEArU3a*u<W4yqny$xlnG>*g*G8k9nlKBTE_$YD#Mww2is>%DR3S)(&VQ^CLA
zX5KyEC`C^QrBqvx7c(!bn4KMKWTQhFXl|*Pc_H3u4fDpBo;La?+is#~EuRAF{sn0N
zF8^DAjT$4pP8bjV`<=4JZAxmQ77)+9FMc-u!~4{XVrp?Ri&tbZMafIw$Z6^f1(F>T
zn(Pq_vg=hHOHIGyY7NicB=Z9Aj2bWJ`p+hc+IEfer)t%~${ILsah&Z$6>o`^k#Pax
zkC#9Cik`WpVN0Eifuf#gyNrb4|2a5YSzP`IJwC$XJN$OvDR3ztVNVm#>*e6BNo45#
z(`rQ9m!~;|v);qBhm!t5|Ivq<L3$^@S5&ALAg2k?xuY0Vl+1Eqd~a@Lev-k=k@&$v
zsY6e0dz*eq&>w6={XozrbRaN)q?=}$@f3EtppQ1v2;OLA-(t+o8<snWImoR*LWu>z
z$NUF}NUD0<%Wa$e##U1YG9#4*EkUzEsLJBA8C?E6)81z09ykvoKG^e9{0d9^S$MjX
zZ+9@4FJOfOYv&zemDAfn&ILgSwLW$ShNs-2;Fzn8?k9PgFwiDHA;7wB+Wn$V_hRG>
zM*6gjCAdGoPZ>izT)WjgLxpceW?-ng&H4OgRI@JUimPq!<!m0s20bV5!%=NuOYK&o
z=MX5?7wxB2=8nq5Uon+<cz%2mRp}f*O`z(0OQbz^RhdsC`x_w;zXxnX*l+)^=D13d
z+WHgY$%M`_GIB@eO~K_4%7MRioAERT^_yd*m$B}LF$%5wqRluOxAGZwOTKuwi9Zcj
z^z$(kiFp9`hI}WuAzPpqOkVVFra?o{L~sq(7+=+&2+dh&djVBEsZwYNct5u&-Ep)s
zX1ty~u}y4mGjte7g4_3cMFtWpXWXys>+Tsf|IH~Jup-Rvm5d+gCt^e3FB`mt&Q9!y
zpFnSHsxK@~*1Ie>Ot~Wh*6N&{zPa3L7o>t4$p<?k3WB=}E#y3uLCwSya-Su0dHC(s
z6Zmmk@R1(JhyH~+-x884tU{kJWgvUsnXyT|uDt5X!<r)}RCEHE8GClNYUJtY@U{B;
z4BopF0C^gNU2M^)l%|C0FQ}`|r1+jWUwpFx;Vw_1av-3E==gR+M2WZ1!1u96>B}Jy
zgKC8AR!d$>oRU|a!Y*7s3VRLQ@5p$J?capOr6Geu$`yhL9h^eEjuPs*L6GU@hXF#|
zq#?uJ2#$A3jPKOl6nR(y4Ey4Imzzx}RL-_;&To4m86^~=k?&LB>USA3{BR@Ld*QCf
zJQE=|`%a%@JHlcAlbF7-!EqbtG*Bc?cqqwnWg_pSRorucF_a5&8sgGI<<!L?F`>jC
zN>KSr#9VO7S$-wxUEuca7glJ;w1<B-2>W;J9Bvj1-@LAhcG|@TIn#1{y?6XRm`#2|
z+AR9rxiNj`9vm;f0M$B!EHdtQ9JFWUJeTJA^ZviDV`>pU?>&A$4mfKJEBeV8$iu|V
z=Jk^n|EC-}X?$MW<kVkWZESuEeyNL+J&0qf!7W)aa91;2`P_qxNNV744htC3;_H1w
z$>vyU#5?@YP(!coHl5vW|1V=Ig{z(S8~>k+^j|fffi7r0F96@Zehz6WNhkB-`3=Yi
zBp5qK4M#y==`&0s`}gGjtDgQf_y0q4+qptvG)ns_4Q^AsfxGL86rtx6x?D_BSr4rS
z%RRna-z1iR`8Pmk`&-nv8eJziBUynNEdJA8z_Y798HYZ!?eJ?WVNhPoq5rOYvDinT
z^eBVc#d8H{#&wx91$Ocbb0i9voV}gUDq|~h8FGG4{;%dxZjsQ0i^P{@=U0pT632HK
z1Y4ZgrUyGI_z+5$BC)=QrAvk<zav+YInN;Xjnkp*-nruwh94EyR0yfdf!UY0I#~6!
zL@w)tbQ{`itg`(Z++~f`**)??fN_GMfA~RtAQCK%9ESxpBlH8U<6PbRA~eqAQ#o+H
z!_7vJZGOs@W;K!4fwr7{V098mx9&If)B_Fehu!I0jq|cvDJC4Q8%;1dzB}7$VWv~Z
zSlDs{WnLrH+X`LCWSJaL7lb3&nBA~Swo;TZ6+TKH76Wf}xJS<mm_1QCl_l{1Mx%Rg
zLtQXdry4Cg!-m=_XD~<hQtYTUy)T~z>vi!l_07=Tu1xP`_B`jgN&`vOrj<214Mgsw
z_ZQXK465bim3FF9m{2&<caEQA?2mr-<4Q<?%q+v?-!S>jE3apcvgB;{s=)tV?6e{O
zS?_}Gb8DPQ+Zu8jeU9RIHrVvEtwJbr(m4QsA29MI`3YOd5#vP5GQxX-aj-}eC;1U!
zA4fv+hvz3>nA}o52UsZTT~6@s#9&AAv7A#q^({BV!Uk1i-@DbOjj%H~=y}{YmDXv)
zA`}WHJfi()nV$l}Ep#I+45+p)b~sah_}ZoVdCGhL)nD)<fJ%1vQQ8xYzga&krtF$X
zXn^7o<5~wlIn%@!TI%Z&7P_@otA^2J&;B{|o!Tc%1#h3VUg!1dvXu1r$MApA{1?_n
z+OLt<8Ex{<cU#gez<R1<LcmX2jh=F7o3qt<?P7^&dY7e_);7ak_MpFST&EiJcGoAk
zuAW>!A4YpJ{L6(!ExufCO16Mcxu%(F5$F3_#rE|))7cwg|1T~4|CwF>N3&%f@fcs-
zn(~Z4+F6X<edqP<sqF4slzktM;gVqoO%)v>5VUi#`FO(1dsK3@S5yAnS4oI@P{$qe
zLb%nrp+7tLUH>7<$9@UsV($egoo;OVVPpR9&1)h{ZY+wqsD^rp9nOY!Xol9@p<
zxw}Lq6EA`?LS$Cn*r8D`k#6bn#P)prGOk<qVH2`7bm&Uv-(`WN=+YahJN_r|>5~|u
zq~H1yw6{@N1pg~nXQ`CgDs@lkw(Q>?FFZiJU69Vv;oMfDUY=(CJXY_yv>HbTf*VWW
z#;$tw=0zZrFrfsY<^`B>&(iDQniZUVO>e8u<gW>DjMGZ*@M@o=j{{;(|L$wcbyBzB
zyr7M#6@pvngVK#DZsp?%x*83@IWT_F)VmCk{d*J6w)Hr14qG1IS1PPvrcT5DAd&^&
zzGMi<Q2DD!cSSB=-u%XBJWlyV+<wo3chC33gHs;5lQjw1g@=TL_&lL}CKp~f%y@RR
z@obsN=*zScnpOuI``g)5<5|j}N-5|(OJRZsE@c<n#y4j*TuN7XeBw4y=kW39)LkO~
z6t^eVQh0Vqkv%Xx=GfXh<)ZOFjqh>FPcdK>#D7Qa_4sd2aBElvy$cH5okKM8x}=Xd
ziYPa!xPu&_L4Rsa1?Emj_5B8u=T~hq`tnB!`vtAUv8fus-f2%4Um8N_v`U3-)<3KN
zNI5@+d;k_WJ7n}E@$n6%r%D0k#%yy56G;I*_Z~m#_m|$B6<|M_4LYer5Bwru3X9MU
zYj&wT;$058aVFu@=HpeSUC<HGErAk!8t-aB%uqNQN7bAZ!@c+UPM)^S$~{w;N8gEJ
zk;7|(hzF%hiIM1@tN}^{U)EkP;nXK1N7AX)$LeQc{2^T7^p_$2)gIGCz&A!bAYaL}
zL6J~II4HGb<|?J06D;%3vikurNb8s}IoD2K%DT|QGa<;Ri+TUySjhg+sh&Aj|E0J6
z-^!HyyL0KHxhkaX-p}My`+ot*Hf8=nU9i#r!hix8|NoC-{)2IfTT7u6|F|mL$KL@;
zZuu|&m$$Lzer~G&ADN?n20EUZJI)P0KkPL}sHLZ^Nds+hh(g|;FUt?ooo9IF9Bcoh
zKlN{Lg375bLSB0RUY`(~3Y>1z22M2oSO>4Eo3ws!yWjo36Sq0ttSU7vYvvxL8R`}x
zC<TS@%{ZMYEbP;tdhQVK7GEH+CEi0n#ddvZe<SH7LahY583byLIrT0LA`p6y&k^=I
zHxVgGy74J)4?Pxh8sSd#Z|H*>Mj`;pjMv9>ANv1(K5>ZC&Qv~~vtRDpshPv%r*?$;
z6izqUq_3N{KR3cD=<{Z7IwBVRbCDG(#_`$QrP&j%O$2oi5Y0fC2_=-4QTvA&hm65=
z#<GzRcJG!LjKza^_K+3{R)-QrF7Lw-gx&5UHD3P@y2krB#QPACz4Xw{J}0>FDElBz
zS<j3)e?rJ?1(_aNi_xSCIe7`%L1XPtk$J-18JmK$eqatE-dHYWmyjtp)&cYP^LS`t
zrGc`rM~sg$c4eTl#xG{?e4?KZs*SIIMJMgYY+Q7Py=jrsj2~y{!gTlG>6uAagHXdZ
z*@L@`$}?_Pe?bbO;Yi;QLv<lD*S2C{FP;@3iz>~I2X%!WWQ#3lZ0YORI8rSJh>H;3
z+46>>8XTQfcI$GM`{xvdCxxTkjDJ}OG{8`jtQVMZ=7O+o;^FM}vzqd!$k87AziFqN
zOMS|6>fpR;@yVTv?U+_g-uK+>XA76~A{$o{G9+m!jKA2t1;{jRma3St4zFi+3j-Aj
zO4Cm0^QFnDm+0Ogs(+N=m?{>EkP5SK4e^L%u3!~E_?#f&mQb31wTPHu(35#7ym`&O
z$Na>0Owr|$QF0Lgyp$ns^GfP$ocqrn$GHEgdjP)RuqyrA<F&j)R1hk^9b{L}(pB(R
zEGE5HeYdBUH!KJ^BkIi{dIlC@Okao`TN#PnJ@4%gG#*#>h<5jh!qN~mpEV2>)zw*R
zRr7Y0HtW<TSKl`<$pI^q?2%0d(H8Hw04!XJ0%=;vp1(#f&&Jj?b%SW-Y*-}U!DSfa
z@xvIhkuw;}LYd$t{I-z9wP~fo`;+yCuuwSylL)F^w)0q#$b8_@?iHh(!?u64xZxdx
zejszgnOV~zylRsuu8h$a1~W=nAlH+`f3K;Z2a)XWo}K5_L|8jfzIow2^LqIkZOS$i
z-tG_|@N#7+SXqnCT;E`u;og}%m$PJSq3@e`z2;FWhGusIm-;ajAI=(_BRQ&Vx_f>`
znbG>OJtLnyt5ezeS1c;%qgC?xjX~4W9L>$k^ba$`L^Y##?)LoKhX14Y^7R%sE|^FS
z!VdV`ZEmi=o!dzDpCt~pJq+241I-1kY`_<T?S@8LAA$(q13h{ILqgu^o;jIe6x?jw
zZEcHE6ZlPJZ+5m-ihnB^fcQY`cwQ9L(U#KapRs3*XxOYwQv@b~v1Nhw2z8=O)%ZSp
zjo+yLME#nT{S+0b<V-e|i??WQ&FQfUvmfJ|y?>M{32U(1@wR3rrTfRG6qdv$7T`40
z;#{RR^93o#)18rH^)<w;jT-+BQ_w+KeGOL6|By0RL<ZW}zWv_KZ(0EAHX-Z*KmP0*
z>I~n8Z|Gj%gGm={n)ru^c#Pg04f40`yIkVY1BqSP%UR`B33h@LnpuMzI#ES|-5z^=
zSdom)wkks<PbEUmHhi_Ma?>U20dPOjdwziu@tU33be%O5aBjZb9L*eCjRrl0IhdX$
zaR-6VI)E(%BJ3mem4k{{WP#AuddG->t(O^()uj;kx;I3K`9rQsbAs%$)4J<sJlScj
z_afsJRYUd&Fgq;pcqtp-<))lid3sR70!+HPed1N$TAb{&p1PrI^1y@5H=G4DGKF8M
zadXHzz<uUhUv2F5aLd|tZ#B03BRhSBdk7-3pXyo80(jfD-?ko}p6JcWd@<P#1(&(b
zaC*-}N|#3IMdapifxgYHgxQCypt#_v<(JgD@JPB(>UWPd@YRG!L>1xmg}%}Qq{P|Y
z(p2$QU3?bGQF?Nm>b(sxNGKeb+X_{FlA~UAOp_eX0&2SMrF47a+vE4W-y_-XKUNa!
z4+_a(scG+FRut*k3?Y7Ae|7k!mAFD#v)4Sci{e8Jr{@WGsOJdig%T;fzvO0NE`hM1
z<TS&$)cBaYybi!b%fKMmcKd#ctJPsj5C^n(ikIUB(ST^ns0hwr5#K4tx2s2AS0osU
zZEvj>8Ca%BOXqO2D|Q~6#~sI~ap`_4{|+JCEu$4V6aQEK*X!nV>Bls3D!Zpg`F&;L
zC-FIl8&Jf4Z^wpw9{Ey2N2EGMm^)x>F1vdZxPH@(rT_2j)X67VBD=(k_aWbp-XG+~
z`yEy0fU|--CQm<g<a~O$d)gGVo#Tc-46R6o|5g^}nr`%g?}cA=P0#*>Qpy8Qw4IyV
zj{Rx#`{29}jd@_BB~*R_S1THM^1R{1J9*t~if<EMud^G=%J<l^Cl~lrm6I}fsj&uY
zWsLA3`O+TL#J4QOtqyx+cs^qAPGgUYBL76-NSXJ~dz(F2fyTZI$s_HwxVs0Qd)FrF
z!G*19V6*o}Aw?_%jKjeU(ZvoO>Y6r#Z9r_!GVyBYGjVanP4UT}`W3H-c-6M<j=SI&
z0QzYi{)z(fdlJ3Xw&G?NXykDgpdIL`#toP3`|LX+HT~X0nzDcTx5TCcb8Zg{odt!+
zoLx2BU+2?sVwVrhp6J65eTgkW?SYtd{9L<&YF7s6N#{984c)(Nm(cY4PGc{wbb|_;
zfPkc|h>|E$zHA%=qd%s51B+kZ?`-eG>9MEVwuj`!^!O)oo%JvAL3Q#Urt}Kx>A(EK
zq$mG5`}%K}?Y|&r{}0V`XA!;l(f8EM{~}#Ry5iYA&fTu96hr-L=!3l<R3bm++KVJj
z!StXU1-y(&%EzU|?p`i<FIRl>lT1xEP)@7*!0)DTwU+F2_v!yWrWf%YCEfdSZ?i$f
z9P8eqp0+Mw&>vVDa^$<`g#zyl<nAF}*5cB6q$e=!?OJ2an;g)c5zMkZ2l5ct-lZ?E
z2xt2v5r6e8*&yt@^^0W9O(5YXg(&YsO!7>PJ;9ODrvemgmRtdHhbwrn?e+L#f+mln
zDl!G&cg8W|42WwUa-8dFJhdg!(+evT1Z42z!J%H>yRMt?ANpoI$!SQx2%ndcAl-9i
zU*|PY=H@2=U<Xj+EV~pCe2C%!ovQ#kouP%@M>j(t6qH8Xj`65SpV*b<TsP+u;K>E#
zwOK)vMyX1t#(sPpY3ph$`1A1J;?9rrI?YZCU`Y|SQR7{QhF1fxT+N!NDZg7E4Q4k!
zNa$|tsk7Z+_WN*7oZY>xu9ciI|1>{$2b4I)py}+I9CLS+dsQgt1D5)?=Q>3ux6g97
ziG{jGj)bX^h!t1xt{<F4|AndQihM@0{ru#$_KocNXEL%*W?|bd;k0*f-iN*yyCgaF
z3U#dsmolY2%7c1=(uzC3fe)EeUN`9yapqdVod})4962nrH*(xJeHP44@h@Sxx3d~o
z+r3&q4&j*x*kui?yEt9;t#@YLut!<*TN;FR>x<ecYJ>r#j=X2=!cy?=w%mYixEh(`
z?ZKhtJz|G*pir2*i#51K*|%BzN0xt=Sb&j?qQu_Y7=hK~BBy4pwEd3IK)}<Pv2WYk
zDsmm~ZKX#Ze=ta&AHPQSwEDRRZK4M|L&?%{065fQQ^=tBsue`WopzxDN`(8RTC!vN
z_vH21tWp(EFJyP8-Xm$@a!pHcs)S+%fa8viPat@IeRKmZHX$qC{K{b*t9^*e7l0d2
zil@;Sq@>PyMat9M7=Sy>QutqfSv}5v5*Q0~!3(HH=O)fiV>ksuD=FQ5hHfy&&Ah1M
z70h=m&9?VzqahRJUuB>zAXYgRuXm$o>b`}HDd29x=K7US57W1WIL9=J-Y^L6uF%F{
zHL~8bxkn-Q<gYnxV6e3#TXx-bASpsLr0HP(TH~?21uyc|?=kxr!b<Qc5Fl&eAWu0x
zGk6J41$W^T&Jifj-ScZAOcykl+lKcOYk*$27`G-qde5#{2F~XR8&kH(%Ua}fzPH_+
zYIx~ju7&0!yDG5EK>M0l6%;ia4^5}1();F9FcTkYUrm6Nd@Xpnz(9NAwOD~f#7M$J
z;to*9!g;iyRyg`t{sSp6uhNk<A0vHpfWm%u;#u(81up>9aI1_;?sHx+(*rx1-3|EY
zA2urMr(X|+IInGVmm(D3%=toiB#2vY9weHn={{6#3uY3Qj^BB^WPJupv5u?9-q=SQ
z7wxT9hiKQwmVkjH*M_oQ)|kTnI->KwS|n#U&E)#b(C&OzyWXwz)hS#s=;uaB3yv3}
zMRBw7!7S(J`H<-%`$w8+?OJxIB0LV-kKOwk5dEQzc&Fz_D<5|U;T7t+516_l)I@#f
zYzACgEsN$&+PsQ6Y@$N_jB`U+3*JeH{K^80tA6?fIQBiTvbo9_(q`L2T==T#k2y#Z
z9Gv43u#wrNwaO46NWnm!E-0I_KRl^qm)|^?=KQ>N9I#t?yY;)#v^z0Y;J{2RAl+03
zT_w-etp2gbHaYFP``MAu?cDL6+Pjaxblf;KaN*q@n_R0ACLwr~f<6f)D=t{^ZdWXF
z<u^V~N2?!zD#+>k=?~m>XCk##WcA0w=5T+=FnX4wBP^(p*1GLKCwqLHh7FUmT$At-
z_)0b5H;q>E)sPts_+$8thKn2=OQ_4P)Pu$;WwFsuWgB>XP3@M8!bDL$*t}u>9PB?U
zVm!ky=0q^xGu2i`ekRCdq)!fvDSCayKJZj@;Mj~VaAH#qX$^kKXnKyO7h;Et=^BTa
zp9Q6&e)DXuHG-QpdNd(e_pJK@rQ=w!@<|lm9TGPo5mnpv3}y7QZ%r?|d@kw=nP0I3
zaqGKyi?3*5KJO3qH-1ey()Qs+4ItZ#ODkUcnh-E7bsOLk`wyz=-nEeP8sJ-|+4a`&
zo)O+j=_J_!iSFR|zBZy|CZg<)%<o*_YQC&tY9W$)5-jQxLHZt(mf&L>?7J=cn117i
zj1cKGg%9~R9?i!oWJKO@e~3MK6&YagB6r2F<l6g11Jh>*#^oELx`|F@h2GEZOj+m>
z&=-tH(AAxl-!;<R-`eKSJ@IepVZ9^_y~5fnDKqltqEm}4BnO~uN%fB`VI_smlaSaG
z$;48``iEhyoyq2YNP^r;sE~i@@fuZW_G)LmtK5Cv3@puVavQIhT>Q>tuP@saGK1_`
zW6&je6_B|q$r2fMCV6<?1;rWGr0MqY8F?<}IjYQ+{MYE(DsaijLHV8DQse{U;wKz)
zn{u1)^a!Srt@aZsd^Fm2E1{#udpf1Z7Rvdq(chA?UBNXY3B>HlsAYWv8Y*^uv#a`W
zPe{d$>sJ;UnJDm|T(fqwbtmDzU=U-ouDd|Y9t<;<@vVm856*7Hf+=^8ltgwq<7=8@
zbh}m`(x-3m>fqssQ+(N!Su7r!vB7B`1j@`~y9-uMv1qtREYMu>|Ir}dDEUxPF}Fd*
zats4Z_c(dI+>yD`DA$4*K1hH;e8cW;o`hhK*MXg>&FfB?_Oq_cgL6zQM5eoEdxRqJ
zvT@|kUCP^@NZjtsCN<2B?Hp-```(3EdX${f8G1=j4oo%lDZrg-<0?u!d<?G0m*;%y
znUhQ{swx;8SoO=;HQP}+-8Lu+t^md}-N~d}el^pkot0DPsMOpMFRyD4-Mg60Mvw;w
zYX-2n*sH<ln^EZ7Yk|u-k|6<*oOGWCS#g>Nw<IY7d&Hdn8gAWOcTh|fcD^5ye6-9-
z*!I^``sf4KS37t}-=_9{5vqDZUu&TjeNRO4%1Km@;-IfyH1XK8b0ZS}aS~_LxYuU~
z2yqCW#4a$shDGOusvQh5$Za_6+a~V??};S*YJDk2B)DUT6-zjfRfwIVBN+|a(H*0c
zwweZ}rSVdsuxd!{<k{)`zG13)e@2;D0jS%M*OyM0S8Ln$s;nEVL|%LJ^C?yv8k%W$
z@r_~q#cVYW`QO*F`AQK>j4nm~FmcI;G0oB`;1_Hj>6Tid4X!4jr?ko&CXcID1(VCH
zM_sP~4YjCPqvk4Zc?1MQf4tl%*vImmMhVlUt`@s)N)9g>3#?0JG{-K{SI^|GmVU?P
zp;e|VKK#Wm$o*!He$wxa`4|WC^M*xqq2{~WE~}7`n_9%vL1O9pHi+)4Y>NuxQEdmV
zLlLo=X+7eR&+GhXHgW9{Apig^T8Gh7Eve=(*<9I`V~c7pIWX4Gyw4>fIUlD1X6B#$
zHjPtgUhZfs=nrX5i%7bDojPQ_+=t?$?3m8VeW$}i-$Nl=vLRu)?=Es*)u80o#E<be
z^w=DQh8W^SZP7h_R#Ob|92@FDv*iA4sF3#?67eh_f-o6<d*ptmGT6qBB`Er6O}lqZ
zJw2O}UhKdZ`(s#)7w_t$#>PIyJ>X*0>pWn*VQMr#c|VenYuVd#r~rD})^#@=9um0-
zbvQ|Fuu$Wd7KuO<%<M(+cMY~i&Cony)SR;DIqE<}j@WKpbUMTdiFx!raN2|9ir1N!
zdVFJR%=klAFISTOe8t&s9W@ubVETMDi<HCj+>GMmC8x&7ozZ3oO0sLRz;NAL(CC!F
zZ)k|lzy_ULBkyKWTG=N{8RM*6VL*rdWNKmY@ekii3o%r>os-xS!%p&$!A<q+^^gep
zAg9`m=fE-Jnb&Bz@B1ZHlU9iJ9Y~TEcpxg5g;ycIUMG2_e6Y|MBJ6vd`4~1Xw407D
zyJEMj`s4`b10FDX<B~A=b@b-CpTg}>lBo{?X)E!q*M+;^&I;sU7q302V()X|aQA;r
zpb=un^+I>6-P5ONt(tzAD0w8%Z-@!b;>A4mN-it!N`5{MmzGmAOZD)V=Ga)h#J#&?
zdH;0?xPg1<)C@-Fm{3Z4;;*(cvEmr}%h=o+W|nbH3;ebM;~kG;w=|=j5l@-4=YSLR
z4E(cOd}z3fiyM_va#u<{lFAz-GpYR`*?dy%5eM0WMBam~ipc)Aoh!8~93H?=8!KT8
zEB;P>`p}YP{Y0&ji+?KcSAFbbQSW$KgLFfdm`q+<ibzmNd*n?_)QJFv1(EBwjB`Gt
z){Cn7ZP#pt3YC>G3<TK1GE&PUxhyAR=hHF%Sgagyd{jHJuq>&K0kP<2TR;zq@e-NW
zatAy-0{iz}<=`zj8)BlR3iP&5-TWoRtnuUg>G@CuU6}o<*O*d;skRQ3Eq^sCsB!=J
z>8(d7^THzm^#C@shUNQ@f^_6wu=2#Dk?dzQRjsxzf8jAsav6zZ1JiOlef-L)T8Q?H
z(UBbA+|Lc#TF|lY;pkri?U1`k-TKo75gxKc=0+v%Q@cN&Ry3AoP62)|h&4C$2>V;V
zH_H3252dNGPV`_4E4WgG{vtWviV27WrARc=iv}(UZcHx5e%)!!8}L>)WSR~yOa&Dt
zJu$VFN)Z?Ui>fYauU`LaG?C<S%zaNLJa^n(AaN|idik2+4YkR1>0ZSW1#UF!&k=}#
ze&e+nSr~xb&-KEFRtofJImV$hHSe=VWqC@8WI}`We7k3AS<hOX^+8qT_TXdW<v*Hs
zkg8`m5_`qSsj(~ZU2FV9;|f75URZJHwtN&My}O=qxj}-Ql)b9H=1%Zg`SAR;HZzYK
zyyBIN1#e)b<|MUt&0#Z*`iY}GSW&&;6B41!dO;YuL#K5u#@bl-ber)`6F{*(=Y?da
zyxwGgzL31%PX#!TEA#-tI3S$%3>#%P(lSDEQouF%B!xziu;N~dVR(`%-^Y4Wyi#KH
zwTt?b){WV=3Ip)1Mty-a>cmi0znt`EmluKPp3my7lDF6`z*RlT-IEp#c^b{E8LEr=
z@C1<Wa3$X;6A$(MysIS=`{Kh^sMM7SC_<i^cJ7Pc)qq3Z)vVfSh9FV^i9V$K6S1c?
zrNp#QhsN^&-xNpcrHLG^>*#G3ROi<beIcUI!0FPcI-xi=iGiiLf5;)gFc<nP@Rfy5
z{7Ai7S`c4(yq{v!efIhSp&T>&PA*Q{!7L6vIF((o;h?rasC}%dv2ftCW?=69ENOFC
zo|6wR>Gr$-Zab$>{D;e^dODk?Hq#Shk>Rq=`;DE<8LBp(6h)gKbn)5uFBTeLhMIuj
zrTb1`#q`V)^2rLIy1~eBVqIY5l)}1iGRk2;sikzSv+TQ>>rYiGQ}9xWM6UbDt3ukE
zkwV{0)`1jm^`2<6fP*jewN{DmvIneYs-yWM^i^Kk98kqTjJCh{*^4_7G<HxLZ@fds
zR_(hU$@k^na5YS>O?@_{|IycK_!HN#6FSJl4&Igg^Q0~SgBuY|DNyAp+0dtq_)<H3
zl^LQt4vtG+xy*?1dg|(bHFcwf$)cqzNp_c>?FkxDtA2MG7#8Pl8aRv@)gE{(COd|$
z*bXy0xB?&t{VD^ivhkH~Z+F#hybya}Q%QJ5lR`p_eE+qhu)_y*of`&pT0WPstv4HB
zoK8<#`V+<Y>4d49j-K_{ro3Wg6P&W3wTD4Q%!<I;KBC=i>I1gw(ryn;@6L!{!Qd)d
ze1L;u_h+^yYA@j?Vu^ao-Fbdh0FM#EP^4lYME(}QGjAK7G>ycn%IwPt*(%KjIw*bn
zbgOyeKDE+%U5wj+OTBlah$411XSD6iT{F+Xd7!<-ovz@RTWjN<5o2PGRk|4Gw(-Rt
z$H@|U2$GvNcNze~G+Ztzr5wR5HS2|1D>F1G3uWP4;GoNcLnUT>BL}35QF0m)<<#>E
zIu+=K4N>u&J;6@|!%dWD1LZ<3-VbI^g90}}?b-aV7`X6V*=#a#U*<)5o91Xv^vAd7
z>-v!zsin7m*37thqY57P1;T0j^y0ae{9|K+uCNGnG)i(+=&Dt;SBWiDC>!$TAm@DB
zVKKCoR|>G=bw~Cd{@;T}E)4T7O@}JGn*Hq?yLGCV+ioHyzNw$BzX$g{vA<GM`Cuw{
zB@pNB(njAnllslX0~n0)0NHa&Ak7P~B2B^ea}b*<#3Zg&0TonnYHRhIQ?uf-VN-`k
zHLcJ(1DTvT#DBP4S;A|ns%uhcSOMAATQT8OPjnKUn{thmzkE5n$4x_kpK4vm!k4)Q
zZJU8yy>V4Z0a=Cay)x^rzR81OJ^HcB2xk2nSbj(Ex0(rNTKEY~9>y%yZf=p!B)S;B
zcZ~=$BS$w20@b_xN~0@|>W#?yzbm*p?uzY}IyvMGQn$M01ys^H<bk=o0jBD0St<_A
zdJz-pEojJ;w)#UAue6ROtp5mM(7bJW3e{SIlF9~Qg9chq%;C_n=y&}}a`%6yNzZx`
zOb-N?$D~^38V?}(`E}F6RzJ75tGoIGyUas>!H4iEsk7lW6Szf|C}p3(zaBG%AUdy#
ze)zKCi_9L=(0A@8OS;p}E0u)NR;kcjZ1%`<PhvH7MWfKYgFY)ym`|gKwC2^z*alA4
zW}&+-x>>@WtD*6|{;QMwpWU5amTw5Vwm+W+8rRIb)raqVtx|{{$JG%R`uNMIF{TZP
z1=aDvu^&0AuW7?N-wSLrkU4ZO<_ZbyUDuq08h_Nyv_{!n>=h_QcXn2*91l&!P?*YI
z1o&cYG(As&oZyYe;G5yo!CmfvA<ol7I=2nq**Rf&$>MErdD73!Z}SNEHjLyQ%y*h?
zpo}}m=L=DWnAp2gtvSbK2CJA=tH~A(-Q^#%E9!(z5xsXK2;<Ynqwh@X$BGdbqS7v{
zGCXw1TyNcBN@jfXowdfpz^4ub&YQVYiq4(UpbS9Yw66oE1h_Egt~$4O>XAnrp%jDn
zPSutWmX;bRe6<Q&UxtBqys&w#-F-P`hiWNT0h!uO_K8aQ)`*1AY~GHPDUQ-s2M=-(
z!pa(k`rfZg`Z)##MTDPp&tRhv1*(Xzdph~`>&CjiuZF)|VN?^)yPdB7JXF(0I;)%M
zsR;^HbGW*f(z%vz9oKBFY)RWUJixuW0DxH076+(=J;-o3b6V#rv@tb!9KH&qN8EO-
z9F8<n^WW9(YcunawII|4q^c5!u8n<y*(aj#4YN*V!`I>)o~HP?WD`~<7_O}*y8A_E
zLHKw=1+50Yjz^~O5%m?A>|x}17gDZJRXS`YlguGuWAeeduRi=u^|KW7R4s+<ajQ6S
zdUcyF-sEo!jq!>}-4`?0l#u8v9)+IBq#}C%#{u-MJmCabqmXcuxXv$>G*5#ir&F}+
zPd~O;Jp{OFT{_IP;`I9KDg7}w^^2U56g14ca!w^Ib1j~AF}+`YQZ^%2{HpP?P8K}n
z^*N4>rA!v-)wsdtq`mv>qOJi>je>oPF<)RD0OqTMhGwB5(|O8H1>pzI+cWJd3=G%8
zAFnCN#WcKK>s3{BP@A3PU6fMFBp%m&!3?%7=_j`LJC1(1Z(#q?2}Ur>d~lPrfB|u-
zqBCMTku5hoP18LPa_>7|oY+h-^dAlkeVektTocjQ2i_4Bz!r-SC0VxDQF;E9{G#Rj
z*VanQ*<8bUnq$~9dp?gxGv0L-Sa8KM`=*zhCH<2bo5DIXHOD)b30|cFab@Ouz2<p2
zuVZs#(F!!%bN&}i;BB^j52oHBHcs=Fb_`YEh#6pw$MOPj2SxWZ@o~8Pj8)BhxPBB`
zueZ62557s|(<Qw<Ni84Kpp>(nCtSNTd4DF|e9bc1P!WTB3l@6b_H6m{d+PXB`)AKC
zA|Ql1zUb|y0<=O)yDk2A+`MSfD~DO|Ms#kY&%i5L3$6hYddO5o4x*ZshJtv5U6{oO
zkP#oI_9>J$2N6c=oF(#aR)2i+%{EwAE>oy>^{>9(nSTt!w3Z#cD%+v)xxTw%ffuT+
zSQmzyU<Wp$SPTxL0cJg80aF&eN06;2f*NZil|@n&VdB)A++L+hZdXG^B}u6$+usru
zeJk~Spra^Z0h3Fbg<I<jZ~e0T8`NQvm_Ucgms43e(Kb8W6~L>Wp^qaV+a@l6Ljbzx
zWr!6(hZ35htQtc6;X>}}XyaCV_%SAvy~o%s5mu`eVu#lg5Suk|5kXG-Nem-(l;d+j
zmmr~ArIdtMuxm?m!wm8gUJu>Gaj!&bH8Op%$KnI}Ef%HnSY4WFTFl7u^$?|}Z6o%B
zf77{4`8)VER;;R~!3TAUG6UC$L9|ZSAMn6&hk=?|x}CF%k5?-@65P6ffyD5W3&BcF
zY}e8wK8=i+v1@DljE7Y8^&xF7prX^epRUIm32W<kY~V-5=Nn;iD&S;eV3r%}y^mAg
zONORkKjkgDME=>WiSlkW*2V$UhnC0caK4JRT#X5kc_C?kVf_C5EP*sl{Gyi(JW_}r
z{B3Pn@;mlJi({h1K*S!y-IiNQAuY*eIBFc~&Ni&_Lv%FD#$ZKVRU#qgFKvpxzepC4
zP41YlMqfk<y_v=SYrbQ1Z@u6C)9^~5|1UA2U;37q`KyzqKU`a%6&AdUm7|voH(t9R
zLO)Vhf?~MqH^T0vt6@)Q`MPFewYlf_wnk+y;|p!yc13Zn|1EiF)e)*TVHPh_xIEK5
zt4~E@%ZmA0a`?DhC7MavL15=OD1f<<@!F&$m_B3da53eZFVEaN@UO55eoB3Azb}21
zdWdonm-sWL+dpN`ul=(J6!(EPWn?B~q>?iN>2Bg?^LV0RicvuoI{Ydf@V2JvS?Oo>
zzt>nhp`??cWWTV*{z{A*L#vc}gtk8`px*mk%+oa7pA_@_qv<0<uu)%H=lWQO>p>rg
z^M^*VK*-cbrqt^<6d9i`i<fN;(~;M^Y%n$T7=J1WFGP*iU#(}gXt@>Zy-xK@NL;;{
zy7%{)Q|KA*{-@EpDgG~(dfi%d(zIa$Ml(6D5cnBAxaTfC`~pQk@(WhduPTR+nYTCV
z2`}7#nVL81#O|hnMj6S<D<+}w`yi77W>D(oj$sCLilXZCUnV7=`#|B^v%rg$1&4R5
zDhZ_E>6JnqKFBc1sc%)aA?blpcPK$LJgHF?!^i*#7f{s3d@JGdE@lI1E)}7cTT3jP
z{hFty>gs6wuEoz6158?<9X+t6Bo5;;I<u@b@=$>y^zZ^_f&8l>Uu9Z~8-EWQP1hyR
zl1KBej^3=^kij7C=03D}Ax8RXZFE+Qu;Q2NG0)s7UnHynoluWlH2i%I&88rrtl{s^
zl(^Ifecr}?TDrdATB<yfGTfKKR{X0~n!}^g+R1S7N2Td|y#)pnco+32$u9ySZ)+A3
zxdMt|+?Un2%Ab02s|7E)<zA0VDJ37h?ToKMcbBBY_M;mGXj$kzpBS!Oag^n{ydtnV
zngp1-%AEUfkoGnuH$1F;QDK<sx64z8#lx9bc(Ms!_8d)`8x~d$W)Se^O^z5g*l$RA
zrv&n6gQ`ZQMmppv7Yp%7xaHlSA6+Ip%6)Dfaqd}nh2DzBe0ov8-SuU7M3;>(ew!D%
zB+*?HKSFK~;<7PH>s~%}e}9Yc>A~_NfsX}+7NM$3C)2}CgtC{iW7qyfQE6``Dcx^?
zJ)FQ!o6OPK)yL_6X<YN}E6w{7qRFcfOU7v8?yH1x<H=JPtD`UrUGGV0Q4F?s>G}|T
z&{v+8ypEm9P3%BwBTtn>fL0S2>bU(cV(LGIrK||U3N(zdb)OaDG5}@T6PI;#Jamb(
z*IuS{OUmq~rp#vasT7l}+uzra>~6~TJP@o7{osqJ3Kp@;QofnOX1$|ZOfVKnGRS`k
z(JnV{oAaBc%3h(qwk(7wAu&$2z^C=A?0EvKPCQomXzWg%Y33H*%uIRRrE;IBNPDr8
z6LSHs&HPGmAHLlZlZV?o@?rzRohRK}lr7rG?Ik5F4nfJgJS8=`@~0F5SUKBZgLKB{
zcG9O`)0wHSyMf6sjDf_}bvzPhj{N>g9fx?@?M(nMQeP|ia9Hh2&{1v5a-OAfu2*@}
z-k(%6#5qi@pQdS>;qU|E6)|$CqYZ?f?ds!$#_!)NzsCnt$8d<6J&tF)Vm)6&&O;wO
zb@RB@A3^Ig)T(E8O}BW6k>kNSkpF7*k%ghwHD>3TAj?Rn_Lnr<2B$xrRQHGPbUj(Q
z5M_^_YEJn9A*a@K{d5Z~&u3VzO#`x;bX)}Uhr{B;C@<<5_(v}Kk5$uxn2V}jFY7(L
zD;3n*@nX!!-Q<ORvdGQQW~N7|)ncb+oIqbOIyw90a=bx8%pyTj+;Qik4c|BF%S$T@
z4U?()xE_#t%Cps5xz(hoXo>mmO#3&+5AM6G1lNe_q2H>7I00r74@(?YGx+=BEGRBf
z7t}~iinPt$!xzV6gVR^W0GI9|-AuO+)3tb2WOoEO_dM~!Y*}+BZh6<tN*iUO0(%0d
zyvwli<CPEbQko_BT*Zo@{l8mjr=Z4*R?d49NfdqV>IkHXm85-R(kB6aVE^S+T335k
z9u%ZPuxhl`5>@vt)4#ZtXpQA6RJffDmD|rLLa3@bz&@C@iG4-SM)PC6V%s0eNbXld
zFOYJXR5}o$Qwap#7e2mk#NJ-7&y)IIG^f-_Rz;!=01%0(v0JRYldCxxksxMn58u`4
zh2Ocoqx6o7ebYFJPcvDgQH&$G4S#;XvgHO3TP3o!{XN56O9hJceDwda{cz!dya@0&
zq)&prNxt#XUqtxXteEpIqMjwi&f1aWKINW~)#}gt+#c8Aeg7x%8n7$R*r-z$HR6t{
z!IgX*30xM!7YaOSyWI-p=A(Hk0jOKJRB%Cid?P1k3_i8kCe=3^I^IBGhf?7>lG4PG
zVhEA(P!X;+&v!j;RNVGfutuGzD|i3=z~Ivjjbs}e@d0S|KZPlfF0H;^<INWR)z{=!
z^4<+VOuv_FvIJnRrqA^b{nbo>FXJerisZn+8haFy=QiHV%TJhn#cXzYdOw10ly}w#
z#d`jWl2w#7#MIY#ah%U)+wJl8WNq`vx-UJ8FbcvVv~j0I;EF71MoBgvsL%iKt-}8s
z-wLM!=9g<-LX^%_)d`N*Q9O_+X!Ma2@X6S<F-^wT?m_)RLLogV7$Df8{H40ILc30>
zrL99md4=z2#U=i)u(HPy!nL@K@{wZ7@ec+s{0-ADTMxK+lkfeIAlq8Ww)QvF5Iu0-
z1)Wq<)THnDW-4&y(zx5Vjz~oIi+_&!^A98I!Ph=e14hU=x|bQnopG_8?r#zk)ODbo
z@yQx&e9>Zwq_hkWxd?o6Kx}r3{za5nVg;#{X$VwN89E{DfObCtSNfmPkc+dNow64`
zp?8m8@Cu#Utn9Sw`y&_4Iw`%`uv(D6W*r%P_dpeVk8)MN=LBctxJsjsP^6>)#q~6q
z&dpPaf$58+gtuR1)`+AAvNT$biyt(Wdh&Q>Ui)HREiubV3&y*M_qjQp<@u}huUG9f
z*u2=iQb_Lk3iQeSVT&&wzvKmv_tyC*w?aLn+<N>ft9p9elQsp&?`Q7+Bz2sJA@4|e
zyW6p$ol~Y3NzvSIpXg9e*FbAe4w|{*(+**~EmG<S<WKK#?rQPO9jZ`15G9o1*Ro<o
zTYS}KQ@~j2<JL?AKMx*Su|797Ht8St8)MwF{&#&n6Rcc23%z0@ZoEjF#fLWDO8IS9
zJ;QT$4t<v3lep%g6d@chxfZxY0DjB;<M>O#;SBJ<O{d9h_7}ptjk_{`sg&m^+A=BV
zl=-q5&xE!I8X2)4TCCR66$k4bvboMGMC_HJG(-W`5t!}4@RHI?I6Z~~7<g=-*JLrY
zTc=&ob~AKPjL>Vst1(ZQRj6BZuq0P03nxDIx4PSA;H~7pU~a;CH|OU;tDA-SBC>mm
zeKJjqaAlU<dq&Tp-7geul{b4@qRg(qAkmhvd~JX^dlO9c@f~VobtX7yO;rD&`{(x7
zP_Dsvwfb_&6EyZb5WmyKGM_`_lfBa{lEw)1<^}}#1U_q>y?;FDd&$8s5URVoQiy({
zcH^CU<lkK-d0VdqT7L17X3G#e4FIYD?lY3R>c4Y&PQV@_?lz#1nA$$p_lA|#FwzWp
zo@^mYs6yD4?93<|gQ0bV;DFB+M}x;BrChdIXtJl*sFm96c6D$kL(GocLHW0XvE&^T
zaqv4$D4Zzq7rp+xe@nj<6y1+tqjiX_6KQPJe%N09m)3u-9#?V+7+Q+j*m>g+Iyo+3
zjWYW^zKq{2U`msI6SPMernRTuE8B%pI}P|9#BE(&w`aM~&lOy8^~}$YU?oBQugo?#
zGyAU8VqddsIhO`nUgJT8HYS;U27hRapR^)(a`f6rEsoabsW5-;vl7JU8!{A?6xV<!
z^!H6h9zD8-zO-DJ{SICd<Knhm(MC`uyPt|PP+JL>eV@inCnJa6VPx{XEw{C@X>k1g
z^%x@R-J97v6Vg9DUdW?m_%6d+%ha5@o35UKS-nOaO*Uc{#Z?G0mr0T4r+|h%NHsF5
z7FU89j18En{nRYv!2>$vHStpAn%!3zdQ?V(tE9A6$q?QPAW8;%B7aaMbiXE<*Qdd?
zzi13Z9o5;s=We~`wP88I_JJqcz}sxD&JS9ZP%y-bEhZ!)@8BGqWx1ATjQV<9p0%6t
z`?p!5?~&b~^OW_7vh$r1xxYR}Sv@p+%Tjt&t^iHE1nCuwX1(`67<=!yr2cq+yt1<0
zCrwQ&ElbUvduz+gg}L{rMBD=h;>^s3np+TOx%U=S99fwIm7J)En2H-Lab!4t$GV?;
zfA{ye_xt_vC+ax}Z_ar?U(dDCktbsM6f;@asywoMJ&Kd{n$w?G`!MK9xzg;f4n3r0
z+FX7G_4FAP)eo@j&}MZ{GdkNGy?r7@4sy%q^T|iMVTT6>Z)9woZGgB2R37(ue;D?W
z`K&wj@l@FM%UySQVub8%v%4+fbDU(el{43Ee8vx7yXVQ1KDjkZ6o{DkeJ%3pW0!jj
zE_3Qi%?qYN`^#d3ElQ>ald6NtFTzRE<tvnGCc-Q(jm5qB`?*Z^OSeQ<-EJOt<~EJ7
zw7Y5l?X0d9)2!FpBagLH1?Iy1=euTkD+F}7h}H*Bb<Wu9^Cxg**r+%xXetumbd#>w
zmTL)KjO%V^ecS~kx^)P4-Mghyx1$G#2UKX%y+k2uU)p#EIcQ%g46o?Zh$={#67v_W
zKR3-fKncAw{}V#6pE$0rhbXVVQ*nRzZos{@(MehGz-Yq&t;ex(g6^3(ks7Sq+^a1S
zR&gB94QrmWX{~UYW50Bu^(*bszwfQ_xYt4BoNsx%B_CZ@{6y_HaIB(ee@^sxd@czO
zLc+;cI|(m4QaZ+R$nG%u&rDAKsOETEw{MFvT<-G{&&qp*r9WDpIanCwY-&P+hwN?>
z#hUqIxKdjx-28-}yv+dt5+YnIrDTsPcs&0Sz5YwThcS6q?ZP(H_k|}Q(*hjq{nVA@
zq+pAStV4B}SoDp+S0vNQi&$`125+c&1bcaEF__y_;e@~+e<@dnRQHmQc^Qc_W9o|w
z`c#ZavyEc-){*pYH*?l2Cb743_$2Oqxs@~Q`n|ZQQ2AVpCu3cAjW5&;n)5|@<U*ez
zv6>lQ6!H(~&#geZX%M`5HHUOTMSLY$Gt8l6y>sAnu2W`nmb>Sd$5$V;^Jpo>R_Z^@
zuygYI{c8Q#LmoF*efX72-#5|xsCf#Q6Rtd6!>ZoY-$?B&?Q^SO&(~nZt37p5zC@C~
z{tjWI4j>F=5IdL`{&!LQT>+*#0UH+eeyr~=`l>u(i|gnbl$L7z<7v5eAk6|KDTfT#
z3v+J~@AMrA*?`A?`rWjB{u<!!_<oV`Je!lopW8oB(m8V=i_F%!37m_2Q5O*Wh;%;y
zR@<zMGxvW#@c$ln^6v6Yn%NCGe2YPO({C3j-6LdKXMAfV2qnunJodfG0N)jCnufQ6
z#`lDdfS@ahS1tWgLKCO*|G2-n^gkbeix<V}ftu!I8>l0ph}J!_hQi=EMJmg7hl_=M
zE?q0wN~~c+wjt!J`DPuYW`{x5!x8@kogDf&?0<K9b+e1;(IV>6>>JnFg2|X?gb1t=
zhvmAH2|Hz0-Y&2`bkn=7u4L2wJKX*bAFN8sqD{#H_{DXys@Q2_%Z>eSKhJJ|Jx{2o
zj{@$}61wX4R$2?cEp|<7`}OOb`6iF$R@^(={ZtgW?yrAVI`ivk7MSw&EmCM!X~1`5
z_*xqyeO_<nABCkR55Tdxr?!2cQT3?6dOC!bC^h@rvW{<!WxIYQF*DSXPr#aCz3oTT
zUE5&Qv8(S737PQ#uk7zaw&yJ?5ILr*+!vm^$1~gmEcaG;_Q!fw1O8eAx%7bHMh<^x
zG!D|46+CUfJ&`AsKF|7aY^xi6U$%lIa(;)d)~VLRU~UL5lNU{j*;l!%8uDfvyN{AA
znK|E19Zdyq@sIhBcYu5LHf0VLhIHZAtM&N?jP(7o&y)rI8A=rS7di;LOI#zJYwr8$
z$*{Q)Bd_%^-^Q1@2kwz3b@JekxcZ-;WXZysR*37^y7*`LaqLpL8v2!6{k{*bebk-X
zO<SpZC27K+zW09TbJMnD>kOHqprFOCqoyCSx8c#%8^xaLL<j3mp1xdj#Rs7yO=q;}
z%Hd!kkN$k8-h2IVI+?kRR^A0EP%bixq2f(H?v>KDEpqZq5A?hLig%R<(*8@;>oTPL
zR}_Y7BmQ+j#U1FcfXDylgc9%X|7)ulzCc*W2ZAHK_zc=r{x5prw#<(Ii<jHw*R?8-
zMUh1_7uyHpqFZ&D{Jmw`d3aF#Rc)}t51Hp}PuvQmW2G43UNy5VSN$xkYZ&6X-t&q|
zqUWt!o~k0Rk;Jj^2|*P<u4$vEs6u9@U~CD~I}E8T@ti<&LDxY(ne?HJZ2r5`%#6wS
z$#=q(?K*9+As#;)QMZK?ZjL3ww5vdk<EgXRH6+u<@H6E8Gf3@5S*>E1;^<1lC0)G&
zl}l{!J6b$vdT!hpJ$q%;+2{Qa1fP_@9?zMj<wNW`rVisT;8sygX6uSE(`AfOt(44v
z1OvFiZ&E@lbvTscXMm)WmlaIGU#hwLGDVAYfnb}bwgeUIej@|QXxw|3qDY?oX+7X<
zd&B*mDgP)ff#6RhaSouJa+D|KBR+v+O*uPmtloT7C2(;kk$*C%dYPB6M>s3`nM<Sl
zV}lH-r`+*_tQ^K!uF(Mwm8wR|0S?!V&$Fog`grsWzC11`=8BMRj$T5d9$;^b-LvIw
zCC(g;;WNkHL;Li^Nl|g0+xX+E({8tHj+vfrhKFRe9yTs`4@B-<iSFqeP<Z3sC=!}C
zp8Ip0S)gmCZ(goD!AqEO0WvcLc8ebxo@{d9=uG>y7y}vmG7)))Jh06RBd(*x5-gXT
zfz%wtXwy2qXXPXQC;it!gpkcf4ViYBJU3Ra0Ck&G9`q%PNs5=ecuyJAX;CeT1P$%<
zu3OHE$?eWHms`@074lQ5!eA|oH8ZIfs%>Yf(walWza|!(!x%>8F4ibbeYy*^v;BVc
z$2y~J^_VU5xwWb*Jy-Lt&Ssy#UDU*AmQVJJ!&$5DhWrTRtYGb&u?yH$mTi^tUNlg1
zn_Pt$<sPA2dloQ7|3Htq`(x`=rsv5nse@l`BjlB}@+Q}kB9gXW5ov#q{(8mq>ErPL
zVK;0^FzN)ZCJ3I64eF}uD2e$fp@*9-?rLYo{@%7s+f_^twDM+hST!r4E4FSQ-J*kW
zis(twO&pIm0ix;K7jH=M6L)jqcBA_3s_W{R`3D=|dcb?@9ZCUUi%%Ub)5k{K4szlQ
zEFbDB6#A;v#1%8Y-LKguKvw`0@51%hk2-WQIHYAO-`4*8XUUy~oY{}p8ME&)#7Fhm
zpPu?s)+|YxxR?A@8N!5}f8?Bvovvk>q`2K|Tg*QsChYS##yUzmwp#CVv@1e_WMfwk
zLioybjmFO{#tc^nO;`^&Sem%pg(*L2tvDPjxBGJLx0=gnjdOgh4Hd1te6C1a=Y2IP
zxWeRXdU{2vi$az-(ox~WK!$&U=z>0-un1(wWyLxp=p=~xVxViDa-$&I=eDm#S1gV&
zcOKdPhN)EvevucLAVOf8?4;XA`V&*rWE*}dOYumpe!Ls<_M&_zJ0Ja;>5rSSMLyO*
z43ZjA)R6nk6}v_q#rcLoCD~p*+Q&q%IN>W_wtV`K4y~Vf`|bvPX-Is)ZRJMTa)-~}
ztzvB4TpJx;n)RAHnXExH8L!PdKe8B^1h>ja@II1!4#qwA<lOIsfH4%ia#c=pl04{@
zQ$%no>{Gk)+A8@oeWm9uQkx(oP7Ua(pP+^%R-8#LtEGyS>OA`F27`2}w>5qmXB!w+
z97E|XgtYG(Z6{37mfq{UoHzI3evSC>a@}X?t-p7v${6VJxEAf|YUH<?VpGLA3XTbb
zoRG=Oyx1#Y^)+UBlj(jL;U(e5sUd}$cP{7j0*BU9B1Q!2_Jj$VB~sS(3d@^_fCon`
zQq@222b4k_N&@2QSDRHrp384B=#+x3iX?ICJy`XV8Fq!=g*lWBf=zXn$j6g!94ID{
z+WpgPw=nPB;Qid+m<DrRWd1^B6ne9lsv~TbD?UOO%}>^_di3Kf;-Z?u$2XIdxm@o)
z+KIMMm>%D5cZ%1c_uX^2Vm09ljZv^$GFNF0GJ0oXGvVXJSEWd~9ikF7-+2jp;|%1;
zL`hn1uOPkvbxO<d;p1(`G==91%LQL*et`J0M}uFQBsAtN8(5yRm`icfJc~Rn#vT}V
z)+pC!lH@&mMZnr&*GGY;T<7cE^4kI_w8ow}-KYhL{&{hXrRJlhkF3g_Z24`PnAXfR
zaqC&ox#LGc+&T%TrKy^;RX?FZDf)*)GM&IfbEwRpnAX)?xyOr-kjY6_GsgcgmGRBn
zm&QRGz~#>Z;@Q3xDk}w6*ZP%JLMjH6mz%(aR(x)-3*hX-7_8kjnkxsZZ-zS6R48%Q
z&y*o~Ap`Oqe#Q6|i`^zOMmmjLykOR3DeEB_%B{U>S#9YeV|QypyIkJlbu&MO`#w_l
z%m_rg$lE#9b$Kn1SNq6euc7s&&N980m0S%yt_i*}lAxC!dHC?ES+;<-B<P=^#!Qs$
zaQK(q&CAm}!L(MxK3n|UkT&#V7<1=qo+gii*?A-7jH_Sn#Jk6|*o@O*KRtzbxvngk
zqa7Of%;1}r|4b_-UgaTO@|h_MksEbv6XQma#2UrwAOh4F@lb6W7FXf=e7EVyFW9sE
z9}4KT)8~r&HneEfld8>QYBxAtQJO7E+EG6KkerPtc)tRCeXEF|_;dZT#gVJQL%eU!
z`?c%3opRDFS86LK&3$*Z40~>eRS=UGeQ2@*FJ-HJ+LubiAMMUVUplU5lWWQ1%Tp>a
zdl1MqS8rcHSWztbrmT(mwB9`GB1|YBM}DZ3?0-(o;CbW1StD8(^J7=bHyKSDZqa!#
zYi`f|8u`KBX|+y$F%iLk$v2Z@Me*|2E3AfzZUJ@kpFVv&xht=iixo(fkstc4OD}7C
zUi(t9AYPSS4*?E4{-$uT>vWNxA3oj*BwR0AgKjSu`_+*E=cO(x22z(%+r}!7R`#1O
z-vsRr;*~;&>m>xb)D!!XVdP-Fa|-Jxlw`7et6dF$LiSFbPA@t`ad(tLme+*Bl#<E!
zE*o1-NXdlVd2D%xin61Qz}~?|EU_EM`bHKx>5YP(AiI*w_f<^ob-}sBvMWoRI3L^#
zT5Vn}?UH5<ByV~tghLIM)_+I>Bb=lX&dIf&x@GZ6?~oG(ol6q|MP!ebBTyu$<btfM
z7@v5B_4VnDW&$7}LY@4!`$7!4MHusoI!nutVuZP`9*dT5(B$60MydDLth99-u|u%Z
z3`E`L>N#t<uwdQ8eW=|gLfY|+YrUf1Nn08^8GO$S#I^B4nx--L==Gn1y(;r0BfAnU
z8(lf}4yj0NJGQOOAA9fa^5*lHi7us!u{;Z}N*2WMlF{{@k{9*GhEanRtZ=Uk<%HB(
z9;N%Z#}VLc;L_KqBR%N{dsXZA#?=OEVZaescIN^kIkNjfHPQOlnsGg;y-xgG@5~QM
zNukcmQ|YzC9V8^S&F8~=wRy8L9OQ*v1?*!?lIzK%QeJ4@Mqe(E{R$O7F{FkY;g=(=
zQB3?-84Dz^t5|QzZz7@3w1DA|+}MgJp)I3|)vn`J8KuDpr@$q-U80=mYUqC5{*o<r
zj<y%bPWrb0b<<X%!qyC1yWfm~5%nBS^?48yN`6B`fP~orpMb&9*gPJ&^^_u&UGjO5
zRP3r6n7_3#H}s)hs+wAcs<-C8*p~5FHbTwHcE4GQ$;oFRV@`}<yt~!nEMOn18aElO
z+?lF(Jugqes!y@fDe(8JkJ9sA_xA^Brrqos0X4YShO(u0so{gd`dX<{Ya&=3Rp0xQ
z>x!{Kl-5TPgO?>j(^}FO&oMIYY@b1-+5ML6HoQOtg95@AE6K4!ISM$IiIDVdd&F4J
z_E8Lez&=;uS?lbs&AMWb)%+&i9B$kFTkN49H}l{WRpT=*`$8RACVkbOS~J}{GZEy&
z{X2~lj@ZkHrkqG>U7^s|<TGrfV)^7=au-C#qdHKGz%5KR?txEc1HB7mI9Hv7u`PGR
z;tct@oUw*|LDhpvkXC~i&hD^bA=a@)tqQy{oE^~tM^s!?aO&$2iQQL<$2}RP^i7;+
zMvDJ1hp4vC!z32Zl5N(86{vw>yVhSDPn)CD7vV`s9T%Y84M<4a#y>it$EF{7_-&s+
zL`+rNBjVTE6C7OL>^+-l{{vADT!*&c0ev0cIIia7`q&ib)6?bF!O)>M4xD?fAWT+w
zQf5{r#q8EI>{1c#X?YONwt`Ev_g_3e+u*eKSkc08fJ>QP0m<c%!%@dG0*7Z`HWnqv
z{27n`himcj3fyXgmS4PBtGO1R^R=^&Fqz=CUvqylq{0@YOlj&oB#|h)O@KW=ai~xZ
z^2pMUP&sw0377?#CSKD&tF(EM)9pojRl$B$`farOD{jBqVCF;5g@n9L1OjID#?)g6
zbAv<Af8YOu!Tu}d^li-F@QT8i`iF95U81+U+bSClAlQG6?{npV=|uG{)`Mr|M(HWE
z{DVXNufeEid2VisX7F8gp+-HzQd&j^a~ph%*2e^^pB43-X*quTP+4M(mjiLt3Sx`Y
z*plj<ap($GT6y{9<x0sCX&jU&SHM=q<9?^F+E*)9RHM$~>r!%S{g)b&v7QdmS_Ttg
z7E+SRNUQID*(VTa`HkY3OJ#CABXg2PZRf{*_+lQ?9)}PeAQFX(jI{iy<6yFAyW*m=
zOF~qipY39p4-mCVi#qd}l2HCGVJTJU#!6Da;GmOVW+PKw?2b|hwq}FJ;_b<AhembI
z<D+tq4YKPXRW2zEF1tWzCLdkLcf!H9o9l|MyebfGor3L16W8L*)UQGJ!m1ztz~ppm
zIGBqEtE69dOY1?aG0rd=m4Vn><WT-L6o~)1F0s*C(G6rCC(2#U>!rPBz6cp}z@gAr
zhMhS}O!uZ)@P4K$5^k{J3HGPkWzIAYt}>k@akDIXj5%kAH@#4IS43w>0)0nCN^|q*
zqaQEtDQtYGOuR<vEqMM$5jg#+at{ygz8fVmD-;b0`djni@CRrA!2khbpI@Ix0*Q)`
z3=I>d<>YW74C?%U0;=20KZ1@V<Kv{vV*M}IV4%M0sQIn!%kT}^XdqbmoDnQro%%~0
z$+ubK&iC~h0u!94vsgdTQeNJYM^|5gia1%{#Ec#x4UX~R#A$wmfw08dx{g{elkU=0
z7oPN+vBV}TN%uVEr?P!5dDO*q_a(%fWl4Q_?L_1UWI2GT!ZN~XaebP-YppX$*G2JE
zUIIDeG5k$>E4SU8wl^jXaW6TpaIGJRV71LOAQ8F{#(C)xAfDOC@v7r~U)nN!0yKDT
zF*QzPIH-N6T0T`6oh6%6x|QDq;zkD*v=w_ko4?9CiE;e}H<@^w70p!JtCum-?=;vj
zr1F>_cI^zl#__v=cr8nc7Yl!R_ro{e);I)HZ0MDg!eVgtmxDcYJ<+s4iN`}pTO@X7
zjFyc))*C2}-?*UN(osoL7sH^obZV~t#+eyqUSz?y$S2Xq*qxSX2sA=|(2C#C&m$hY
zQndnIY?QHf$FKfax_FtgS@fAYWKq+Gg+(_@*Jr&ghVVZk%olg9`D;XFObOMg=1v4m
z_yXv9x$$Jls_}y@;|sp;#a1yoEn37+JB-x>G?rE#tyn$RQPvxhT4L>941_$abqhl$
zygIr*yykW;sF!wB{_&BC(kn-<H9RGuRBssw?rw;X?8y{7U2+qKj~{v<H&ge`7p}BA
z<-@ulqP=3rrB>!zP@XA{Om`J9Lb-R^JP*8AgNd(51Y09<GNA7x8|cniFD)BCa%ow7
zuVg-vsPCwlX79Q=_WK|i8&=_3?gKISAR#a>3}mjwGpE^WZkk(=e%^#5<em5WPNu`v
z(LoQ8=SA;Z%idlAaMmOG$v*mYJ#*_yxbDqBhe;qA*e{fzB2P+I{pR1;6Ef|*8-($B
zbcQ)Ch*A7FEF7yzzO;XBNYq*qCxTHBQyCS}?pgDISG|ceS4dZu!r9uhGoN1vLRWta
z#5c;K!rz+|jvGMVZ8IE&ikj~mWkw8Yy~4)y<^V{pqOGf}bB&`LaUHVV6EG%CHmNKz
z<JX#?Wp}^y+0gRFn7c!iq(WJh9KO_&)ZI_>`%A!SMr*avNS|!sdw?)|9jZ2t9>|;;
z+_8;sg<2;0Z?=nNn7%hA;iK3We}J?`s_P75-%hUng_ZAe^31gqy1#-5kPFM=p~0QE
z+mbV-TZ5fzGVCrIl7)@!ui}ynJsS7QrOJ-Zq-LycmM$g7z)y~RKUD}oC=YT=`l0@u
zu&4cZvrQ&CdW%aLx;Wnft`m+s;SP4cQ8gum_QGX@)gE&RNHj;6^r}u8Y^2BaA^--M
z43Y=ZvWRS>Wxp&pqivU*NgoKR3(&>G5JYaeRGjcEZy?(B%aHlp6nT2iBT(z>MYwB}
z>2IA?>4uVz0l{NNPt|3a`z{F(3?7@2qnhW)!;3Zj?X^c<CY>*vP@YsKqdMv&#lP1$
zhOxiU)_rgBrZwx5*s~5_ZD*`hU2&~C&YFxwUAtE1n;jVE)*RsMcK+AcdTB}TjQ6-s
zSY2Q1FXj1b{vmCd*OQ~VYpepM+mD(~hBSa<l%2`?`hzZ_B<I{RVZZEtNANuh{;p#I
zLFcc;F020pcjg^@9@37ZNyWe4zUowt8CV)F1Kp-z**lh4w%UD*fv925nX<qc{R)u2
z{b|o(`3{@JazwHu-;HZ0iy$L9f*aQjLuxlf1xC+Qtn%pwj<V4<E{(!$g*IRAyN@+>
z?ZA7=dFYSIk-GK3C}X@#Iud=;88-A({&JSe!r7x-(Z})Yx;X6JMhImBpLS{^AE$F?
z3jOhy()F5WzL^4}XN6K}Q*|XzJej1GE)(r(?h_K}ZEwT9e+*?rbm3?Y4eew&^IE;j
zD`~>b<}j16x5AO5n&Jr*{y3;=1?k}tl9luKesoe=#aw*90gq5qkRw<p{C-A<jG$pA
ztMX$f!icC1?0vQ+n{(%t(E=da+V8#~I<E-PqgqT_!MJUG2Yxhd1kF5VA+UZy{@IEx
zw;j5!(Mj|+qsE1p>dH|yZJFv^aIX77(I}0D;K`W&Ct&4NX6<?O+{iGWT(?I<hQI)~
z0rT22s;|H$_t)>Z<6!GQ7`U>rM^z5D;c65tuE6!HK{Pc&&$9s6X>UF!$BvRrN_8#Z
zpzX3?fH!S-cX!jg95xBpcy;covn{0tgg|BZ1VK19qN`>X8<#TMQW|_RcCo~<!#COb
zTho^ibSBc$M9pda2y@Fg9SYp2GA$mSa<yb|U!|-3)syu1+Du=juAK(v8FP=8vUIJn
zkH>XJjX5ASn1b2<kuLY!O3-4f!Biq|uC+h$>ll9Mbl{wL^Wcb7iw#ouJ`(3=S{QLb
zK<=1D#JXz=LVrHu37&<av|YINJp@k_!>79o&791N*lMg?FBMu{sT=%chrltGU9l&R
zqiv8EsgssEQ4h~S0%iV%Gn(A;&Cn_IK(g4YV6T4o9Qq&e(9%!4O*LhyOf6f4F8b4e
zsHCoCu{obaSbkFe_wENwz{XXKb)%A$Tv!|I<?azvj69i@X3BN#;j!Z&y)-WFEbaMW
zV89uWXw1`nqI2T`C@OPE!gnAzVmZ)jpn5)<HGVJ~Wz8I!O=(nD%qOm{KI%d^!<c37
zTJmNFZ<;ei2KzFU7>wV)S4wn>jEX~7C!05o^QU(FL>r1jza~t;7uNHuc(m2r7bFY=
zf4B0aJ8+XjyLCc*HTR=GN{5Ch2G1qzS<_0Y)kQ;-3Or8a_G`jYu~w!VWJYmDT%Ate
zntkBXt8dEP1w+!WyUfo0K^A>VZPI~*LaZ7A4B*tO8Jv$QFtl*W4VPe}>?CQ(EZ7ev
zy=+<lDP=@UUPY|ZZqznCxRpM+u+2P}WzG&{tgp#r2aQtJxg!QL<0nhql)W#Xy6~%P
zRa}Bxw}=xQR8(<M`;aBbXB$4?B<5><Of~c~i(w2Yh(3_bIJ@dm#CQ?(0-j!j7p+{?
z37ssB;-jf{>tzApWaFP1*@Am?`8Y^_^@!$sonOoR`Oh9cz8!lXXPf(RuP1kL^74eF
zZLg&Mk{^c&{{Y;1jm)?_==GU(%MWRDj!_BbEw5(T^5bZj;3Q@t3U-#owfMli&bH~M
zQ%V$?n&g00xt-$5XX4zB6BrTZT9qs3LdwO3IBa*e>Rzm{OTE#p^)=+S(!Oiv>w<of
z*ZbRVe%o+I(B+>hb(_c7sdR=2n1Q6abv+p;`daJVBoNRiXZu54n=bvaGXAmE=_p1P
z`H5~EFX?B0z^dF``Q-C#VNOi-U(Atb4q%RA=>Y@OnrLXVbFFmZzu71Mv3LJNpBylY
z(>mV6*tB9+J>`DRrnzBLTVGmYVfb(~myD3)ot|8h(6Z8<qX+Qdp)mj^uEJ;IL6wDD
zn#y~>W++qP=h3TwHvG<c4uE6<c)Z{HH_qx08+quEZFbJzfR*AK<^8%Y(9_Y-+pg{h
z4CDXIJD%NC&i_Az^nH2KVSpNI=u<WqxH_Yl*O77R(4Egt{|oKnb#AVWaOn88veGm+
za#6k9i@0FpW=o=AFMsUm)z{2n2i+dJNYqZf&R_aqR%_9#9R8>FCF7qw_ZXLUcbLUw
zlv()~KxR|%E7(g<fWi8nd+6q)ogbgiQ}U2D38I$hM;;y(xdwt`buP$Y%*Ey0k}^#(
zMPhIs^8iX4h836mZw!p#Pi7&~+oi+zcu+Xk`R`4pX;Nc0<50h~&G(43Ij&Q=U4j0K
ze!>oQ+8#f)qKcawk4enaH$S@ayuISzOg^jq>?9ki5Xx$v83}z=>n2IuTaj8>4j|lO
z6hE(lXm`(grIF5`clX-`{;A+4Po**_05at0>Eg)fTGl0QqV+VFJJzLEQa}3RPZ`gh
z(<As(S22<?^f~?A#)l49ujfo&D&;Z$`M||}@7QwPrhX-8HdtWxW>Q(rmt=yzZ@a;A
zTpOjYbCqi(qlEjW^YeNj(4A8?s6Lug>S1bXYHmY=`6kgrO~6y6Yn)9zz3lA8^0<3@
zZ!iW;M<Dnwlk*j&OUEpNwTa;Ec3?0Ze6{OhiW3y+{rp>pOpzb<!a_Cr(ULPPz<F?G
zw9lSH`}B78+XIG4z`r(*JT4MTl<qJOD}JN$t?|4r3>f@N4(bNr;hqV)tJaeFy||er
zFX312<A~_A8wB=Ig>?}dLBmEWwDZa$)USN=V`(gH*&dfCfrOB3#?{MP8BnF`omYay
z*cLkl&opFr>)OMGw0=*SPQ=LgSFWEEY1H-Y4&Lii?P7A=3pP%fy4!C1FtHq^jeI2%
zXt&p1tHt&iAI>`uuwwd&-H#k13OyT6%$bMPD&W}7_ya|{Y~uw*tov_;5PKEMIrEZ}
zwUQc1oUsu}IhEDfgNnUB8@^C)w)&&7nV`hYyeGu_!R|0nfK-diNSdg4o{9Tu;T)Z&
z6p**#Hz6;cj9a?6ASb_=hPm$<cDLcdQKo8A9qr|or&pQ!Ru}x#Jh<zME?bLfudFgQ
z9*RoM{i1^8I;M1y$?$CSgAdzC)$KW5q*pH-wBU%k&6Ws<MGO`622%6Ohc1{v1UnQL
z*OnslWBA-oyKP6Ma<{n-E(SEuZJpYpH<l49X;vH8ekPmR#LvVifzGL{KUQkbyE_8}
zYv1x{+0M@}cw^p=+3Zg8J`GM0wR(~~N>fNkU2teJ`9#w49a$eyuP)`f{7Ue|e69oP
z$TwGki!MOHgrrgm^<S1oMy1AoZsmtmno3DP-ZPtwth|Fecu$xHeq^iE;%1pCc9CkS
zXxsQ@&g<+u1%*&^HE%Rw;5I-!c=J=7oe-=mgJ#j`LYUW?!Gw%j?mH0NF+VL;eG0#G
zfY-#K?Vr3idNTWP4`_ZE#t=z&V1#ycf<rTHOxE`|P%tCeTl6UP)IGb7<_FXA&*#QW
zBf8`Z?K3<5Yu)n7nsN)4HE-=7HQ2goP7}91mYyime*EX=l&i3D*Bm(Mqb93>#~M#P
zi4}%EV(QqO;`TlD<In8Xs$|KpwEIHeGeu67tUUM;YwOV5sRe(4d+-*VCz<C4<8O0d
zq^{&rf1Zn{#5(<R!Pq2PtR@Jg5K}VEMeu_lX6{<XFfn2SNHoxG<FoTA4v3DE6Mnh&
zQ6^2ge7hFd)<JU0nw@C(OY`3Ikyfj-c_%NLG%UB&2K#(Ue;5gGC}RZZtXpVn$l$hV
zHLr3Q;y<~b)VdFeR#V#LL6<H*zP&%u7}~KOBs$Qj`+BQsVR7iyMfzly-@SI7vCXAB
zH`2h-Qo8E8vF~x?V1U`dMOm7b_>vly?oPJ^GoVKv#2$}SXMPL1Gi$Pd8-Zrjj-OgM
z*(<tu_S)^{YAxp~SH7ZA_e&7G=eZb~c@5h%fg4>gtFL*i&%$Im+TuDEOy{R}s99bi
zjCILJQ2i%r5L`{mZDm4AooD!U{j8nyau=Y5kZIXGT&2m8^dJujwg@4r>qMmf4d2+o
zL{TKenvmhv<EkyLByO<-m6)E;gzn`ht)oueLl#A6J;Tv!#pbESP>{XOdiOHtfV0r(
z-rHys53)bd##AgI?*|kc`DqjK+yl}q=Td^zlNFd-G@)UltiZK0VT5ZI+{m^d`Ib@8
zm+s#{(sK+c5RWJn+W(a&5H*o#nU~B4ab`P)u1HLq?N1t8%I=PAIUzzm#$g)Tnl3Kh
z%o+vVw0EM8>}u?~!zvoYGqeg9GW&*ja<`8P$otpgN^EQ8+wO|~f)^!2AS7gcte*{E
z!K!*MW_S_N8)w4=5K<R?vlsf&>}h0)I%%XUz+fS6-1rzWRu=BKJ9Kop4{fggVVgr9
zWh-gO(U{kuRHfJ~d+X(fdvt2%-S8OzK&TVKHv6DQdd3NVF4CBRTjL3ZB>kxG7r8?<
zNTcZpq1FU@A+bJskW!T0AOmO^Te+4YmQd`H*QvavqLSmMO*;B)v8KH}y{)1#?y4Rk
z4=w!m?BwaX*>>^DOo8%S<-$pR7o0S^WZFhlTD7Ul@te+9Uanhw_i10avI>{!qnU&g
zdh>=|bUk^xvUG%yn9$p7g2dD*<$IRTU2-kpMP+w@s;@FcgynoK+iGf3*+=WfPEhaX
zqs-5^twe+KpJq17w}iPA1Gf>Ue}>8899^q*`dLI$KmRrI+O*=>gfcG4i}>QnF|3;G
zNL{7LWciz)@LEwmj<KNbg?A1MU4>7NBus(%P-Qv8EGyRWV~YEZM6mk-wK6Z)F@9dK
z;>V6~KAS{j2uxq`njFJ>9)1RS^cFRnx_P{3f=68!|3QEbFf|*VzTICA+1$``TkZo?
z2*4heT4S}^M2|UhRdJ}L8<HV%8l7aMgWuH5EH817>g&l{ULlh}hhv1Z@uQlK?Ro0L
z5{p-B0)y}PQ*6j-=&nq@Au0Dg^C$60Ly|A_(6H3_qEs*lgQ&-<nB0VQ<cJ<wlv?3h
ze-vz->AGz9&{Q}h|M1hZ<?6i`l^5!*qb=&9;>ZRx^tTbyZ~ia3uy+Vh6(>7v;L*h{
zK^0<Vzs${#PI;0z%C0Y(^b%nhsOC?q<i!=Dg<uDqxO+(gF<JbMp3p)IxNH}DpP+kB
zfTb3vi=pm0$_sSVN^-;_zds%22xNQY*5<=M`{8aLp{}x!_6C4eGbUPu=_KjE4pxYz
z3^UMck~lw1rRW=&#|t6e-qHuR2Rr2<b0*PPpMV=^tTW=RGBE>4f!hj(fG#$)Uty|i
znW~*zdV6kb#-!T!{d5vK7SyQYsG6~gIp-7=3wzPtD}2@ydV~oQta099rWQ1pM0XA_
zHz3iV@}kutCCwx8?8Zf^DDiz>iwmb`ON2-3r)l_%#Oo62wf&O$VRoDEb@T^6FWqUR
z;epA4oangL<bZG*Z0Ig&?5NvYf1rbdTBD-`qphqiFV|E^<{1?2H3x2uo1$LOiFLEZ
zdz{+P`459>UqeK8>pyhpP#5<@JX7I8`+`#2!+g6^M+SGc7uQkRzhMm(-RyNc5vX;C
z-IjmR&p%fA-|*vG=@fCBZ6^@UoH^w8i{<O}B9y2b&E1;bk8D(e?*}NPOR|O=3|!gS
zUD=q}A3AHkxU}2~a{y%2u<CAj5l&ye`2x}O`JBg`G{CTAY9yyEtX&kl(QJ;hsh>E8
zq`cZI$Z#^>=0eS_;d~T(Bj}Qrj+}Mi&%qB?<kt14_u5!{W(){=UlmUV$(A=ov!r&&
ziAXGuSmK{d_-4gdw9leK${(v<{HJFodiQxEP1&haF=P?eypA+6(K6igD}Z0|S4-)5
z0_Jm>j6lZ*O;6G!M-0FIt{_nRbdiZ6^SkQ-og&`b=y*p8@@)@vIY>^p7qu8Useu%_
z(W`!9F46XN&uIR{>sFD?X1ncfGi1cbGlaq5W#GQ{Fi*6!&pPIXQ%sMVF62SO$u2^V
zq$~Fr<x@~W3Roy*GQHZ3F67+!c@F+CRg!T3ck|Tx4)L8pCiHu8KwKfvb1|~tEnKm_
zO2pvdyTJZZ>F~5mMe|(Kxx~yY+CSYyCu75O;UbHFsTC7G*~3)@=I2w`jFL`0!Q%9U
zP1?G(6p9gm5lU-#Ann3!BYv7R<f!aP$69K6z~Va(9^F4;OPh43@&K5j@AO*%#c)E?
z@1>HyU>4uXRQ;TXUHTeV!D{ibvYdoQrK><KCK{LMCtgfmHavXj7D)HS1K-(r4aK(X
zNr9igW@3@y=T%2rm=1052Hc`H<}tCcDH%vk&z${d_z|4!2<BTuDAk!}#D9w~IlR?>
zC%n-7FY{8U_8*|>-~3Ct2O1Ss_-A!bJ|AcNsW*tL{W;S?gd}T9KZ+KP7ou(RmHnSY
zoc|Xv1>iN?&&+TC?YA5N3MMzCo!o(5IKRjhGa;$)io<8_07aPTW1C8ZF@lpyhC^eE
zoI7Fy^BE)(PYeAtXWbGPoUE~B4hr~;EN~d{Q*`PkohO351`b4Q&M}JP9Iv_cc##zW
z=}>kPP?RWZ;~8U7sbm0F`E`@Uv`x4D3UJ25k^`PZYai4PggTEDl7?pWHdcuZyIT&(
z++Op9+!AkIPjDTpRu2da<V)7-D!0b2VDrobtle~>{bMv?mAF@{cCT=J)|dH)WsDz=
z3T_DE!-T>~W8)g01&b#9r&e7PXN`|Q{pQrUPZW#KzUL~vw2<>Pd2}j3v@S?K_NMR~
z8=GT;#?ZZEgu9K<l&Xn2{eT9A`{S}Gx6n<_%{uS^441z=>lN%V_^02@#=D<UoZ0Se
z-$P2BjH49g)j*V-oNE|Q$m0<MvNSQd<}`r1@q>PTYtbUaCE?qmdEZ^3j40XWhin1_
zSiIdYX$%i)a*VrNtVfpO1gOHBv!j0DnaPL5xMZH0NTllBXWTX!*B*=0x#3Jd6Jn|0
z9|KAH9&gLH9-b~Sd_A;Pe684>Tc+8@NafMcAupi$Ml$G^C#RG{w6u(jrnh&^3Wgj4
znlekM6Smjxh(4m!b|>sa(Ji%UH))&e17|-fx8lhu#pZ9u{{>;aG}n#(gJ!)U?VOXT
zJn6IC&wG8GEY0d_1APQ)U7uonYDZ6UlNLPO(8=KeivKunI|6DsS+#KHVfG5iXWTNX
zZBr&Jq*@V9ppLH>D|IsIJl%hyG@72KSGn4*gF*VmFR!*tfKJ@Hz8|1ysWZ0fr}l>o
zDu6A`!$ue9^eq}Rd5oZbEkwh$h59*Og42(wq}jb|<Q8X)>*Nq`Z_Wx%+2`^ZF^}IX
zfu0Iao%4PQUuvK)2UcNnp~xC>leSYjR|FI#g&wp?W8vN)ON@^-*Q5IjYbd<p>hkY_
zwfCVLyB~iKYFt^A8AyJ!Qf(T9^31!3Gu=r}sz)o$RK@MYVs(IlzJiU8>HERtQM-mt
z5~w@jho9-N<aP11-&%>$qMI-8I&w2_xfvJNyaTXCU<c--gs^`p$gL4KG=ECma=W8`
zAYFl|jV#J>YwYP&3#@L-nlyL-*Q<8V1qTk>7fiAov<1F_RX<#AE;;4FXz+C=vENt<
zd>dJqmIhYoNOSM*PC;+_Z=RO`E_=U#D_*Bp$5nyX+yR9d(*y5+0`+J~a^#>vkl>fn
zmA|Zw?5)68w@X=wDjEi?q)*vwt70CnS<@#QA0f7tvX44F(d?B}dgGw<;izcuG&hTm
z=agIJpS^uqqmRHvzpu>@;|iai61Vqk&K=I?s<iS{G)vO6p00N8M-wBPE(wZ17Q(8h
z!@nq+kN7s|=C~80dSp{1jbCduv_a?EzMAlh$+Y(SqAZJUJ?;mJZN+;xMPN(r@lIiQ
zCAJr%795|pjNK#3)W;jKd1dOtcejO8As{*C56q^-Q(0X^-MMQ{-LS8BWrhC{Pcv-m
zR1~zwOS^pBa7-)mOqo9>oVuq<=+QRmzR|aa@@12+s(s*^m(Rk<mTjKk>?NF(Drc#q
zu8Y$u>AJE1KxcGKy?PnnxBqi)yUM7Nq*Y)dJE$ef8dILkOP6+Yu(1}N(26uSC8W;8
zq|Ldb*6gk%C;6k53~%L%%8d2m{@^eh{>S`J$7x1{a9)DTcu!NKE1!vQaY$b60WY*g
zEq4ar!zlF<aJky-UDBeg>PP*bz|%c=tYxkXr~!4H;@Q`8Ozyq7Tfi1z;%51)&h_;b
zL!e7l)59<p6>R>2_4*UvrPSfAPs?+ED;;S*qQce=S;%o(i>Wxk)6s|+ae+JQj_=KU
z;ucC*PM!9SvV<F09Ktj?!V1F{_3!F6t%Ox1b?3wQtH60WCr`F4xy<K6p7rwVmwH-3
z1fIQzpu-`f;mm^Ghg1sO2hvn8tLR3OyM}A8B&CLeP$M-#7Tb%-z38XY#{Sa^r&mo(
z(oa1pp(Z55u!Dz2N^YjUHi4G(oMn;Lvv5V09wDti%oH%xLHWIKrMh|jggfj{9iO%#
z1Stf*D!Taf!kgEXkkPt(3mMnN{rx7$^XHINGb>>Oxy{0lU*-EQGLoy?BI)0{ix5&I
zHMoGF6$I2|#jTs1sno`Psi4~Ct8%116!o}l^5@!7?+H(gc$urp(lIvxQwB*M&U1%#
zix7Rbq;7!Rp>)Z3$qVJ_%9}z#lEvnI<Rr<qsRL;wQL)U)9rY|m?j#VrQDTGoVS!#h
z+DWMfVD<%3r3SbaHn_I6*;c6fW`QzC=nL7&XI83{dwPMi9-I)~{c{wimtq{=81Eh%
zFs&O(w?LV`chwjwq7QEs-^76C%304k`Vu#0;uQ5auIXcwANB*ai!l~w<`u)<s$ZNt
zK9zp=hn%9JmscjosQE9*m-)A~kPs7?6^%3LlTyKwW)_=Y+HeV(O*SF)mG$|`W@r>`
z{e`vWQugIL?~|WSNAhCL`6eP$^t!e&kMhUZkH{VJ!!Fdb$tY#jTeND;8QZPoBqb`y
zNLY#0*Ao^G_t$7S*X+$qAG^4zzkO2aa;g0Jvxin@Q|u9hxJhB(Tcg8eE#<7y{{*5Q
zrE=JhYfO;nEZ;oA*$){D>zk_P2=K8h?V0b(XcA)0`&gTCDW;~UmKR)%<OWP2XU}cG
zDl}2=74@#(ss`;^(jCl#E3`f>%~>PIO*}0fqE`7#uKo}3M}QO|#2tXvTl2B@we)fj
zvxe-`{7$)AK^{x`u>_d(F0N)wN?;)+kxHL)7YFW{*|M3dXrokxMU)Jx+gEulp9RPc
zsC^b&gBB~nZVa!pds*KtdypKPisn5|naxs3TU?zz=q_b~yxFkhYoOe(s5`klkSOBj
zE7*c-7!2+f0pw-oa_Yy{e{uoUzhl+KhKMeE>|PDxGwxYIRH~qo3zdAe(LIt#xCz4O
zR#@hWe+{mh3-cCDqGhpzjo521KL!(iHj?r);PRrg`9~Iaeb<_PLn=SZi~58R>`MSv
zX4e(gFu|^fDRI4%ll=DO@}1<9WDNpZ=Rx{=-x1jQd*M2TuSJDGLQXT}+z!j*y2kiG
zw@OnDg0|3viM_C}?TzK3tXtVF$bUp%kNJ92s}gA6KY$-<)qic$()osX1IY>=QN`7I
z&-d-Mw5T$K426=H$XHlKaI&C$tF?CdQdZSX7`I}x(@Muc?B#%-q0GHi<^oj5R+eO<
zt@VJ~joZChS)i<j+)yN=*i|2Ao1*D4vWms|8t*q(jGr`jf*}O#1HMApKI}*rP)eCX
z%>i6eDZ=*VMl-N!2caY+*CaF7i=l%%>t}5&dU76bKL-8wSO^_&5i$L~&R!D?p3mBi
zK@wu7f(91*%;eNvk(`UYW86m;{g+;qlJPfORP$U7+aDDu@QPmz-redG$-A;NA5J#?
z4!0_{<JRz4r3I=}lIJ|80xd?GeIR|UJJfG_HRW61w!xHU(aYa)NNnZ=ya7(r6GjI`
zoS5_aiq4WCvvfar2*>Z(%<bTWIevP;%yI0$Y*xU$`BX78cPX5Y|Co@m3<g__I4KYs
z$#~bP+?N{Kz*vZu=3r?M_Zs9(J<iDR+3vPLhHdCaCk(j6p4{^|*8L&z)i<b9E31y_
zLmOnvE}M?Gk$G3=YLD_696{O;NIX3ss;cf9l}GV<nGV)M#!jKxGxrAm7r|GZOOc$}
zJM^{GDSaS?o?qjtZ_=(&{`$^(&)0jLL}T*(;(5!xfMG``en2f1kciYtC1`56Xj!l>
zjLXK+%`7iBf--ICV$rU!|FX0+0sa!>Rrg|$^3Mm5AE~Cc!NU!D+2>58oQDbz6kaxI
z#^l3*FZ30aTiU%vnlR(|3E*3<18jtm1~)vHRb(lcC01?EU*XqOI92J;^EH2_D>9$#
zST{5(z(xCEkAXf2o8A|HJYjcsfw1udy5`?Pzm^9Ye_INTG^}#cMrPSBQS3%1{v_uA
zkB%Eak){Lt)6Dvx_A<bE|KE=I3K*FDM@Mxo32M9SG%(R<^n2yvSe22(aa*Q4f?S?F
z1A}mj3k;m%rx3-%0M4)~SRXY#`<f~ScTvh~@XiSCKT#Var%fY1u|4ueUH8S8^J+Vf
zZ2}hg#;U<l>zkWeLdgl%SiiCVx9NrJcAr%vF*yE7z12XSzJawN_+^Gc*1Z>FLF;x3
zwAyva<rq@aIaOyby8))BqaGT%6{2OgkI_Y};cElS%FfV_XY$KNwXDrJs|U4G^-j7f
z^oU{Fc7*Hun&B_=Bp#Q4#&{e$RD2UlOZM84jOAVSq)A9-_dVhMIh+h#J_g=JsIW7#
zM7<kMpR7>&V#E_!SYdS2C$rxd_c$JI-Usa{Sh|~i{LMjK<@e98aVZkda*K*IQ-KX&
z>+Yuak&c8?>+6<^4xOk?aQ&TQ3c1<)Z%i*Wvt<i+1KW1-++j<>|6)%vvdqgjlO^+J
zpp0?(Ky<Dw%5D_yqphKRV2M~Ye(OGg<6rV1o-`R+|Lxfz@{{g{w#@A5^Guyc?!)N{
z7q@r;VmxsuCas*WENT|Pdn9*4GMCb-ZeR)#wYBmIbmC~(quo(3a`-K3vxnMhbnnI7
zM}r%P`BhkR_n&BEm0Ki$lTmUa-@@o!rGdl~GUcI|XY?PK{g0|otbVL`53O7E-7N()
z8{;Y(eO2^W%5!ezepcnFqXo>H&)O)s%Xf%O-)hC;%eC*%*`n<-*}U(F?>$GFaWkL)
zDtkUaQHp>G{PeTGG`vaGH8p(sv$!w{{$2U0Lc_(LnmdX&x*vS6hN4oQ#uW~rv5L1?
zU15lf)x0R37veQp_*QWd6Mxj3Zd_qC%O`UU(SZ??sGDd5JvK|z(vPHFXEo1L5a}Fu
zdoh!rKpA>#L6xxOaYG-@HPH9P4TRv5K4_~eR<l|yK{;6TVQ=zObBWK)g7aA5^PC2l
z9)X+UICGlWVlYeMkChtyI0U8he00B0X?4^TWdo-q@5xR2e{ShcQfh743xwDVhpv`v
z=2f|L8ShFV*2hfEGaEm&F23SseDXh_*Q<Dz_xB8GxB`z#d`>>{mh<+F?ZxHA{k<N(
zUF<|r{EiQ5HA2jLbB+DOthB~}FDt@Be-Ov27oZyw+x%hQiP1fu#H3`M#Zd){)OSy%
zBi!`NI0tHxX6If+f-3g>jJVl)T6^5gk;7?(K{<Q(nF#w_xoMd1UI^!zAb=yueX45s
z(g&cVpN^<re`ZUmAWrN)Nr7%b?zSn%2q8I_MJvT4Q@I7>mHT=NR-lLmEB)p6HRKJK
zs~2m6F1&zWY3bwbelWBiO|AL3mh>TkTCUoClxY3xRCjqnisXf4B;RUOl+}6QtP1x$
z)7A)gi@*c>P5DaJn_8j*?YVNzVmX_f?S=Lvdm*9T%s1N_$@^z6;HclWB6VQ>i@hkw
zTgDjgT6G~sR;zpFNbzI>pnFnP0LsOvB|77czvj|B#!Zi{09EK6y$RY6#|f0%`hg`X
zfzW&2QLga*hYMkgkiM8zS5U3*z=txv8hmBNX)~fAMjAG+NpNpi9kE$Vv6<aD-mEU!
zo7uIFzY_z%JC`T~Xt3si47GU0ucU?FL)hX0=Xhp<1|OTeaN533FiQYCK3>9f9@qLE
z+BK}ZXFla(s0(e83I+hH{hl&HHO<xlHV6>R7yH>?Pf4yKG`LqlS;GXAw@rg3i)R`N
zjd4#$P;{VqT=*L_yNsn6;EdJ^L(#VQT<l<p6W{^*Qd(aswY6+-ZaP)kg4UYNuu)G%
zmObfs=hGM|{45NCdWoC?%somJ$s0f#C~tLX)2MJ69Mg6QuzC&OMcHAsZ5$8Qf50e*
zMyKi4sifo`#y@Uvb-H9O_?`-Fc&1=7Ut!74upH+_HU@<F2V)l~V%Gko4=;W{9*!6i
zrFW$v+eKhUTq`eO3m?fQYmGs^XHViYZAZ=Hn)K13z(_4}e0LS$<#1C+FurNe50IyW
zRkHgc7dbOl@TnTNd&h7Vxxe~Ufudd&Z8n)^Wv@(8uVVuo%EGz$XNq*2fK)&#^j#`9
z9_$#Aud=)Q6#oDo3G(POmqxl5<n3hM$J{O08Ly@xDW4#7p9V|FQgp9@0{R&ozHkwK
z6J1e>ql&tG*|b_}G|I~?I>(yY(I-1ywp_SjG>tN5gK=xj=8*@oTOkAf9uFty@|AL|
z81x>Y`xjb`qbrp+(O#kZ7iCvFDW}V~BuDNarg*@7AM*#f!debit>?iTC%M@U4i+%=
zPgSwm?hKesQn&u9>{PWZ!icw4O{X@0RP4-}Tvxq8k55DIw1Nrdn>Xg^#}GuJBnLg<
zZLBBXs-I{)7di3zV%p>m-3JHy$#}j(5pi!&Q7>pXVeFwP-RhJ@37F4-*)M$iuxYHi
zY(JLo<6=u$?@8`V^3LJ)!G!MoWfc1AE_h|gZ~~(wYE5Y@Hg|XtnXc+GvpKfa2OYob
z--IUzH>8vMuP&Sh2|wMoK^S2IuNGv&)%K_96Ur)quY79PyXP*I<8Hn6RsmRK+Bt=A
zNJa9`a`m?&_p=-Uah0fmctIn$r)yZAmEtZHFe0<1c#n9BHNc^_i6E)*RcB~T2_Rct
zG?A36f@1>%A_DalTAr_Joicq2dbmd5ez>)cY77bZTn=n_eaeIBF8MuBcxZ+jCPnHE
zr_W`=GRzHi89egDlqORJV7dei9xLmC4P8zZ{v^Y%J^%6%CQ(`v>j926;sMfm4PGy#
zF(L_I1wtgi$J`Kz6`tV8QZcu{&S{8e?PUr+dq%|tHBk#8#$gCfh$S_N`+$_l!>}yK
z?)ksA4h6mv&W+_;<;Ly`D~lqYx=*dZ*@aZGm;(=wwGNoKiV8vo8wyoMdku%Z0P_lo
z<3>4%tGQ?*@}V-Ax9~9uH!zbV-DS3#lHpt0cU9?Av%4tP*C2LyUJe;jds@f{vu?kK
zz?KgZ`j!;wb5;Fg%nAfa2h0Fi95%+gbk1w&wGha%VZc}0v_&!QLJuwoXMNVa^ipf#
zBft5+jNRbi?~w1cY+nEBus-2cBhpno`+R0Ik9lRo=cBy>Qq@|&^ck?@a^;q{NR&wV
zs2@giGH<u>Cny{pW3SVF?T~!V0esU6mI>ynl6W-pfTcsGgD_uj0Dtd(@cae&%dYr`
zud-0xBKJArw$spyAdfGbY*4hcfCrP)$$SZKfcy{-;ui%t2BQCW-^(&87M@olDxy9O
zIQ8x9rc=j5#WT*Mz`1WmUf5+OHJm+8mzS~*BjYilXVWq@n&7TSarwRBE97q!tpZ(w
zw#nr2xu3gy4yg3%gyB>`mE{#kKRTFJ6_bNrzJofEmFa#ARh8SJEM5gJdLD_!rXo+G
zu~uy+HFr>AcfAfi0c|n|#FZf73Oj}(pT?6*cdCJ)%z{+>;s!FFLtW&)=HjsO*`($L
za`m!mcDnNz9mz+AYTQbhn+m8szuVjc&5pg(fKTfH6e>cd@nlWm^bUP`PkQFI5<p3`
zXv~5qDKU>N8ah4x2M5P_<sWzExgPA6h`!<XPkQqC)!%`CvY?8Z3a;|;SHE^;t!F*|
zKg3kJv_${^fj>|`{svXs)!M@JXaDVJ1eg+r<2AS!k46P#eVK_>vpJ{w%Z~A<+x5MP
zGyBayom5Nb|B9+U{#%p9r}#JP;l1KC7fHJ6ugCqH|NpmS^sh_3c;P_fjqlFcx*Th;
z)I{$+-9Ozue-n3)kHt;%ilqIC*pxpNk#V^XELmg<;WO{#R)me5M=PJ)`z(Iw(4A*F
zd{saD|2BGElKx+6kW1X-e%Z<?hF93Dx+e)xiEM$C0_aRa#-_TLpV+q6!vOakamK?G
z$+`0f;NV?u<Ns6Fw}&&`|NkqMyMuI7L{3p5Ek(%066MaZ@`=jLB8QNc<FL(=`_@4@
zA99#OP7zkfa@sn`VPj4qv8^yO%(9u8+4g<w^SQ6zb^WgE{$tm!>-B!WUhntydcR(;
z$MgKKnyer&pGK~A9MCTn^(l7kBy(Xz|Aq146(7b4vod~=iR0o_UST6_V+O8<i+H0X
z4at(mxSJMXZ5W$By5SALTu~Q^B_e`kGzY3Z>khUDk9<@53}AZilvDZ)IU@4GJ}?S5
zF&bFTT>fvSeI3UVrvA&furo2pqH-bnvl;u!7hCwyQ^k(6q)B~9CN^rw9F}pwKvMaR
z73EBPMqMUF#9RxSb`mh@D@I;5nSNAB>%f+(`cLP+e1|{mb_=~B*DJ($-q_{nUm@J-
z92t8)wz>Uyaym8qMw=_~Y2TJ|8SX*8DM0yk;!?n#3M=o(dwXneR&eq2wJi&;23e@3
zO--oCR}SmrGVPBmsk5o$lIE-8$uw6%b_AmLFLgn7w}S5r{!dG{tcbzkkwZ$DV5s7Y
zILiOB0dAgmU^DNe(v{%$fkPKAQYeRlS@vblwEn)0K&km~?ne9<gZr5?;->l_uGn%i
zAO0p#0|?nNzUcFJa5thjsLdgA^KpeosQDdcsIp*s%UPcSp{#<BV;0ZD;N1`bomMS@
z3opD+$I$i)@S??z^y$LjJ&ZBId|~2xNTO;GRK{W~`pNowGHac7?O+8i)+KiB397zG
zlA#~tp3lbr?1W^yBt+OJrYCHz6GL0y8;Ww4!xbF1aegEPc<BK(x!#|h+8Ux$5cT2=
zx5r4?>VrY-(oX-Frq;7usDuZlhZPF*F*hD$qmQuQ=>L>QMs`=B*R7Q+hj#A=xk%pF
zi&=O=S{rRmNkOYX5B=6VvwE=eV-DxdR8knkE$Nr{W0z5<7RJK;3Lx|uY(7`<Ps)PS
zJNLzwh}1vecAh5K7+0QMXKSV5N5P)~r=Zxjl^Z%m!f72yKt+}<qio&|_#G!lXTO-S
zdZADqf%?o2jag{GUKDl&*KsrN3Yw0kV52Ay2t|uUPS+^|VgxwG29Ln_^Wy2giHd+V
zWratWnMR_lIctZ&8B>8WB~%OYGEimqKI4(Xx+00QB67?l_m(r*Lkxk9ox)U@r9WHt
z9~^>cgJ`)r>7tP<z(&{=uJS&*2C1TX)Zpwf^Vjv^crY5%>6`80SCQp6uT98GPC|RA
zuQJBzaUj?`1vyjRy!1pxC?_N|A9b|OEtWe4CHWhG!0n_Laqd_LGhBmfYnmCw&iznC
z1mNBQ!~?aBs2n5F8%}fHQQ7HJg~rccl@7Zyn|F}y88tdPCZEIK%$2kRXnhm8nlawY
zCMwq?sJs{XvrU9mmPo(GiwypG{KXBEz<nz#eTm8z-C(`JvuES2%P7vqUHaMERMz^V
zXIqJHrar4HuMPAO3!x{kCL9;*H=z<y*3=YQ_<EkIsjx2m)sID)X&O}+Cg3V8%wJ*s
zPpl{2O*qz}mE+fv*=Xi*BhROri=Yf<<t{{8Q^K>K2Ds~5fSdjZ*)pw;Nw#dQ`w?NC
zbc_1ADmo=<!Czui%G<pJj$izf(`U$TNT2>X<(xJo5itf?5!`j`-EqqDAv0#*%tahr
zt+4U*Ot@(Tgxe?BT;q&BuXravU-_wM^jYW>x(O7+WLxUN8*<H(>I^In3<V%KWc_If
zr6SW@oEX~Ku~jQV@jY;CX~ueL7RkwOY1nsJdJ~r9by%7_+ZtiHHoDW>&X_Yj;6(b-
zU;N2u`D<I<rvQg9QcHQ9`9wyyua3D!*z$T*Wxm<eIpsiT_ODb0qm!lcx9^Zs5*^y`
zyuk6AaMrnibU!nz1k%?K(BMgmcm+sC4H#g49zYKBN(I%Fc>QATpkWc0)2tqp@^X`R
z8Ta^kmb<*;rSKBZhqq<!n9-Hc-ZJTer^qlmlX<)(`Sy4b*cHDj+FBQzh7s2UhsHR;
zUAQ^tV?aOj7X5p)alTFv?t}GigTX@Ol|K@VnwkE7G6eJMK6EwC=PA~x>P!nd)5#q^
zv{@ioFsJX_)e-iskM7*M_!3yS`+v5cy&S)ZzV(U9vDuP8J-=UBJaYkx%xR^L9c4jQ
z%O{o>9GVtfjwGxxq22yUcQa)CB&kN1dI@KIE#C?KLlHCwLE_0pz8%gfE4bO|Mxxs5
zj(Q+lP!*n=KQsawjL2El&br_L30v{d%KkRsXt3}4Abs0Ye{#X$N=5TFz)zz0W6h@5
z-2>(c|6v!!<=jYbQ*001_9*Zd^V!|5l!H!geb;8Y3XeQOXd_NtQCppH*E02s&{2DI
z>b}Fh2;lrK+CfEf9fj3rHq30OtXE&68F_BT%wU7s%;>|(M{nqd(8JPvi{&Gbj@~oD
zx(OoEMKSB>vN%D!Jk;1dy3Gp6%LKNt!>`<@g_k8SWu6VC{q~E$Y@5^#`2!lD|0ZBs
z{Zq8YzO_Yf^8ByO)ZvPyIGW!1db9BGt@YKVmFrUnW+{#evK!=QQa)9qn!kTOFl(jt
z4@vN^L8YWD|M~x~x$oa>bpOUF{4ZGl|6BMrxOF_k2oe#S{)r|cN3V<dB63iXCkyaD
z&z!W`{S}R76k|6yFE6+$0ilC(mKk!8Q*sKbo1YQ4E6SZdyBqvd%6y{=)a^6Ivht7<
zaX*AY%c9SFq@OhEsw4a3LW{QV|JB{4UNeL(6#CF)G`_mMe;?K#CqwLym#f19eeZW%
z0-{s-GmMo*RAFt|{=E%5=lY9fNU_!lU-f|NXHl;LfqgL9!e*5r^tSC|#m3AYwBEiK
z7`T5=6<D>TQBz*i`}Ke!)^|f3y1nJ<PI;b+P4jc_*MoJ~aiB@+5*uJyp3^PgbtV3T
z6K)el5oo&~<C>{?85VX~Zu!aU{D*>0k$F36vCSm;$`%M41)1^^he%vk%x~Y})qys~
zxGb;n?gA`>z!%Lylj&QaOf=EIPUl$sdQ)J7(!)oz5AThZzsEQIumtX%bDab|))*;5
zU=(molG9jy@0oNp8L&Zw_sW6H-vnyA9NvBf$W)~6dmmOgH1{%TSiQPKwqb{@``UJf
zm$*Cnbm4D?oAS9>AztVl@_<I#Xb45iNax*Ec*+6ztnbHU-cGs3lREhmyGA!aA1qXb
zc;Zh7SGBHACdNqxKWZhw3jc&t_|pm+8U~{%HiCl>OMGyU12`Di$Vr9JZl#Lu#$bbH
zBg18L@VqH4$|dWVpb|6ZKwQNHk2mfUnHT;XY8=kVwkABf*2Z6KS-CE{SqcT-_{*#2
zUGh-0C_-3`gtaau0E{>_>vL~9r^i{WI&XoY(N*aaroWB9TQW17d47vN#wbYtT|4O{
z))Y*;9)uqfy)x$1z|iAJ|0eEO!$Gotf`-Ke-cCZ!g^Gv*44tIjz}_py7$-N;x<zX3
zbR0oc!=3ePyU}=O%)26sO=A<FWI;~_hL8!ARt45Zo87yXfN1MCcMwbGqXFu}g;(<L
zLUOF7-=y0Dbi@y3(+6g-c!8MO7TP&k2r$avi2CEu)IQ{l5!sDw#!v-peQwi$n0?RE
z{LWWf3;|r;het6H<4k{-Ha#=JbUI~w#{n3g8OtbHgio%i{b#XTr|{n9_L3y9U6O_|
zu{x1Op0`^cMO)}eL^KI&y~d_#94KxsmD13Ty?AVNq^6DY$~ede&=@1_PI4Z)eG_r@
zhX`|9lD%5vJlmR1dpKCEFk`J((g7WUit^eb^N?o=onvdNf2;X|h}!hRh_Z!stK(_v
z%#7a{V5%fHgo-}#&8Kw|+4sb*4}vz1kxN+lwyhlnINF;&yZYJ++sK6pFX-d^6LVXX
z;Y7XAek++Vy&n_%du7{}Roab%nthwMk5HpT5lwt5g3cisWXC_;9YIBZ&n9GS#D|l?
z^T(Jc2{@E6Qd#nN$q8p8`;!_q717j(WW36jlyO_YY*Q%O+VxDsm2yYn3nMvEnG!BI
z-I&5;PavF`gs?seoCGC5&+)XZx8u}gIOdsjx3Dv^alE<yPrK7YraH#l=wo6IiL;Q<
zqj|6A`J2#3*OkVhW5)~kOSWGR9vSwx<jOeBdz&@vTUx2zYFN+PA^6&935ZIJN1x>C
z`!;sYTIy|k?>{}M;?R(qCvjaHS`ScE7Ff#GjbQ0d%fh(4!IAemg~&kx5t!M<R!dHo
zSKTnuR@zk7@GAJ8!r0)r{;k~3cUW0btvD&RQ=rDheW48yzH#>Nx-Xp=_j~~b!>WeP
z7CYU$)_vAQMf3POmI(t;tE5isk$stEvu{DGswAmVM0p#!$RKCVV+mFXsPN~<JX^0F
zakw3FgCE-;9X|nQqe6Q}@kiWxbLp`RC5c|}Xrmd8d4guf>C(n9tsQ1perhLo&8FKj
z!nHwscr6bfW?dmMSJ66xyRfYh33?3PKe+0QJeLJRTp|aV^=;ozva;6#-eOy`O?*3!
zh61V{VOuqqetz0;-**5NpwxqCdOfU=+}(TQKUS!W!JeyjckeaGo6xvdaG`P;9$H?k
z`$6%XtRgShMpoWd4c)4+H*immU0Er;mAg<uEYZ#rk164ALNomVherA*Ig57uh9);>
z`-0-gA2&ds$n2XPaO5;?SFqc$C-cC~zaL%)8!rQ=R@CzV<vZVJ&a9(f2nu^+pQ8=c
z{cdbCI_a<Sq&%*O-#;(cHou$qmMT6cZ5!-Z-Hsy+AAw@T`xvmpN5@PyzUd+0G*o{r
z5wBbiZ~_Z_%;*^|fT9&3<tGX8bUSqDUYI{j{A}LzKV9r1<cG7D-S_ZTlrnc+n-VNn
zK&HZth6$Ov@L%VBTGnm23oEe7!)e}Xz~gLY2YMx-vWjP}TM%hIT=b^kwwntpbO-Rr
zFL^fcHoQdy;uiqoHy71BdZ?bJzDWtV_EP7usjuq2_nTIfr9PW%z*tOHR7w;8UHvt?
zUx&N*2XD;FOSh|!sk<#_&kA?#pZK}~EWf};J?j0Y(EhmpB%OT6G$|V2xbsq9)reg9
z><5{X3y^Ifkg!=6Xq4jBRw?b=trl})RL8sj|6$l;+Gg!>c5fexBDBPPeqZ#xL36G<
z^<XNzf5S;PX9wi}_fh%$ZUB>CWADe=Vy#LOH^5TG5%3W5K0MSQ0!W-flZg3|JNj1}
z5S2;ZM9qz=fwye{q_z-Ru3oY{wMpCK(m+>3P$gBjP^byKV^Qkx`IOlK#q+Xzc}qeX
zf9T_*=5kPikY(B~ySq2uDhC+g#GWr#{}=5D%H04vKw5e5%P8`1QVRZwt?6*y4?+_o
zI2y(lemq;L+mUk6dO8IF`BYog#ffmw%Spg6_oMzEZb`g+IpyU(p<0V)y7q2}L&9#0
zN5b}E-53c8i1+OV4!P9SPF0pei7N0xq?U-aaHRvfu6qwHeiJpTZp`^?u{CRNQ4|~B
z;%2$})0$yM-;mw@NN{6Y`mNfsGS-g<)(zCged9=HSd)^n4iY!Y45p}JsUFo&9@qr`
z<xm>*y(MSoPjSJdl5UR>OE`@1l<S!i;ig5jF~*z+igxCxFz3NhkEFN@K>y-2(g2&9
z&EM(kI|4qg$L?L{w+0ouUH)*&FIXu;8-Y-{qeYw&8tYmEkMU}a0hBP#B$kJxYR$94
z#_BJ`{!~gF!rF82)dczu!G{tFNFrL-uj_Pj1c~%>>XA5<Z>)Dqv;;!XuGjd(`PG*K
z+JqL|_fSmR#2MkH$84o-SPPQ*bf^0SV;+6R%{9R%a0W~T5uU5*y8PW5PWaQ<^mT14
z&b{CSfy;w6bXQ|(+Ir?@o?8c3TNu8vjH85I1f&F1O!COEa3=QV{9V6g+?Ofm=9@7^
zk!$GgrIYV!0#!99^4r)^cjI~x%OHM}OYFC41GH*aDI7-}B%9=*9P@h6gR!P}a$|9%
zjJC1Su7$jjE>iS^rEHD1ZX{ntu-1B2%!q_Eot;5W!RYfe0`$s<A|8cjD0#BnSTF9B
z*p`bm_&;E#^XN(EJg{jP#y<vXLLE9--^=c4VnhSM#W$i<Xfz~Y{bu7><2S`Fg#ie#
zUaNEknjw5HryW*sDw*MnbJL6RZ7cPJ7qZ7%Jv|$ZA6dAMnirzh_VM4g5<EsA@YfD&
z09jlErnI%Mh8C#rl}b$mu_eVlYajim<;48kW*;5K>IE0aEY?&6iAI)_vF)e5tE18D
zy27J?t>~FKWt2^*yXr};cQ=`X5vH#qsM*<tBXs);TXYGcS0&oJ&79)y`j(XA4>q_|
z#o{E=@{gj|Lxdb$i?6e*XF|=bxA2Or^Dfj@B4^&+cWROE%MbIju<#G6@cX7<o;wW_
zPlhkyMPis@m%#;GH-z%#AXmb)AFNjBWn5{YrDfC#8Pdk?E|sI4rS>8BA?A)ezg7BI
z1N(yk4OBEIn=_K}xO`V0$B#Hngw~`+I0p*sV30|qX;91Sp@M4!O3ll8P7b@)vxSVo
zoAsGYmyPRVn=QmM2btntaP*ne&b-)_NH3p?P9%ED9@a#{WGdx2;P0Bu8wa_M)sLOQ
z5vGFE%>tGe#sUWe%OM!2TPrIIU1W`YCLmgj$9SGNe+m^nnT?oE)(PAmW8bVqGYDP&
z9r!{N_;5$d-82j6Yzo<Zh8UplaL>nYkBOtngf>Jy^6zyQ28c7NL(zwfe~*Rq{9XYK
zz#|g>gpU}|XC68fu&*D+NTRD6Xv@hQXDHBRO%N&U=KG313Vmsn8W#;nFxxI)Q<}a#
zyUndO&k(2?k_Bte7JtrZ3}3W);}T|pN}NEx4`-etgbDEO)17_i9gIQ;b^A!kuy5LE
zs#jx;Sp|_eUeH6dPu5dVruSx?2epXToUv|<=ki}WMhWtkwg5K@Xi0NPgpwh>bb%b{
zmBFS;;I9xzL^V2!U44yUXW#igN8edlZWC|kv<$)QTCQ_&n!@e~TDo)sY1Sn<<;#P@
z$D((YE`Y^JvVdZQA<!#0LrJu`#W<HwS{MtX*V?jESH^-3g;~broS)hkP!5(W;-gKx
z$7eEcO^JB8F1do!=PV=Ve!C9$c<ADVn17t-^9`;cAE0v3U0iAH3ho69Z7jd}Sen|2
z8I9~NgP!DB*eT4tzn-2RSkJm1$^F7xw&0M0Ka~mKgbeiB;ffv-XF{4olL(Wiw+Ia#
zR>64aVvJ2`bQE^|eNgK7nvo#!x*BKIN+Z44nGAT=fA7qTSix7f@-Di7BGk^TgpYAy
zJ4?;?BXa;EzKFa#8q`A^l8Io+5A)7F_9S1YuBAJJ4@V!VM^|NXUI&zGwIFMI-q1z&
zK-Rkro=HGl#cg>r)U-@eatdUv=fjNPgQ0grPWE>2g`DWv9wfSblOKmH<3q-v$m`Vg
zfV>G_T8`g?zsz<v<^{}zl|0OYsWc-UOQs){yzR+lmKaJiPjU0Y2nG_131mkpqq*rw
zUp{MKo$6Ims(0G?7E>PH5>*=(H?E?RRbX0>ktdmi`7-BAN}Jd^hJD-RKQ0sd#FN-*
z+vb`s1G)qhi4)gA3Xi)*oY9jhjCp-#3A45C<gsUKA4Ypa(fsXZm$>UCD)N8B`icb#
zCZNDJPyh0O>jyITS?16rpMOUmLVrgJv<EraZe1GrDbc+w8ANZ@X@|lbQHkC;gFMvD
zucwsFt(*|yaTC1v7In8-jOpZ#dEpO$83uRUQCZomfQ^8zAg6D_m7VUS0hu|%eEz6G
z)?pn_?d%-dAM@$dw@1Flkhgfp-E4u?Z@bx7;cWBQ9~P1<hqVRWv3jx2k)K9@qGcYQ
zOT1We-CQ{Rn^_bZ6aL^-cO<%n)L=UT8V%-vqOUvKwANh^hs6toKZK#LYvaq~%UXJa
yA48(}(tu+s0pGfQM9@3$sR!u#pMMufq@RUc{W0!U5Db8t)W!2oHnrz&KKMWS$Uz?f

literal 0
HcmV?d00001

diff --git a/docs/source/tools/images/scr_11.png b/docs/source/tools/images/scr_11.png
new file mode 100644
index 0000000000000000000000000000000000000000..ffa6b74da134d884541b3ccf294faa00cb9179ee
GIT binary patch
literal 50431
zcmc$`cT|&I^fjm;h!l}7U4n>Gr6UN4hN5&Klu)Hh2kBKrBy<r0=^6w==papcN2&;+
zcSMwq^#0v|`u@Ie*36neX06Fu7Q&OK+<Wdm`|PuCf>o8}NC@c&&z(C*@=zZ7_}n?%
z>~rUEUJ&4ecSM9$dcZ&DpFNhle=e_^;TQPff`zn_^tp3|kwk}2@W9WP9OQMLojZ5A
z5&P$St3Ar}+_|Q=524Z;?gmTa7mYMzjxVmQmTrF4y-)Glfa(g>c`8a9)cg04_r_97
zo+%T_TdP?XcZfKI-y(4cIS_~!gd7CgKVQfPy)<|%FFbl||D(&?`q~$}-)~xX0-|r(
z7vHM<v25=ru)!rsf;fAjuVkZx|NNI2KO1`b(wMug%?81~kQ_nv9Au|2*d1ccvm;UM
zPYNEL9SOS}06RSpf_A!^Lw<UoT1=x+%%4*-{Qr5%!d*eMlB4VGGJ<YM<N_=u?DLm4
zN!9nBruZd&6b|xUl2Lco)&*%KQ+^nKpVeV)_U|{XTmX+C@c->FpkH>47Oi!^9Y)JR
z*~R|v2P7&O@$?g`oezWlL3m3^nS_6%oKt9Pv!L#DF)z(@9Mz)P!JJ5D$LPNO$=B#v
zGq0}&2O;*%;pK+w6@h^=R#2Mdnt)XWhRP%V`ML64co6A4m0wqfoW#q+LmUrrN2&2o
ze0)4BK?vo$XA`Liz2LPoCdR-?qufLlmlR%Gy7X8Msi36Pk{O*6Tedo!%lOJFm38D~
zXH7}FWSiF|TfW39BTc$`<fgNj**OlJZX9lhalbW!h5kG>hJug-&!dWiwcarM5ZuvH
zc9G~=m6HQa@7i6F&4mf+?dBbp8fG#A`_V#rsU9!gh)I`@+j|~RXXCS2GKD~fDC&(p
z1no!UOCGV`xkHwHG87gN^vq$oz<;R9mge!B=-0yqOs?k3GOGsq-<P3MN(VJiBhCwc
zjAUc3zub!!enGEsN$zY)i+t%sORcdXDTvNulY@rL^tjY1z(&MQgH%1Er?L0N{p)OO
z?;o1u9)5C;xf@$zA@%z=(d)PGZXVPxV6te10!Gtx#dCIjA=KS;?5phFpZUn*nNLrK
zq_|h6oI97G8kOa}IqZh9v{~qX)Y{S|?&uVHgP(o`CC#|!a)Hc-aAtA*xQ?jY^QQ+t
zg2c^rRekNt(&~eOoRFt=#z#3<7e(B2I*Viv^dS5qp(<%3lol7Cg-&t(Mj6a~K_!MQ
z;qL#qV0!ugO_*vRr}RAnI(|={sFq!Wps#l3XkIhdyk+pNf)5+n4Xu58?{wH8Xff>H
z|Cc45gs<l>O7d<du#dPaFfy|OtcSP-Ns^(tBSx7C9W)Jd*0vlh6oGQ$)6Ts|3{Jai
zJfyelo<PK`@{I36LBjlJEo+@m5QCX!P4OIws2{ue12-UO?<9OhfDfiOAOn;Y2zLw=
zivMSp$sx=fFBJW_@wPgA^-2*4;`&plXuEC-(cqNKH+X<K_I39eo9)aYkMakAqE?~j
zcPu9=FiWK)oHLOr8;od^hmg`uLL|w_XafK=#ptYdYCXWwAS?G3_Q(aRH!A9WQ5yW#
zEo*qf>{w7VBCH8qq209kTMmlX;P_w%4=n?Q%D$;kUGDiL#ZLE)G+(Gyth<wgj5Zj1
zx-S=JebsX1?aJK6Q^4(|;l;0W&)||p@nLT!KJ4n~gA&N_@?rFSsHrG-&0WpXPh3LY
zMKVxRf|{c1Nd8{~oD|dw-$UbSPs(boz}<ZCmMmEj+=f#_AOQeWk^Bd@7B7EV)-&9C
zzEe{}7Z<*S@&qGwy|7!OD&2BDMzzB)U7QEJC5s)gIS5XF^RQbxIYT))SybU>4oW^G
zgp}hB-5775W9}6)+Lx#39HnDfnBE!K4!3R|iZl6dv~O@cAk0Ub{OnlbAd8Yd!{_*5
zZ7&WJMOf=M8mj;Criv+aR=tCm^S`$cZ9ySnmQ5=)(=N9nLYc^-)c%|xQ2td&N#^lZ
z26B6+aq%$|JA|L`Y^71$Vo`y2Cm`BH&k&{)YWhItzQzEDIGtMDc6s>&2*l(pv+z?X
z7>O8yktyPtb5tYy??DjP`Z5O+Tha$sV-alxe}T$WZcd8(>)U1R{RPQjL|ENf&?rL1
z`m>9W91q~fHJV_u8vex6rXmXNhF%!jheBKa#vMQX0!AQh{nq{w2ps>jQG2-i?89-6
zWsVRCn!kgiVyfL7G-R#w-|3*6`raje$k@sQ*(?Pwy>WK3D6&6CBfxBIWfToaLUR>7
z^h$&O-2ya3D;a%HJwIT7!Q^R#K=C7}tnq2eG^K`2QHt-je?rwV4)At3s{N1aK!r8(
zBw}n~am6YiAkOl{jhT&*CKNAENoNtb??1C0{4>6Hw;zPa-*duW)9!$PymIy#b0pq7
ze$E9_!L<H#iEc3Ur;!a}G2T)-L(hJpkFIFok#_B;*5)h!O)8@8)dFpAT|`a&(QgJY
zqc~?XT70uR#7ZXH%v=y<I51mu1Hw=6XR4!yRk_>V(msYA987PmM^OBC6@+uvZcp|?
zf$wlc==uL<oFY{QKhdc<3)Q%9CZ0!iwjKZ=@l(XmhO2PXfypxZGe&TmTl`1}GhHhD
z69N(T2O^+NaP`8pe1kZRM;=M>FbMyZKX-DeCrtK4Yk&67n!)1Eu~4Rg+CgZoVFY;h
zANKH*E)=&dX10%^#)d`84NGS|Ss?uQXJ?6i9jv+>;Je)azs>!1QyvXdSs8^;0W;25
zHEq~A2t13O;I<r;-%*xkYd?+ce~&^IPZh&#lD{Q<1{i=##0SbF+5z<ZA2ZlyW4Zph
z?<zLV{ym>5zlKUgOF9_izssrj*#}Pp!R`MRlw#97l}yf^@M?4Om@{NUAlfvbnu4NI
z@MFpU!Z=XRVu4844Xg{U|DK<NgO|f?Gd!1f<iC+3=O^PBOj=;nY5h8=p7Y-nkURDS
zv|~{Ni+k9|fVRQ=+$kXtT)tCg$RGT=Bx4;#TW|)`*dtylB0@IN+HM@yg~j%tK0CL{
zL?ha6{#n7-omC9i`QZAWXFWp^ZCC%UYx4&skE<H(iG7AoBMW097Q);j@glKB@@3-j
z77z83W@F|2?5|4ZdQ@SL@wZoLY?0tnWH1gQF8(?V9ktYjP_c5u_F1d9h_)-lnvu8g
zeC)h%>?6JC>?6CHG7a<kS|W_=4~EOv0@-i>n#+E!LT#+o&j+Fo`6vS~0YbkJZLK0x
z`SLSyD}a>-qb+wE&$-U-`_(_fO9nI#qoVy=jK^YwslOZ)N-bOkS5}*&82;*h%nIQT
zX>Kj+jIiJFzPm|%dpi~P({pD=1NYlRX{n6QJx4y6&E8;jv~lZ<yh<OuZO2<R(SEXZ
zgKbDv`@5P1KW@-CVcfPi<k{5hQy%ZTO5fL5L>DkZOY58N-ttPN*#>*=mvf(Q?0A*L
zQk#avxyRltk*|&YU7l;Fa^2NSbCH4!CW)WjIz7fuq|)=HX=v2*eM)B$Q+-@dr%cDi
zy!+y#lHU72aX$GS;@Cu7tt2FI3Y`+XX4!elEb!2B=BnZn=Yu}YQ^ezfJU%`9^zuM%
z;%m{32iM0vL@p2aVJzp+Q8NgI0{G^r51K|?cbrKcyEgHs1*c5lL5@2|FPh#;-FBHV
z4Lk5=bW)=@;&pm56AC}6Sl1pXlNym>WCbI<&`@Ja7SdgL6{W8guAwd|Ax+mq&c$1M
zPM~I&sT&fv#LRuOi6~#C`CVm7i=04>f9q_pm@HI-sWT@wt`(1+L0-ceo?cKG(3$f%
z>Gm@dFj9b{SZw{!-X+mH>JDhuAuu$4^C8kgk3ajOR^8sBPk}ZU{Qa-cic-v?f0O!V
zr`2)qeec7)vZ2b8D=SsM9s$;8blhO5nr%+^+08ao#oQ+-FBxuf&@Dk`Vhb0F*^!27
zP-Cyo&813(49CtlW_JDB^z5yIl_5fgt~h`6)3z|w?iDZY&^HNhUS{(!Kfe)A(oayp
zYE24QFiXIwEXe|dT?88}Y96Pl4oRWl%VbnspV)h~+u9!I@ng*S;)1#v3VOC?gT-`H
zCKV$A^verDIceZ>F&9t4NbJdwaGqR4C#l(}&%m7`Va8P7@P7Ax&oomd{skHf<>DQt
zdr0aeS$b9o2k9y1JKrz(&fq^Asxoyaw~UVORy<X>`m()sQvxO{wY!y6D*tWCPq)>9
z%g<^I!s1#~8VwWOkS-k=YOM%FdjLHb?3!HxbiIK@uk(xp<}l^Z4>XLHvgUZXHIBZI
zq6^np7SR?f!09&I$u?NB+48zp*eZagNiNT*Dn=1}c^GVUeIGor4j;&LQ0R|2u6r{<
z6h?;YL{m<T0;)o!Q6%B&w`3kQT9rUAr1!RftA?DyXuOs&f7KTKd&^zRI!EK2<bJo`
zTlMoW7eybJOaNSG`5?nYw~R1i@0sbS%S3aLq*`0DcM0V9i&!iGJS;~9nJo*!=*MW-
z`7%?6UdYLqELiObp0%EJx}>p{sA8kj?(Ms|qxBve$!n!nxw0zP;0{^y4{$wthxU)v
zAjtOCQ|f@0ur}Z!XRo=`>h{cRQY$+mM+GXG-xd-gl!=Te$tU?oxL{rel_V+V>>qyq
z&c?I_D}0B}eOB+)v{vaDA3O$Z<}69TbW3Au>q__CuHC|0d4`YfrAX`y33-WXd(%#?
zoB9_VQE;cjHSVQJ9}>eZxYf%|J(LtPda!W&-`%6xv{Ij`InOe#30r@VmH&AE$XaC@
zwmenS78^TPj{3(s&?FrnaOJ(-7|3f5HMV`8{wlg56+;vwZLVt2Ke=-;HT^D=GZ6s3
zKhH|i5!JZ%X%T@W;?-iMl)jMQ$bn_uXG};f#=@}X>VS{ev_uxwSapwD8f9`@P)p|R
zvVT+tmN2_GT)?YKoI08=`}19uWbQdhodbovePatF2qf(+_|acrAGMBnpKr7ju9*qO
z`iSAv>MjhcN^mal&Mjpx=KdpNusg95!}6Z<upwVv!l9)#wi{Wj&Uz3dOo(C#evH%3
ze^1FU8e2QH;){PbbNex?h82I^r#j^S<a)4IK3^a@l`5;aJ5Mdzg=50o5DSY+ASu_y
z{LMV5NAm9kD7J+-jqVhvgsXaJ&+JBm!s0Y5wW-FY?G?GEexnF9X=%_Zt65UNgUCUt
zn90qm{tqJes&IKEqRGuLkj;y|Dqg8X>xTDJa?Vhdp5T_W0ZFSsaH+ejS3qb!vFV7b
z8_$&Trd8@EXnu49+U)XpR&|m2U*hLTHtF@&q0tv+Sh15ZOPrDG32!k7rT$CJ)Y2dA
z+R)`BiZmoVgNCc>1$Uy_5baB-pKojbM<S1DaM~@%sa8sC^cc5Y8$jJUpp=7@Q(7xy
zJk>#N2ZWCN6k5;@1Kf$PZ~H*JnMlXRRkYGHSiJY`Y8<owjW5UB=D8Op6Y~zKy`<EM
zO5u;2*DatX`OQ-Q=9JJxw~#?H2v_Doo`2>u5ury^4|_taPQD1C3c)miEQ(!nGu`u!
ztWWk$hX(U55Jof%zK;^SPsQn8{nUuW%5hTa$13x@=wH5~Hl}0adX1Z;n@j}f%Pp(7
z90^0sTIO=qNY;Jy_Ri68-(#jYFre2?=jW!K4gDbNlWq1Qedp=9qKtORB&u+&e?%^X
zXtCQ?{?L^0uB{k~jQ>SvSUa*Phws1DWNh9wfoC86<7U<RW%m(&ZJ`8hFSK93K-%0Y
z;IL<ES4?8F&hwX~lYvJ;c+W>k?2le85YWoziEsT{RH?2R+W%Xfg8WdiM;<+=Tr1Nk
zd)~E@j?F~p)5VrlHW*t50H-tVq!vFskoUr8Sfyy0&LJu%i*W7fNps=P{{o=B>9~XZ
zdgl@4glE!kT_L<8WZ}Sn%#=eSm2`#4;{95S__VD@5#2{LG*5&oUM(MebhP&Q?HO;u
zj(9}Jo5X;o;MwV28}!nF6(&{)CP8iJ;3|mXT`Xc61W^B{Zi?t|5@b&K!9D%?&`Rh&
z3})#nSXy)J|8H?XOQ|}ZjMm^yyrOkT^{JstM{|lQ(A_ND0+V~j#gRpR;-uSw8zovq
z-W960s4!IR%hk(z12qhQ7!mYHf*e5kNap6svx^OPeq44>5RCIdU*`Iqn!-r37&MqJ
z$^-w$K%2QP?y#<pI~JU5CQH7K%^1|+M(hgQ6@XV=E^*(PEO1KtkJmTnh?h2ueh|Ro
z5vxXWn<9$%_SVwP?<4BcTXWi~C+@9g|Ki|<&z|x7wo595Ob*E&rF7{FvQ6=z2CS7u
zZ?k-4HelW<|5q{oyfFVxM1e>%uFiAqIo*c^F5~5=il8QRJk2f1sF0*0r_=%ZK#_>R
zGPR5)b)g@90@>?;Sf7^YP-mQ?rSALR#Al60H`%>Q7I7{GFA%`|Jk14tu_$mFw95Ck
zK_(dAaCV26sIi(-J82mfHrg{-<pE8G^^{SI`47c$y)ChxGUC==<{mW_KE`Ae4uWf(
z)_Be@<adqnm#nqESZBQn-WyD4$^KW0MHhdc|1#27Bt+O67MQOrG0ZMNKKyl;$}a-o
z-B|^#*jf6Fo_x}Ws;wg+^8pR_vC-NSD-L>&P;EE5F;d?2yUI{g%2Oh7!(8_^%}NdK
z^G||8_cP{+Ord7WtJeM&^0k!MNW*F=h_;u9^tUR4BA!?3stjjPylf9^AEDPZ{G)Jy
zf{wN|Q~k+!!R6Q_*0qfEY&EeDyHN9fLqTj#BR;L&+pcr*#7&@d=w$}S@eRWhJX4jg
zLRir>hDlZ~t$!20Db7HbAIp;;K}E}AZtrt`HKol<A6cv74y*Z)SZUX4U~lMDX+|X!
zGYkbasoZEBFHEHSKoO((a$f*>JTr;W%9Z2ZaQs`Jp<8kig#n~X63R^wsAT3@y;ZiX
zUsQT9kRnSfKH>PU7DLhft{9hpCwVC%J=EydT|z@yb(fdQ&Q8@?ow4}2Qd?Nn16w1X
zmT5?<Hsi9!I0M{n>z2B>w@BIG3x2|a+ar@;AWn;Dq!m=}O-s!h{jjUPLwrtf$lhRl
zOV;gwnt+DV&_E8fA(Od2t`{19C6J=xt2@o#vKfufCATq$|47DU<$0!(m(U~1hj}95
z>QS_s{~oHL>BqxM%vAv|?+BPp>el+_1V5rqA|vT}9b4D~ri=J=&QKhjPW6K~DY`l3
z(C3jR(Tj>li%MqPk{5Oq{PYC>qBe@KC|~E&=qr~s4C<2B2@&RI@A|JhYK77HDJ<bR
zftItqkpLr4$&DJ-?C!MgQ0+dkq}&TS77n&35~@WdpVo3$D_W}z&EP-T6nzzNm+7i?
zWp~|_99cwrLn;|2gcqfs@{ha0&Vw+K_dX#J{m~QUW0*Tqbc2I4we1x)#WhX*8)c5V
z&+POm)a;EyQxKv18j}7cng0>IwtHB}y5oH{Wng(~#Vv)7je|L!cx%s8Q_uh`WHm*y
z945&(q1N_ZIJ~uY^tXwKLQQqUC_o_zi@-CiFSL~8@{sE%P`it=LFxI?rSmL0&P{z_
zs1o$0OQGLkF-Tpz6+une{<~pOo$p6p<~q>0r`Fy3bAu7a773kJJ$B0uR)eKKUAHZ{
z4A+C`fX<43QDvZ0;k6?({Z4$!_M6^zu<;Fk<jgNNfi!>eZ@i6;Y6sl#A{>k42`bXC
zYW(={g_#~D@dEc@x=rz7`U#I;%mt&iL!X2c#uE@XT{9B!$BK2mu!^*=ZiO_F_F%Re
zD6r6tP7YG!lZ+P+5~$YL($8f*T>#H;Azugs=NCs)!9sm?A{J!^bJeZA`4`2#*SA#K
zI(Y9TYAO|;l`H2%n_ryOD~4y6A<(qOD7NaC5kGkR##chDH|2K=t5XsJf#N6xaRFA0
zgWE!eEMkTe?kZT$v*BoVQqNkx`9jiiyy%v&-9)g@0cqnbnMgm|by1B_#7#-XVsluI
z8kIE+4-0fibW6<=yH4f)cF;kXZ;_G)UxPqe;oIj_*22gq0b-V3P6B$OQ^a>j${)%e
z&DdCNPvFX%);)qcnxdc9fs_J^GMV5S@;lNoNO^@qQd!8AkovoRayp!aR4xo|yxo2$
z_Dr%SsrCX9_0bEvWZg8cGS_z-TgDaZ-$1$3p2JC|R;B$(3M%dQ3Z2?hqxVuS)kkX!
z$FBSb<PPJI{HgwqKGa|5U%0_~aL<1sind=_rH*BJ5Bj4Hi0Bbp?(mSLHMZ*wpUo(|
zl@c1}zs6;&V$8muqzXM|IIG%4%W@izcT!$xyP*S-BZ#(${q}Jcu?nJv2v1e|zTk0R
zBErzu32&|sW)T|Xt9Vtbzx@6YI?N6UiL*-8sa|TR(H6B%B36#;_M!((nYYb9?*X_~
zwiy`Lk@vP>$T`?<7)4Uy!q5!xs#^nzo<49?7KY!8PTg=k-?(`Ab!>tj1By^HJJ6ql
z3`U@gdjrJbxow$^=@k!rP-UUWknND)NXWZ>$$hsyZVOO>$Y%_bl%F+pFa&_C@-c+g
z#<}!<C4(1uX4p``IHL4PIF4&_I^6Y`uR-`<J$1!R5%9aTPr?+McO={4v}~)=WGgZ?
zHkoO>AcpB(I`O^RBDoJ?^1T1H;<c9ZxCFDx<Pm{vxDR!-FhfikPz5%eDyn|Q?2(Ur
zJkc6Zs2P3rlGnWMvz-uE&}GEln}t|tOcxR_zJ6G(n%N-k7*Iq$^bx)S6}Q`#o?=o7
zL3h2%Y)$w6&~Zid`h&=*@~WDNi$LKK5Kvw9kNO#6`);#SZ1ZFuPyPqK0ww#{>sHXL
z(DY-9B()Is(b;+~XifIJ>XQ4fSh(nH?_&Q=m>k9@Aibe$v0ybh6UOWMFjqY7c*{>q
z0+fW!LfNfR6MHyejAPPUhucr;gQEFX()P4HR5#0JB9n*(sqK-Y{JjtlsqN4w0PNFg
zPSRDjsFa-;>VLwmO%GN?NIMeRM2~GOs|W#^c&Ec1j)`8!k)^iKWO<Vgxh?UV$Y6I~
zSWhxfLel52ZnRe97#$_*dVQp_+DUw;J@&29gAeCv_0gi2E2((SLa}&%QT4#1yrlIu
zW}!jf#jjq(gZ+tz^sBqu7CgnOnE0p@zmPDW&UlQHpOHO-K=zkh@eju&-pUkFd;kZj
zsi~*e=Wn!yY8<$%d?lPssgSd)p)6iADj5Qr(N6x?-?agSi}4kfdv!2Ni@p{n{pZ_}
zhJpB?qvP1{G!68ViuV%_zA*1Dtwt9j3(hCpaVi*xJ&0hxai_p?W70GcooOXSQZA}Z
zrK7e^&RZ&`qljBGeLhzSs<EfYN20xiGX{EO-n#=P<+B-_-?ub*865E6kek&0h|?|>
zvl|f0NfM$}*9r}fjxef!)-&_#68zjjQ>eDE(Us508~cXe8=7d?O~#(h7UtOded?1+
z2GzY~fh}IQ{BR(hu!ZA$4)4liO$>xzd2Q>lNj@eSRG4)dea`~~av<71oBs5#-)czH
zJmD21%;d%ku1}Qk)N^Q1yX*Qo9u`7jJ_7_0i><iZ2Nab0PIxIk4(H944Z`V4qSw0M
z_-J#ZSDgzfWhr+I8b0;AI3+4u-hPD@qVWeo&kaADl9$--^z)|UpU6tX7pLf)v*>!)
zL5Er3LU!e(H$FA@5M}<Fajs0k$eJvmTc*S-*glyv1D5BkH@Zzf^?XCeg8qCsC>P`M
zb?b3Q-{2^aWN*t9c<##c<W(<Ua$XksT#l%@$_{~q1kTk-f~*?Wz-KbOJOm3Thv`FD
z7Yyy;dd5FNmTso3M-Q#LwbMr$%{2rUaU}^|rXdk!l@vcuLDn1mW$ymW=0o{+BJ^vX
z(;YU*%U57e_{%{Rw9bjW;+&vSA<^2zQ4qFih;*Xh?X4EFYVhbH85w?)ZI1H1QfMXr
zN^k%<$xRl;q6iRsurBetD7J5|p;=0@Xk>^j5Lt}{*pTR+)Jx%7N>Eb<b!A-Yxjfxh
zS!n>v(@jqHE{aMX5!7hZ3_rxgsF=wF<i*iFNX-J{`R0~DcD+m(vsmXZfcwtgD}4v=
zf3}EEJsnpLzXB7X=B}gE>3vUb4FK#~27zWX9fs&)l7ITf1kZ%guUFPQZ+xO0bM-@4
zUsI5&Imf<8lRundyxw^X-LBrEYfC_VCXZK(A`6$}27`;wCsP3b>1@39S*d7yw0g%#
zJ{5tuNtazNsFbD%VjvLA$Hd{_!>zc}ZW3Qt0_Fo~mHP&?wrABMseOQM28FVL?0DXK
zn+Fs+LD?sPR&P-W8y3*H{k|b<M?XN}!8JHGe-7}N2C(&FXRf8j^8C_<`T4*=tCwwx
zhqT}BiI-x_MMa7n*<ue}>H)hUq$fD9nA_yBLqr9|1a)pdL6k>ZrzHasmd!9?HLpFL
z2nN|1ul&ZEL-yev44Tav@{G__-eqq2ZyB3>#l|l$AQMJk?zOGS3jM)K{=rjL@TX@M
zU%%cz_L*UR;1lmYmy4HW(J=1zoQ{oOAciWu+`vlv))pYnXYIN^C$4uyt&x)r2_+B0
zuf!*HhM?gZ_5WM>iLS0{RMzYNxja*3(s=&P%CCNsK)_kc90i8i`D@>Yp)@-rd;<2(
zBtWTgUHw9KP~`U+;ATiGD3ITl;of(#N>4KxrFIHKAUbc&-(z`ImiW>0p1v28FTJOf
zE3Y1&eIxNTHW(v|7~gJjdeK{FM$9*+L4d1jrx9vp>%5bNP8Sz&;T)heow64>%#%eZ
zbNSn3)n@XiKXwonLh*)P@kj!i8%9$5XsJWyr-(b(4R6tl)%X)Hv8fD>f(hskfZmaT
zOdyX=YV2qIf#OK#hM7a^o1KFeqgRqGLj;i0!@a+;N`l660H%XyDKxOSWg5g=!#59m
z6YL@z1D*yKB9IG;H}b4PrG*C5yxLs_9lmF&T`PtuVIX2;d&TMXwBMXIE9?L>Hw9(@
zNC3x+zK<U-2;GN5*>M!qSXO+qc#A^6t#=;_6b{B@M4YofX%gZ7bwMMEmV4YqBT`qO
z8VK<-3BY!Pj?Edae7)OkT~HpfmdYJDo%6wV&b{TK&~1yd8itoYo4)tFZux+%H1PRB
z2)g1PTegUxGY_WcW3a4Fv28xUq+79EMVvx1NHY7!v7<%DwXNjTpbp92VEOOTTb-8P
zV1%O-1xU(WvNNs<E=hW$@N$i#({V3GIs^cgOHssYNUY6ulg>$KOrl#g4N7@ss8?%%
zZ&M}_jC4jkewBbVZ_&<DYmz-RYuXbD!7n!NBS6zp=ejo^ZYiu5h*sCUT^6#`AcUGT
zB^n)=K}jI7HVO7UUmVD~<#ztq=rC>9YM*GR{OQF^fLrll?|v^bJiF*!vVbCk5T+M>
z;O9iITgY{co;Km$nbSF>_En-&Ru(?YJJQ`Ii@Ky;W-kp&t1qS2Q*1>ykmLv`sy(D7
zEfy;$f%`lBo!B=AhpE|5#0F*>Bq18^8(c|2pv0oZkCi=%QetxfD~gF|e@$&6YXa64
zOtMcAm%1k57(&;d9WGY#Eh^6N`Tb0F(EU`~G8wHPnFwofi1<vKk|)uBlZ-L4Ere-j
zKPx@&EDM3h^4eK=A>FvrERe4#U0ZL$xA~^{4#_(ND&f()J(eY+nip(Rq!Z@c;KMP>
z4w`}kpsC9MF1ga1n>=Rp#SjX>37z#4Dk?XLV`!B?cMbXNw*_=otsL78G)(~3l3cW3
zI2t9BS_wuA#$2jI|3tS)wKSB6iNhMg5ot;LO&iQ8pJFr?))zXq51g)h3!X>m=!RcW
z^Jp_gorq;Q0y|jPV-VngB3~awDepR5_9Ur#xI(~X<!`MSj6h3<cHs5gl9*-?pia50
zkG<7UVjk|_sWw@Q8L?%cb^XzbImf{Di6J)Si0C~di|zKRbnjXKWraF$s36hu7qTw(
zO$3>M!k*Xlm<=g<MMsV7Q-~OlkFXr`R>t{IVl;tr^c(VigtDH!%knF3;GBZO9(6z2
zwZcgEuJu|yM;K@FgT_C4kZ<!;^BDw?sFq%2t4*}{qjoA=mpRWr0CSV9S?+0mD*Q8C
zr3BCbtX+f*MtAT=#=%>dggPJl(B631@dMJ>sgS@Ceh^mUFyVgjF8c<22udZ>6V{T2
zI=KF)l1FRHZA6LA*$&b8vQ`@J^OC_nf)RD1KA{8EI4)N3MiQEh9@13NDxFrLEJ#9%
z6x#4Y5Yt~~C^&y+e*lgy$JPfYjp&lZetA?kBmBvjz|K|ATueZPj03fYM*eTpekIv$
ze;f>}04h1<`Gb*-;Ld5N$6ecOzaW6*00P6`QJkCz(y%i3(A~QH@x~}D-~0D~ZIhu&
zZH8MD=iR8QB>6{=9IIsGTrKx@bTQ{Z%Mx804<mc2?0ZD!p#*dE!CYLeD%};!p81?D
zQCaffu`%+q5Ez&n2gp4K?+rZA^T{TIpot+9#61zzkuu^p+8wd&{btI(=S0cMIR<49
z2Yf4cn23LYsGnPRUA#08YFvq|7&;c@BMPT*jog=d4}?%W7?3_@-$4Ko9Qczmk=C`n
zMbtj5ndd0Tk}bJ{NLkmb2cL`<T%Mp4n$FfyU}QnIQ@)|yid6+>5g^Zn87Gj$dOBOP
zXP!F<1)N50uJEMvLW-vC^wUvkFnM!C-4TSn3s}Db+ItMFBZRT>fo)cf{=C6CkUEQ$
zp!k=*guox6I$W{Hk5%GOV+=$!FHP2W3QU((S)rzPV;Ex&fvp5fdXx(Gv2ft|zrn$y
z=Li&J_pY6eS7PaF`dfSaiS3ePv?_Wrw*|4blQGD<KO{s*D=<H!7J<KIm8P8Q{C$qb
zLBt5egVA^`>6g`-8zLINKRqQibP5r>4~0Dds_g0728qykaB`yhv(eWAuk}t&^>X^#
zWNLU$od*mnJedtzp_NDEFd=?gG(Ylq$x_iL_aPAK03iIZ1H_1`qh^y`M?6T6WapQU
zX(9rRw4{ApjqvMjIbz$#=5Bz81=`u|LTGSo<2q)(;HZWYv~-?-*ARLHs_8a_X9=li
ztVTW%lJ|F8|AAC99G0iX1_|K}Exu>}@ngNmZ1UQGJ!&TyW%XO8Bdi4uX68vk=R(q+
z!gn~VkhS3E;KVj`y+r}AKi7erWchdNA8aa{w$zthhi@R!O6Cua2IaRb<V;R!sbD<j
zmi=AwB6HYgMLk%wdx&IT3aNy1Dv6h^ULw$>F(eqA?$=9}71%l9>%s?TrDO(d8i?%O
zEUZ&2j<4w0Kx=s(6Vm?LQzm!IjIp108SA3~(PFd(oRU;3=c(s_TM|PCfpBoQE~yw9
z3NptwUYxe=eH2cvwnhGy1vdPR-0i&NndJ8*K`2|nPd^AG^DnpRkSTlzURbq)gR_jN
z8&K<Pm>kI!H5Ul^hiRSO*S@N{+mX-oB0!I?Nzhue(wV^Y+#7O=UY*6z1R%k8efAMx
znCy<R9h}qZNI;;A-{)AiiS;|3g4C<0{Mq%D=XdDbDN=mQLUNk(>Vhr{G(Lh-hPyPv
zuPeDTPI!V|G8xSJI1Ct=77Rn-6E)?Rfh9|gYlNBQm<oOE|1b$Ij37)2|Ct0eVp{v&
zJiAasC-h{zkZK}>Jxh&)^Z4W`kE~?MsYDU6Os*!UY2Pp+Hxae>b&AG)Zh3HFfR2%=
zS{ghMOjvytzv@5wc^$EHvw2IMLNbx#fLJ5A6Iv^Yu0<dOGJLPBmzk^=Van$Xd1t3H
zC!op`Y7GBpD_?!j{u@P<N4yvo73BwBq}{s@NlLR*18a^4!b{hK94+@?FLGUNB~lp+
zJ~`Z=?w42SeK9EX)6HC@YU7)fQQ2t0Q|nlOOlnJ59X|P*O`>pJsVkNDt*o1$O;JE(
zE56hn){?fHt5-}`S+p~KEY~{ScZW#ryOW9B08@XKJ}z}AUN~L9jIv$Lf2TRO)Ir1g
zWC6OI?|V(*KGd8F0CdUE%k$r-U!1p?3S@7Au+;kpN*!H}Y|W0i4~C_g0yNH$6wfQ?
z<A3jGrEDhyN)R8V`0|yRY@>os<bj!B4(A{$gv+X0tmM0Pbl86LaQ{;E#n)}mV-PcK
zpa?5>?&`2tmV5G{#w!(MW_2~hPpG*g70Z*60|)P|iX|_4v-Sjpq9T6Whj_JhUT>~)
zh6DF|`74Zk2ieSX);adnJaRjEw!PK5;+=b*d<Jw<B!0FN!Jv<a)}<h8?(UDrL?;N}
zZ!P84h(!EsG9IoWsY!4-Ek0S06w*`hEJ>X>Z*I&i^I(9X{?X@>wfuv;#;!EpW~o(L
zwn21AVbBUwpl)6z@y^Mz6|hL4u&2!%xtN{mQ$cWqAKCnpbpAak@V=HzTIvu13owZ6
zpHO4nTKrhtgDG1KoJiv$&?Cj3>^2moHT$rX=eMs%TqIL}9vw|?wro+dU;>4%YyLk|
zW+X+B&zJk6=03!rhYfH*8Okq!#!E(C&XI;46=5xgh`+qx&KCH!a|)A1g9m7dr;LG`
zus66FDHC5$tL7(;db@`ejTEZ^CHndDb!t%47yJ4e<2&A$FJhS{F4P(Fdw;V8Rpbsc
z%#*_`@-v=R5kMIU8GkO7upYkf%Xil!9V_pPAD|7VyKL~?%2Kd*WK%!fOa3SetAMux
zDyLg!6#}SSV-Wl+hp>*A!y18zHUy$xoV%|yup+QukMVnLL(v<oWPbmMwpSGE*5mII
zoJ!X5imJAd`r%!Cm6l}j1F#^wyhOBlFA>#BH@x6|GG6*H{0i9~o|*jY<5QG<TIaIZ
z|F-g+-|b^HZ=oYRo3T#;eb0m2K2>q+ra5#1OMrd4ECS&S;p7-dT^Ll@%9rA{Hypo3
z7Ihv=+k1ZL!&Kb1zHR`IFG8`|bN4nmdlf#mR835|0QxQq1Y~LjmczYgtWW?P)V8!V
z*%esk;^i(t2WS9N-qTz|5a%>Q%JTimkS&jp6II+qD%GN7z<9oJ6OdoC6Oea9d1~qM
zB<I_OLD}8QpH>L$VzckeyU=TxO`6{K2DbfKF%eaVIFw?^p!@)3c;e0BIIfpMUYT?(
z5V2S9J5Pl&H6R+7xaISJa>L;6Mmyyu{axw@ymt}-_eAagE;pOYRXfGyE=$M(6xvF|
z{eVsZbp5d4D#fiI2vmSrahy>2W(CNkFgcL!qBf7+hh2B>Qq+8;1U54<rYM5&SVnI9
zC^vavnZy?G8Znf2qSn?;%f(&QaiLJNbbe&b)RN`f#|yI{2cfp>7c2~U-t_shpVf=k
zMRdqZ>VPDzkvyb%jC1`u(y|TXkm?%?Fo}acRg1Hp_OYK-6)?7fmbu1((N+=Bf$?;X
zWBGJKXh`PyP^Ure#(r+V<Rx3jdW;Nz2PEMv*}lyJ-sX1JN3McZJj>vNxbfxzY-C9{
zz;Yx@E`QlILZS<nH;Qn>eX8o0viqWlD!9`*iX<v-9=7m<4n9d>V7N)0DqEr?cOe@u
zP}$jL^nUnTm6cU>KK6AEJ&ZKTQc7fIs?JNPv8<~Ot&CSO!fXpe^<xmx(7OxK94fNo
zrl!m{RVC2fJd#_sOQXs8o^{`HDq$V&7Uoq*_`S>(TKC0JPFVhu9fKeVz2-7Z!f3j?
zumIk*6LIXrYBAl4mk1->UC-=@D*8cF&O?iqy_qmI4>#XP*!@1=i`Ei4Q`1)^V1Ct!
zwS;8~Quco}!^)z5dB<x)!+D-_&m5XH51m#oqG>8WOXwmGinX$t0PmZ@S7?o@^yInW
zTR~nKV$z+az2+;-<2`*z(Q}VuDmrk8$|dZk@X=^3899oun&0rvsxv*esQZ<H!-Vjt
z`M0ahy}5DT<luKl31RBo8@oHphJhE*+>CMsrfta%LT#$7luEnLxUEOY^v2yn7RZ_-
zcMldwHSjDxC`7=-kHajJ2-SgJ2TZwtB;h5MEJEFNIc0ph(N7uN6x?riO#J%!kVD@Y
z9nqSottbBaW(Ssst7s~#zI|LYOxU{Qo=5_wqe!EJp=Qs!+Uk7#_FWrSOp|WV=jlzB
z4$RHKQN4y|R--KeXc<c83bw;)s*t38p~&pWHKEM)n;;CQ1gE}fOVIKfU0+$`5?fqr
z2++BDv~;e&&$FXNNp@yv<sCDhF!}PhfqQk#$)eJuf)8Jx|7xX_+VhYRWn<kJ-&d*i
zWqB2`^wBeBwUpC*<{%%3w!z3cV(emKt_`n^SbFVy_=jWh>HB)_W?q==<B@^g{LaV3
z`4voCwtM`l+`og}%AZR;JK?b=EZL8<Ar=?+Y&njsIsg%Js!fB5HI(uxqZ%Yl$@BbB
zW8lcDYz33s;)rdX4fUW~^PenxvL~^gh_9D#p|I(`!hvChp!J9~6<DupOT6GsjSr0_
z)eCej_1WF*#j`xw#T#PabETql=;CO8|M8cFlwCDN#D&=#GN7MX=W35OKml7kTJzV|
z7jIa}{pt-2%yKlnW&!5s0}l)6_|*UtI;oSB!zmV$9s9=UCOIOvb#rJU=dRv>&x?gX
zj-F?^NQW-;owu7JW&AsYh}Hy{g!Yl@ma$g81rb)5y{fK)Yd3sXV7n_uv!}USXR*L(
zz*JG+M}s~q_?V{R_ImS|t2<-vT#KX3%g6|_X7gR8Oq6C_0sZIt>D$|WqnioAmKe28
z76Ku=mMa*J#K<`PO!$XRO4LZHeaoB3%EpTd^6xort8(iEbPD$8CFXdZKS_2kv?6O(
zIZ+q^XOc1$RVhK!oOr57MG}1Km;~wQQAyWn7gL;RpII|^{G;sszXHC&K*oJ{AY=PO
zeW+7;Y{DwftLHQc{(d%E#Uir*uJ%~?V@9As$`^Sr*EwVPo*mUpUk>wphBVm@d-<K6
zLSAv2K0l`LFms`P%Pk9_+(z_dHoEL#9Ng|u&AWJ0+zBFdn6EuBYO)>2<o?ulx=c0m
zBEp`9TjKnN@EBS)`?9%aSEJ5P*^|<I>--4NqnuqX(E6Piu-=}-Q>kmJ+5CnW+X#RI
z8DsCz;cp|Lyr6(UV4kb#b?E*V>)eogt~31=<z|~^WP=8|if0>5$~}u$#_)kv2i@A*
zX9G5!ghm0c=L^ha>62gM%vlBxrBRdXk)cLgpJ)I>1Ua9Nr2OlzrUjQo)8#U+)%w?x
zKoVHrdo4QqKvWWAcpLvBD1>4jTu5f4Vi_r~C8!6+Z4+#Ps_qkH;3WLFsRiHUv?07r
z*2|~uW>Fva?q=pOIpD^QEI54j%K1KQz*C>M0_?`;GKP(^<jP#ND;$g+$$UV=$aM6R
zbn0*5^OT_>g{6T@-2!b=I2khhAoR)@c2`WsB~XEenHnBbkY^~pVB^&SxgI7NML-2C
z#lR1LY6J<vb?cdMh@d4>sZ<86qwS_YaaOshCYMh&P!Sph>lXl*j9Y}sR0E=(;^~MS
zV*~OLQadC`{@uRhk^=RYnzARaa=>N2Z=km_;M@WqPrXmZAx1Dgv@8(QLM>JEg~6sJ
zIRoGGZn!sqVxeC~STo`>C7G5F@-UhZX(rd93&@8F_{h@!Xr&enF`X-Kiia8D+2IE!
zLQ_uleYxy<xLDeU<hTd6j_94%U!4mo3R+KoRh6A4-<z#7tJAwP6ho!<Kz@e8ls*wZ
z<-%HGGQQ-5{HjgbR)A7<^=05DVZ9A7U7&z!xBz{w69GIH;aLM%aTf?x$pU`CNynVR
zI^=UBqbWBHIzYV?U3K@mq9+Ato7C&(pFiVuP?(y7tl-@T@BtvDJ90^eTd5~8c5=Tc
zZcFowVNbK;FCwKxF~v(SZ(d;dmZD-xd@$DYyvek@hY=-f+z1-He*!F=^5Wy@Wwe&$
zf$w+(v;B#7q)_A*VgB1z`Xpa+&)T~h%mBDdGul8+0=qDZHZ)tVnJOyB!`9deW->lV
zY7Ikb=@JDWpg4$v370x>Qd_4qQuEa*Gyv*#p{adGeLC%Gdyy2UVfbU6;o1|rOF;LF
zp}$3@CN#UX_Z_b8k*kqQru&D)6`Q;K%3myS7!a+XC_q_s9;aAq{(8C5`3|pqYcmo?
zdbQ7oW5Q?20+Ys{HFzaZWa7MiaV*fsP;{^B&9q|$f1EZJis>EVF6U~H%0E(sTIBLt
zAt}DR;VnaJ*5!x{w!5|e+67_^1na`0WC*NO*V_GNjRG@<3EGWb-O$ls)IE8(2oFYZ
zyu0OwyCSr9<L*H_rB0~6KPjX6tC&vvo$WWN^NE_ws@!7K!1^<7J4#mbof12rjClNM
zHK3Ru22@RU#;%SE0kEm+t-0Hy)WM(*Iu+Gw)jr{H*~gD;#1#|bj|NkxSA4~|<SsXA
z701%$ijGMPV5ddFWs<~wl~=rAZ9qkX*EL+(K0<Q#6$(oX__yrp63fh<oN9!ocGTH2
zMDmPi8lyslBR-nVu`?C-phv1Mp``dR4yfWn=6fPqvat)Pzn-Hv*nA{I&GOsl(uMO~
zqnH*89T5~$;9B_>oahHp>mkWR=MB$PAk7l#d(n9ThKY6iM0?owUhz(mkd4)<neETD
z1TFb%Pg>-PmKXe3UN-@u#grOLe1R1T+c-f|gt$x^T(1(pJn^IIItf|)Yn57AtHY3O
z;I+e=8GpWfGXD9}#6F3t;CH%muPsdg8s_A0$zmMM?_$*tto@n&<x5_vK$8g@?Fa_{
zGOPm#TR_)vfo(bd=GPv;yK8*QpwL&KN63*^jMcByw6A`~cz67W|2b3|xIhN0YVfAX
z`|?ghPtx<Zh+}Cc`Wi23H+^e^R&fR$kXt$ZTs07yalnkJHYWgjVcF(lSZy1K+TmAd
zO%$zt`j~2m^YiPM9IZ7XITG2dFEU7ZbB+SPBaW~^LlB~~XAv`)>sjsj3T)*X#WLP^
zH;Tn9j>)mS9c*Nc#je`dv(Ol`-)S9bDg9FWJ@#msm@lQ~cyA_rsV4e0K%6)u5wVQn
z=|DRs?laZQtXe0~<Vn)jgBpfWz+!zyH(;T}TJDr7%HvZEE<{-0ZvbeP;IG$Yxo-%1
z_6~lKmAdihry~zgf>|8{fS~;JTVAYHR1`z1irkE%k63A$45to|992I(S7*oRnIg_Y
zcR1X22t%NL1Mgi(^b=Co0Jio!N7Y@d8lfBsb8AoH2^|asWC1)UYai08fh=qoy!aSw
z@dC|JY!w7rDb&{gFxa?hr4C9p-Ge!BaBZ=^Ix;7rI4ndM+4K2cM5?RfGw*j2gu!2g
zhciP=MoVddk_99sqP&h*SnQ;TlA@>U0V+RnpH0DaP~gc_a5^Nze*vG))JmPkG`^cV
zjYz7Lektjac*rw;!v}fWRTR<-<Munzvl@j<oUyFaM<PbIxPX)iWfzVo+ZsH6JW->G
zRci(5xwcj?cmc1*LoE#AA#Qj~o?nK?jDk7(d5PSv&8vP`z#H?Ln+J1IKc34TVpoN`
z(wzyqDoBcCnNa@5qO`e{Kb5YRvo%c-eZfx(*hi|+VlYC<gP|`YGwTYcIES|c?QU2@
zAm3&b{agi=0l41;mp~;UZ8Mzj8bp%jfqmC%9>c=N=Rba|0~IkyFvxR$z?F5H26T!&
zfq|rrBo?}C*!V~zs^yS%e@mQ~saLuLXN-rJr^}rtP9*+0XF@IS=!Vq3I%E2`qN5U~
zfiVX65m#uh%mdJTAfpO>0~8#vrkjBthYcO<p50=7<1UT>P4rEfK)Wz2L7CV>x72i5
zmw6J~(1TPdxh<Sm!d@yRI0n@_7nfzu8RET-^%*w4u9!-^Gy)ojh?}3+IxTas_ej0m
z>{Q}c!u8@?;mI!ZHnZr9!DBCGc=Fq<h^vc6c4WX7v3ItuW{kE3(wSOtKAOoO&+~k=
zt6?_0H(1e1P(;E*jHjkBZ6PMd)f4CP)1FKUxGxizN5z^@rz$Rn=4=DbKaCE%`Nx!Z
zJTN~mF+K1d_3kIX!hvuvk><Tsy_}U~un{+&qj-P#W9EAIcb%lmic~k??>P=P*Qqfa
z^a>u-H7WPFy6<22-ek(L*22kpnn9oeVB5f5Q!uv14;%0CR$W&4{vy+#)WzGzpUbSZ
zD-5cd-FY384>x<(`>t&(m(mC{=64qc0?JIZusz9EimHkJ`QoM_EjNHX=hPK(^U$Is
z<ujnwYm33L8c=8nvW}1-5iw{I!&tsLIiS?rEHhD=CrKr|YuBqPXsfTV6_LxInXd2n
z8%p^duV#Q|SR}@JezvIm_ov{YY5}>e=c_+9Umd+whvgd|=AIDS=zFPdJ*Q2~fABcu
z!4?N~X87r5<E-Pp=f$_(Hf=6sY*?Mo)NCK6>3e>$#_NMHNHcrxzv$4eFK}VgZO1ra
zOWXbnu$)l4NT%0JysA`^JxmmjAJL;UGps8WFJ00I3>CQVH@WUQTlbX3e_=RFcYWMN
zrLi<?##Nyv*U<7k2eIv*<XyXbu$LHS2qBrqNd9!O@j0bu=~i>C!@YyAn8tiGN;n4A
z<do|X?4GEUZK>B?PGem$=;#MtX%^q(eDoc~*X*>bwMv{T^@P?&>Qi>jRM2$6wafJP
zabdNjDJM!Lp#B$#Vh&e%0vv8wlP{)y+C<TQJESatv%W5I^vgNe+a0%X)wCa)z1eu*
zY?a3}4n+Ek8Jk;>Vxm(ITYgAgMy18fFOrI&sDb`+{aCNH7%T#?hZ)9sly8#@kIK<p
z=eZ4giQmQHT3iIIBqrI}H@3YV_Y-_Q?tg^__TZ+U)(t6%KEYs{GRz{1_S+D)HF!1W
zamkycg*uPHPq6Ce;wh_XaLQ16rsZ__c^84cuSg$8OepVYyW#Dvd9~f2^6cKxgO9=&
z#F8F60+nPmPb*iIw#)kTmgY9lot8;fr`rb7t>#x~4rcmt2U%|komUzih^@q2{#=?Y
z_Nefxr!GzBun)yIE3xlFMpAkr<k7yBaK>=n1GkN--4$!WW@+!rMZ5mod<&QbS1ps2
z={U<M)x*Q2YF;}QH?fqe-*Did;|~HW7E3NgtDKeaX40*dZBA-jd1AQ<Y!sch^fPBZ
z`)z7e7u{M-r>ybt!j<&=u7$yU(?`BG(!#0HqrKfc+1QsZ?)^08nZ$R0T6=V?-H-jn
zD7b+176%Qli!pZVS^8(g&o>t5xxD#i6ePN_MxLC+&8IJjCYe9FAP^kP0@+j#Ye<ct
zfbbV<XnM>Cv$6Q2*Lpi@rz=%Nta5**K3jr7>3J(Czkj}u1C!Q^HE(~=QKGroll2qG
zljnjdWWN3kIOz0+zfYicObodhmwZ<``vJ+*os@09{bkM?g5DM#w8iP!d%%hEbI3W#
z%v!LM4mK^oXanW+he}soFTm`XYYQKNn%KSPK88cWQ72_7*MISK>_F4S*ep$y{Upu#
zXLH5h=U71In3kycu2jVFNV-O$<o@!A6BI;cr*xGHQ@J@>)5~PZ+NFH1_Qd~7U+tUa
zG`v9f>8-4*&j$N;fsJ6zox@L-mMoqo0LGsrN_M%+n{@C~!V9l`zgH}@tJ7IzlTZn1
zO*fhDZ>dcp`I6WCctuk9Nd}$wRgVp7k~R15^}$}{!+LG}a$<Ef2ES%cFZe{z38jph
zHL%l?f!5T11OG?9EnH#y@xavByl>sT`;$G*9Ru$t<Hy&!6;7Uy+cjq7>Hr<_CeziN
zH`CC2)J*S=_m{h^lFcOi+`mUOH*`g&<iT6rPWFl+zPz(5$A$gUhgmejYPqVH9v0>X
zad5VZJTp3RWy+~6p0WkTt6Vz%gy5%bVEY=Rn?iD3CA%8UV{dqnKU9MoRA@DRBdpx%
z8iVVRUeTYuej;<${qK34oHphFce;C9JDtP2`=tvSb~Lv0SPDaUh5h%3GFP=~6LC#V
z=iaa`w2Mu3zK#EnWb15$x?OH9wB}sFcP8oHaFc1Q_{T9hrZ1AMSywwLng0#f4Q$i;
zkZkW=uk$$f<z|Po-n>)A`gmk57PqH<_TFdUc0hsxnkiCi97kT>0lVJ3v3Jm6PLgyz
zJI!NQ#AKyAacvv<WS3*{hR&mom79BhU7tpR(JtPRz7hl!Mfoj$zU5c#ZJBvSW9mP<
zi7;%KcewVxww_W@d}EuYU1FH&e%IWt{Ia?)Ez78d{@8s+HV8jWs+O5d38~s5KNC&P
z$ejw4O6sASXQWu`Z}kD`blnG5%QtGZqKOIBJLky3-e5Hbz^@uh7KiVZ+#f$&ibunI
zbZn^wt@RbmVI2)lVfT*upV4P){t^<qbM)@K!`)$F+j}$O`3+#_9bJ~#W9#eheP=a*
zP{hxkDmMRfxc9xUyMFuZi!SpApxfc#85!}jH<w%4YJ?!X<X*`iyD!$Xef;gk5Vk+k
za}5&0m|qO{zx+dV+L_id8AEBxEQ<ETG{z`R1hrDQb4YFt2U<>Me^^K^Zy4NAo|OFj
z?M3(R0tT^jhRdG|Y!7jPqa28H*~ux02NlYgw1o)pB^g)u+!qFQ!np-1o;?(ntn|zy
z76>q$iYc4SQ86J-xvLrx)3mr%ucHCnz2ABcSs)xF_}Nsbq5#aH9!<BkfWg-D(q#In
z>inQ=rkL+hi$9*MS8<~+dLPeU{g|N@{#21h(2qV3S@+D+cRLpiD^d`+pMPuLW19p`
zWeCJrG<<}aJQS~E(vG-eMIT57*|H|{Tn;Ag*$<n~d!1y%XS+|fdc`qc>$dG#b{wW;
z)~clJq&;&lfV&T=M_0UK)yD90rUfNJmNq%PYIP=WT)q3t8v}aidP(>;<TLtRQ}<v)
z1)jVumy@$RDZA(+c`k#jsW4iV4R4|A=Mq$<!Z!^g{qpG*5vfL?rAZ;-WsSfK<!irj
zH>RpqgoVhN8{zmkuqXBX;k!i%c2z<j=MMl9-Yz&wbfEqWiv+&#&614zMP(DPj~Q$x
zKtJ>r5#Qeu@zbqRD7l#sZ=e4-S9MjQQm-V8#elwCp{+e%zf`y6e&EH@dMDBrUWK)x
z<qZuY%Y`=So$ofHKCX(A{c2oRkL8Sac0y}Zb5-q+H!SuJvh22(+F`k>^Pk)w09c!m
zF_ps=-+wiD>OcdVU2YU^?^L5@)^D$-yOh_ix9-|YooJxjVcJuVbLA^b830ZMZBwH(
zKOPOQSFY&$+C}B4Je&v+U*rzzB1bTcw-UVT;MjE{0PTZEBdMnOnwvS6n~w)(L51+7
zz>55c$1)54q5U%v_pdh}EC_avV>kl_+9h?^Gv8L3Kdp<g)!@_!xbYNCl_k@<yWDw@
zAJ-GTwTef=cYM!{OPsFU7cIf!8;>dG2|Z5A%VW6IoBvDf@P|6qR__bG1_X8H^9u%}
z=e!(4yXC}AR5qChrwkubyp^ORE<ITCYQ?gArjHQ~^Jux-uTX-M=eH&}WW!ebT+|+_
ziy<tZZQiw$*Ek8%DbF2O9-h$9P#9>)P9;{$-7P;HP?ira>cwn!r8R9WWGAnd$r|=}
z4bEC*0i0Qa7GzmKkF1qVmKNO<!1tc4fUl4VD|ZebsxxbQx;k5>Fgmr`lbj~|%YDM#
z=F$&Ecu;imuL3y_k-bAN@tMN|G4LHX`=jsU+r(#@UNQ$+`!B3d8j_#Lo#gfqi<0NK
zKdqm0Bw5NCEvq{|aH7-~Q&hgl)PKw^XsL%Q5F7Eh3{iWe<54~rMtjv)MT@wyGLbH~
zChwOxo~{T}gS9}=)LVge-!470XE}XAg~rbKb2qN;mau8#OQE~7yx8xl5m_C)8mBT1
z92BPD3$LgUgF3msO>ZYH?T+8`jwIQRxfq7~{x7cHGaRlyYWu!IkPt+)=m`<gqj!>M
z5z)dZqb5Y}(Q6RVdlJ2iVe~ROqxa6}y)$|lj56<bUH5&w&+~k7_{3)R{_nNU^*hhS
zV?AF-)wE`EL2a5x5{eT*b7zw0GJT*aMUVQehx@`{B}9m$gT1jEh&fOh^+!BsxTAlJ
z?Wtd@J&t)Bk?RcqS(7&+_FOilU*$eyT$1p%(iCb>jp@ksM28{h+BOPS!9xN7ViXQg
zu%Rcl4l>eLeE3cx1XKb4;QS`y+t=^UzK<1K_-(rChbZzcsxjT8G?R%;jH%Zp7Pg}$
zaD{{>`g;xqR+J)3hUp@?tx#z|8K<TbBBXvs=Y4}zZ9|sRPE!9D{y>)N)8|B$5z4z;
zOD|jlSz|>EVpUrvhIa+rwQ;w7Lyy0;0v=X`-^JBQ^;Wz-xSk(<W&*%6CcKp#%0Hn$
z6J}{XAwk=bl&(ObLl*$bTAezYB)M5*=(yECs`s$~eZe@hMknFAMvWofuZvA2-v^$G
z@|9h4RWUFt!oeZvzw2<K)%P^}b<yOw8fOLlBLbM*<6&2od}>b1U(0QNy=1}T3X=xV
zVDg524Q}QVMa}_0PFS@UTEf?uuD{2f2qz?tNXho7JE$n=a@HK`@zQS!L%Jg2gY6*S
z0?~NZ#M4vm1k^D;Vl2X^hUFL^pJ)58AtJb#(PxvqOl)0*%FdkoJr?}b*YJ067}8D-
zusIx77B%#plC9O+cl3~gRN_-Mnpu1!G#gb7mGq;`_HP8&_HGZ#Z3Cui%j=f*%l=JM
zf?nA+&-(Uuj<av(8|;Elt-{%*L{-j!{|qHx!Al2o-?uUuY<j*Z4Ft6Ar<0G=3sv$?
z%;Fa^k%B>wVKw5jX3A;f=xHbg*r_l~q~5MQT31Z|KD2e3s-!>@;VcaYx)SK?<v8z)
zzc<4l2sLm>5<(Zv(iL@g-tOd%Fylg7`Obd;UquqZOLT&AK!6thU}&eo1?1mA5X6g(
zp=)*6JDX{HRT#85;3~UB$!k4b3aW{C#_MW_al+~3tkFX)`hKo0EXW@4rH^)wMMR9i
zv@-HnAGp+7Ev33v^*JzIuMT|s8l98^cGO%(g_-7Qz7XPem6mxCpm?;AYJdeq;c$0J
z-I2V&IjWXm*Q9UJ#^>7t$V_^Aww6{OQ8m2EE7{4u&wM)xMi*k78}xtOvY_hJOWXdI
zG;%`QEs*QWN58Jc&W~luNn&y^0;eSscdoioy7R@K|C$z+R#FsofpWEu@Dr$Dhs~mB
zIRP%e7$PWc-Z$j1JQ2xIV6a~6CexuDPlwpWt_f{72*0;t7A61DMJ`pkJk22odt;+2
z#^!VTJOudDPdBu*fRp@(Og-?Bq)xTFziaS=Oae($Kpshd@xPw;B$>EBF#9%aNXtF3
zCm<5jLJs#mLr|t&WVUE;Gy80o%Z&+w;KF@E6M_HxTd67xn3$eWKXueMLl6Xa;VQ1c
z{AS6i^UyYWO)9VtX6Cc!Jg-+db&e~W7j9Qw-L^%Em6eAfSXTlAc_Dn<)|nol$^1s3
z^l7F<+J(Cj0asPi&LD$f=rDJ72Vgsp<lU*t<|$-E68^(;E8?32>MKiu0b<j|^K*6l
zHGQwpfnBpFF{gT?D#LCa1w{c53!*if>&1)d&k<vjl;^I}1zTguc9)BZTdttfj~+GV
zLm^t}-EU^uJ==id>QQJa4%dsn?NwJfB<;9*k@r1ek3Q9U7XBCi)kJexmF}_{@cg7w
z%WZ~1{z%^7I)bd^w0GLh-Njf>z~;N)8=&V-&=Cj$0eTSo_*k`f0T|I%h6T!8Jd6$C
z&j#9?`<~1=&@ibPe_cKZMLdJlR8HitUG0`t9j#wnSC@-oP3oM_<~Q2Uoa5VUk5(e(
z%fDu@1y3>_;cJ`_i%R<phMaSN<k@~EpxB#y85!}(wZqtUmSFi`55j5yTPwiwDXj!e
z>uU7r?OPu6?DwZccBx=Kp-5x*gfVEBa=Kl+k~6iUa<R&i$ML2gqrb{r5wFqo9hEN!
zNhCP=nhy)E`>kJ`RjeM=iCr_?jqYCC^t+^dUpRQ<l2%xB_(i5@v2;L(#@4XohTGQ=
zq5@K^HkqODl5)rfWC69Wk2hcQSSSN0w`UOuP+#v}_>BcE;I{Ht{ohsxJI9tu%@hax
z_S|{R5rfG!`%)4-7FQ3(ZKM5NAVRHpI!q&;-3rGpUj<eXpqk>oASm7%oy$sj|3hHx
zeI#^~m%0aH>7K3T)+~zE_t0Vi6vQW-b>p&arUI|OZa&b<jrgsZDwi+J>>f`61t0Y+
z4_*kCtmFp3z0S^oiSv$h%w#4&U4`bF$*pPuHCDh~m4J)`>XG6aSePY<o4D$V-!l5V
zWghJTs&5OO)FkR)N>9@gCwH2TpXK2&&f;)QyG)eS_SYIrjtWZl@>jrI%0~P^Z`Ce-
z_2%yad8G@vq`d6v&3bcntt}yN@U8)F`?RpC{N?~X^jub$#fCIVgvc1qq7;Ee44*cH
zt#s+OnyLctgv<7A^@duS5+Kp&?jtlYKn;}Q;P9_fKI%}4l7n9>f;NZpeqNoO&;fr(
zr((h3bwV@sz`Sl`_dk5pBsu%HtED@#)iyWJIWU%D#a1-kb)j@YH|Mp!W|BVX)rwV7
ze}{s%tgJot)tWxq>u9GaKLU%>&+5a*AH23*czjBuS2vK@yRybd4RlN-RGdzG7Q1);
z(5Hn-{qbUU?a-|HuS+vo()t3H^m}LljUv^tGz4GfvZVJ{nB_J_1V$YeS)?;N<F=1}
zb4X*YkxguG7d3PUA6e`})ql476cFK7Y&Nd+z1Xw%%Yw{Z7tv+rfji~g^7ruX-=0l>
zc;)^l75cDh#eL+DoXYNqq)r>UTrlu|(K!`>tcd29O|CHfdpVzeJ7MPTsV(+5ZZ`r?
z*qfA(im{%rPmi%>FKrQ{0;Mm2qtWV?zP2_ddTp%4kj8rX7u?=Rt7%8p!*~)@<Jwtk
z|6<zVeEi#MNhB11x?>_1hOJ;16;RT|qca$POB-tDTTd1O9`;yYg><Knr8P_O0P{)(
zDJH--eZ+Q08?Y-tW0RbV0w9_OP>;HJSHb{+UcL#(@NRCHslVm+lTYsg2GA4t`KxmQ
z+`giaMx)@AVrU8q9)XEu-!qu^!kIaXNTZ7U{XoMzQE9p1!m-9PJzI`_QX+BeXez(r
z^T5ecn7kHL4RA47uSm?f0V5M2GUP8Cd!qrPK@YX2UgYnHhT|q%;6B)r$fWsr)>sr_
ziIbyaU322ccL+2?<rnZl3A?i~n!l<fiKC;AUN<;Tq*8H#0akSY4~=EW8v(sa9^j{Q
z+>)vn0v|Hq=8t97bNAJny965|uy;&yzIdR_5d>%HsahGfVNM9_eKgAJ6kgr9gOlRW
zqu}N`0D|J><cLH&I1(ZVoR8?}Kn2G-M7#3K5ypPNc<WQGOR;1gI7ZyIiCINPW7@yu
zHyhw2F90&)3V>8M<B{8^M?<POA_=QM#@5vud*jrEaHjDhq4XWITm8h+F6=%&(?<N}
z6c2pZ-|d~s(Do7*!eRS8Kb`wt)~H13wdqd(QCbX*8fOk{m8dv!e32_>+u)Q%;47Z*
zu?iHU|3B1exABL;b+u7(vmi}a3&-qz--6U-%Exf@i`#f{AR&m*R5PU4{L6$uLZzpS
zL;ltqffVN(>K9F7%2pZ?tY8Qs`z5b90C)zoB~sXhivM^ZCM)``<iF-D5#cz6xp5Om
z&8y|i%9KdRNjct|+53M%qQFGNGXgW4*0^e~Y>(zMj4&b7z9{JIyRX45^X-Lu)XqHy
zn>vaR))8#F63UtvCv=E0l^nzUCK8OuI>Fi=`#7EerrP^eA^1(T`G#r3X?36_+0&z_
z{ihyUMmC!nxpr-PMB>VrRKchEL)Jf%lXl|2lk5m8bkxy#f>h3BQ4xnVjZY^y>?J0{
z{|Fq-s7M1tLT{R=IBtq-pD70#Abbfm>EWhq3z6&FEL%P)wN(4Fb8j_XKLl54%-*51
z{x~pnktJD<N#w(@$TQ;x%aHSB&MlMtB9A)HO`<nr#enO^pNG`!KM~MOQwWp6Td;ET
z@dzT`iC+80$Agah1|tIIXU3&qqN`2iPGSkUm1DH!Kk{WuiBx&#&F`H$1V4J1(xEXY
zSTFdrfBW3{u+TT42@nKs7iaf`&yVAdHUbo1+l_h%P`_qEqVs4d0bj-AJhTps#-$3O
z(fGXpk`^dml2YJtuUcY}W;4P(Ko{*Bu7o7Y8~JYfps_@vX)8Wgp|abR<DJu9a)>;H
zP^0k}87HC)bBdqTj?xGts4g7yZ-46Q<gxrG@WNuk6hUCG>vb6Hiror`ui2K{fOsxd
z#(pBjAcl9A1j!m}?zH@s_1XU4(5;R~Ej#rAn|)(H+yW`B=FxMH^o0JU5T63^p<Gv`
zOKkuRGvZbF%d^kAgF%GKoMVMT_a*qnL6;BD$4`}SKyLf{2PSl!s^klRFk@e_Xb8YC
z?LQxni1X~SBk<2UZ=`xWP8eeRp!QGwDA{afaP9o(pWOJS|9(5&d)(W{`H53s+EX%?
z)7Hx^hjhd1Fo>EHZte$t4E)4D-i`XO?-1Lwl0basg>%uX4Z*v68TwE9$B0m{)<P3S
z6*TE?&{qMbXVhWC8e@Vl1^U{yt*_3kj^S0jMo-_%gAj7l2vLO?%V#zA>3(?(iiv$<
z{fYy69d4^o2Wcz>$M_7?6Kv+lnr{Cfkb|`?8cOM&Iq@xkBPjQEay&BYW~}lxn>|#D
zEoISM2b!nveRZ0?(Q<WczL5xPs<+J?ieP%%Qjq>Y(}p>v{VI37bG;<(r)`^VUQD@8
zxe<+R-7YErw>5Lau7CkqKh~<KMn}ownx=}K-zp05ZxC+u$;NgR*Bf{X=hv%!cGKmc
zlB#Z(nsnmGKxxn0kus58&6>H9#itoNLH|ip9^wlCFtSKRd!y8GZ0>FQvN*j3fEDkW
zU7qP670fL;_}2Bciv%AfCqoVXp7qRBHo@@iQNi%{vG0GY)3p7!?FxZgu89)X%5;zt
z1gN?BF{;E(lMIcx0_qa;P5SON#a9{#0&-4m8_5Qq4ezIkwR_$cigQ9u41;~N3<BCk
zelNz~W$nnO<8tISzWnuC<n8@}cVlD7&s)DLde1sfQ;Ti`3oMIUy5vPy;{6{9Kyh~a
z<g(w!O$j<vI~?P^>Lwmoz-PuSU@-i_<$Zea{2-V#9u{_+Ffh~vf^BIF+l>E91)Qbh
zHn0q~Sl%T#@;-vxi=QUpaNE}rm-x(U;I<;mLG${7{{KeP0oLvRvgy9eqC53$ebyc!
z@R<RT>iDPC`b>gRL=vLe0$Umzl<Bj|U%5q5YoSu|Eg7o$MtASNXCJ``yy}5Q;6noJ
zNi{+j)AZqz)U3e2%f;TMcrz&LJdb-4ah6|n-^OrA`NRjR(BCT2u_nhA3lpyryAVIr
zutK~a@xd!Qki2d&WCE(?4kocY%{N)|27p)JLV$E6SMkVy?+dq&M?TP;#Hh7PHhs~I
zcVp7Lvt`s(b|L713hsL%;+XA!EX#cK5?sQ|ujp58n=^006%6NwJTy^FVvixGgBscQ
z_zd*}xfgG+ORitvXWj1dF{?eyqlvHfU_iZKH??WX6rM`GDIwR!?Agv;&0m%><1U0v
zH=zHar$dq7-YWY#?a#lDz||XD#onF&9Y$$t5L#@u<(F9B9S}5xexg4#)E>-I-{&l@
z1xx3E)GF&ZM>Iqnw&5io_TETk#c<v$dJ^|Fn%Z_4JN5NpTKAjR)Bq3S=Rvr%lMRoX
z=RR@1Trox^@#N{SC`5MjQQ8%CgllOouLnx2X4^iC^ZI%cVs9yhY5%R2?Gj)jK;8S7
zj$zIeE2+9T;%HGXQ!bxBA%yQP)%epqUqQ0ZR6sR%h@mNLpq8rU^Gm>e{GXuHVtG9`
zX+?pu>63-{P+eSALcfhR{O#d)lR#tM(W%D*$=?m%{J8>1cYSr*Gzv^m2zXoFIyV_%
z)2E27|2y9pJ<)MQsM+P!3k0cz15c60-&%cxaLZG_M;d_w1QlN~WiR={`Y<rG9{tG)
zUewWxVhHvAB)7{CFHXoLo(k2%l><c$Nb!onq3@-33XZ+2Br~HbV4P$V!=RJya*=;W
zB_i<K`z?2EEky_iv;8s#x#unAy{(q!&3FhexK$)mn%=0o_Y~3b9FNcgP*NN3OX#2C
ztM9pP_T{%kC;bqu&-?6|@uLbAGpeIl48EoU*OK+Q)?IPg$4m4cgmbBN&=J-CxrDpC
z<~s=sAz4vpPTXlIZum1{#hT1&zRDNZ)&yYXAGP+g{hpn0##=rKRoCeVp~%CpZ5nnU
zVMt5+6B_pf3&~{`WvIKuQP8*DiIO5NNZbEO6jM)x5%tAuO0QC=+KcSQcpo>6ET&ux
z{d`{D^n6uX2XA(#Fv>lj^4HTm&95SO`0FzUKtc8a$Y^A39hkDZBiaB@-tXzZRHP=g
z1sJoM!XwXeeG;xdj!U1o^*~!D0hLGdu=4;GUr<qO$N?ra?&Wf%58v0TMJ*rZt`j0z
zUo-EcO|*a<-r}Od#PVQW5#eo<QHHf^eGcnwwN(PjH;LGo5AvLtOT@Oq&B49MaD>f)
zIMOZlXog@Giiiy3o-ccUTP){-ek`4=@@ua(8$Bck6u%^pCp;EhwlH(x0)fcg-#t+k
zr*Amt;3X+ElcY0yW&_dZ@pd8f|Abh;>l<Y|{x)Rcv={jFx5kR-g1BQjUuC0RT(<t&
zan)2{|GcfI;hmZMHRdA^cmL_7j)umuF!emSXz;N>#vCZMzcgF>YX=oQs_{DSF@#dS
zZa|nIfOJyh%ZkjSJ0m8Lx7E8=tJ|brqze27P0rNra&mIH^j2N!dO^Av7>&VNAWmV=
z7k%@D#-{EJ&HJGE1uBUvcku$>nTSr7NsUI;%_z$Puqa6>x-UAY4k#Bmkwj36VF5Ke
z-{jeQhMV`Bs%g?hi!P4E$d`wk{A(H$5BmuTG}vRS{UL$LvOcz#i$(yQPk#i_z8;9W
zGx7u4IhmH?rd?Nac(uJn!MVLc*u(P1<@3fJ^GmnN?`OdstYKp-A!x~x8vEMP?Dxc2
zk<B5rdnhfJlo#-zNeoO|Y_(VVZ)~)A2gax7;~5rLT|Tz*_M-M#q>`7un(ZFuAwI0M
zUgwyhrV>?R9`_V|Rmw8;dg~_&$cJ(@I83~zFW%v$3gA6EVA0Hny@P=9D!`Xq*JY1-
zy>umUH%%H?ku98y>hgf-uY~cW6+XMXx`mJ3>HZp*z<4`RT>SHQ%fKCP1p;@lr%6uJ
z0^d|BrO!`KUP#}k1g$!2B<MUf3!M4s(}x9ShwdQnqyC8klML$p6W7US2k%V;3zXZ)
z5cmS8%H9>(+tb<7y<03Ey+^CHjH})VAiA*ZORt~gE2q>SrW0g3wwUwn3;S8v4#m;g
z^sH6sXW2Cx97;iVU0ClXFs4!CWD8@TX(TSYWlyMR4xD>3AxBE`JN`nEX`!7Qc2Pk(
zGl?9%bH4!TeQYBFRB_Uw!$^PAr*1Z4)2M>6XE?sTNZRX*byI;SJw?jy#IVGNX17P!
z69orS%wMYK4FZx@2Vm%R!>pbyH5FJ^GJ3}jO8$EJd{_X|+8yNatkK0tYq|W%TX(M0
zX5pTl)^1Ta<Gg0D6h6(8Ydj83V{N(kb)T&86>6BZ<LI<63uw!GNrRBX$`_o+=cv1p
zl<dgbx=cFSQ#NQ=kqU52F&<J%0?x>Vq<ifD(g?Ea#L5AeUADgaAE^>9`aIdsnmcTx
znxspXq|P3e7*Kf6Th&m_mayMaXWJno^-oZ%X@Anq&&j>)>flQI1>mm7U)8qieGNl3
zgx~c7#~n2LtI$=?@DQDW6*H=4GzO{giOEBi<wI)tmd?Ce*);pU??K(n`94PEIuJlC
za3yr1H>9xcXk0*ET&2@G2Mm<=fgAX3O3H`wi;AuKdjt?->tb+>_1k+J#jqfa0w<r8
z@`nVSC0Y_U3hetOrr9PbJPpj92X*+VS7tkSQOiGrJb`krTK)D;9!S^E1jw0J1#c$I
zUTo8_dOOw-G!5D?&9-tzr51z9?bf^{fI-iyCJex))WH#3R^DbdrWq<SN9*bkJsm6N
zyC}K?z_0}5m?rfD5^%On>q+_TXWJgX^|G2UaVWXP&5E(zAAaYv_Xzwgqlbi%qK3mV
zFf7w>-9Vr+Bab(-!ko}37g+uN`_*Hqf4le0=oF(&pfqc_@pX|lR)$<DhNm&B%xAH_
zdTk3Yvc<jfRE#QY5eD}{Z-Y}}>+S_uUHfT1#EUr~y#I<YDaW{*Go~Jgq3>qXmkYl6
zZM4%^`$WOA(n)z=gK|^Pr+f$J<AqxKhZ_{;hW5Boy#sA><81%p$DriXvvd0QAFJ-%
z8eYLdsb{HPcc`>f@tr(YX6_q&;hN`9+wai2%_edfR&9Q->r$(-qpbv`)i@d^NT_}B
z%*rI6yKH4_(CJ*@Mmz1)zp=(jyW9Jtrvr1Y1Z8cSlib8v!uYx|f}q>svQ$zYJ~mnY
zvrjY$R8rVpa&YT6?RjQxR@x;=^@FRC?=|B$$M$oUaPL))6n7m01iMb<;$?@$Y_$3m
zeQjDZ|DrAgB|Y2pwLW*JyNcnNRIjM~k!Q#muMox+iPews3fRH%O82g}RFj4NJNTW`
z?&0`9@nFS;_oe)TTr&Y$+#HurjK3m^fww5;n)Y8$Lw{jX`|&}xUGB*lOUyk53vo%S
zXiJ<F4}&pY90dSS#m%MKJ4OB$l!9^H#siu${s<Omd0TN4@QqLt-4LM-CczUU*KV7P
z)^2tZO~HU^B}zZ@04(l+6nJc9gw%QRHUA;rj*ya=ZARVl?0k(NpObNlqU9DG$$gxZ
z-SNzOr$qMZv*DCTVgG+Ek`Kl<-txEMT_3i3)N!}|01@Mc0h!9c7+p^Z+5Qa-xf{b?
z^Otz3`(%@-8>Kg_SxtFs?mhhSHr|JdH`@b~Q-R?8z+&e6T+Ynvy8sfk`h7AG`7$Lx
z^4DPT?<){{>;tVJ%%(;osVpjLx?FOB)dex9pJ(;(Bf?p`@Nmy5-@s`MV~3L)eyfny
zenW3{!C9_n0mak9pb~js`#MbiF8#BLDQ^@z)?goPDfB{w!VN#TjRgQtF!9Z94!n##
zKx_K=ob#(F{fP!WW7gBx+5#aJc-5bZrpU<a9Mt<Ppb&|-UKE8_KE#q<yIKODM6X=e
zjIAM54}7FigIs(L=I3e%v8F$pe;O=CYm7f)oEZr3Co4>BvFPxKLPzl2FjeViey_i!
z8)kzubFZ(ot}p7+ev|n%EE;<bc9h^mfVf=<lW?&X?db+zdH$)Y<YE}p0mjOC;L*YQ
zm2o6OGIYie%e#^5%mI*Fx`9<0N?Ef2mpI(xuVMJkLE$GhCfOmePIg<|y+;Vpv=);6
z=G_A57IeS=tB)rqfBd~loF<?<u}m+4DOmZ(Wo2G*eKW^m(Tx9Gc`NGI&mJh{@E3bN
z0J}?#ULge2SgSmoTU;k(lrqe-Q;-Wq<feva_es$v>XR=bqYi9kVqDM*n-0qjmEOGB
z)WwQls6!pG(lG-UgwNsGB>^sil7Gf<1=WlGad6KF|AVgFG1fU#qOi50y+}%zX#7Ao
zGJo|EfLNT5V00RN*TqO4fi;m|p3zoBA>H>A9xyYK-bvoYjKrV3GT)|mhx!~kP4i~W
zPHx5Pu@?C{UFpG`G&#}r1*?y0QbvUSJZUL9y!jEK&8weYm-3}7n+R+2IV@M3hfjA=
zW~=v-GJDY%JKnR_3-wI=YTpt;P?NLmdEwyn?2WC|%g0Xo^RJlK=H|O&vv7_zb=nsS
zBj_GJXP*8dYgyDhZWJ{eG+Wusozmv=n1Dzx;=Z9#&}79Y%S`D{Y%fy;6;WE=jRbsY
z*ZNgArjlh?in(;YGpMMODoa0yXDKcgdtO?n&V?<<M8ZetEm^egdaqVVaoZ@>tOoH@
z@j51o-AC5BzY>h3G6l8jw)>;X?98?_kOiV2|6mvZYR+nA(WiDp!>U&R^5s=0K-%9a
z$?cS+6M#G`_2)pXXtee?Q(c4o*AxfqT#2EK;o_F~+Pf~F2n}uxF@eCddEeXc07(dk
zh$Q7M*kq<THk^RlDo8~z;!ib6dt|HKd?gtTk&emb+Q60+B>H^vGwroL#+`C#K1wMF
zyTucJ>g0||1bDHhWOWnOvZl_+sXgI?NSE@PbtLk2mc>?Ni?`L5POq#8`m4Ez$WCTd
z<MU$Z^kXI35XX<&NBik+=XO$7Qa`9wWiBq^^^~8rJ7siIaAM+nNDNh)w1-2-He+NP
z%pYrVk4&!qS(`6Y1lm_mSB{65BG%w=PAm<z4V!wMh_t8Wf(!sIaUkCG5qKn^-08ll
z`I=!;S7oO_1>0_fcrn2tfjU!BM6QTst6+PZDXvkEG_*0BUDBse+&GF5`-=XfC6spG
zOto{k2u6H)dey6zI2o0$?MK5a*(#Mh5bX_5XR!sa40_HdvlZ`&GtRGL0;C916Y6Kf
z($A;t5A$v{Tz?EiIsPimbw28&Iqdnyesmkx_x0z@!5`d;vXWP?30)zxA$$Ap_?1D}
zZ^f0lQ!X>dLOQ53+Q8rGtxfuf?N8rt?X}M=$I@DzFfSkOZ;3q{JL0@_EC70PkP0Iu
z+rri_D`l7{>dstp_aU!Wx7!sh=gcz=m+R_JSBVrRBuHRIwl`~N!Dk|}OYqJ}&%Zei
zq03XJb8~kY1{``wKMT7%j_>5ATJ}A2-ly@+_$kmkSt)0@$Tr16k$5i+Gl**~LOs>J
z9X%9CN6+%4_s}W*q*wH#uS1}9J*Qu@M`w%VMRsZJ&OM<9H_^d7s)SZrj%+ji6baGO
zV^R6a1Lh>Wq4c80SE_VMU&{4ijH%x`dZ6U`CtS`z1v2mo>8qmAlkaD-(2{nu`vqt+
zpv4@=@C%>^?Yw)IQ<aCl*W{YjaF^R@6qUR07QE%H@v`eeFb`twI4Zy7$d6IU^fXH?
z05>yL48@<H^fO94n5x?``lSjoQz(4~GkA7LY9-$`UCC!=qi6&zT4aTtHCN5k%^xkp
znM!8anoQqO<?^k2U|Uhvzp(3yAUqYwyPAiYaOPOGoKE^;=<1LCT1)>%o{5gX%caOO
zJaamaUNlrVP&=4TD*)efZ5U=*32ar0N+(L*o095s<~|1~p`lwJRx>BobZhFr;7r~m
zNCCj7IIjn2i6Odo&Bb?~4C(?Mc1zX`&x&CtzT@kzhi1cEu-}=_76Ui!F!VOcHs6<B
zok7{;iiNdaU&qh$reBZ?VGbS%P^Y?8=_A~-3fq+Ws3Uk18S?3)Ez2zKeKu(Z=SaFo
zRsTc8=~ZNms4y9_ZlFbx)%W+yjjhgoB)jo?f95ocA3EGklJvFj$Sr!0^&S0be$V8D
zemE=Msa5;Sw#XOSBhB=QiAXnFX@1M3@TRC#tOAMCE6Ex@2EKG~Ac)<?_<Vv^fO;6&
zx>0kGT9MrpcGFV>vL|=5Kf@lGB#oD`uH@e{gnu5U+Z)j)NMju>zZ5%MEgYyhsg*2c
z(OmF!1tK9FUd7iFC*uHZKhN<|Va&=KOUE9*;19-at@Ccn{+H-WnG`PLLNTK5cKx?@
z?|_N(OMr#x4mJXH=Oz2^V*iDwS$y@}U8&VRx7wnf?c}st_Lb5P-7LPQ@V$OIMFv%y
z!hOF|%{q}QcQQA~ZsJm#V+2$DaB5NNnQE(dd2C_st%;o@@UfMWyQHJvOw;1t_GfDs
zNS-dvoQI)`*ZDC6lyWCzPdX~GkqlE$IFyp(ac`^wJNbib8)e(Sn8m-oA7ahPKESy4
zb6;4^oYjh?a6y6{ElCDgts+`1aiF_K&U-`CZrjI!P`nxrgNAhHh@Ic_K$iqkJpYV(
zufv^KK|VBhTI9nucDl7l<_e#qJa@j;F#U*}dN-*nS%4E0mv)I0XK*VU@z0sn-_~#1
zh6JKDKIV@v_atvzUC6k|o|TTMhW(;`it1)fxU^s6H(U-*s$<_5@S2IeNK1Ml=X>w4
zV`Lusw|=?B!2Ig=<U$g{H$lPAJy<$XlI}mLsUK2;slb(LD0>?jC}vQ)<El8hEnHds
z#(?i^J^e!y#lCyus&x7ANrfGV(>UZn!ON}%+IwE*s6eq#lch3CAvZTqHz_rxeirU*
zfE0>Jr<_W#-WWv==TSlEwOm0w+TO_N^rKtFKEQenujXp5ji>c&6x;46DIC;loN}g(
zzRT-=r0#yQkoRtHs25Npae{_I@rEAgc~)|?VuRj>7ok~chUT6E@}?ohZnIKqE;$}_
zxok!$k>Mu*r8Ko&zOT?`0t(bQv>;h4DNu~+Ms65&p|;{_AHGZ*|5JLvsILcM4!x8=
z2b!WEX7O1kqpZBD<fGBw-?q*6yaBy0qO3KoDqPSL`Dm&hMV?30{ebX}8kBng*sG5k
zsg(3s1cU)d-)&o~(7<E2??&Tim?)eE_(jqljZv-sTgkR0zb7Ga6F-2<f!n5q>HodB
zQrLLrdW^W;o>V~WXrn<(Dbg}5TmXu4)JrFor<oU^djcvrjdcFvaUEI$V%Nx|&`z6e
z4z+b|+=7y;ndR)bR(4R<%BfE+v%5nbn|e)uCf}mjXb<J+`;IAA`bAxIcw>u>L*~cR
zUX^6C^{S~D$uAl)zCzXKb$aOh&|&}wKb+R0jHfZcw+4@2Z4A@-+~7)m`!FZ?`q<-x
z&*P@FTZkpqD#kWsQ4DqD@(mLOVjq`?_bvu^is-eDUw_l3W>|}>6~{y!0Pj;KA>Y4X
zC&t4NAsZOx+APlT90vG-?f}K*-jGEnV^_7FbI>TMsapU)0^dAA!?Bq;*wv;_S}z-P
zAJQUwxTs6OsZD-G6B44aO;IrSR{Q+{#Z4@iPhzBAe~$&r?6jKTTYS`7WagHag*pQJ
z_N?pgmPpp$osRwCrdVwEsC&qa0CIXx5z;LeSzq;xa<V|XzK=2;Rd%9<U=x=WC9_1D
z-+=!?<juePrCvJR#End@{obS{-AjV{WT)?Df<<}e1>0i%1Fss&IV2xb7xJr|KiB^m
z5wnxAwq41rPYRo@O@xxOUJNXnf}3~Zvqn?nC4KShFcsNz_ofVacSXt{*fBh~CK5}h
zx^%W3NTL2z>~+RZRT&zapn2@9)|YLYt$_%2S|}BSehm)-CLQ3IT_R&l4!^A$KsxDU
z@3QGnKY5ZZC2#9lQw><C<&nzhv!XWr*M*IiG253a!47$eK%fs`uN4Rpga`#wCp4Aq
zu>r2^E;GVUMc>u&_**)JTz>80{*g)9&x<gK1e@Ec$Veyquu{foU!B=6iIE)inrbsv
z6u&$FF(drx=_QtdiENNSXw$cYPMoX9wQLmTCa%xiHVSKSElVf7X_js)OpQ0-K3YuR
zH|A2|S3AWrKbelHA=wASguQlF;vg7S`&6DBgVA2=Fk#ZldO1%jh<ynPcMf36ToWQ?
zlm3u|O;;xb3}ND>;2N;-lf`YyHVT*NXP*}alQsH;7j0^lVDBOt05+O2H0m54dFzT<
zfS{oiiHXAGJ61|}aApZmdbQ4fXOAX{FTT9+W#_q?7O<c8x-g%FSyo<es9clQ3vL*V
z$4Qhrex^vP@t_~&B&w{CH@!V>G}t}gQJ1+wOk)2uJV*Mi8>o=z9k$L4hb~vUcEm@O
z#H>0O5ea)6j4K^wsy&G|CH5BGxA!<SYod`4tNcev%%nm)#=NXMzfs=XR=BcBN)=_L
z(-wq{LqS%D^>*)rE0OpNUGPVHzAzsVY9N@~9y!S{Ff@)eXR1)+v75i;U`r~pQEmEG
zurmVuR6xml92p}OKbB>OD`11nGd9ib+gaMrt~vT;QGT-<AD?l?dS+73cIe@KF_~9l
z|EYea=F$*SFamvkUP{W1S(Oj3D-lbA6@w=e$Y~aX>B1Noq#g)Zb~WixQk6gr>NZNG
zP{zFl@a19_bcRdgmfb&}a&`a$ih;m&b^7h%)-;e}tJ2T)WV6H(B}Ql-er5Iz$n^rY
zh9Lkdv1KPZ{9J>5sRJi|C5NcXpg@OO$TgkIm6oDT%<^|a?Bd3R$|0k+zHht#b;f$c
zRfFuYxG;~W%=pKzrcj){Y?jYFV!Fh*dtiI;EOb+o6jlY|Z+#I`GeIhRm@MXm*RSRr
zZ*)EqSC)DBLI6we!pJPeRqK@1Q8Cx9rQP$-4$Hx7JTGO=2Sll8anxT)VOO`^_+Yoa
zrgUH=J3R19GzEycA8dGLOCtirC|lY~uiyEc`yUrd^1VWv2Ct7;ym?8VpnrT!48TI|
z7R@iqTog|8^UhIG-jw5v5D0KCe>lk@p+YRZL1fBmaa#A7Tc~udDw$>dWahm}_|0(R
z9S1+gZpoNZUSxpC5@_IH=*1qo7{zg`9K6zxb2X|iz@;GLS7oY;1>PXxpE*O}rZtCe
zKu=v=Pg4{7*u+)ICvF;O0$bofa!wWymtoln28Lb1fEK}#)(C=qIhk*dsCmAcy|Uu$
zS?0d(<E(KWFNRiz;Ydo@v!7nt4<rA4O`l|*OnCWI0`aNR2=t3Tj834)BR>-+HRDaj
z#c7&aKdY-Zv5D(Hb$At$`#G8VV&G2zPT*C{I;)JNp1JvCMFBfO(h4#q((yy-()+4a
zU?gO6rHvc8mjS-#E-5jq8_PO`2?Z=c<nez`stcI9TnoQD^%RauO)$pMT}yajre6XH
zKEeGTQX)FqMcf-Tbbx($z`{f;AY1?dS_DGqv9aRSSW66>rXqWYZ?Zh<Xp)tsi+-Z1
zRp8Q1Ta#}jFqpc`_hPj;Qu*yOqY{>MRyFs-aH=wdY&()`ifpz4?lI*hcDuLsxLCvO
zGj)3lL-r&br0cUqstbCW>2HT#V^J_sC(+XSG`gW%0hy{WeDQ7r!(*HiZqGb|PBZ;L
zlsAoqRJK+z$hmLK-HG(J;!&L81?bEvD*PHBjwuEFd7P}UH47sf<*>QAycYqyi=Q~>
z$ErDpu_oX^<V<jXr?2U_m7+F$w=3Z0Xj60Uw)x~g7tn^6n>hXdB!3~gOud+Z9V07u
zN)qiLE5PBmw9Fqh_>8IeRcJ(SzZKg;bGp)X9u(68LxK^*ri)Fx9rL`{mrs75rs`yS
z{d}N}SIyBXu!kiD(Y;E_y8o^34KMGih)JKhwJW56{`E?gt(!CAG1arhL=n34>LfqN
zP%6u;UF4(G&g90+2f5-$VRNcm#rbB&?@$76?(3@Jyf<FXVXD&myZ#m>#embCm=N=6
zV#On4QEA?(-)ctCP781q2-t$8VUc^&Ws5-14zat^1UlI6a2ZLB6gpWKjR_vq%}m-H
zZOP5K<XtLRbi6N_SOx0yQTe|?Wg@2kS4vq@dv*Ht-@tE~Z__|mTEWIDAPM@ivAw91
z>SiwK?&cd7U2EYEHEh8|Iz$(+DdA~yw;V2k<YnNuA2+l<&ast4qeS{sf|N6ts!yg7
zosB!oxgwiJ0b<0-;(n~XLKFHgk@Ne*OZ$WI=r3DLkZ#nxlH?!fQW!z`(d5vTi6_v#
z&mpe(`aO&}i!H?;GU@uf>ar60_%28?;x*ru$MkvqMvJ<`p7cC0q!K;~yK1QFAZu_K
zimJ)IB%6-f@LkmZIq1!&L7X=!tmGqchLkMMt{vf90y1%3c4XANT7zMLmF6r_8AunZ
znWrCzCPvcfc&b8N2t^k1-s-Qd;i75)zYTc+D6YoCS}4(*?CoE8Fkbb~gQ@t=SKSV)
zY5m(ZCEF~=Cjns(RA6H<4f_5ap%r76D=C%W@{}~{?oarPMoO?h*G^B6IyY9}z~M2&
zS7yJqVav&^slJZIkHGT!hBP+Z-R(EnW(7>5BzdN_@8De%3J-aw#E`9~htRO{(4`Sf
z<7L=`?^;8Bt%v;z$AT>>I9y(+9ZLkZq^FC(4r6UizbA@VWIw%%mP?1ABm2@T#fLTB
zK1zk=1$^`1*tSC#u9q*T$WlbX9R@hl8QJ#E)D?B{NXNV&gDFw!^_8*%)cIg3`$6UR
z!8R<$j9K=8*7v2OdIU26>oK1~x<u0x;_779S&{wjN7jcB)Q5<M+>c;wju$T1fy}(a
zr)D2yUCx3h38Ok#!X5;-g$R_4%(<$Z3e>W@&b&r0`dRc6?#f5!1nwNr3KJS`{MbMA
zY}>Qp*$ih*aJ}X4mq21qOYTHm`^*nFRm)hlKb$!@$3Y*=fVNsv;wO4tN|SkWR|*P~
z0fp(^LrkCd;dsf+q<X5b`_g&^P=?AE$Uy=0F)`M~<4naR*7?!R66F(RGm-_2IhRM;
zD1Bocl7?8YC(kfNX<)(gpO8n|A+I6<y+bBD?n8Ct$k6KXml0hWjS>@9a9*M{>&@6&
z$KUv6)VXH?eKeQr4dKmoj*O79#nQW|pJ0o}+7<5a2~18J=&9P#5L(Fihvpx<1tYh9
z731&UYy{dI%}%17^fMKYUBoAyM4!Ms^YXn<#Wu4v6OP90tug$^pI$TfNa!64pmqdC
zgi_FqA2O(|0c6y%_g8dE(z7dOR#s9P_9FrLg%TCT;@MXqf-F0^RCG_d3Si=6S~w@O
z^XYvu$K`71H9nw!$agzaPAYZXfM23=L8e=6_P_ff{xyuvG>{hQb~Y_Dk;J6SUNMnf
zu**0&1A+mhz|p0Hoy}==hJqo_;7#)W*^#fI9)Nt%Q0%o_6j1J%F)F2T^R~ST7ATMm
z!;{D|(F|<Fp}*dcEcEagc6RwfH@yJTI9}KKq&`=QE}YpcXrdU$4YI10B=#^I!_Q7T
z$d|(+GFe}Z-1m^1Qn4;sWOb*}B<ELW0%trDOPay*(<ve&4z;$mmV9#Y;fr8tO{G%l
zPYTXCLVWsaPibz1x~>>kBvC^Jc9WNTs^rs7-}F?p=~8AscIaY48fy0-;b@etk#sH6
z!F$Ba93alNX5CV}T9&HqyR@~LxYcy`^e3FLh;|9C)8Klpq@zm05$>wV)j+4|#<bR!
zE|+BymIReYfc^y(IWGyhpa$Vy;#N7r(MhzrrjC29^A~Wx6B8GWv{mE6`jq&PAk%b+
z^M{v$&zva8pNoX_sRX}jx+BF?q?LE2%~hGPY6kF#^9}Ntrbk?fiL~yk0DDF4Tl}EU
z7Ou24p59oYLoBMqPGmC*&f_>@t}vnDyT7A$lAi?vUbB+?t2jMF8rr;<%@ja?7!);A
z%FS{r`{wO0!aUT)JHs6Q3S6%FIEu=fu?DRyJZQ@VM{lPS=bJN{+a=a5PgzC87-2o;
zSDv3`djIen(AFfsV_{wWa>JeB!v1qmwbJ=FgAwB|KroZum?v*aV401-oD5kn%MfP8
zi#8oRzsD1-BLl!?d5m}ObxQgRdfPC(=|6KT<z2Bsbh_Dikz)t?#qe*?9NEMjBz!kw
zs?g$Oq?==VB74L&`OF+5l$qAXg|rXy;-bnxPcyv(&8M(vLI3Q>3kj_(n~niVeXecX
zW#~M~^}I4>oj)IooQi2AbbmveY)N(fIaXw7l_I(MznkaGo6#v|&6%@lACP4FK^~{&
z-(0T`55#pg&ESny$Wz9%?<2D!?~}<~dS4A(E?vBJbS`(WF=BHq{Vf&0pHzwHSlHDA
z2G|+vZh#CJisztAKB<BjDjGqKGW<7cj+?56?CN&lsW_9%Jd4kWK$Oo8i*Ty#H+veS
znj~hDy`34vQgN8}84ti-h{a}mu2x@y1~*fC6;0kWWj(=exBmi;Sqw;(?9z?G_&4qk
zTOhdiY9AMX<Un(ErrBhAXnQR(TCPh^t3sUoqft+7f`(M@-yC$@z2Ofu`m8o{nV`-q
zqLpx`={xj)g8;pG5IA}3`&R^ToX`S|F%PPzIaluQ^5TVj%8$^>sH~cC*6qBmFeb3-
zlz_lPupagAi_<Y`I!}pxXlu6i0Y~%a5?$P?tPAYP(UfcDMh)V3@&f38rms#^x6ZQO
zv!}*Y_k~;5pk44nQ4}i$W}1{3j^%bYiv@O<A2%h>-|d=%(=`oY-{a~h0S@DK(OIW*
zpZ2Ed$35v;=$~4cwU!pF|G9_=I{AePoU%3-@Fcq|Z>DJjnK!?qfNID|tg*~85N$iF
z4X}=jbXwg?z8~;+h-y!lIN-0)>L!gWcqcBNS^Hl+L$hC^<ZH<yGYoG))){2oZ#FI&
z0~R_{Et#^v+2|$l+>_@}*14t@aWs(=&`#?3>1DU%(_S}aF0+(EQ;A#MC~?<WR&FlQ
zMSB`JKjCVCWbfeUp;fm|=fmuf-%mwjtXy91J-wS)Pj?SO$?cba(?usZ?b7|(XsA}c
zU69ea?$8tfJA1=dT>z~C(L@iBS4AEhuMbML>ffddymn+00oIy^RHP>)yg1H6JAVQ!
z;us+L>8{aK{ko6K5xgWLp5SiZdM~+QVJmu8^z$hbWLqeB#V&CS5TUaW{uDh#HI<OA
zwASImtOpux0#&&g#R8@7BZ69)ma;p#Ly<4`ZZV?Paem7rQ8dmrM71$vYMe{gLx%Cj
zBb&NMy^Ue*)8$jm)X2k=m<*-(Ldh2vEsg}b_>KWsAYVm2T>1NB`7@KR#`k9#nsaq*
z-fOkwT{K|rnUTHqjG^F%XrGc-RyVj(xD<*8za@5xtv*S%$9$S1Eqr>G-mM>cGbc;$
z1vJaUuG$1C;|hyZwc-o)duG?SmoC<4>)puFs&`7`dFek`v>r|Pa5-ncCe}wWPjJoy
zq_DD?$x?nIWu5^7X3`>9cr8L8B<18$3%DpdR2HpUQdp6PQ@R1s(dMm1sVZ;g2&eS!
zd}LM>rmH+<H1;V@G|?fx-VRNGy9C5#3KSFwV2nz^eKZ#H3LEr!gC;Zi>i(7Y1koO9
zB*35A<t9JIlGvNuIGM$1QZw_n01Rl=su$L|{PhWNZ<td6HLr7<UKsYQJZ=7YKnYm^
z`B~E6)%>u{)d0^w*~Q#c&YAIeG=pvQS{_G9s>v?Zd+JY)CW=W&_<sAtv)s7lOl$-A
zis~i&A~FFlTd;(WIw*t9+R*#e3)T4>GoqLCw|vE3rL#QPpq4bFwmg=))q!Nn)t})}
zp?w#+lm)sn0HX2x!q#gET*|0Qu3SDI?FWsnD0zH|mu+Xw`~wf#n#4SP7;JT_;b3-Z
z7paSJ|CtZ4m*?)Bhn@Y;?~d5k;BV=|joUEzuZog=V2?Dg$4=?XhE>euzIa;T!43J@
z!)N|IbwoFTPW}TyF5r|k_}bS>#l{>kcKxlXg`Z{3(szNLCvJXeg-MK_$m(xbB-VzM
zwE;&~o?I=V{<-$LryO=&GjHP2%Hk6yFH=7IB|LC$*CL}Dw`Xi)Q*QFD!!I4tcSO0)
zGWZ=V1BI&_YPaO@%53)N{p8SBj9ae6tr*Js;rxyB^ycj0-W3gq+o-b*-g9-0iXcol
zYsa59IBqd@<{wRRA93v&SfEHeLd}+Me7_?+w?kaNuV49Op(rtKs&IZ_iyg3{4e^4j
zmZvoLlis)S9kJVo8{q|KFh{{XR95htVtq?2<qs}P-}?d<Pw<b%o>QX8-A8i&(3tT%
zKA&dWm#|-4Ik^t?_aC%}6%vhCh^$4n>5g}H(C>MEtk4as<^pd^{!mKOyIAbnOK2eU
zwYmtjX0bF>D7`E+Pa~=<$z05J;QVbth?cxRLYn?Omm(~7^P6!=^*@0faX<Xjhk8|u
z-Ve+ok<5z?C_J5@tDQg?6laaWWu+z4a4BTOu+KFs(0z3h8S$Fo7O926zc{*qYp@%k
zvEhK_PVj~r;2PuxEP?Oi#)nAvlM{Vc7??JmNeM~TKP4rJDQkCsZ|J!bSAWnqvq)T`
z=&U!-1BBP!m7M3dN+!)OLm!N{Cue2NBmFnfgjrhx9QGem!epM-u??~IaY1NqcBV%l
z#dL&m92b0(K??QwYHMdbED<$!4p&QkqqImb%^_2Hx&k_9m?&J;cZN=MC)&!GxF$70
z_g5eXXM(sTYbyn9ry%WH?=F&ts?9I%x^N!OUExN9CH70R@X!N@LPYZnm?U-|wRLam
zGpEdeY>eKy&OemLl_evcG2wIR#DUg1WE^Vq2(;bVHez|hq6I4KM{IrllFJgq^@nRA
z(hh!0uxZj1X=f&bo9+Xiqy5rBDF$Ny6qXhvhNWA{P~&16x)b;$%H6cD8`(Eu5zPpB
z86+mQ>MVX%eR**?Yg3aqFnd?hB2q-T(Tf$RbRMRiQF<-F(?W7LiL6U!nq<R=_ai1H
z(B$+E%iE^u6!Cn6Bd#PAItU}@C(=X8eczZM{b)NiZqSfsC=&YT#YM;;Q`i^ZCLgM(
z@f^m&Q@UR^a79b&VSgoV)AjpYBUyn-Pscb&!dzbTIA**dZgQc;x0Z=XTxqe0(BpkS
z55Yos?eS$_wxzNl0}O;Barfjnlt&p!lfUt!Z99En$vIzU6ya@Dm1S{@q{eVL401Nl
zAa6Me*XHqGi}Bnw>#bD2!Oq(1`#yLOBHN$wt_G|Dqp=M~W2&c3HGw<#;^${b4vHE+
zoMEAbs7k<@6a%d8XuvQH!k!8Fn9|oy11;yuH)mk%^bbS59Y^E+8@DD9@(^Q?3uZp4
zX}B;n^9C0!PB5Ud&cDChinWzLPl{X_6=ppg7hWM!#KQ#=nICdm)s0l|`Apjxh0S%8
z)=C^Za;?mnsSL^|%Vi=t82&?4%JrX;$+##ebgI;2omaS(;8avkm(A+FP%Iw^WsJI&
zASS8{jVf@9b;$G2*luQ|+)L-$C7vOs=ZHS*AjacTQO}r<hnwv6cP!K12#$nbsJL?G
zJ&Itn68g|``L+dOUuc9dtzIdYuV49H_&Fg(?8#(=ls=tE6&&tOBstXd_79R_@2{M+
zyZDGNH$&vrU2ISuShSNSX}zVN?GCVmDqMUxdVed*<1t}96l*Qq>SCnyz{4^)zm8d9
z-1g;{@|k=tJr7I4;;kl{Y_YXIM|VXQu%(~n+I@|s=-G%tLZ*O5A>z~a5*&|8_fLe}
zKVeR*l}QhQJ$wx?&vu%`v$++vcC-4_3wx~?4)<6155av<xd0O#Zi7m?dM})E+EWqf
z1*2P!Yg|O={(ND<>*rUN4UNV%g@zY5pPXNeV=9gK^(xDZd!+P!e5N7|>njwR$Y=Wc
zEw-oF=7Wq=>xi<3<3K)&n+tZg@Sznr2=~46qbvhI6cnjcle=)tm*D(;z{*GzQJ$j4
z=7@X?#eCu$36|EhI_1!h$ccrF?mOc|o+<Wqayp+82A0onms4;Rr@ug=E&P!4bXKI0
zrM|HTjOOGzE9TVTQ;wp9SXw1$m%o-B`d)$W1N{n*_XOHE)Y_-t1@+!(yK(X6bn)KS
zF~EjrwDb(-Kvi!**B@-R4r6w<bH&SO7ggBZKiDl2+&PnzG0s1M<6I5)H#OONMVb}}
zI%1-`0`Y@r`ruPsuDygWbE_%n@$U!fHigV_bi0I;#;^k;q{=i&0imh0fz$6pt7VLy
zu5`QeT8gNEGYir>dQi6}QsKtds+{J4_Cce|c3!^*0_)QgrpdrRnkNOb%dJSUytHDg
z^p7dOaxvo0MJ_MyD^1#%xOSIcq()pUhi$bis2MJ~qL<V+p?{REPfwjuoKROU(gW>K
zgkg|DsHyd;1r#L8y;$`gw{Y124oIn;t)vG;GGm&VQIi<S+K;xKz}hK9yf9txH~)UX
zKXdh1-4L$J$S4ggPW*5!rt$XQAeG-z!)#%`1{ch?*WL`YZx5t`IX7h!(<G?z1UFV1
zPc{meGDl;x%;e4~u?Al)`rK#`-}c_CL!4*$d|DfWs0Bz1d+<A@?x=X$Xje$tOK6fj
zb16y;Q0K!=G+e%tLUzYx8xEOXzTy~_bT!0^v|J1m{zFQ}flE5()P{h)$jAg?3`B`+
zL)i_92}1+#QK3zx6aQ%NSL67@2rWr-?N?k=Y)AO2Rx22VxY@$sU*(8D21$+4dCRAU
zpYV1CT}w9g)G>eI++~XQ2|VjV<r%y>no>uoi%F0KIWnb}Xh__xZ2RB+D)Q+h8sLJa
zp-w5hK~UpVDsKi)S^}Ty+5g!6c~)Z7r2)IMfgNaIxeU)Hfy`Fgz(i(ebY&7SDCvf!
z0QHj$AdFYf;s!B2R=rI$CIqrnW@-BUX5F8vPvY@bg;kH^KkWapKC52F%}qiW$N9e8
zTq**!nxn(G$+>xvWyk`=uaUvo8(b!cyiUfc5o7ujF8-GH1VI0Ih`XG5(qGP5lovnP
ze;4&AtId~T^J^0o&qN-`XonxDHJT>)m0@)%z^!@Lva$}zEOsCm3MZ_o6Eu;V<I3;q
zLo1)D5PTF`b1P+D4D*(yFC26**0FtFRf`|9&h@UG!c1^NQva7%Nm`&pC$Hw(#<rD+
zY%^Q0tC(~^GcJnB&0;9<qA`P&1N>U#EQ;StUxRosYfGi;=2Ejzc%_v>%MW!X!K-a!
zqV%S&BN7b$&N>rOgho#ot(d6{W6fP>^c`QHcBVf?Y+-XG)Tp6OzG25#IpFR|a#WVd
zD|=JedS8A_RNIrt`U`}o;j+XxC?48{y@L=g-hMgAi?6sentcznuvw&GvU@8uo&cQw
zZy>=h25(F2Qcn#2e1wbd@Jso3F<n6cf#A|@XREh4k{hE~jS>@HDHA@F9v16pgL;Rz
zLD+}%t3?yzzqZC8=t*s+J_A=Jfvj{~JGU~*!6xz3SgsZ#k9Q@3JMmDQECsd?iU_PJ
zxU>CpHD-O}^M{20UwQ8t)@0MQjmCls2;72*QbnamReG<|d+#7Z5D2}58iJyrNUzeR
z_fA45BGP*ap@$9u0#ZT`?07%#v-h#TAK(7|@BB%QD|5}PX=~24X4W}e{s7Jbxv;d;
zMLxo=rB4(desN%+#<ts7{-~-zZkWUh7p=EeH^U!FRAFOk7$Cg9eAUHtw{Pdhma>d)
zDnt2jS*3q0`3^B&>a&AM?*3jh&cPI~R2PdbeE#7&fIwXN<*z>)ePkLGp__cr@Na+q
zL6)gR`0<_N1VT)(hI0s<hY3%nTT?_AWtpK!-H+knVxmIBa#tfgm*L{HFD@!CjnCe*
z8rlO7E5z&*c7HWGlUvVc_h1?n7)d2&AQEk3;cRu!G(93g)Y+Z~U;!`s_K)eB=VXG!
zLQI)f(hsu;ECvHm>vGyiuUGcrt}5FGc*lVF(Vn<6$n~S01BOmT^aEpDeHhENw|=fW
z$Qo;U@&Gw53M;}7WOKhFSS4X|NR!B<;V!0=WyE`EzJC+y4)mK5-16%=_fc_6^p!8O
z<*6q-WxQOW#$7r$1rU-1c~+*#r`)=BHo^e$aUJelOxm;VIz<vlkqNsk6g;rQObOgq
z3YLdDpQDLrbTCYWnA;A*^0D4hqbZ*MCR2Zh#XQ)14EJsAp^5D6e8LhyxG5$5nt!N*
zs!1g-CO+$SCF<ikEMqG1efzI68STW<&V;Nzuccz-x#SnBbu-Z6Vs%q>(~l=o5+i0&
z7m<((3dZd}Pb6HpgD2X;7BNjh;8LOt$`GChx$~HIc9e<p!c!*vqdE53XNwDq;#`$5
zj+&BE$ldC>+&$IwHR8c@^0rrAg^To9&!mPEW>^6Cjt7l3qW?1sKYS^4Ai4@9Z~xTH
z(3RY($TWN53GNo-1xBtVNsyk*^v-0K)c(ScFDPOkiTS&eHiPZtp6-XlkLS0ai;$Z8
zZtt@^{=WZc?wu?j!Lyq*_jZ&`$mHSm(cq)!sxPCzssFt|CP#@+A1hFl_3_PA%<bkm
z0K3iw`>d>q-V#!Xe?PD^<&W<(Evc(uLE{g1r;0l5+kvw#_&7F6Q^wzaEX8#Y3)Uvb
zXW>=dy6n-!kvpQNM!9FoTvMObL6o#J49YN*5s}f*c&Fae(+^fiZrEe}3otoIc9Tcz
zfkz<Y3&$HKsq;6l3DitfH$l1-0TxrmL>W9-+by_%OyT?$YM!{^r40v3os2h~2a@~N
zn@O(>HV+^49kzTM0s+=zh?SO?<a4T8p{Vv%CdI|!`~W2P%`3=pGJy_@4*KQN^jAuV
z=C8{#v;9r+jg@NQp?LI0KFV8PDS8jv4Gk@2=c_^#O^g3M_q+~n5f)Z+Z#mkFrhL#z
zAhL;{s@sim#55yAv&<wkDJ-4w#gC{>pHime!}$A<W<k@5_rG<9@tZPy?96w~Jlb{j
zu5$8Ooj_KLhb`8BWbHX=v_6?xeK`Lj`jMg8?{y`A9%H8?FLrjb)NS0`T26smsq~MO
zQPpB~fQhh(R37?K7v9~D%9mrYp(AR`_juk^1Tb7x>e)&jT<!W=ufHWkn^#>PI(u*}
zI0}53yR9m+aU6%*CV}ucepOQpwF_3W*+`n=YbOu}Fcb?Gx?d>$#~-!?JFC+k2$O8P
zT&@{5`Vn=r(kCU~bmXQ2QY0$fhKoW&Lz<*;{DZs_gYWf;*nt!kavamu)bHkapt%FS
z3$D|EPd&e>*`7^Q_AU%<f83lKjjCsPTfXsXvVmfyAEFey_AR1l`zhZmbrp*F#8bS5
zk<z?E{flKEf1O;Pa5a<niump1Z-m_ih3lOh)BVH*%30Vs0jT=N6|!M3S+|xcCwcq-
zQUm{8gqkp4r}o54{Kcv#S6}J>r1gTkD<WRJG`Ai_ozHEy=Zu^mMfUV-v`4_s6W+>}
zeV~?gOyab#(+fdX&x9k0Vu`55FT*{!+R<8Vs4!YX@iAD<0#(ZEh(}eSX*Elx>NAo#
z4aUx+Syv%@+b@LeRVRZOb?Odb_w_q0(skgNIz#RpXb>u~XjI!OB;de=%odSzxdQVf
zNY5~wnNJ{NuoYS!czKY0E~p!FMZgCHiot)mD&AdW_jsp%oN{8Hij~9X#Xo{48Qiul
zA1f=u+=~c<y1a@8+lM@Vg=@&fSCvsvgMTKAVu7=Ju7fvGzSK7y-B=BdIujP{e@n?N
zi%*&B2e-OZ%Ngw4F_+7O(2pBNa(F+&5h7oHGDDv?y}<eyj8lr18;fv&6FEW{pp5M-
zyqV8JCd>+Pdb!l{mn}@?B(0NMi`(MM%R0a;UBV@@$S~=ai2n)CVqH40dJMbW@jf$T
z5tJLQ&1FIQi=@@^q5WEsq0p-TqPy9M5VJ{A*YT&K538nH<e`^nlT!}|y=j^cTZptk
zZTbaR)r1WH#Lp)Vn1;i=hf`%zJIIe?e+J5}R*Brap#C2F@rVC_&q%7rlX-a+Jh|FW
zf_5XFU-4e7FYA^D9pQ48u1uR~6oz^tJe<{5hkAa+;y)BmE(OXW{@j{AYY?!x*>#-u
zp}QiJZI<8CB0tJ7T?`j5A4CDY(Ata|AasEgA3uk5m*uy=x2OI+oysP7uZ)81q~pxh
zx}$Y`G#hh6Fo!cvFYp-_+&I!?$!t0~79#XF#;w;vxc7BC!deG%@n8;yBM{&rOOj|L
zw=v!I>sw3qZIAv*&Vvhjc@~7++u_(@Ycs1Zbyc}+KcK$e4K2-*Fu>hxX&?0`7UEej
zDyWa+H{C>>EM8<D|Ez3a^Ay{4KBz8JXk40b*s}kbcYT>-1rH;T1utB(bs7^$kZ=7x
zV3dQ8ot9Z*vbfo<9rs+|6=n0L;&UNF1&V7m=RfIeTzw^Na#czenl|1ZYNtU625X43
z_ig&Ct@0qC4~2}fia|;56hcDp{+U01V*;nI>Cb>hukpI_kpKA6knk|!kv-aCklkcR
zEa`<P%p{@m%GID}U8zGWHR3PnW_Z^g4QlDl$oJZMfCDa074os|B0uQ+L6(*L?nOz&
z*VD5Q8JRuQ0OfZQ2B+N%=wA{Pot@tpEjtAmRdx|>J3k3w6?)?O^~-V;!LV#_Vtirh
zRAUVLIg(A}oP+P18DX&2a?Y;g<{lf8QZX!@%SKs!^malR!0O^Nhd)N6@;lZ^=hi=j
z9#%a=5PN8Uazg9SA2(|A_J$eu5{sC>?QDvSoZ!AiTc{wt_I0GL?oEvOY+P`B8Kx*B
zF2YM>q|5Xc?$=}jW8YZeedzHyZ9{tY32)7hJxRDaF8D5ihbBxb^=qMKMhGwCxul~V
z>;zW+>itP%!%1T8{$koj-?2$@`ll3YJ-_j~JvQ0^;s;fOI%6;F2?+6ks`=v1HyD9o
zoDluR8oNT`bhI+UXn37y*r}-8J{CVG(u$K6d63e0bhnJK6VDcE5Ixn!?efVe37O;^
zOFvk*;dtWI?iq0B9Z*(p3far0AnvQHd-GUaEr<yWZKF=EW7n0mV3sibK_|xg#`2{h
z+f4&z1=8PiG%U|LO7_a=cHxXnD^|$^oq24@rLo$zXmif(u(aVA?<#>(GNFJ)XylE0
zc&Fg-aSZmVLsJp0X4ZE+U}V%UW^AIhOB3NzNYG4g_MZ;@^0yOMwYpE>BmP&L#(iwI
zPjW6VHXZdYF27q7MzYg0L@J@sL^xuNk8F#Z){u55QL8g{w=1`$msI>HfZmWzs{bcm
z2y{|PR46zWUNvm=WsVYjLTx2`yR+T#=?(94V!rPW#feB+urIjUDy5;*r9M&FdERBW
zT%AJxhFp1^J)c+oPD_8*D5`VQHQNCtMu;eM*4xx<{a3(Y&6zWue?!;3x`BjSU<o|;
z8aYuc=SSn8s=|C-U7nGAQ_b|B$xjM$I!sdR=_75mxTVB%)#ll-g&EQ){d@?@&$Y{;
zds6IjRJ13XHgjIA@u?+1QlaLyRfXbI5@*5D6*R!-=3@oO3G}jAl$iO`tQ&Fl`Go0%
zPLA7qUyw6OoMG(0w{j;6?;iV|KZWFRm=aN6KWnXnYjqlOF3SF#aimC`G!^aBlhDqi
zP!@@2?_qelI(5Bwk%rz$B>TiEZi42e_lcTMnNFGB2|B(=o4&;qRA~FK=SfP&>eYMR
z<X>(TpMEhHmsEP|Nd~Cj)2w?*-zw+o^aZu%eJ`zQyXjW%fT-``9Edf2={)Bs5Y3=+
z^Yjy%A|9<j{}2TOWd&y40Jy!Vqn&wtY$|q!NS--@1IKYpfIJdwK@HnfCnOI&RKWuI
zGJ%uYAY*j}iv_NA4b=q7%hrlp9GWz9RhF3Q#j|ar{5CSkw>Je-Al@4r*z&!yxhP$|
zG-i`0`x@cGSWp7(A!~i-rwL<|*BdPy(u#I0Cb^npnk97Gwh;Mp36XO_;YlhwW1RVP
z^e-5EJcgD|ABjXiVgEcoqXPe0;zvBYah_u`wpZrj!oJXn{#&E6^!tr=%NQ5-t*<3~
zXR>RX4dj5}$$!2k1@%9_9)$NglNR2RA$edp{zgMSmkd&!D33iB5K=~wvkC~=^$oR-
zR)Otw#@9>Ur@3Qma})cduzai6I6p(F3Qjj03Da_h)Y;xly0j-GN~x<Q(vS)EZO|{>
z@$W4fZaWI8?=VeJ=0a`Fey%!MK|@1jCaUX#Mp(8c2VLq_i$)H}MO;3pF)(5^G~P(H
zIdi(Vg{gZuU?BRg8o0TjT{;EiA}POaF#i~J0}!JMXQd@?HiL@n!#|JDOWb%$eXrC(
zF0IFL(=jXd31%i`sE}|a%q?ApLs}N;W}TtEX5KMK*XpQ-xX0Q`s?#ho?aUinXEy0g
z&%C!Qxe)Lpd!sbz;TveE0xx@AA&<P&YgyxFesRVIKW8Vd2*|CLA2Ml_Y5&X&h?bcU
zf2a|f+r<CYj9Agy=efNWP_cCfg=)bf1AsH3u?LG?I<dP)4<PgU0Km-C!z2gZQ=f+}
zH=ZUEBzz&A;LwqW2Bv93+Z3^`e-_;Ry~~oheDbZ(Nfx3x(poFqX%@^OwLu%9Z@RK{
zX)t}^9dQnf2<co|j^n-Lk8=e^)Ql6=XP&QZ9zC~^6CIPB7`kJC-t8(lR4aJ>@S6T=
zWn-iN2vt#c1G#6K<5_uJK>-LpdOIwR?Ash|$cK|W{Y((!neg*7T(GCP0w3Er>4z*)
zm*n&r&t&WgwmjPPnc4S7`5?q0$MmVnU18Rer^BUzE;Fd7pQn41HVtH33z{|7QXMkX
zm2uFIise%b<pqzrE^P-sJPfVLcfHos1)tE^f{{kRo<#gy!fHVq*l*1Bi;@QI<~abM
z%ygC9q2x~rD1KwCLA4YD*>_m*5~~BnoQK};^=cO!Eygau>-VsSJhT&YT)4DJTlmhQ
zm^b$81=cmYYb!ODn<x%=B?!4|jwsZbM1IG4P%pj-ibvn40+aH!9IE(K&OW!g>e_I+
zZl}LK8#=HBrqEG};=e&wi?{eHm~H@+IfJ<MIdhfPCa;(Px*=7mtdypmtjPL9scD3?
z*%qCAjUy;ZKtR#wI$5mksW|O26bdtw9VdN^olCRHDM<cv+yHuw6|7HleB(G+4xT}H
zsEPNI0;hsYEGh@gUkWhmHS-i~KlYR=AEwCr%(~iTr_8YXR*bB8W@0+o*E%m`v>7CW
zbor%6(@+fn<WU{k6}m<j)FMY;)Q9n96p{QpNS-yPEUw>GLkA?D9jtHhjJ*`*!r`9i
zHcP8DZJ>J7nt2n*ygPR-Sw&K1E{4uEYka+r3N4P2z`la=vsg!43maqjq+s9V3!1h0
z7IA;Uc_%8<Sr!4(A;GXrD?g;8Lr#=kYN$RoRqq@ZZ_We&{PEpLWtF_1m}l(s`o$5f
z|E#QlxcnWfIEWAu+D0~eV4kIHpb`2ISQhW;2?a4Bd3ddwR*@<}njt$=X)l{WxnZGq
zCO?-p^);uF0OxnB%BH5@@-F6;oCsbE6&RfuVADjQg6PPT!fupauGpis-u*&vclpe5
z9xOM3W2%Gf1|awzL|oF80g(64U-cO(HzwF2&NP9bb*qnXaws(DcT)yq`)d-l6M>4(
zm1HV9lm2IBSZZ+k7>LdQYkyVv-qCUJA}QrF6V&?&G0mf>@xrHPuH{`@|BTZc8wr{u
z;xd6oJ<iaEH;m;itYZ*-@<vlJ0PwTWwI9y0#ZeYpf>|O@g92hWJ|P807hp};ogreR
zrj1qTbjS2GZY*EE{9zO48F*;MZWw=g7W!@V8tiNB?kcYwwT&N@MaQY0?<9}4;rMn=
zO&jyjQW)E$@$?na?L3fO67dh0Hy>?ju3E3EnGZtxMm?_kmJUKRa-F3C0Fm445L8>%
zc6r-fruJ8WJW4|CAHqS({=ivge2%o<c<x(7#Fmg%ldTI49+{%~B!zMzi)EH>Yi;KP
zC{JlaayL^0gA0Mp*08C8cLw92LFv4WCe|<cJ_wAr)pl}<wka-{TG^vFQ^kA4Y~L#n
z&vB)|trOJEq;(nRL{#rsPNOvY$I*^!8kB%wdAngj9B1C^d26zOYHEP)N5bR~qZUxU
z)+A$<NzHJSvn;?!jkBCz<d?|Xhm{SqzL}g*<jtFCT#*uWUA!aw;((VJ(OyD3NN0f1
z^yd2!km2>}hM+J%m*fH6D$}-DAKXGT4)kbB>bZQ=s;zP~*s?y*M(M)1wL!N4Q-`Sr
zuk+}BPh-wdFVwtJwW6z$A565m0r<&0x)<}DdhGkITj+mOXpV2NkII52Yuzz_HqLka
z%;@GW&vw4mCS>~gXVw9sz7#yTPq1SlewZ4Y_-)u6w3llLnNr&VdFGW8=MZ6O2c27w
zc;<K^q$~23j;`yT?@)MnbCcKSh=#?G2D?_jY3rA*)!J8}PDI~S_i(~h#<nwxnav-v
zEQ>lx?zc<Fgc3$h?3h?ldBheX*Mp$TsQ3t>@#0<r`UnmR^^4L#?JVAoHoSA=RV@~{
zJ!D#3?kTP4AAvnxp)@(2+CEr}BX@}iqIwV)pj8%s#adKxk0Q~4iF#_ksY7~(ZXQd^
zc-N~mSsF@cP`_QuwxO^K$NZlbI=(>pKqO3y9lfwiEUawM&~;pT9wYQPN{lC+S3J+q
zQSM^?aV&Y<^VY!?Z*KUfliqGhNDD0<HV(vm+XilTj~4VMCuMM<{q2jKOq=DeT37vq
zCl7UTC|j>#w)vQ8MmxXiprZo=kAo+YbJ{J|lF;>Y31JOdE}QFGpb@QQvq#$Htb*rU
z%m%Lkx~852axr|cMc7@jf?z9{J#fNWp7PwDL3>IAVMM}jxr{F~dWXJRCUB%w9tHlw
z`332D$8XD*WI)GqYYJJ<#n_=M=sfg$v1aBvgA7s+;B3VyLe#!lw^3RgKAd#f;w0?l
zn{)-t4aARm2z#xD-rr`EhF~~^Zv)D|^=;`?D_LF`B${9SX+4;TuAol2Xmaw<GgSi?
zUb)K}FeJnDc1<!sAzMvaEpLn8g_MkC*9*TFBrYjw{81v*6kI|8Y|keaj@j-Q7%-H4
zB`tBR-$~y3!$_A<uQJZ$2wB=cS^ijZzEYORx3N^8dfNcEmeXyK)x*QT_vYH~jMP1Q
z_+*VLC;CwGwe#1B9Vb%_P1e^%CH;dE?-%M{@D!DF?^$rESz7FclJB-y?#mh^9z(e>
zhksb^7$iE;o1(pwhr$VkuKFS3W+2?+lYk$<7EcY_L&kt@pD<B%XKEP^9TQ0o!=C5$
zMt!goig*`YHrD}PMKR6OlKWnIW*t+t>pjv7`_DXv%PbCX6>4ks;?kVgQ?q74QclS&
zO{*`&oe{lpW}x`Jg9>8m=9#Xlog&byVRkMip&hWzii7ur>twT7iyrO3tXYojN?;b5
zchX5mRzr4?oWsU>Y6%5fmQ_&Cg^*Zv$yT%bGR-PtH~P9aP0#cF3o@up+iUIJ%l%Fl
z_C}@!ys+QS2f|OwiWdk~k09A7uJhleEsQ-^GybCaEBA#T?p<~B#E%LKmXGX~7sr!|
zUC)tq1~_0N$6Ae!NJ;D3q_Df7<8PrFFrNk~K$e|gz;=`z&^u%<@F8a@OUHSy4KJCG
zJYb}$sEm;X7CYHC9XETamB`3Xc7Chqb=$R0F<FV@{w&+nYzNXUc#r?c4D~}~s)W9;
zB*^>;noBfA@uV3RmSbl$UgFAUCgph5H!vs}Z84kX3WwW(#<v6kPbJSbQvK`K(GSRP
z$&_dl+8x}}KOV%(7I<PRD(YD~fVdDmo0CMTMjJ}J$IW*p9x!!SxykGJi{{cbd&gVA
zERp&?;CK9Qt=<6*_F#>0!ngw6DWe=V)0n((+-)C{^XAI0w3nmw%+Ay>ZcjJVe8KAK
zc#M|NG;b4;n8V7L8}j~5%)>0LL3Rrf^D+_JY(cUW2plP)eGT@UV$*m7hR8ZEmJPBJ
zQC6oTWC{@MP`d~2GzmEed*a3ioqlD>hw~U#5kRdb9~$cI{Ti0yXpRpwd3`+yjc@Br
zub)YBA9D0}INT^}Ym@DQxL07tPlzEQ;x)6`9q0YSMP)aYA8?c^DP)^pu8H}1bXBg!
zh6S-grLW(KPHQ-D7m{IsV7u{ozrA2}jAet8*s5=~3EH0|u7zUP#Xg3i;9LI5;PBD<
zG+T!_o1o!__7T0&^-Z-Dl}To+IK;lAwt?g?jkLmDQa@(V(Y$W8wX*&wA9)=bs|`U<
zLxv8G;a2?IkCoMlcEUWty9^Fi@NzsPVGt-PiN6Dm=MVEoZX5Hq32VAQx%{Y>lkOM@
z9;a#W9(eUzYzPp{ok6`j&22@qSt4$pc6CvROV5_=Z%W!kZUPvZs(tFQE<2YK#V
zA_<^a7KpQVEQ&XEzOzG;Q0^!A799%Rjj@_*)#mF+@i?iqs$2PG#~^iyc~=awVo%(|
z#!1eC@+)3*HJ`i>SNt^-s(3h=r}0%`i;gbe)2_fa?WduI6`S{Z<0tj2Iz(=2;!C>Y
zop>=%L>y^!a{VKx1oAUc-Ix>>rky$FjDXeNMbC@AUipBYh`lRh=)4+_l*)pM@k<mM
znxt>E8^^|E&XF7?W-^o3eTkkm%X`hqC9ElGW~7(tzX@H#Dx|GuZ?rwLt1+m4S9<B#
zr+YROJ)Ar=f7lW`^t%)a>Yl&ae05<~?ufsb&HLp=6Cu@ZxoD38(X9qVC;C%SEDH0Z
z;}Q5ax!o+tnQSFIZNSEdH?Ed<i}StZRCnb5=I7L_qaD>V6rHAM*;xsbMv%NC=(ry`
zZ11;ui8c~e27?RkxBN(^3;8kC=3Uuxb->YB;}nd4ciWjM5qUH=$yo}$?!VKB8QT~P
zaO=6Ff#+O7s!+I|rd#9JHh3dn*#Lgv9vDVv>whvp)`9|x&-2ba8Ap%*Sc|k)Nsp6W
z@VDm48@KN3N-A{z*2I5#k9c^vys>mY%Ton<mX)lSY7MZ52;90uupwwt1zWEbT%v46
z=%3sLlt)*Q001$pb?xRh4LWtiu16=j6wvQ?d;z*&|NV!jj*@olP{RVzD*4MAp#0z$
z2LK?`^y;NvY0d&-DhTf*(gBDO`(ywB1baOQzcY~Qq2^8f*95)+0E~Ugd{-i<6EIm!
zVANC&h209)XD+)*0Pf$W`btW}8Aq$=?gENsjEVk=#p<BQk*l<n^dAWQHx(&@(!VO)
zp0w}+X$Vjz`cKLCz8O(#NL&l-!}G^qmfuun{`YaJy~hIP!Gg!ozxey#BC3oSBpt8+
zPS5`&X}Z@~vZ>QT^j{fB<6ps^&1`a%Dfxq308fGUN&x@>=2zt>R>L<GJ|gZCfVyY@
z$$Lbu$cNg>2Xw_ST6GMN3IFHcx~Z?0{A8}&_!HJ@_WuaC4YgF5D2-_H$5>P7nGnT@
zF&`>@LCAIYOlH7z%}|S>rdq=l*!-<{Ntb(=AD*%mPO}Evp(B&k|I>#aP_R=L_nF_c
z%RPV$keVP7;2)8{jZ0|s#ed5M06xh7Z7=}fHo5ixBrqpCeLT|!LTrO9GDtjh;ZF2)
zXMQHSwe<^R+2uCS0-EFZ{wix%14AV4f^Pv|WdHmAU@)PR{$>ai9Gu{RDz*t9GEB#L
z=FGjaPB%ASW&J^B!mR~)9~C&lf+okcd8N<siI^?;om>0a?lbmEVpPu-xLL-;$2ApW
z1oitaMN3y(ws~S)z`nIDm$_LzmxA89ZbKrC(1EXGn#aGfvnqDzV{7ysy$;5+i}6a;
zT{5FS8S!V+AXp5d!}%AZ-%6~*!9<@}tR`C@tiA!Sk1vsUg{?f0g|ywjIIdqiABJm5
zkpo7=1j?RSl#yHyOICgo2JK~jUb=ub2(%1GDa{@P`JQW`eegpiqDv~gP^W;C)-&VN
ztNaUR>5oo&r^uA(u<naW6`PGLD1GKt1?qaKmLhD!NkDRERw#${@|aZXGaF>3_|I&l
zi+&A?0~$6P<?myL^YFb?pN|$?GGcmlVbqPPB$F1!t^MiBTLA#dKimDp*lwHp6$bEP
zO7MR8yawy$of@i<^_Gjfeyd-t`uRyY^C$1SnAEB^;V>?2YsVOt8oy?&QVcHcn`@IU
z&oMX+W;X~HZ8w<qKLPQk!E0P+qmKFha9SmTwD?<@T_Y0znBuO<XH~_ZVIhOCw?*Y8
z2`+g5v^Lm5dsOC_ne-FjaolnXcMS_h1Rw)qE-Q$?;{yV!xkPO%(0PW>o%h^sTqBTt
z?hNv;eGoqWy*HcgWQ~IB2o3sr3<lLr)jLngT9#fikhg6atz96nY{FK?_IE+gFL;~w
z;v#PW>d1dNieVPiG#6S-z#yo6e%W*?ZzK5T3_ZR}n%_)Qq%GNL+Kg@f9GA?^wD+gL
zz@RTlVvk<5j=NQQTm|lgI~R7IdB$*<*V~5w1et}uXfebiI^d~&=p~uba$lV`>H7FN
z|L#{lWWfsujXY+yYPrX*Ng8PyA{t_Z?lx`sshc`|bdlBboCzG~Vun)f)`?$T*E?;6
z3%yrpoRfey-`_D|<4j)bAEba7+*x-@I^z!_9Bq5lmt=6}ft5t1y6;}yD^m5tent0R
z<|mF_lv#V=HCV<1_DK9zvr%u(zm(1Srp`f7L}RVY*61Mgj+qqM-4RB=4dJze0VcyQ
z>E3PT;^%67Zjd;XBO_r{gi;^;`SafPnwm}J136ey&*il5WnAqg>Hsfo?_|N5b_XKQ
z*wD~C!AelE_6(npUUdjfNB&xtWX^E71FU)bMZNrG*xXF39li8p^g42vx|V4^xTDRv
zIj+_4Onfd>@H?bXBgp2&qQ)H%Y!mfY69WME=!Z=Hmzn?nsT%$_31mzImevq`*KMt1
zWH4vYq(R?`*7YR8odC!aCMdqeFPk(sXBZ-a9RB2HNpBF$HUN1<70IHWGrA2gB;$;(
z?vHe6d}=7>4e@FvN5(pZmGVfV!FSWTX*8uxt`4_z=e7HFbnXBa#|bORV$}<fWht{l
zM|kKXY@6`fd%iy(=~in->t}vuuk5Dn=OsW^7{%Kf*Bkw<AQ1~n;t0pW6$EY`1D^3l
zw+PorTnAlUzGH%(gz#e<)-SEw-<_@PB(Hwtbp2Wuu<Q>i+`e2g-4oqB`mC1J&ztG5
z6Lj#c(Y;@j70iIgX~Mbd#PM=_LFbRDo25D`uQMkmc3>;<qutlP%c2*wm~x|$?pHN7
zSy>1ToMHDi*!P*8IKA@D;ePb3R*iI}<tzV-R)g4);`%Gkllre?-GLZdS2V`o1>M%%
zIs1grU@5Nv^q~|e7n1A!VYOcYCzGLU^B2ax7&E3Rcf={~8~FS(*|I+4pnDrqD0Rsx
z9{9BSa2gIN%s?z``9fNTlrVhI)7tmW2u;+L^DMZNTOK95hoa9&hNSzosKOV55JNtb
zbJ7ayG~#%fBdO}`T#IG+7f5CwRNZrXJbel%D69pYZo0laV{r*v|Lx+L6?K@Uw<F<Y
zQhmV*#^Zvnz!Tk$&}E_h?rwq#1qAwQn-IAfH+gZyP+T<{E|@hcR?tC;o#xy0;4ZT-
z3$y&=GTL0^VdajFg6f<QCPSwhgI4l0zvF6dB#$sk%5EE6c>p$#<{7K7h{B?c&#KE;
zF?@BY^+(vm8qX7+fNRTc@7fbl{lW4RV!VHEetrn>Fv%G%y1leDUK(Z*8b1H%{Bpsx
ztxt*&KcE%#8s##~ZRXSCkR?63u%V5SfTgC0Xw3($bVtTWBdXOEE!?x4efl?rq~HH>
zaM@gZcVbi)2-(dLCm{c}8Pl`3NU`R5Z-}?_=D}ab6qPmgPiu_6@l(kr?<C>m7S7gh
zEpJbMm!TDIDkIty_zsoO@jjN&F))StmEzcl$1B>mVAPHVt()NKyyt8~4_{``4o|J7
z1s2aKJ+r*)ZJ%o9>YRDrO{A9B>^Gz|_Rydsndf<|6oS`6!Z_)2pC%RvV&9$2*Is}?
zVa=%QQ9nz&o8V;$!9Zy-&rEZ#g{LVOvxtU(`Un@#`Hk+6L9gADOb{rmz!x=b{yba&
zz^T`>vYR4KetwO|h9TQuJ0W>K!@IHJ>%e^Zyt&)V<uMLERfkWh1|g#XWrjpbmhRaW
ziqFU_P4kbEvo}OI%7eQMH^|?F6oo2XOZ3}rm&<y0X=TN_&I`yX2A<s%x42a*WmMj>
zg(rXb7AK(xg(O|td?w@o2{Nsw_n00$4!Sz4p~>_NO}HL1W6J3MFl1t2Iz4%Tfsgzt
zJfshF@XF*gDvu9f^#A3O%WLTiC+j$;r>d9tyez31(h|R(GeVB}2`Di^JAoU0-=Wt*
zwpU>_88-kj{=^PuZ&4J04?mcl+v%=jDlL3I(~ZM1ou5>P#m9wM2!K?kpMY!fA`H<-
z>Ep(d(c7%`*C^Zpl&M0x#2VHJ#2;mwMXP?9AhD_8s1SNNd8im|XN-I$eT863>f1Uk
z7!r=eA6VlB)StET?QzEkJ?%FN-0bq@VKRK?136*s1^H|aHWG9iIrV?hQhM$GXw$#w
z4gh%S_Al}S1poXOn*sng!~Uyo|NoI7U3<z6PY8eCQ?np=vjKobW7@!DlM3F3n^b>+
zF2TV)djIj?@BafN`oCFj{@*!#`W3pZRXpp`Jc#?ZjHvv(Oo1afBBLL_9L=WQ{!KiR
zq>(D1QlOEx9{TZ{-Os;|{x^Melisitc)WRG8++zMpsdhk2zXKySqrr7QRiQIr19is
zt#H%+-70%kWyPW5<2wT@N<dp>_J=j|kZ-M{-J#bK<Zt&O;Q5k`2#E-9hxW@f9m)6l
zUVbhNH{D_74^`m!l_=iYNI8G5CfWRMgYlIf`dSzB9{NI3e57NvnK0o#x`wmgqid%p
z%|6+5nDQ)XDGx5CP4|Cl$?X`QJaJ(xIPBh*I_dr_ch?{LTOw>~gYf?7N~alIdf0^S
zl8wHZp@4DnHDO=|YgI0WB0Kr>on(p=8rnr5U8;Vk<S&UEmmaaF&?g<dRwx(_tjg5p
zc|Ixl#qyp>vv{cQi3(Y*l(es!VrJoXsLbmnkMo0A-I?y4ToB)jqwpSmVHJlzbR49u
zEA|~+CWbjyKkVJ;96eHfoD2pnSvNG1-`5#@o@>O^o@7-Sb_o-w_sv!22zWU0+|Sr1
z(x9RY7-D*90KSxaA^;8y91*Cb@zig=G5jz-4uAG0dY|9v`%zbN|CE%A*GC0C#-w*X
zh|jnBcA7#31Bck7%)&%z)oFPDNm*KNw9XsI66u!s_8a>Lem_IboPcFy`iX?fc>IAW
z15K|T^aTgP)~6`is;M~<lt8Ju=yd+s(w*<_NY7nQrd`gwWxs%E-}S59b*}K@e>(L!
z;|=2l%HD8V=)nd+ES0PVZ+6qP2WJ{&)-(J|6WU(u`bTB3`J~=v<QpCw;-2<c>1qW{
z)+Z-OLJRK;c6>ExzjyfEK5;O>vT3)qukU&PAy>==KHPKQII_I!tlwAwm@ptTD*!f6
zxI<l={(bE{QO2w|BEWp-I6NacCG5@{@ddP9^~xna-qt*Bjg{d&S1P4N*8tKbPY<uy
zct7=(VfX-|dRR!+*P}RXx!71ztH5$OYE;SwxZK*G_b#5%N-uxnb4Si=Dqvp#wH^1j
zJl~%Z#b2gRLc-%Averq0tp0N}S(`*7Tj4;ycfQ3bC91Y_PPLakEdxTAveoMJyLY0e
zo_W5Qqb06k5gwvULCBN{Cd^>1yJx(=p9t4-hebq&E=YB<l9&F*x|OP1RLr&)UT~k&
z9-2v=b~hY%8<w|13c6K~UV}xl$H)jH@94oum7^5bY<Lo##1B7RA^BB8Y;<v)$zHQ+
zg60!5=6tcMitL4ZrEhkP1(YkclONESPPEiuVo72Rf7ekyoP2tzS~n)>j`K~a9SzJA
zVk+t;@D>ForvK@<7&HYbe>05#wIpr<Jk@-8vIk1CIPU-c)c)q^ljQranb!LU`IM$c
z=rYZpW#{z^G7Syhr)hFuv(b#N9yaJJD=nOHSK<6E#&q~yn69P!bXYCfNEBpi>!(_e
zQXpeZz<|xA=1IS-`z<_+Zon1AEk6k|el-h$)d%%*j<d?7;au$}x;qVTZuEMW@_Iz8
zO<4wckIjKb6Eby!kgTpPr2w8(KFd{WqX%ehuJOUcqD+3vb%kA@u<3;bp@Xc-@`7e(
zO!>EZZu|y#=%COd|Buc~ODQd*k>Vtx!@CN<4EbnZ>;<w=*4wsQutvCS@^%`bUy@#q
zINw}Y-OzC3Z)J)1zGws-7k)F<9<_SG-#=8ft*tMiyzcO}ZpC|ky|;#hVa!>?BD^p<
z_MP}(?h^PwyT0tZj8V0JO#;q)#ilLhOKr8>f=4T1Lo8%q(2jrDxJmQb{vch+hI(>+
zMU!%_Up&^WaF(3WGg^<W8PkLvh^?MUE3wjdP`l+!wuD7-JQ0N!G#Y(PSfG<02F3y?
z{yJ=^da6t%H<rGW47EqZnDAd-JW|0JJf92m<p&;%&^)5L-zZP3;g-_SRCqSRSUwT)
z9cP~>r|-3({G_@?Zb8u6Ba^r}==1v`;uj#H&AVIT8V%#!+ozej$8&te#!WvzjhXOO
zsN758pmL_NwNZ9Z*VCm^T#rt04@rZ+uL!cSj9VkGW||CRloM3xsQQk5^_yFc6S7ZT
z!QR}ugC79XSIEBTJq%os(4rF?1iQiy%GHA>N1QJNZBGq49~{(OH$)Q8_E&1AYPT)&
z)DBi)F|6#o_mgnmi6d=5zF`T9*B2^%{@wbvtumfNf}@^t-gKMrfiKfZO+4pw1?rEf
zB*CHG0=Y;#SVx*@P<qbm=<nKld$~l<PNHI#2`~{H8tUR|GUIi+pu^fQt?lp&ehutV
z;ZF@Tq0}n<!uqMAdEV@<=YXT<Tqf9LCI8*eduAy}ZL8PqpZus~Y%s|W{3B+8WW33l
zJl~VlCwZT@?2`iR>r(w5+QZs5w!R)*KAv|ao?X(iPVtAigcZf975G*fP(=Zc<!ZTU
zdY0W_`dmyeI?u%->b`$r^f6ttI1Wc0{G@;pRzhX18~00*HH$=*sa|E%eUWGPp;ew3
z#es_Oz@I41v>^_&#E1yt56nVluurA;8ECTjpT%*bDh}_*?yt)Sj{O)__e4E^4b%}>
zd?7jfnvz4^C&t;&RviGoF#sNpOngS=wjm^>is5Vu-1~NSUNMqiF4j5E%S=cyv!i`j
z&&A{)z7IR1(9D%QV|hc@T7Il3(&aL;^WZr6-OHMGi5b%$n-WwJFkRYa!r|8)@Z8Op
zNM{Brx!lAqkI_55ohh}zbBkrcHKFK}v{*Kwez;Xj!<8{hazmvC<%w0^%SxMHd6W9h
zv#O2>hOV6qA7*S(aDgv@vev&{!qPNjfK3c@FF?x0_xWbuj?z)<23meiAg3udeg}`2
zG@Ac}Yoq2Hl?+;H-F*#F1?G{#zS(|Ick<^VxCNg&-{vjj$$Uu&0dP~`T#$GP?Vte8
zIPnEeA(QO+)%XjolD>X_>X#~npfA<<XJwl82R|i8`njxWpRJvov2ul}UD`-7HOkfm
zR&*MX*AEl8?ev{xIaQ;}-M|JPT&eL^u;@$5@VjJ3Sju|F*X$+0k)LK}HqC%&@Ras)
z>O&C!qOzw3RcHa4S`zyDnR~$Wg@eIhX_O|kpie{FvUcp$U59q{(M(Ah-Gkal!oh?2
zB;wu$sd}T!&jA`BZi{I7`*Jh%CcO97jjAN=n6%hy2V0!=W;XhPAIzl9L{a#Enz2rv
z-c)0(_O)Q=vAKxOlU{9SR;b~CjmC!XbFKT!$Hr2x;RVG<r_JMr@8va)v!*BaB<V%#
zXQ12<Q#Pu&JO<KGGsdwGHXcofZ+%p|B>m1}ljOba&DWEOE~U0X2!<G%_5;rY$KS)f
zV@*2!`1@Kf9Au*O-?iHPVk{5ljVIEh`7(7zz;sV7h<M5z6+plJx*M4{&iF7XZXbRW
zJ>av)`3gH1eL|-P#KdGXCE@*2h5{Fm>m`oAgGtOH$M^jcjqraM_?Ct<H=g)Koanun
z8Q`1Y8uvObRHjUoV%*#is~bylgV~TJ9arl;QW3#;*i5#%Hb%7Qlo~IDo!IeK*?#QF
zwUgSg^^dMKO7I>wDh;?Iu!0XC4&Fd@N(?6n$!2oAE6pev_nwd83S@Dr{C<(BZpj|f
z=O3_wB}%Fbe<D6qn(C;1z}YMB{L`r^QsC!o=|Pk8B9pv=4P#;<TD&{*Fm;lHEY7k@
z?92D?JcNZ{sUmL+Qm-$}s23~L?!TdWvGMJgKEQvq;ae`VMzoKx)$BB$>aays{S~UD
z=HyFW-PjX{<DDoV9qn_P+=EGOzt7W7+hIz|jb^AGkZd%Iv#t|U<pym)qE+iig4H}u
zZ11D7`|J{DS{#>Jv;~zm9@QSBD{50gV~%Ucc<cvhWnDHhB!gtMc-PmdZj%xeM4l#l
z*;)di0Slj9ZNJNu5W607W`2^5FyL;}QZ$H&ZyYoXi{feJaOsRxNu4Mlq6$|WPFUU`
z*JilSMMY9s9OFI=KT3BX<n+TiMZh6WJxyqkjL2w_qjatKUdc;sK2eJhM>C^mKL6+w
zIy*RlbQKw__pXPb+|WP+I%8YUZD{Zp*xx4lui5oM?ESy)YPTH>?&C+ESO0kqVEVtE
z(Es&H|M!LeV_V{X^^0q=J2hM-m>!(u6HadtK5krOQWsn(8uJgte|`J}Yuo<L{lDM$
h=>JbGyti+zr0**9sZDV|S^N8@ih|~w3c0r*|1X%i-8KLK

literal 0
HcmV?d00001

diff --git a/tools/speech_data_explorer/data_explorer.py b/tools/speech_data_explorer/data_explorer.py
index 23ea375fa608..de2b342a1028 100755
--- a/tools/speech_data_explorer/data_explorer.py
+++ b/tools/speech_data_explorer/data_explorer.py
@@ -59,6 +59,7 @@
     'contains ': 'contains',
 }
 comparison_mode = False
+
 # parse table filter queries
 def split_filter_part(filter_part):
     for op in filter_operators:
@@ -125,6 +126,7 @@ def parse_args():
     # automaticly going in comparison mode, if there is names_compared argument
     if args.names_compared is not None:
         comparison_mode = True
+        logging.error("comparison mod set to true")
     else:
         comparison_mode = False
 
@@ -549,6 +551,7 @@ def absolute_audio_filepath(audio_filepath, audio_base_path):
     name_1, name_2 = args.names_compared
     print(name_1, name_2)
 
+
 print('Loading data...')
 if not comparison_mode:
     data, wer, cer, wmr, mwa, num_hours, vocabulary, alphabet, metrics_available = load_data(
@@ -930,8 +933,7 @@ def update_wordstable(page_current, sort_by, filter_query):
 wordstable_columns_tool = [{'name': 'Word', 'id': 'word'}, {'name': 'Count', 'id': 'count'}]
 wordstable_columns_tool.append({'name': 'Accuracy_1, %', 'id': 'accuracy_1'})
 wordstable_columns_tool.append({'name': 'Accuracy_2, %', 'id': 'accuracy_2'})
-# wordstable_columns_tool.append({'name': 'Accuracy_' + name_1 + ', %', 'id': 'accuracy_1'})
-# wordstable_columns_tool.append({'name': 'Accuracy_' + name_2 + ', %', 'id': 'accuracy_2'})
+
 
 if comparison_mode:
     model_name_1, model_name_2 = name_1, name_2
@@ -939,6 +941,47 @@ def update_wordstable(page_current, sort_by, filter_query):
     for i in range(len(vocabulary_1)):
         vocabulary_1[i].update(vocabulary_2[i])
 
+    def _wer_(grnd, pred):
+        grnd_words = grnd.split()
+        pred_words = pred.split()
+        edit_distance = editdistance.eval(grnd_words, pred_words)
+        wer = edit_distance / len(grnd_words)
+        return wer
+
+    def metric(a, b, met=None):
+        cer = editdistance.distance(a, b) / len(a)
+        wer = _wer_(a, b)
+        return round(float(wer) * 100, 2), round(float(cer) * 100, 2)
+
+    def write_metrics(data, Ox, Oy):
+        da = pd.DataFrame.from_records(data)
+        gt = da['text']
+        tt_1 = da[Ox]
+        tt_2 = da[Oy]
+
+        wer_tt1_c, cer_tt1_c = [], []
+        wer_tt2_c, cer_tt2_c = [], []
+
+        for j in range(len(gt)):
+            wer_tt1, cer_tt1 = metric(gt[j], tt_1[j])  # first model
+            wer_tt2, cer_tt2 = metric(gt[j], tt_2[j])  # second model
+            wer_tt1_c.append(wer_tt1)
+            cer_tt1_c.append(cer_tt1)
+            wer_tt2_c.append(wer_tt2)
+            cer_tt2_c.append(cer_tt2)
+
+        da['wer_' + Ox] = pd.Series(wer_tt1_c, index=da.index)
+        da['wer_' + Oy] = pd.Series(wer_tt2_c, index=da.index)
+        da['cer_' + Ox] = pd.Series(cer_tt1_c, index=da.index)
+        da['cer_' + Oy] = pd.Series(cer_tt2_c, index=da.index)
+        return da.to_dict('records')
+
+    data_with_metrics = write_metrics(data, model_name_1, model_name_2)
+    if args.show_statistics is not None:
+        textdiffstyle = {'border': 'none', 'width': '100%', 'height': '100%'}
+    else:
+        textdiffstyle = {'border': 'none', 'width': '1%', 'height': '1%', 'display': 'none'}
+
     def prepare_data(df, name1=model_name_1, name2=model_name_2):
         res = pd.DataFrame()
         tmp = df['word']
@@ -1050,6 +1093,33 @@ def read_query(query):
             return "No filter query"
         return dcc.Markdown('`filter_query = "{}"`'.format(query))
 
+    ############
+    @app.callback(
+        Output('filter-query-input-2', 'style'),
+        Output('filter-query-output-2', 'style'),
+        Input('filter-query-read-write', 'value'),
+    )
+    def query_input_output(val):
+        input_style = {'width': '100%'}
+        output_style = {}
+        input_style.update(display='inline-block')
+        output_style.update(display='none')
+        return input_style, output_style
+
+    @app.callback(Output('datatable-advanced-filtering-2', 'filter_query'), Input('filter-query-input-2', 'value'))
+    def write_query(query):
+        if query is None:
+            return ''
+        return query
+
+    @app.callback(Output('filter-query-output-2', 'children'), Input('datatable-advanced-filtering-2', 'filter_query'))
+    def read_query(query):
+        if query is None:
+            return "No filter query"
+        return dcc.Markdown('`filter_query = "{}"`'.format(query))
+
+    ############
+
     def display_query(query):
         if query is None:
             return ''
@@ -1068,50 +1138,325 @@ def display_query(query):
             ]
         )
 
-    comparison_layout = [
-        html.Div(
-            [dcc.Markdown("model 1:" + ' ' + model_name_1[10:]), dcc.Markdown("model 2:" + ' ' + model_name_2[10:])]
-        ),
-        html.Hr(),
+
+comparison_layout = [
+    html.Div(
+        [
+            dcc.Markdown("model 1:" + ' ' + model_name_1[10:]),
+            dcc.Markdown("model 2:" + ' ' + model_name_2[10:]),
+            dcc.Dropdown(
+                ['word level', 'utterance level'], 'word level', placeholder="choose comparison lvl", id='lvl_choose'
+            ),
+        ]
+    ),
+    html.Hr(),
+    html.Div(
+        [
+            html.Div(
+                [
+                    dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_1, id='xaxis-column'),
+                    dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_2, id='yaxis-column'),
+                    dcc.Dropdown(
+                        for_col_names.select_dtypes(include='number').columns[::],
+                        placeholder='Select what will encode color of points',
+                        id='color-column',
+                    ),
+                    dcc.Dropdown(
+                        for_col_names.select_dtypes(include='number').columns[::],
+                        placeholder='Select what will encode size of points',
+                        id='size-column',
+                    ),
+                    dcc.Dropdown(
+                        ['yes', 'no'],
+                        placeholder='if you want to enable dot spacing',
+                        id='dot_spacing',
+                        style={'width': '200%'},
+                    ),
+                    dcc.Input(id='radius', placeholder='Enter radius of spacing (std is 0.01)'),
+                    html.Hr(),
+                    dcc.Input(id='filter-query-input', placeholder='Enter filter query',),
+                ],
+                style={'width': '200%', 'display': 'inline-block', 'float': 'middle'},
+            ),
+            html.Hr(),
+            html.Div(id='filter-query-output'),
+            dash_table.DataTable(
+                id='datatable-advanced-filtering',
+                columns=wordstable_columns_tool,
+                data=vocabulary_1,
+                editable=False,
+                page_action='native',
+                page_size=5,
+                filter_action="native",
+            ),
+            html.Hr(),
+            html.Div(id='datatable-query-structure', style={'whitespace': 'pre'}),
+            html.Hr(),
+            dbc.Row(dbc.Col(dcc.Graph(id='voc_graph'),),),
+            html.Hr(),
+        ],
+        id='wrd_lvl',
+        style={'display': 'block'},
+    ),
+    html.Div(
+        [
+            html.Div(
+                [
+                    dcc.Dropdown(['WER', 'CER'], 'WER', placeholder="Choose metric", id="choose_metric"),
+                    dbc.Row(dbc.Col(html.H5('Data'), class_name='text-secondary'), class_name='mt-3'),
+                    html.Hr(),
+                    html.Hr(),
+                    dcc.Input(id='filter-query-input-2', placeholder='Enter filter query', style={'width': '100%'}),
+                    html.Div(id='filter-query-output-2'),
+                    dbc.Row(
+                        dbc.Col(
+                            [
+                                dash_table.DataTable(
+                                    id='datatable-advanced-filtering-2',
+                                    columns=[
+                                        {'name': k.replace('_', ' '), 'id': k, 'hideable': True}
+                                        for k in data_with_metrics[0]
+                                    ],
+                                    data=data_with_metrics,
+                                    editable=False,
+                                    page_action='native',
+                                    page_size=5,
+                                    row_selectable='single',
+                                    selected_rows=[0],
+                                    page_current=0,
+                                    filter_action="native",
+                                    style_cell={
+                                        'overflow': 'hidden',
+                                        'textOverflow': 'ellipsis',
+                                        'maxWidth': 0,
+                                        'textAlign': 'center',
+                                    },
+                                    style_header={
+                                        'color': 'text-primary',
+                                        'text_align': 'center',
+                                        'height': 'auto',
+                                        'whiteSpace': 'normal',
+                                    },
+                                    css=[
+                                        {
+                                            'selector': '.dash-spreadsheet-menu',
+                                            'rule': 'position:absolute; bottom: 8px',
+                                        },
+                                        {'selector': '.dash-filter--case', 'rule': 'display: none'},
+                                        {'selector': '.column-header--hide', 'rule': 'display: none'},
+                                    ],
+                                ),
+                                dbc.Row(dbc.Col(html.Audio(id='player-1', controls=True),), class_name='mt-3'),
+                            ]
+                        )
+                    ),
+                ]
+                + [
+                    dbc.Row(
+                        [
+                            dbc.Col(
+                                html.Div(children=k.replace('_', '-')),
+                                width=2,
+                                class_name='mt-1 bg-light font-monospace text-break small rounded border',
+                            ),
+                            dbc.Col(
+                                html.Div(id='__' + k),
+                                class_name='mt-1 bg-light font-monospace text-break small rounded border',
+                            ),
+                        ]
+                    )
+                    for k in data_with_metrics[0]
+                ]
+            ),
+        ],
+        id='unt_lvl',
+    ),
+]
+
+if args.show_statistics is not None:
+    comparison_layout += [
         html.Div(
             [
-                dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_1, id='xaxis-column'),
-                dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_2, id='yaxis-column'),
-                dcc.Dropdown(
-                    for_col_names.select_dtypes(include='number').columns[::],
-                    placeholder='Select what will encode color of points',
-                    id='color-column',
-                ),
-                dcc.Dropdown(
-                    for_col_names.select_dtypes(include='number').columns[::],
-                    placeholder='Select what will encode size of points',
-                    id='size-column',
-                ),
-                dcc.Dropdown(['yes', 'no'], placeholder='if you want to enable dot spacing', id='dot_spacing'),
-                dcc.Input(id='radius', placeholder='Enter radius of spacing (std is 0.01)'),
-                html.Hr(),
-                dcc.Input(id='filter-query-input', placeholder='Enter filter query'),
+                dbc.Row(
+                    [
+                        dbc.Col(
+                            html.Div(children='text diff'),
+                            width=2,
+                            class_name='mt-1 bg-light font-monospace text-break small rounded border',
+                        ),
+                        dbc.Col(
+                            html.Iframe(
+                                id='__diff',
+                                sandbox='',
+                                srcDoc='',
+                                style=textdiffstyle,
+                                className='bg-light font-monospace text-break small',
+                            ),
+                            class_name='mt-1 bg-light font-monospace text-break small rounded border',
+                        ),
+                    ],
+                    id="text_diff_div",
+                )
             ],
-            style={'width': '50%', 'display': 'inline-block', 'float': 'middle'},
-        ),
-        html.Hr(),
-        html.Div(id='filter-query-output'),
-        dash_table.DataTable(
-            id='datatable-advanced-filtering',
-            columns=wordstable_columns_tool,
-            data=vocabulary_1,
-            editable=False,
-            page_action='native',
-            page_size=5,
-            filter_action="native",
+            id='mid_thing',
+            style={'display': 'block'},
         ),
-        html.Hr(),
-        html.Div(id='datatable-query-structure', style={'whitespace': 'pre'}),
-        html.Hr(),
-        dbc.Row(dbc.Col(dcc.Graph(id='voc_graph'),),),
-        html.Hr(),
     ]
 
+    @app.callback(
+        [
+            Output(component_id='wrd_lvl', component_property='style'),
+            Output(component_id='unt_lvl', component_property='style'),
+            Output(component_id='mid_thing', component_property='style'),
+            Output(component_id='down_thing', component_property='style'),
+            Input(component_id='lvl_choose', component_property='value'),
+        ]
+    )
+    def show_hide_element(visibility_state):
+        if visibility_state == 'word level':
+            return (
+                {'width': '50%', 'display': 'inline-block', 'float': 'middle'},
+                {'width': '50%', 'display': 'none', 'float': 'middle'},
+                {'display': 'none'},
+                {'display': 'none'},
+            )
+        else:
+            return (
+                {'width': '100%', 'display': 'none', 'float': 'middle'},
+                {'width': '100%', 'display': 'inline-block', 'float': 'middle'},
+                {'display': 'block'},
+                {'display': 'block'},
+            )
+
+
+comparison_layout += [
+    html.Div(
+        [
+            html.Div(
+                [
+                    dbc.Row(dbc.Col(dcc.Graph(id='utt_graph'),),),
+                    html.Hr(),
+                    dcc.Input(id='clicked_aidopath', style={'width': '100%'}),
+                    html.Hr(),
+                    dcc.Input(id='my-output-1', style={'display': 'none'}),  # we do need this
+                ]
+            ),
+            html.Div([dbc.Row(dbc.Col(dcc.Graph(id='signal-graph-1')), class_name='mt-3'),]),
+        ],
+        id='down_thing',
+        style={'display': 'block'},
+    )
+]
+
+
+if args.show_statistics is None:
+
+    @app.callback(
+        [
+            Output(component_id='wrd_lvl', component_property='style'),
+            Output(component_id='unt_lvl', component_property='style'),
+            Output(component_id='down_thing', component_property='style'),
+            Input(component_id='lvl_choose', component_property='value'),
+        ]
+    )
+    def show_hide_element(visibility_state):
+        if args.show_statistics is not None:
+            a = {'border': 'none', 'width': '100%', 'height': '100%', 'display': 'block'}
+        else:
+            a = {'border': 'none', 'width': '100%', 'height': '100%', 'display': 'none'}
+        if visibility_state == 'word level':
+            return (
+                {'width': '50%', 'display': 'inline-block', 'float': 'middle'},
+                {'width': '50%', 'display': 'none', 'float': 'middle'},
+                {'display': 'none'},
+            )
+        else:
+            return (
+                {'width': '100%', 'display': 'none', 'float': 'middle'},
+                {'width': '100%', 'display': 'inline-block', 'float': 'middle'},
+                {'display': 'block'},
+            )
+
+
+store = []
+
+
+@app.callback(
+    [Output('datatable-advanced-filtering-2', 'page_current'), Output('my-output-1', 'value')],
+    [Input('utt_graph', 'clickData'),],
+)
+def real_select_click(hoverData):
+    if hoverData is not None:
+        path = str(hoverData['points'][0]['customdata'][-1])
+        for t in range(len(data_with_metrics)):
+            if data_with_metrics[t]['audio_filepath'] == path:
+                ind = t
+                s = t  #% 5
+                sel = s
+                pg = math.ceil(ind // 5)
+        return pg, sel
+    else:
+        return 0, 0
+
+
+@app.callback(
+    [Output('datatable-advanced-filtering-2', 'selected_rows')], [Input('my-output-1', 'value')],
+)
+def real_select_click(num):
+    s = num
+    return [[s]]
+
+
+CALCULATED_METRIC = [False, False]
+
+
+@app.callback(
+    [
+        Output('utt_graph', 'figure'),
+        Output('clicked_aidopath', 'value'),
+        Input('choose_metric', 'value'),
+        Input('utt_graph', 'clickData'),
+        Input('datatable-advanced-filtering-2', 'derived_virtual_data'),
+    ],
+)
+def draw_table_with_metrics(met, hoverData, data_virt):
+    Ox = name_1
+    Oy = name_2
+    if met == "WER":
+        cerower = 'wer_'
+    else:
+        cerower = 'cer_'
+    da = pd.DataFrame.from_records(data_virt)
+
+    c = da
+    fig = px.scatter(
+        c,
+        x=cerower + Ox,
+        y=cerower + Oy,
+        width=1000,
+        height=900,
+        color='num_words',
+        hover_data={
+            'text': True,
+            Ox: True,
+            Oy: True,
+            'wer_' + Ox: True,
+            'wer_' + Oy: True,
+            'cer_' + Ox: True,
+            'cer_' + Oy: True,
+            'audio_filepath': True,
+        },
+    )  #'numwords': True,
+    fig.add_shape(type="line", x0=0, y0=0, x1=100, y1=100, line=dict(color="Red", width=1, dash="dot",))
+    fig.update_layout(clickmode='event+select')
+    fig.update_traces(marker_size=10)
+    path = None
+
+    if hoverData is not None:
+        path = str(hoverData['points'][0]['customdata'][-1])
+
+    return fig, path
+
 
 @app.callback(
     [Output('datatable', 'data'), Output('datatable', 'page_count')],
@@ -1219,6 +1564,18 @@ def show_item(idx, data):
     return [data[idx[0]][k] for k in data[0]]
 
 
+if comparison_mode:
+
+    @app.callback(
+        [Output('__' + k, 'children') for k in data_with_metrics[0]],
+        [Input('datatable-advanced-filtering-2', 'selected_rows'), Input('datatable-advanced-filtering-2', 'data')],
+    )
+    def show_item(idx, data):
+        if len(idx) == 0:
+            raise PreventUpdate
+        return [data[idx[0]][k] for k in data_with_metrics[0]]
+
+
 @app.callback(Output('_diff', 'srcDoc'), [Input('datatable', 'selected_rows'), Input('datatable', 'data'),])
 def show_diff(
     idx, data,
@@ -1245,6 +1602,35 @@ def show_diff(
     return diff_html
 
 
+@app.callback(
+    Output('__diff', 'srcDoc'),
+    [Input('datatable-advanced-filtering-2', 'selected_rows'), Input('datatable-advanced-filtering-2', 'data'),],
+)
+def show_diff(
+    idx, data,
+):
+    if len(idx) == 0:
+        raise PreventUpdate
+    orig_words = data[idx[0]]['text']
+    orig_words = '\n'.join(orig_words.split()) + '\n'
+
+    pred_words = data[idx[0]][fld_nm]
+    pred_words = '\n'.join(pred_words.split()) + '\n'
+
+    diff = diff_match_patch.diff_match_patch()
+    diff.Diff_Timeout = 0
+    orig_enc, pred_enc, enc = diff.diff_linesToChars(orig_words, pred_words)
+    diffs = diff.diff_main(orig_enc, pred_enc, False)
+    diff.diff_charsToLines(diffs, enc)
+    diffs_post = []
+    for d in diffs:
+        diffs_post.append((d[0], d[1].replace('\n', ' ')))
+
+    diff_html = diff.diff_prettyHtml(diffs_post)
+
+    return diff_html
+
+
 @app.callback(Output('signal-graph', 'figure'), [Input('datatable', 'selected_rows'), Input('datatable', 'data')])
 def plot_signal(idx, data):
     if len(idx) == 0:
@@ -1298,6 +1684,62 @@ def plot_signal(idx, data):
     return figs
 
 
+@app.callback(
+    Output('signal-graph-1', 'figure'),
+    [Input('datatable-advanced-filtering-2', 'selected_rows'), Input('datatable-advanced-filtering-2', 'data')],
+)
+def plot_signal(idx, data):
+    if len(idx) == 0:
+        raise PreventUpdate
+    figs = make_subplots(rows=2, cols=1, subplot_titles=('Waveform', 'Spectrogram'))
+    try:
+        filename = absolute_audio_filepath(data[idx[0]]['audio_filepath'], args.audio_base_path)
+        audio, fs = librosa.load(path=filename, sr=None)
+        if 'offset' in data[idx[0]]:
+            audio = audio[
+                int(data[idx[0]]['offset'] * fs) : int((data[idx[0]]['offset'] + data[idx[0]]['duration']) * fs)
+            ]
+        time_stride = 0.01
+        hop_length = int(fs * time_stride)
+        n_fft = 512
+        # linear scale spectrogram
+        s = librosa.stft(y=audio, n_fft=n_fft, hop_length=hop_length)
+        s_db = librosa.power_to_db(S=np.abs(s) ** 2, ref=np.max, top_db=100)
+        figs.add_trace(
+            go.Scatter(
+                x=np.arange(audio.shape[0]) / fs,
+                y=audio,
+                line={'color': 'green'},
+                name='Waveform',
+                hovertemplate='Time: %{x:.2f} s<br>Amplitude: %{y:.2f}<br><extra></extra>',
+            ),
+            row=1,
+            col=1,
+        )
+        figs.add_trace(
+            go.Heatmap(
+                z=s_db,
+                colorscale=[[0, 'rgb(30,62,62)'], [0.5, 'rgb(30,128,128)'], [1, 'rgb(30,255,30)'],],
+                colorbar=dict(yanchor='middle', lenmode='fraction', y=0.2, len=0.5, ticksuffix=' dB'),
+                dx=time_stride,
+                dy=fs / n_fft / 1000,
+                name='Spectrogram',
+                hovertemplate='Time: %{x:.2f} s<br>Frequency: %{y:.2f} kHz<br>Magnitude: %{z:.2f} dB<extra></extra>',
+            ),
+            row=2,
+            col=1,
+        )
+        figs.update_layout({'margin': dict(l=0, r=0, t=20, b=0, pad=0), 'height': 500})
+        figs.update_xaxes(title_text='Time, s', row=1, col=1)
+        figs.update_yaxes(title_text='Amplitude', row=1, col=1)
+        figs.update_xaxes(title_text='Time, s', row=2, col=1)
+        figs.update_yaxes(title_text='Frequency, kHz', row=2, col=1)
+    except Exception as ex:
+        app.logger.error(f'ERROR in plot signal: {ex}')
+
+    return figs
+
+
 @app.callback(Output('player', 'src'), [Input('datatable', 'selected_rows'), Input('datatable', 'data')])
 def update_player(idx, data):
     if len(idx) == 0:
@@ -1320,5 +1762,30 @@ def update_player(idx, data):
         return ''
 
 
+@app.callback(
+    Output('player-1', 'src'),
+    [Input('datatable-advanced-filtering-2', 'selected_rows'), Input('datatable-advanced-filtering-2', 'data')],
+)
+def update_player(idx, data):
+    if len(idx) == 0:
+        raise PreventUpdate
+    try:
+        filename = absolute_audio_filepath(data[idx[0]]['audio_filepath'], args.audio_base_path)
+        signal, sr = librosa.load(path=filename, sr=None)
+        if 'offset' in data[idx[0]]:
+            signal = signal[
+                int(data[idx[0]]['offset'] * sr) : int((data[idx[0]]['offset'] + data[idx[0]]['duration']) * sr)
+            ]
+        with io.BytesIO() as buf:
+            # convert to PCM .wav
+            sf.write(buf, signal, sr, format='WAV')
+            buf.seek(0)
+            encoded = base64.b64encode(buf.read())
+        return 'data:audio/wav;base64,{}'.format(encoded.decode())
+    except Exception as ex:
+        app.logger.error(f'ERROR in audio player: {ex}')
+        return ''
+
+
 if __name__ == '__main__':
     app.run_server(host='0.0.0.0', port=args.port, debug=args.debug)

From 5c3ed943d8b23ae32719f8fd0e51fca5801596d3 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 3 Jun 2023 22:52:03 -0700
Subject: [PATCH 013/123] Debug Transformer Engine FP8 support with
 Megatron-core infrastructure (#6791)

* Construct FP8 amax reduction group

Signed-off-by: Tim Moon <tmoon@nvidia.com>

* Update Megatron-core version in CI

Signed-off-by: Tim Moon <tmoon@nvidia.com>

---------

Signed-off-by: Tim Moon <tmoon@nvidia.com>
Co-authored-by: Tim Moon <tmoon@nvidia.com>
Co-authored-by: Tim Moon <4406448+timmoon10@users.noreply.github.com>
---
 Jenkinsfile                                 | 9 +++++++++
 nemo/collections/nlp/parts/nlp_overrides.py | 3 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 79c696a48600..bc991d195710 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -57,6 +57,15 @@ pipeline {
       }
     }
 
+    stage('Megatron Core installation') {
+      steps {
+        sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \
+            cd Megatron-LM && \
+            git checkout cd2537d444792b487b1ab5a6fa685e09c9957409 && \
+            pip install -e .'
+      }
+    }
+
     stage('PyTorch Lightning version') {
       steps {
         sh 'python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"'
diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
index 5a0f028ddbe9..c390ba995843 100644
--- a/nemo/collections/nlp/parts/nlp_overrides.py
+++ b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -168,6 +168,7 @@ def init_model_parallel(self, global_rank: int, world_size: int) -> None:
                     pipeline_model_parallel_size=app_state.pipeline_model_parallel_size,
                     virtual_pipeline_model_parallel_size=app_state.virtual_pipeline_model_parallel_size,
                     pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank,
+                    use_fp8=app_state.use_fp8,
                 )
 
                 # assert that fake tp and pp rank match after model parallel init
@@ -406,7 +407,7 @@ class PEFTSaveRestoreConnector(NLPSaveRestoreConnector):
         peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params)
         peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFT model. This is required when no .nemo is available (yet) such as during resumed training.
         peft_model_ckpt_name: The filename of the ckpt file inside the peft_model_ckpt_path folder
-    If both are provided the peft_model_ckpt_path takes precedence. 
+    If both are provided the peft_model_ckpt_path takes precedence.
     If neither are provided, PEFT params are initialized at random (not loaded from any external source).
     """
 

From ae004147b50a9d963e4cbe5da6ba75407b0aef5f Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Sun, 4 Jun 2023 21:17:19 -0700
Subject: [PATCH 014/123] Lora/PEFT training script CI test (#6664)

* new lora test

Signed-off-by: arendu <adithya.r@gmail.com>

* updates

Signed-off-by: arendu <adithya.r@gmail.com>

* check for chat

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* small train set

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* precision change

Signed-off-by: arendu <adithya.r@gmail.com>

* fixed typo in paths

Signed-off-by: arendu <adithya.r@gmail.com>

* full data with limit val batches

Signed-off-by: arendu <adithya.r@gmail.com>

* tp2 instead of pp2

Signed-off-by: arendu <adithya.r@gmail.com>

* tp2 instead of pp2

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: Adi Renduchintala <adithya.r@gmail.com>
---
 Jenkinsfile | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index bc991d195710..83223d5c8669 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -3381,6 +3381,41 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         sh "rm -rf examples/nlp/language_modeling/gpt_sft_results"
       }
     }
+    stage('L2: Megatron GPT PEFT Lora TP=2') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      steps {
+        sh "rm -rf examples/nlp/language_modeling/gpt_peft_lora_results"
+        sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py \
+        trainer.devices=2 \
+        trainer.log_every_n_steps=1 \
+        trainer.max_epochs=9999 \
+        trainer.max_steps=3 \
+        trainer.val_check_interval=3 \
+        ++trainer.limit_val_batches=2 \
+        trainer.precision=16 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_peft_lora_results \
+        model.pipeline_model_parallel_size=1 \
+        model.tensor_model_parallel_size=2 \
+        model.restore_from_path=/home/TestData/nlp/megatron_gpt/TP2/megatron_gpt_tp2.nemo \
+        model.peft.peft_scheme='lora' \
+        model.answer_only_loss=True \
+        model.micro_batch_size=1 \
+        model.global_batch_size=4 \
+        model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+        model.data.train_ds.concat_sampling_probabilities=[1.0] \
+        model.data.train_ds.num_workers=0 \
+        model.data.validation_ds.num_workers=0 \
+        model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+        model.data.validation_ds.names=[quarel]"
+        sh "rm -rf examples/nlp/language_modeling/gpt_peft_lora_results"
+      }
+    }
     stage('L2: Megatron GPT Eval') {
       when {
         anyOf {

From 76fc488a6584077c85f9a06d81bfd9f73a65e73a Mon Sep 17 00:00:00 2001
From: bene-ges <antonova_sasha@list.ru>
Date: Mon, 5 Jun 2023 08:29:09 +0300
Subject: [PATCH 015/123] change branch to main, small fix (#6803)

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>
---
 .../nlp/SpellMapper_English_ASR_Customization.ipynb   | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
index 189ac958d377..e11025aeb1d3 100644
--- a/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
+++ b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
@@ -83,8 +83,8 @@
       "source": [
         "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n",
         "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n",
-        "GITHUB_ACCOUNT = \"bene-ges\"\n",
-        "BRANCH = \"spellchecking_asr_customization_double_bert\"\n",
+        "GITHUB_ACCOUNT = \"NVIDIA\"\n",
+        "BRANCH = \"main\"\n",
         "!python -m pip install git+https://github.com/{GITHUB_ACCOUNT}/NeMo.git@{BRANCH}#egg=nemo_toolkit[all]\n",
         "\n",
         "# Download local version of NeMo scripts. If you are running locally and want to use your own local NeMo code,\n",
@@ -974,7 +974,8 @@
       "metadata": {
         "id": "ZwEpAOCaRH7s"
       },
-      "outputs": []
+      "outputs": [],
+      "execution_count": null
     },
     {
       "cell_type": "markdown",
@@ -1387,9 +1388,9 @@
     "accelerator": "GPU",
     "colab": {
       "toc_visible": true,
-      "provenance": []
+      "provenance": [],
+      "gpuType": "T4"
     },
-    "gpuClass": "standard",
     "kernelspec": {
       "display_name": "Python 3",
       "name": "python3"

From aa1986f05960ccd7260f262185b0353811105b3c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 5 Jun 2023 10:19:28 -0700
Subject: [PATCH 016/123] add call to p2p overlap (#6779) (#6786)

* add call to p2p overlap


* update Jenkins for test


---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 Jenkinsfile                                                     | 2 +-
 .../nlp/models/language_modeling/megatron_gpt_model.py          | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 83223d5c8669..fdd311ba4a59 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -61,7 +61,7 @@ pipeline {
       steps {
         sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \
             cd Megatron-LM && \
-            git checkout cd2537d444792b487b1ab5a6fa685e09c9957409 && \
+            git checkout e6d7e09845590d0a36bc7f29eb28db974fb8da4e && \
             pip install -e .'
       }
     }
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index fd1382e668cf..96f40b99bdd0 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -468,6 +468,8 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only):
             no_sync_func=no_sync_func,
             grad_sync_func=grad_sync_func,
             param_sync_func=param_sync_func,
+            overlap_p2p_comm=self.cfg.get('overlap_p2p_comm', False),
+            batch_p2p_comm=self.cfg.get('batch_p2p_comm', True),
         )
 
         # only the last stages of the pipeline return losses

From aa21e8aa91fe893b5c460200b74c928ed918e28a Mon Sep 17 00:00:00 2001
From: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Date: Mon, 5 Jun 2023 11:44:09 -0700
Subject: [PATCH 017/123] fixed  decor to show messages only when the wrapped
 object is called. (#6793)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
---
 nemo/utils/decorators/experimental.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/nemo/utils/decorators/experimental.py b/nemo/utils/decorators/experimental.py
index 35b26fb8690d..de62dbaf9ffb 100644
--- a/nemo/utils/decorators/experimental.py
+++ b/nemo/utils/decorators/experimental.py
@@ -15,19 +15,13 @@
 
 __all__ = ['experimental']
 
-from nemo.utils import logging
-
 
-def experimental(cls):
-    """ Decorator which indicates that module is experimental.
-    Use it to mark experimental or research modules.
-    """
+import wrapt
 
-    def wrapped(cls):
-        logging.warning(
-            f'Module {cls} is experimental, not ready for production and is not fully supported. Use at your own risk.'
-        )
+from nemo.utils import logging
 
-        return cls
 
-    return wrapped(cls=cls)
+@wrapt.decorator
+def experimental(wrapped, instance, args, kwargs):
+    logging.warning(f"`{wrapped}` is experimental and not ready for production yet. Use at your own risk.")
+    return wrapped(*args, **kwargs)

From f9bb1b0f8272f42cf5f64174ec1e51ebfcc1fe32 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 5 Jun 2023 12:31:14 -0700
Subject: [PATCH 018/123] Bug fix for reset_sequence_parallel_args (#6802)
 (#6805)

Signed-off-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Co-authored-by: Markel Sanz Ausin <markelsanz14@gmail.com>
---
 .../nlp/models/language_modeling/megatron_gpt_model.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 96f40b99bdd0..9aadb6853190 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -1282,7 +1282,7 @@ def _reset_sequence_parallelism_args(self):
         for module in self.get_gpt_module_list():
             for mod in module.modules():
                 if hasattr(mod, "sequence_parallel"):
-                    mod.sequence_parallel = self.last_sequence_parallel
+                    mod.sequence_parallel = False
 
     def _restore_sequence_parallelism_args(self):
         """ Restores the sequence parallelism parameters using the values saved by

From 3063e3251bb4dbbf81278084cce132c3e56b4c52 Mon Sep 17 00:00:00 2001
From: Yang Zhang <yzhang123@users.noreply.github.com>
Date: Tue, 6 Jun 2023 01:52:45 -0400
Subject: [PATCH 019/123] text_generation_utils memory reduction if no logprob
 needed (#6773)

* repro for gpt eval mp mem issue

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* add print statements for memory allocation

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* adjusted hot fix that prevents softmax on the entire output embedding,now memory bottlenecked by attention softmax which needs to be solved with FA or long attention

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* using compute_logprob to configure inference

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* enable compute logprob for peft

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* remove print statements

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix ci

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added docstrings

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add missing config

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* remove truncate prompt length feature

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* tensor before all gather needs to be contiguous

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

---------

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
---
 .../tuning/megatron_gpt_peft_eval.py          |  22 ++--
 .../language_modeling/megatron_gpt_model.py   |   2 -
 .../megatron_gpt_sft_model.py                 |   4 +-
 .../megatron_retrieval_model.py               |   2 -
 .../common/text_generation_strategy.py        |   1 -
 .../modules/common/text_generation_utils.py   | 110 +++++++++++-------
 6 files changed, 83 insertions(+), 58 deletions(-)

diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
index a5bf1ee552cb..fc427a60d172 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
@@ -155,7 +155,7 @@ def main(cfg) -> None:
 
     if os.path.isdir(cfg.model.restore_from_path):
         save_restore_connector.model_extracted_dir = cfg.model.restore_from_path
-    model = NLPModel.restore_from(
+    model = MegatronGPTSFTModel.restore_from(
         restore_path=cfg.model.restore_from_path,
         trainer=trainer,
         override_config_path=peft_model_cfg,
@@ -180,15 +180,17 @@ def main(cfg) -> None:
                 for batch in response:
                     batch_sentences = [s for s in batch['sentences']]
                     batch_tokens = [s for s in batch['tokens']]
-                    batch_logprob = [s.tolist() for s in batch['logprob']]
-                    for s, t, l in zip(batch_sentences, batch_tokens, batch_logprob):
-                        if cfg.inference.get("verbose", False):
-                            d = {
-                                'sentence': s,
-                                'tokens_with_logprobs': ', '.join([f"{_t} {_l:.4f}" for _t, _l in zip(t, l)]),
-                            }
-                            f.write(json.dumps(d, sort_keys=True, indent=2) + '\n')
-                        else:
+                    if cfg.inference.compute_logprob:
+                        batch_logprob = [s.tolist() for s in batch['logprob']]
+                        for s, t, l in zip(batch_sentences, batch_tokens, batch_logprob):
+                            if cfg.inference.get("verbose", False):
+                                d = {
+                                    'sentence': s,
+                                    'tokens_with_logprobs': ', '.join([f"{_t} {_l:.4f}" for _t, _l in zip(t, l)]),
+                                }
+                                f.write(json.dumps(d, sort_keys=True, indent=2) + '\n')
+                    else:
+                        for s in batch_sentences:
                             d = {'sentence': s}
                             f.write(json.dumps(d) + '\n')
             print("predictions saved to {}".format(cfg.inference.outfile_path))
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 9aadb6853190..3530ffcfc371 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -1111,7 +1111,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
             inference_config = inference_config.copy()
             compute_logprob = inference_config['compute_logprob']
             if compute_logprob:
-                del inference_config['compute_logprob']
                 inference_config['inputs'] = batch
                 inference_config['tokens_to_generate'] = 1
                 inference_config['all_probs'] = True
@@ -1121,7 +1120,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
                 compute_prob_response = get_computeprob_response(self.tokenizer, response, batch)
                 return compute_prob_response
             else:
-                del inference_config['compute_logprob']
                 inference_config['inputs'] = batch
                 return generate(self, **inference_config)
 
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
index 1dc335b86609..9507a01d01f0 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
@@ -35,6 +35,7 @@
     LengthParam,
     SamplingParam,
     generate,
+    get_computeprob_response,
     megatron_gpt_generate,
 )
 from nemo.utils import AppState, logging
@@ -539,7 +540,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
         inference_config = inference_config.copy()
         compute_logprob = inference_config['compute_logprob']
         if compute_logprob:
-            del inference_config['compute_logprob']
             inference_config['inputs'] = batch
             inference_config['tokens_to_generate'] = 1
             inference_config['all_probs'] = True
@@ -549,8 +549,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
             compute_prob_response = get_computeprob_response(self.tokenizer, response, batch)
             return compute_prob_response
         else:
-            del inference_config['compute_logprob']
-
             # for megatron_gpt_eval.py
             if isinstance(batch, list):
                 inference_config['inputs'] = batch
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py
index afd8ad54d150..5900513f3547 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_retrieval_model.py
@@ -464,7 +464,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
             inference_config = inference_config.copy()
             compute_logprob = inference_config['compute_logprob']
             if compute_logprob:
-                del inference_config['compute_logprob']
                 inference_config['inputs'] = batch
                 inference_config['tokens_to_generate'] = 1
                 inference_config['all_probs'] = True
@@ -474,7 +473,6 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
                 compute_prob_response = get_computeprob_response(self.tokenizer, response, batch)
                 return compute_prob_response
             else:
-                del inference_config['compute_logprob']
                 inference_config['inputs'] = batch
                 return generate(self, **inference_config, strategy=self.inference_strategy)
 
diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py
index 27ae3b2606d3..310065fc3523 100644
--- a/nemo/collections/nlp/modules/common/text_generation_strategy.py
+++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py
@@ -53,7 +53,6 @@ def __init__(self, model):
 
     def forward_step(self, batch, tensor_shape):
         fwd_bwd_function = get_forward_backward_func()
-
         output_tensor = fwd_bwd_function(
             forward_step_func=self.model.get_forward_output_only_func(),
             data_iterator=iter([batch,]),
diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index 3a07a807b11a..a56304970bdc 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -97,6 +97,7 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para
             inputs=inputs,
             tokens_to_generate=length_params['max_length'],
             all_probs=sampling_params['all_probs'],
+            compute_logprob=sampling_params['compute_logprob'],
             temperature=sampling_params['temperature'],
             add_BOS=sampling_params['add_BOS'],
             top_k=sampling_params['top_k'],
@@ -116,6 +117,7 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para
                 inputs=inputs,
                 tokens_to_generate=length_params['max_length'],
                 all_probs=sampling_params['all_probs'],
+                compute_logprob=sampling_params['compute_logprob'],
                 temperature=sampling_params['temperature'],
                 add_BOS=sampling_params['add_BOS'],
                 top_k=sampling_params['top_k'],
@@ -269,6 +271,7 @@ def send_generate_info(
     context_length_tensor,
     tokens_to_generate,
     all_probs,
+    compute_logprob,
     temperature,
     top_k,
     top_p,
@@ -288,6 +291,7 @@ def send_generate_info(
         context_tokens_tensor.size(1),  # seq_len
         tokens_to_generate,
         all_probs,
+        compute_logprob,  # whether to compute log probabilities matrix
         temperature,
         top_k,
         top_p,
@@ -317,18 +321,19 @@ def receive_generate_info():
     """
     model_parallel_group = parallel_state.get_model_parallel_group()
     src = get_model_parallel_src_rank()
-    input_info_tensor = torch.empty(10, dtype=torch.float32, device=torch.cuda.current_device())
+    input_info_tensor = torch.empty(11, dtype=torch.float32, device=torch.cuda.current_device())
     torch.distributed.broadcast(input_info_tensor, src, model_parallel_group)
     batch_size = int(input_info_tensor[0].item())
     seq_len = int(input_info_tensor[1].item())
     tokens_to_generate = int(input_info_tensor[2].item())
     all_probs = bool(input_info_tensor[3].item())
-    temperature = float(input_info_tensor[4].item())
-    top_k = int(input_info_tensor[5].item())
-    top_p = float(input_info_tensor[6].item())
-    greedy = bool(input_info_tensor[7].item())
-    repetition_penalty = float(input_info_tensor[8].item())
-    min_tokens_to_generate = int(input_info_tensor[9].item())
+    compute_logprob = bool(input_info_tensor[4].item())  # whether to compute log probabilities matrix
+    temperature = float(input_info_tensor[5].item())
+    top_k = int(input_info_tensor[6].item())
+    top_p = float(input_info_tensor[7].item())
+    greedy = bool(input_info_tensor[8].item())
+    repetition_penalty = float(input_info_tensor[9].item())
+    min_tokens_to_generate = int(input_info_tensor[10].item())
 
     context_length_tensor = torch.empty(batch_size, dtype=torch.int64, device=torch.cuda.current_device())
     context_tokens_tensor = torch.empty(batch_size, seq_len, dtype=torch.int64, device=torch.cuda.current_device())
@@ -349,6 +354,7 @@ def receive_generate_info():
         context_tokens_tensor,
         tokens_to_generate,
         all_probs,
+        compute_logprob,
         temperature,
         top_k,
         top_p,
@@ -370,6 +376,7 @@ def synced_generate(
     top_k=0,
     top_p=0.0,
     greedy=False,
+    compute_logprob=False,
     repetition_penalty=1.2,
     min_tokens_to_generate=0,
     end_strings=[],
@@ -394,6 +401,7 @@ def synced_generate(
             context_length_tensor,
             tokens_to_generate,
             all_probs,
+            compute_logprob=compute_logprob,
             temperature=temperature,
             end_strings=end_strings,
             extra={
@@ -411,7 +419,8 @@ def synced_generate(
     if parallel_state.is_pipeline_last_stage():
         src = parallel_state.get_pipeline_model_parallel_last_rank()
         group = parallel_state.get_embedding_group()
-        torch.distributed.broadcast(output_logits, src, group)
+        if compute_logprob:
+            torch.distributed.broadcast(output_logits, src, group)
         if all_probs:
             src = parallel_state.get_pipeline_model_parallel_last_rank()
             group = parallel_state.get_embedding_group()
@@ -422,15 +431,18 @@ def synced_generate(
             src = parallel_state.get_pipeline_model_parallel_last_rank()
             group = parallel_state.get_embedding_group()
 
-            precision = model._trainer.precision
-            if precision in [16, "16"]:
-                dtype = torch.float16
-            elif precision == "bf16":
-                dtype = torch.bfloat16
-            else:
-                dtype = torch.float32
-            output_logits = torch.empty(tokens.size(0), context_length - 1, dtype=dtype, device=torch.device("cuda"))
-            torch.distributed.broadcast(output_logits, src, group)
+            if compute_logprob:
+                precision = model._trainer.precision
+                if precision in [16, "16"]:
+                    dtype = torch.float16
+                elif precision == "bf16":
+                    dtype = torch.bfloat16
+                else:
+                    dtype = torch.float32
+                output_logits = torch.empty(
+                    tokens.size(0), context_length - 1, dtype=dtype, device=torch.device("cuda")
+                )
+                torch.distributed.broadcast(output_logits, src, group)
 
             if all_probs:
                 src = parallel_state.get_pipeline_model_parallel_last_rank()
@@ -457,6 +469,7 @@ def generate(
     top_k=0,
     top_p=0.0,
     greedy=False,
+    compute_logprob=False,
     repetition_penalty=1.0,
     min_tokens_to_generate=0,
     end_strings=['<|endoftext|>'],
@@ -504,6 +517,7 @@ def generate(
             context_length_tensor,
             tokens_to_generate,
             all_probs,
+            compute_logprob,
             temperature,
             top_k,
             top_p,
@@ -518,6 +532,7 @@ def generate(
             context_tokens_tensor,
             tokens_to_generate,
             all_probs,
+            compute_logprob,
             temperature,
             top_k,
             top_p,
@@ -535,6 +550,7 @@ def generate(
         tokens_to_generate,
         all_probs,
         temperature,
+        compute_logprob=compute_logprob,
         top_k=top_k,
         top_p=top_p,
         greedy=greedy,
@@ -619,6 +635,7 @@ def sample_sequence_batch(
     context_lengths,
     tokens_to_generate,
     all_probs=False,
+    compute_logprob=False,
     type_ids=None,
     temperature=None,
     end_strings=['<|endoftext|>'],
@@ -673,11 +690,18 @@ def sample_sequence_batch(
             output = inference_strategy.forward_step(batch, tensor_shape)
 
             if parallel_state.is_pipeline_last_stage():
-                output = output[0]['logits']
 
-                output = tensor_parallel.gather_from_tensor_model_parallel_region(output)
-                assert output is not None
-                logits = output[:, -1].view(batch_size, -1).contiguous()
+                if compute_logprob:
+                    output = output[0]['logits']
+                    output = tensor_parallel.gather_from_tensor_model_parallel_region(output)
+                    assert output is not None
+                    logits = output[:, -1].view(batch_size, -1).contiguous()
+
+                else:
+                    logits = output[0]['logits'][:, -1].contiguous()
+                    logits = tensor_parallel.gather_from_tensor_model_parallel_region(logits)
+                    assert logits is not None
+                    logits = logits.view(batch_size, -1)
 
                 # make sure it will generate at least min_length
                 min_length = extra.get('min_tokens_to_generate', 0)
@@ -689,6 +713,7 @@ def sample_sequence_batch(
                 logits[:, tokenizer.vocab_size :] = -float('Inf')
 
                 # started indicates whether the current token step passes the context_length, so we make sure not to overwrite the context tokens
+
                 started = context_lengths <= context_length
                 if extra.get('greedy', False):
                     prev = torch.argmax(logits, dim=-1).view(-1)
@@ -716,23 +741,25 @@ def sample_sequence_batch(
                 # Insert either new predicted or next prompt token
                 tokens[:, context_length] = new_tokens
 
-                if output_logits is None:
-                    output = F.log_softmax(output[:, :context_length, :], 2)
-                    indices = torch.unsqueeze(tokens[:, 1 : context_length + 1], 2)
-                    output_logits = torch.gather(output, 2, indices).squeeze(2)
-                    all_generated_indices = indices[:, :, 0]
-                    if all_probs:
-                        full_logits = output
-                else:
-                    output = F.log_softmax(output, 2)
-                    indices = torch.unsqueeze(new_tokens, 1).unsqueeze(2)
-                    new_output_logits = torch.gather(output, 2, indices).squeeze(2)
+                if compute_logprob:
+                    if output_logits is None:
+                        output = F.log_softmax(output[:, :context_length, :], 2)
 
-                    # TODO(rprenger) we're copying output_logits every time.  Should pre-allocate
-                    output_logits = torch.cat([output_logits, new_output_logits], 1)
-                    all_generated_indices = torch.cat([all_generated_indices, indices[:, :, 0]], 1)
-                    if all_probs:
-                        full_logits = torch.cat([full_logits, output], 1)
+                        indices = torch.unsqueeze(tokens[:, 1 : context_length + 1], 2)
+                        output_logits = torch.gather(output, 2, indices).squeeze(2)
+                        all_generated_indices = indices[:, :, 0]
+                        if all_probs:
+                            full_logits = output
+                    else:
+                        output = F.log_softmax(output, 2)
+                        indices = torch.unsqueeze(new_tokens, 1).unsqueeze(2)
+                        new_output_logits = torch.gather(output, 2, indices).squeeze(2)
+
+                        # TODO(rprenger) we're copying output_logits every time.  Should pre-allocate
+                        output_logits = torch.cat([output_logits, new_output_logits], 1)
+                        all_generated_indices = torch.cat([all_generated_indices, indices[:, :, 0]], 1)
+                        if all_probs:
+                            full_logits = torch.cat([full_logits, output], 1)
 
                 src = parallel_state.get_pipeline_model_parallel_last_rank()
                 group = parallel_state.get_embedding_group()
@@ -752,10 +779,13 @@ def sample_sequence_batch(
                 src = parallel_state.get_pipeline_model_parallel_last_rank()
                 group = parallel_state.get_pipeline_model_parallel_group()
                 torch.distributed.broadcast(done, src, group)
-                if all_probs:
-                    yield tokens, lengths, output_logits, full_logits
+                if compute_logprob:
+                    if all_probs:
+                        yield tokens, lengths, output_logits, full_logits
+                    else:
+                        yield tokens, lengths, output_logits, None
                 else:
-                    yield tokens, lengths, output_logits, None
+                    yield tokens, lengths, None, None
 
             else:
                 if parallel_state.is_pipeline_first_stage():

From 010a0e68675d16abe5d4670c7c0b2d093934bfb1 Mon Sep 17 00:00:00 2001
From: tbartley94 <90423858+tbartley94@users.noreply.github.com>
Date: Tue, 6 Jun 2023 02:28:31 -0400
Subject: [PATCH 020/123] Fixed bug in MaskedSpecAug that overestimates
 samples. (#6775)

Signed-off-by: tbartley94 <tbartley@nvidia.com>
---
 nemo/collections/asr/modules/audio_preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/collections/asr/modules/audio_preprocessing.py b/nemo/collections/asr/modules/audio_preprocessing.py
index fbd05cb1809b..91c0c10b9604 100644
--- a/nemo/collections/asr/modules/audio_preprocessing.py
+++ b/nemo/collections/asr/modules/audio_preprocessing.py
@@ -608,7 +608,7 @@ def forward(self, input_spec, length):
 
         for idx in range(input_spec.shape[0]):
             cur_len = length[idx]
-            patches = range(cur_len // self.patch_size - 1)
+            patches = range(cur_len // self.patch_size)
             masked_patches = random.sample(patches, mask_patches)
 
             for mp in masked_patches:

From 8c26464620cfcb0424b576f4102c3abb0130afa2 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 09:50:03 -0700
Subject: [PATCH 021/123] update core version (#6817) (#6819)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index fdd311ba4a59..f3afda26d2bf 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -61,7 +61,7 @@ pipeline {
       steps {
         sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \
             cd Megatron-LM && \
-            git checkout e6d7e09845590d0a36bc7f29eb28db974fb8da4e && \
+            git checkout d2891b4ad3a00e3c4223f89491afd9e1b812f9b5 && \
             pip install -e .'
       }
     }

From acf50f42712aa393a9a550a6af7e426f2e4372bf Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Tue, 6 Jun 2023 10:16:50 -0700
Subject: [PATCH 022/123] lora pp2 (#6818)

Signed-off-by: arendu <adithya.r@gmail.com>
---
 Jenkinsfile | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index f3afda26d2bf..27537b53a557 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -3381,6 +3381,41 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         sh "rm -rf examples/nlp/language_modeling/gpt_sft_results"
       }
     }
+    stage('L2: Megatron GPT PEFT Lora PP=2') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      steps {
+        sh "rm -rf examples/nlp/language_modeling/gpt_peft_lora_results_pp2"
+        sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py \
+        trainer.devices=2 \
+        trainer.log_every_n_steps=1 \
+        trainer.max_epochs=9999 \
+        trainer.max_steps=3 \
+        trainer.val_check_interval=3 \
+        ++trainer.limit_val_batches=2 \
+        trainer.precision=16 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_peft_lora_results_pp2 \
+        model.pipeline_model_parallel_size=2 \
+        model.tensor_model_parallel_size=1 \
+        model.restore_from_path=/home/TestData/nlp/megatron_gpt/PP2/gpt_pp2_tp1.nemo \
+        model.peft.peft_scheme='lora' \
+        model.answer_only_loss=True \
+        model.micro_batch_size=1 \
+        model.global_batch_size=4 \
+        model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+        model.data.train_ds.concat_sampling_probabilities=[1.0] \
+        model.data.train_ds.num_workers=0 \
+        model.data.validation_ds.num_workers=0 \
+        model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+        model.data.validation_ds.names=[quarel]"
+        sh "rm -rf examples/nlp/language_modeling/gpt_peft_lora_results_pp2"
+      }
+    }
     stage('L2: Megatron GPT PEFT Lora TP=2') {
       when {
         anyOf {

From 04628ca8d38ebdf365b29cd5c12479068a9aafd2 Mon Sep 17 00:00:00 2001
From: Greg Heinrich <gregory.heinrich@gmail.com>
Date: Tue, 6 Jun 2023 20:20:50 +0200
Subject: [PATCH 023/123] Add optional index mapping dir in mmap text datasets
 (#6683)

If datasets are stored on a read-only medium, index files
cannot be created into adjacent files and an
alternative directory must be specified for index
mapping files.

This commit adds an optional `index_mapping_dir` to
the constructors.
Unit tests are also added.


[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Update path formatting for relative paths

Signed-off-by: Greg Heinrich <gheinrich@nvidia.com>
---
 .../megatron/gpt_sft_dataset.py               |   6 +-
 .../language_modeling/text_memmap_dataset.py  | 146 +++++++++++++++---
 tests/collections/nlp/test_mem_map_dataset.py | 113 ++++++++++++++
 3 files changed, 238 insertions(+), 27 deletions(-)
 create mode 100644 tests/collections/nlp/test_mem_map_dataset.py

diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
index f9ef6c8470c2..94c4b3c54c63 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
@@ -51,7 +51,7 @@ def __init__(
         file_path: Path to a JSONL GPT supervised fine-tuning dataset. Data is formatted as multiple JSON lines with each line formatted as follows. {'input': 'John von Neumann\nVon Neumann made fundamental contributions .... Q: What did the math of artificial viscosity do?', 'output': 'smoothed the shock transition without sacrificing basic physics'}
         tokenizer: Tokenizer for the dataset. Instance of a class that inherits TokenizerSpec (ex: YTTM, SentencePiece).
         max_seq_length (int): maximum sequence length for each dataset examples. Examples will either be truncated to fit this length or dropped if they cannot be truncated.
-        min_seq_length (int): min length of each data example in the dataset. Data examples will be dropped if they do not meet the min length requirements. 
+        min_seq_length (int): min length of each data example in the dataset. Data examples will be dropped if they do not meet the min length requirements.
         add_bos (bool): Whether to add a beginning of sentence token to each data example
         add_eos (bool): Whether to add an end of sentence token to each data example
         add_sep (bool): Whether to add a separation token to each data example (goes between prompt and answer)
@@ -93,7 +93,9 @@ def __init__(
             self.prompt_template = self.prompt_template.encode('utf-8').decode('unicode_escape')
         assert self.truncation_field in ["answer", "context"]
 
-        self.indexed_dataset = JSONLMemMapDataset(dataset_paths=[file_path], tokenizer=None, header_lines=0)
+        self.indexed_dataset = JSONLMemMapDataset(
+            dataset_paths=[file_path], tokenizer=None, header_lines=0, index_mapping_dir=index_mapping_dir
+        )
 
         # Will be None after this call if `max_num_samples` is None
         self._build_samples_mapping()
diff --git a/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py b/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py
index b26f213282bb..e1a30a3aafb7 100644
--- a/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py
@@ -19,6 +19,7 @@
 import pickle
 import time
 from functools import partial
+from typing import Callable, List, Optional, Type
 
 import numpy as np
 import torch
@@ -35,7 +36,7 @@ def _build_index_from_memdata(fn, newline_int):
     """
     Build index of delimiter positions between samples in memmap.
     Can be provided externally.
-    
+
     Returns a 1D array of ints.
     """
     # use memmap to read file
@@ -68,17 +69,28 @@ class TextMemMapDataset(Dataset):
 
     def __init__(
         self,
-        dataset_paths,
-        newline_int=10,
-        header_lines=0,
-        workers=None,
-        tokenizer=None,
-        sort_dataset_paths=True,
-        build_index_fn=_build_index_from_memdata,
+        dataset_paths: List[str],
+        newline_int: Optional[int] = 10,
+        header_lines: Optional[int] = 0,
+        workers: Optional[int] = None,
+        tokenizer: Optional[Type["TokenizerSpec"]] = None,
+        build_index_fn: Optional[Callable[[str, Optional[int]], bool]] = _build_index_from_memdata,
+        sort_dataset_paths: Optional[bool] = True,
+        index_mapping_dir: Optional[str] = None,
     ):
         """
-        build_index_fn - a callable build_index_fn(fn, newline_int) -> midx [np.array] that returns the index of newlines in a file fn
-                         must be pickleable (to be used in multiprocessing.Pool.map)
+        Args:
+            dataset_paths: list of JSONL file paths.
+            newline_int: ASCII code to use to interpret newlines in file.
+            header_lines: number of header lines in JSON files.
+            workers: number of workers to use for creating index files.
+            tokenizer: tokenizer to use to convert text to tokens.
+            build_index_fn: a callable build_index_fn(fn, newline_int) -> midx [np.array]
+                that returns the index of newlines in a file fn must be pickleable
+                (to be used in multiprocessing.Pool.map).
+            sort_dataset_paths: whether to sort datasets by paths.
+            index_mapping_dir: directory to save the index mapping to.
+                If None, will write to the same folder as the dataset.
         """
         super().__init__()
         self.mdata_midx_list = []
@@ -106,14 +118,20 @@ def __init__(
         is_ditributed = torch.distributed.is_available() and torch.distributed.is_initialized()
 
         if not is_ditributed or (is_ditributed and torch.distributed.get_rank() == 0):
-            build_index_files(dataset_paths, newline_int, workers=self._worker, build_index_fn=build_index_fn)
+            build_index_files(
+                dataset_paths,
+                newline_int,
+                workers=self._worker,
+                build_index_fn=build_index_fn,
+                index_mapping_dir=index_mapping_dir,
+            )
 
         if is_ditributed:
             torch.distributed.barrier()
 
         logging.info(f"Loading data files")
         start_time = time.time()
-        mdata_midx_list = [self.load_file(fn) for fn in self._files_list]
+        mdata_midx_list = [self.load_file(fn, index_mapping_dir) for fn in self._files_list]
         logging.info(
             f'Time loading {len(mdata_midx_list)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}'
         )
@@ -193,7 +211,7 @@ def _build_data_from_text(self, text):
 
         return data
 
-    def load_file(self, fn):
+    def load_file(self, fn, index_mapping_dir: Optional[str] = None):
         """
         Loads a text file as np.int8.
 
@@ -203,7 +221,7 @@ def load_file(self, fn):
             size - number of lines in file
         """
         logging.info(f"Loading {fn}")
-        idx_fn = f"{fn}.{__idx_suffix__}"
+        idx_fn = _index_fn(fn, index_mapping_dir)
 
         # create data map
         mdata = np.memmap(fn, dtype=np.uint8, mode='r')
@@ -246,15 +264,29 @@ class CSVMemMapDataset(TextMemMapDataset):
 
     def __init__(
         self,
-        dataset_paths,
-        newline_int=10,
-        header_lines=1,
-        workers=None,
-        tokenizer=None,
-        sort_dataset_paths=True,
+        dataset_paths: List[str],
+        newline_int: Optional[int] = 10,
+        header_lines: Optional[int] = 0,
+        workers: Optional[int] = None,
+        tokenizer: Optional[Type["TokenizerSpec"]] = None,
+        sort_dataset_paths: Optional[bool] = True,
         data_col=1,
         data_sep=',',
+        index_mapping_dir: Optional[str] = None,
     ):
+        """
+        Args:
+            dataset_paths: list of JSONL file paths.
+            newline_int: ASCII code to use to interpret newlines in file.
+            header_lines: number of header lines in JSON files.
+            workers: number of workers to use for creating index files.
+            tokenizer: tokenizer to use to convert text to tokens.
+            sort_dataset_paths: whether to sort datasets by paths.
+            data_col: index of data column.
+            data_sep: data separator.
+            index_mapping_dir: directory to save the index mapping to.
+                If None, will write to the same folder as the dataset.
+        """
         super().__init__(
             dataset_paths=dataset_paths,
             newline_int=newline_int,
@@ -262,6 +294,7 @@ def __init__(
             workers=workers,
             tokenizer=tokenizer,
             sort_dataset_paths=sort_dataset_paths,
+            index_mapping_dir=index_mapping_dir,
         )
         self._data_col = data_col
         self._data_sep = data_sep
@@ -280,8 +313,26 @@ class JSONLMemMapDataset(TextMemMapDataset):
     """
 
     def __init__(
-        self, dataset_paths, newline_int=10, header_lines=1, workers=None, tokenizer=None, sort_dataset_paths=True,
+        self,
+        dataset_paths: List[str],
+        newline_int: Optional[int] = 10,
+        header_lines: Optional[int] = 0,
+        workers: Optional[int] = None,
+        tokenizer: Optional[Type["TokenizerSpec"]] = None,
+        sort_dataset_paths: Optional[bool] = True,
+        index_mapping_dir: Optional[str] = None,
     ):
+        """
+        Args:
+            dataset_paths: list of JSONL file paths.
+            newline_int: ASCII code to use to interpret newlines in file.
+            header_lines: number of header lines in JSON files.
+            workers: number of workers to use for creating index files.
+            tokenizer: tokenizer to use to convert text to tokens.
+            sort_dataset_paths: whether to sort datasets by paths.
+            index_mapping_dir: directory to save the index mapping to.
+                If None, will write to the same folder as the dataset.
+        """
         super().__init__(
             dataset_paths=dataset_paths,
             newline_int=newline_int,
@@ -289,6 +340,7 @@ def __init__(
             workers=workers,
             tokenizer=tokenizer,
             sort_dataset_paths=sort_dataset_paths,
+            index_mapping_dir=index_mapping_dir,
         )
 
     def _build_data_from_text(self, text):
@@ -304,9 +356,48 @@ def _index_file_exists(idx_fn):
         return False
 
 
-def _build_memmap_index_files(newline_int, build_index_fn, fn):
+def _index_fn(fn: str, index_mapping_dir: str) -> str:
+    """Return base file name of index files.
+
+    This returns the base file name associated with specified index
+    files. This base name is the base on top of which suffixes
+    like .npy or .info are added.
+
+    The parent directory is created if it does not already exist.
+
+    fn may be specified in multiple ways:
+    1. file name: data.jsonl,
+    2. relative path to a file: relative/path/to/data.jsonl,
+    3. absolute path to a file: /absolute/path/to/data.jsonl.
+
+    This function returns paths in the pattern of:
+    1. /path/to/input_mapping_dir/data.jsonl.idx
+    2. /path/to/input_mapping_dir/relative/path/to/data.jsonl.idx
+    3. /path/to/input_mapping_dir/absolute/path/to/data.jsonl.idx
+
+    Args:
+        fn: filename to get base name for.
+        index_mapping_dir: directory to save the index mapping to.
+                If None, will write to the same folder as the dataset.
+    """
+    if index_mapping_dir:
+        # Remove leading "/" and "..".
+        while fn.startswith(("/", "..")):
+            if fn.startswith(".."):
+                fn = fn.lstrip("..")
+            if fn.startswith("/"):
+                fn = fn.lstrip("/")
+        idx_fn = f"{os.path.join(index_mapping_dir, fn)}.{__idx_suffix__}"
+        # Create parent directory if needed.
+        os.makedirs(os.path.dirname(idx_fn), exist_ok=True)
+    else:
+        idx_fn = f"{fn}.{__idx_suffix__}"
+    return idx_fn
+
+
+def _build_memmap_index_files(newline_int, build_index_fn, fn, index_mapping_dir: str):
     """Helper function to build an index file"""
-    idx_fn = f"{fn}.{__idx_suffix__}"
+    idx_fn = _index_fn(fn, index_mapping_dir)
 
     # create data map
     if _index_file_exists(idx_fn):
@@ -332,7 +423,9 @@ def _build_memmap_index_files(newline_int, build_index_fn, fn):
         return True
 
 
-def build_index_files(dataset_paths, newline_int, workers=None, build_index_fn=_build_index_from_memdata):
+def build_index_files(
+    dataset_paths, newline_int, workers=None, build_index_fn=_build_index_from_memdata, index_mapping_dir: str = None
+):
     """Auxiliary method to build multiple index files"""
     if len(dataset_paths) < 1:
         raise ValueError("files_list must contain at leat one file name")
@@ -344,7 +437,10 @@ def build_index_files(dataset_paths, newline_int, workers=None, build_index_fn=_
     # load all files into memmap
     start_time = time.time()
     with mp.Pool(workers) as p:
-        build_status = p.map(partial(_build_memmap_index_files, newline_int, build_index_fn), dataset_paths)
+        build_status = p.map(
+            partial(_build_memmap_index_files, newline_int, build_index_fn, index_mapping_dir=index_mapping_dir),
+            dataset_paths,
+        )
 
     logging.info(
         f'Time building {sum(build_status)} / {len(build_status)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}'
diff --git a/tests/collections/nlp/test_mem_map_dataset.py b/tests/collections/nlp/test_mem_map_dataset.py
new file mode 100644
index 000000000000..b60636022e05
--- /dev/null
+++ b/tests/collections/nlp/test_mem_map_dataset.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import json
+import os
+
+import pytest
+
+from nemo.collections.nlp.data.language_modeling import text_memmap_dataset
+
+
+@pytest.fixture
+def jsonl_file(tmp_path):
+    # Create a temporary file path
+    file_path = tmp_path / "data.jsonl"
+
+    # Generate data to write to the JSONL file
+    data = [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}, {"name": "Bob", "age": 35}]
+
+    # Write data to the JSONL file
+    with open(file_path, mode="w") as file:
+        for item in data:
+            json.dump(item, file)
+            file.write('\n')
+
+    # Provide the file path to the test function
+    yield str(file_path)
+
+    # Optional: Clean up the temporary file after the test
+    file_path.unlink()
+
+
+@pytest.fixture
+def csv_file(tmp_path):
+    # Create a temporary file path
+    file_path = tmp_path / "data.csv"
+
+    # Generate data to write to the CSV file
+    data = [["ID", "Name"], [1, "John"], [2, "Jane"], [3, "Bob"]]
+
+    # Write data to the CSV file
+    with open(file_path, mode="w", newline="") as file:
+        writer = csv.writer(file)
+        writer.writerows(data)
+
+    # Provide the file path to the test function
+    yield str(file_path)
+
+    # Optional: Clean up the temporary file after the test
+    file_path.unlink()
+
+
+def test_jsonl_mem_map_dataset(jsonl_file):
+    """Test for JSONL memory-mapped datasets."""
+
+    indexed_dataset = text_memmap_dataset.JSONLMemMapDataset(dataset_paths=[jsonl_file], header_lines=0)
+    assert indexed_dataset[0] == {"name": "John", "age": 30}
+    assert indexed_dataset[1] == {"name": "Jane", "age": 25}
+    assert indexed_dataset[2] == {"name": "Bob", "age": 35}
+
+
+def test_csv_mem_map_dataset(csv_file):
+    """Test for CSV memory-mapped datasets."""
+
+    indexed_dataset = text_memmap_dataset.CSVMemMapDataset(dataset_paths=[csv_file], data_col=1, header_lines=1)
+    assert indexed_dataset[0].strip() == "John"
+    assert indexed_dataset[1].strip() == "Jane"
+    assert indexed_dataset[2].strip() == "Bob"
+
+
+@pytest.mark.parametrize(
+    "dataset_class", [text_memmap_dataset.JSONLMemMapDataset, text_memmap_dataset.CSVMemMapDataset]
+)
+@pytest.mark.parametrize("use_alternative_index_mapping_dir", [True, False])
+@pytest.mark.parametrize("relative_index_fn", [True, False])
+def test_mem_map_dataset_index_mapping_dir(
+    tmp_path, dataset_class, jsonl_file, use_alternative_index_mapping_dir, relative_index_fn
+):
+    """Test for index_mapping_dir."""
+    if relative_index_fn:
+        jsonl_file = os.path.relpath(jsonl_file)
+    else:
+        jsonl_file = os.path.abspath(jsonl_file)
+
+    if use_alternative_index_mapping_dir:
+        index_mapping_dir = tmp_path / "subdir"
+        dataset_class(dataset_paths=[jsonl_file], header_lines=0, index_mapping_dir=str(index_mapping_dir))
+        # Index files should not be created in default location.
+        assert not os.path.isfile(f"{jsonl_file}.idx.npy")
+        assert not os.path.isfile(f"{jsonl_file}.idx.info")
+        if relative_index_fn:
+            # Remove leading ".." sequences.
+            while jsonl_file.startswith(("../")):
+                jsonl_file = jsonl_file.lstrip("../")
+        idx_fn = f"{str(index_mapping_dir)}/{jsonl_file}.idx"
+        assert os.path.isfile(f"{idx_fn}.npy")
+        assert os.path.isfile(f"{idx_fn}.info")
+    else:
+        text_memmap_dataset.JSONLMemMapDataset(dataset_paths=[jsonl_file], header_lines=0)
+        assert os.path.isfile(f"{jsonl_file}.idx.npy")
+        assert os.path.isfile(f"{jsonl_file}.idx.info")

From bf270794267e0240d8a8b2f2514c80c6929c76f1 Mon Sep 17 00:00:00 2001
From: Yen-Shi Wang <6960565+yen-shi@users.noreply.github.com>
Date: Tue, 6 Jun 2023 11:30:40 -0700
Subject: [PATCH 024/123] Add inference kv cache support for transformer TE
 path (#6627)

* Add kv cache support for transformer TE path

Signed-off-by: Yen-Shi Wang <yenshiw@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Mark get_data_parallel_group as WAR

Signed-off-by: Yen-Shi Wang <yenshiw@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Initialize process group for FP8 training

Signed-off-by: Tim Moon <tmoon@nvidia.com>

* Update Megatron GPT eval script for non-FP8 path

Signed-off-by: Yen-Shi Wang <yenshiw@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Yen-Shi Wang <yenshiw@nvidia.com>
Signed-off-by: Tim Moon <tmoon@nvidia.com>
Signed-off-by: Yen-Shi Wang <6960565+yen-shi@users.noreply.github.com>
Co-authored-by: Yen-Shi Wang <yenshiw@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Tim Moon <tmoon@nvidia.com>
Co-authored-by: Tim Moon <4406448+timmoon10@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 .../language_modeling/megatron_gpt_eval.py    | 23 +++++++++++++++-
 .../modules/common/megatron/transformer.py    | 26 +++++++++++++++----
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py
index d7319fb72a01..b33cdefc6df2 100644
--- a/examples/nlp/language_modeling/megatron_gpt_eval.py
+++ b/examples/nlp/language_modeling/megatron_gpt_eval.py
@@ -154,6 +154,15 @@ def __getitem__(self, idx):
         return self.sentences[idx]
 
 
+def remove_padded_prompts(response, nb_paddings):
+    result = {}
+    for k, v in response.items():
+        if v != None and (type(v) is list or type(v) is torch.Tensor):
+            v = v[:-nb_paddings]
+        result[k] = v
+    return result
+
+
 @hydra_runner(config_path="conf", config_name="megatron_gpt_inference")
 def main(cfg) -> None:
 
@@ -254,22 +263,34 @@ def main(cfg) -> None:
         "compute_logprob": cfg.inference.compute_logprob,
     }
 
+    fp8_enabled = hasattr(model.cfg, "fp8") and (model.cfg.fp8 == True)
+    if fp8_enabled:
+        nb_paddings = 0
+        while len(cfg.prompts) % 8 != 0:
+            cfg.prompts.append("")
+            nb_paddings += 1
+
     # First method of running text generation, call model.generate method
     response = model.generate(
         inputs=OmegaConf.to_container(cfg.prompts), length_params=length_params, sampling_params=sampling_params
     )
 
+    if fp8_enabled:
+        response = remove_padded_prompts(response, nb_paddings)
     print("***************************")
     print(response)
     print("***************************")
 
     # Second method of running text generation, call trainer.predict [recommended]
+    bs = 8 if fp8_enabled else 2
     ds = RequestDataSet(OmegaConf.to_container(cfg.prompts))
-    request_dl = DataLoader(dataset=ds, batch_size=2)
+    request_dl = DataLoader(dataset=ds, batch_size=bs)
     config = OmegaConf.to_container(cfg.inference)
     model.set_inference_config(config)
     response = trainer.predict(model, request_dl)
 
+    if fp8_enabled:
+        response[-1] = remove_padded_prompts(response[-1], nb_paddings)
     print("***************************")
     print(response)
     print("***************************")
diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py
index 9a09a9f9aa0b..f5dfbcabcd0e 100644
--- a/nemo/collections/nlp/modules/common/megatron/transformer.py
+++ b/nemo/collections/nlp/modules/common/megatron/transformer.py
@@ -945,6 +945,9 @@ def __init__(
         self.position_embedding_type = position_embedding_type
         self.multi_query_attention = multi_query_attention
 
+        self.inference_current_sequence_len = 0
+        self.inference_params = None
+
         self.activations_checkpoint_method = activations_checkpoint_method
         self.activations_checkpoint_num_layers = activations_checkpoint_num_layers
         self.activations_checkpoint_granularity = activations_checkpoint_granularity
@@ -1451,6 +1454,20 @@ def forward(
                     if get_key_value:
                         presents = []
 
+                    if self.transformer_engine:
+                        # Pass key value information to TE through inference_params to pre-allocate memory
+                        if set_inference_key_value_memory:
+                            self.inference_params = type('', (), {})()
+                            self.inference_params.max_sequence_len = inference_max_sequence_len
+                            self.inference_params.max_batch_size = hidden_states.size(1)
+                            self.inference_params.batch_size_offset = 0
+                            self.inference_params.key_value_memory_dict = {}
+                            self.inference_params.sequence_len_offset = 0
+                            self.inference_current_sequence_len = 0
+
+                        if self.inference_params != None:
+                            self.inference_params.sequence_len_offset = self.inference_current_sequence_len
+
                     for index in range(self.num_layers):
                         layer = self._get_layer(index)
                         past = None
@@ -1479,19 +1496,15 @@ def forward(
                             checkpoint_core_attention = False
 
                         if self.transformer_engine:
-
-                            inference_params = None
-
                             hidden_states = layer(
                                 hidden_states,
                                 attention_mask,
                                 encoder_output=encoder_output,
                                 enc_dec_attn_mask=enc_dec_attn_mask,
-                                inference_params=inference_params,
+                                inference_params=self.inference_params,
                                 is_first_microbatch=self.is_first_microbatch,
                                 checkpoint_core_attention=checkpoint_core_attention,
                             )
-
                         else:
                             hidden_states = layer(
                                 hidden_states,
@@ -1507,6 +1520,9 @@ def forward(
                                 cross_attention_relative_position_bias=cross_attention_relative_position_bias,
                                 checkpoint_core_attention=checkpoint_core_attention,
                             )
+                    # Update current sequence length outside of the loops
+                    if self.transformer_engine:
+                        self.inference_current_sequence_len += hidden_states.size(0)
 
         # Skip counter update for eval and activation checkpointing
         if torch.is_grad_enabled() and self.training:

From 336372c8f398a93701999677f83e924edc31f613 Mon Sep 17 00:00:00 2001
From: Dima Rekesh <bmwshop@gmail.com>
Date: Wed, 7 Jun 2023 14:09:54 -0700
Subject: [PATCH 025/123] Support large inputs to Conformer and Fast Conformer
 (#6556)

* initial commit

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* typos

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* tweaks to padding

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* comments

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* attempt at first working version

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* typos and fixed p calculation

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* removing merge artifacts

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* typo

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* removing unnessary imports

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* if batch split succeeded no need to conv again

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* adding channel wise split

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* adding reference to pytorch issue 80020

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* removing time chunking methods

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* accounting for the actual self._stride value

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* limiting the fix to dw_striding subsampling

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* renamed methods

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* one more accounting for the actual self._stride value

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* support for causal convs

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* option to set conv chunking size manually

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixing imports

* subsampling test

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* rename variable

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* imports in test

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* more runtime checks

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* a more careful test

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* bug in causal

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix in causal

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* change_conv_chunking_factor methods

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* renamed methods

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* disabling chunking by default

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* typo

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* changing default chunking to auto

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* only split if needed

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* only split if needed

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

---------

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../asr/modules/conformer_encoder.py          |  24 +++
 nemo/collections/asr/parts/mixins/mixins.py   |  28 ++++
 .../asr/parts/submodules/subsampling.py       | 151 +++++++++++++++++-
 tests/collections/asr/test_asr_subsampling.py |  61 +++++++
 4 files changed, 262 insertions(+), 2 deletions(-)
 create mode 100644 tests/collections/asr/test_asr_subsampling.py

diff --git a/nemo/collections/asr/modules/conformer_encoder.py b/nemo/collections/asr/modules/conformer_encoder.py
index df5b8f5c69ed..7c786f9c9720 100644
--- a/nemo/collections/asr/modules/conformer_encoder.py
+++ b/nemo/collections/asr/modules/conformer_encoder.py
@@ -67,6 +67,8 @@ class ConformerEncoder(NeuralModule, StreamingEncoder, Exportable, AccessMixin):
             Defaults to striding.
         subsampling_factor (int): the subsampling factor which should be power of 2
             Defaults to 4.
+        subsampling_conv_chunking_factor(int): optionally, force chunk inputs (helpful for large inputs)
+            Should be power of 2, 1 (auto-chunking, default), or -1 (no chunking)
         subsampling_conv_channels (int): the size of the convolutions in the subsampling module
             Defaults to -1 which would set it to d_model.
         reduction (str, Optional): the method of reduction, choices=['pooling', 'striding']. If no value
@@ -245,6 +247,7 @@ def __init__(
         causal_downsampling=False,
         subsampling='striding',
         subsampling_factor=4,
+        subsampling_conv_chunking_factor=1,
         subsampling_conv_channels=-1,
         reduction=None,
         reduction_position=None,
@@ -279,6 +282,7 @@ def __init__(
         self.scale = math.sqrt(self.d_model)
         self.att_context_style = att_context_style
         self.subsampling_factor = subsampling_factor
+        self.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor
 
         self.self_attention_model = self_attention_model
         self.global_tokens = global_tokens
@@ -355,6 +359,7 @@ def __init__(
                     feat_in=feat_in,
                     feat_out=d_model,
                     conv_channels=subsampling_conv_channels,
+                    subsampling_conv_chunking_factor=subsampling_conv_chunking_factor,
                     activation=nn.ReLU(True),
                     is_causal=causal_downsampling,
                 )
@@ -977,6 +982,25 @@ def change_attention_model(
                 self._cfg.self_attention_model = self_attention_model
                 self._cfg.att_context_size = att_context_size
 
+    def change_subsampling_conv_chunking_factor(self, subsampling_conv_chunking_factor: int):
+        """
+        Update the conv_chunking_factor (int) 
+        Default is 1 (auto)
+        Set it to -1 (disabled) or to a specific value (power of 2) if you OOM in the conv subsampling layers
+
+
+        Args:
+            subsampling_conv_chunking_factor (int)
+        """
+
+        if not hasattr(self.pre_encode, "change_subsampling_conv_chunking_factor"):
+            logging.info("Model pre_encoder doesn't have a change_subsampling_conv_chunking_factor method ")
+            return
+
+        self.pre_encode.change_subsampling_conv_chunking_factor(
+            subsampling_conv_chunking_factor=subsampling_conv_chunking_factor
+        )
+
 
 class ConformerEncoderAdapter(ConformerEncoder, adapter_mixins.AdapterModuleMixin):
 
diff --git a/nemo/collections/asr/parts/mixins/mixins.py b/nemo/collections/asr/parts/mixins/mixins.py
index eba896d0478d..4c43960ac9d2 100644
--- a/nemo/collections/asr/parts/mixins/mixins.py
+++ b/nemo/collections/asr/parts/mixins/mixins.py
@@ -432,6 +432,34 @@ def change_attention_model(
                 self.cfg.encoder.self_attention_model = self_attention_model
                 self.cfg.encoder.att_context_size = att_context_size
 
+    def change_subsampling_conv_chunking_factor(
+        self, subsampling_conv_chunking_factor: int, update_config: bool = True
+    ):
+        """
+        Update the conv_chunking_factor (int) if function is available in encoder.
+        Default is 1 (auto)
+        Set it to -1 (disabled) or to a specific value (power of 2) if you OOM in the conv subsampling layers
+
+        Args:
+            conv_chunking_factor (int)
+        """
+
+        if not hasattr(self, 'encoder'):
+            logging.info(
+                "Could not call the change_subsampling_conv_chunking_factor method in encoder "
+                "since the model provided does not contain an `encoder` module in its config."
+            )
+            return
+
+        if not hasattr(self.encoder, "change_subsampling_conv_chunking_factor"):
+            logging.info("Model encoder doesn't have a change_subsampling_conv_chunking_factor method ")
+            return
+
+        self.encoder.change_subsampling_conv_chunking_factor(subsampling_conv_chunking_factor)
+        if update_config:
+            with open_dict(self.cfg):
+                self.cfg.encoder.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor
+
     def conformer_stream_step(
         self,
         processed_signal: torch.Tensor,
diff --git a/nemo/collections/asr/parts/submodules/subsampling.py b/nemo/collections/asr/parts/submodules/subsampling.py
index 4358d09977fe..23bd625108c7 100644
--- a/nemo/collections/asr/parts/submodules/subsampling.py
+++ b/nemo/collections/asr/parts/submodules/subsampling.py
@@ -19,6 +19,7 @@
 from torch.nn import LayerNorm
 
 from nemo.collections.asr.parts.submodules.causal_convs import CausalConv2D
+from nemo.utils import logging
 
 
 class StackingSubsampling(torch.nn.Module):
@@ -65,6 +66,8 @@ class ConvSubsampling(torch.nn.Module):
     Args:
         subsampling (str): The subsampling technique from {"vggnet", "striding"}
         subsampling_factor (int): The subsampling factor which should be a power of 2
+        subsampling_conv_chunking_factor (int): Input chunking factor which can be -1 (no chunking) 
+        1 (auto) or a power of 2. Default is 1
         feat_in (int): size of the input features
         feat_out (int): size of the output features
         conv_channels (int): Number of channels for the convolution layers.
@@ -72,7 +75,15 @@ class ConvSubsampling(torch.nn.Module):
     """
 
     def __init__(
-        self, subsampling, subsampling_factor, feat_in, feat_out, conv_channels, activation=nn.ReLU(), is_causal=False
+        self,
+        subsampling,
+        subsampling_factor,
+        feat_in,
+        feat_out,
+        conv_channels,
+        subsampling_conv_chunking_factor=1,
+        activation=nn.ReLU(),
+        is_causal=False,
     ):
         super(ConvSubsampling, self).__init__()
         self._subsampling = subsampling
@@ -86,6 +97,14 @@ def __init__(
         self.subsampling_factor = subsampling_factor
         self.is_causal = is_causal
 
+        if (
+            subsampling_conv_chunking_factor != -1
+            and subsampling_conv_chunking_factor != 1
+            and subsampling_conv_chunking_factor % 2 != 0
+        ):
+            raise ValueError("subsampling_conv_chunking_factor should be -1, 1, or a power of 2")
+        self.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor
+
         in_channels = 1
         layers = []
 
@@ -264,7 +283,32 @@ def forward(self, x, lengths):
         )
         x = x.unsqueeze(1)
 
-        x = self.conv(x)
+        # split inputs if chunking_factor is set
+        if self.subsampling_conv_chunking_factor != -1:
+            if self.subsampling_conv_chunking_factor == 1:
+                # if subsampling_conv_chunking_factor is 1, we split only if needed
+                # avoiding a bug / feature limiting indexing of tensors to 2**31
+                # see https://github.com/pytorch/pytorch/issues/80020
+                x_ceil = 2 ** 31 / self._conv_channels * self._stride * self._stride
+                if torch.numel(x) > x_ceil:
+                    need_to_split = True
+                else:
+                    need_to_split = False
+            else:
+                # if subsampling_conv_chunking_factor > 1 we always split
+                need_to_split = True
+
+            if need_to_split:
+                x, success = self.conv_split_by_batch(x)
+                if not success:  # if unable to split by batch, try by channel
+                    if self._subsampling == 'dw_striding':
+                        x = self.conv_split_by_channel(x)
+                    else:
+                        x = self.conv(x)  # try anyway
+            else:
+                x = self.conv(x)
+        else:
+            x = self.conv(x)
 
         b, c, t, f = x.size()
         x = self.out(x.transpose(1, 2).reshape(b, t, -1))
@@ -293,6 +337,109 @@ def reset_parameters(self):
                 torch.nn.init.uniform_(self.out.weight, -fc_scale, fc_scale)
                 torch.nn.init.uniform_(self.out.bias, -fc_scale, fc_scale)
 
+    def conv_split_by_batch(self, x):
+        """ Tries to split input by batch, run conv and concat results """
+        b, _, _, _ = x.size()
+        if b == 1:  # can't split if batch size is 1
+            return x, False
+
+        if self.subsampling_conv_chunking_factor > 1:
+            cf = self.subsampling_conv_chunking_factor
+            logging.debug(f'using manually set chunking factor: {cf}')
+        else:
+            # avoiding a bug / feature limiting indexing of tensors to 2**31
+            # see https://github.com/pytorch/pytorch/issues/80020
+            x_ceil = 2 ** 31 / self._conv_channels * self._stride * self._stride
+            p = math.ceil(math.log(torch.numel(x) / x_ceil, 2))
+            cf = 2 ** p
+            logging.debug(f'using auto set chunking factor: {cf}')
+
+        new_batch_size = b // cf
+        if new_batch_size == 0:  # input is too big
+            return x, False
+
+        logging.debug(f'conv subsampling: using split batch size {new_batch_size}')
+        return torch.cat([self.conv(chunk) for chunk in torch.split(x, new_batch_size, 0)]), True
+
+    def conv_split_by_channel(self, x):
+        """ For dw convs, tries to split input by time, run conv and concat results """
+        x = self.conv[0](x)  # full conv2D
+        x = self.conv[1](x)  # activation
+
+        for i in range(self._sampling_num - 1):
+            _, c, t, _ = x.size()
+
+            if self.subsampling_conv_chunking_factor > 1:
+                cf = self.subsampling_conv_chunking_factor
+                logging.debug(f'using manually set chunking factor: {cf}')
+            else:
+                # avoiding a bug / feature limiting indexing of tensors to 2**31
+                # see https://github.com/pytorch/pytorch/issues/80020
+                p = math.ceil(math.log(torch.numel(x) / 2 ** 31, 2))
+                cf = 2 ** p
+                logging.debug(f'using auto set chunking factor: {cf}')
+
+            new_c = int(c // cf)
+            if new_c == 0:
+                logging.warning(f'chunking factor {cf} is too high; splitting down to one channel.')
+                new_c = 1
+
+            new_t = int(t // cf)
+            if new_t == 0:
+                logging.warning(f'chunking factor {cf} is too high; splitting down to one timestep.')
+                new_t = 1
+
+            logging.debug(f'conv dw subsampling: using split C size {new_c} and split T size {new_t}')
+            x = self.channel_chunked_conv(self.conv[i * 3 + 2], new_c, x)  # conv2D, depthwise
+
+            # splitting pointwise convs by time
+            x = torch.cat([self.conv[i * 3 + 3](chunk) for chunk in torch.split(x, new_t, 2)], 2)  # conv2D, pointwise
+            x = self.conv[i * 3 + 4](x)  # activation
+        return x
+
+    def channel_chunked_conv(self, conv, chunk_size, x):
+        """ Performs channel chunked convolution"""
+
+        ind = 0
+        out_chunks = []
+        for chunk in torch.split(x, chunk_size, 1):
+            step = chunk.size()[1]
+
+            if self.is_causal:
+                chunk = nn.functional.pad(
+                    chunk, pad=(self._kernel_size - 1, self._stride - 1, self._kernel_size - 1, self._stride - 1)
+                )
+                ch_out = nn.functional.conv2d(
+                    chunk,
+                    conv.weight[ind : ind + step, :, :, :],
+                    bias=conv.bias[ind : ind + step],
+                    stride=self._stride,
+                    padding=0,
+                    groups=step,
+                )
+            else:
+                ch_out = nn.functional.conv2d(
+                    chunk,
+                    conv.weight[ind : ind + step, :, :, :],
+                    bias=conv.bias[ind : ind + step],
+                    stride=self._stride,
+                    padding=self._left_padding,
+                    groups=step,
+                )
+            out_chunks.append(ch_out)
+            ind += step
+
+        return torch.cat(out_chunks, 1)
+
+    def change_subsampling_conv_chunking_factor(self, subsampling_conv_chunking_factor: int):
+        if (
+            subsampling_conv_chunking_factor != -1
+            and subsampling_conv_chunking_factor != 1
+            and subsampling_conv_chunking_factor % 2 != 0
+        ):
+            raise ValueError("subsampling_conv_chunking_factor should be -1, 1, or a power of 2")
+        self.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor
+
 
 def calc_length(lengths, all_paddings, kernel_size, stride, ceil_mode, repeat_num=1):
     """ Calculates the output length of a Tensor passed through a convolution or max pooling layer"""
diff --git a/tests/collections/asr/test_asr_subsampling.py b/tests/collections/asr/test_asr_subsampling.py
new file mode 100644
index 000000000000..fe5295be11f1
--- /dev/null
+++ b/tests/collections/asr/test_asr_subsampling.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import torch
+
+from nemo.collections.asr.models import ASRModel
+
+
+class TestASRSubsamplingConvChunking:
+    @pytest.mark.with_downloads()
+    @pytest.mark.unit
+    def test_forward(self):
+        asr_model = ASRModel.from_pretrained("stt_en_fastconformer_ctc_large")
+        asr_model = asr_model.eval()
+        asr_model.preprocessor.featurizer.dither = 0.0
+        asr_model.preprocessor.featurizer.pad_to = 0
+
+        len = 512
+
+        input_signal_batch1 = torch.randn(size=(1, len), device=asr_model.device)
+        length_batch1 = torch.randint(low=161, high=500, size=[1], device=asr_model.device)
+
+        input_signal_batch4 = torch.randn(size=(4, len), device=asr_model.device)
+        length_batch4 = torch.randint(low=161, high=500, size=[4], device=asr_model.device)
+
+        with torch.no_grad():
+            # regular inference
+            logprobs_batch1_nosplit, _, _ = asr_model.forward(
+                input_signal=input_signal_batch1, input_signal_length=length_batch1
+            )
+            logprobs_batch4_nosplit, _, _ = asr_model.forward(
+                input_signal=input_signal_batch4, input_signal_length=length_batch4
+            )
+
+            # force chunking to 2
+            asr_model.change_subsampling_conv_chunking_factor(subsampling_conv_chunking_factor=2)
+
+            # chunked inference by channels as batch is 1
+            logprobs_batch1_split, _, _ = asr_model.forward(
+                input_signal=input_signal_batch1, input_signal_length=length_batch1
+            )
+            # chunked inference by batch as it is 4 [> 1]
+            logprobs_batch4_split, _, _ = asr_model.forward(
+                input_signal=input_signal_batch4, input_signal_length=length_batch4
+            )
+
+        diff = torch.mean(torch.abs(logprobs_batch1_split - logprobs_batch1_nosplit))
+        assert diff <= 1e-6
+        diff = torch.max(torch.abs(logprobs_batch4_split - logprobs_batch4_nosplit))
+        assert diff <= 1e-6

From ebfcef7902c9232da04ecf90f505c3c996e7e059 Mon Sep 17 00:00:00 2001
From: Dima Rekesh <bmwshop@gmail.com>
Date: Wed, 7 Jun 2023 15:00:00 -0700
Subject: [PATCH 026/123] sharded_manifests updated docs (#6833)

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>
---
 docs/source/asr/datasets.rst | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/docs/source/asr/datasets.rst b/docs/source/asr/datasets.rst
index 7ec0f9c61b74..617d5195005f 100644
--- a/docs/source/asr/datasets.rst
+++ b/docs/source/asr/datasets.rst
@@ -218,11 +218,6 @@ of filepaths, e.g. ``['/data/shard1.tar', '/data/shard2.tar']``, or in a single
 As with non-tarred datasets, the manifest file should be passed in ``manifest_filepath``. The dataloader assumes that the length
 of the manifest after filtering is the correct size of the dataset for reporting training progress. 
 
-If the manifest is large, you may wish to reference sharded manifest files instead of a single manifest file. The naming convention 
-is identical to the audio tarballs and there should be a 1:1 relationship between a sharded audio tarfile and its manifest shard; e.g. 
-``'/data/sharded_manifests/manifest__OP_1..64_CL_'`` in the above example. Using sharded manifests improves job startup times and 
-decreases memory usage, as each worker only loads manifest shards for the corresponding audio shards instead of the entire manifest. 
-
 The ``tarred_shard_strategy`` field of the config file can be set if you have multiple shards and are running an experiment with
 multiple workers. It defaults to ``scatter``, which preallocates a set of shards per worker which do not change during runtime.
 Note that this strategy, on specific occasions (when the number of shards is not divisible with ``world_size``), will not sample
@@ -242,6 +237,18 @@ see the corresponding class APIs in the `Datasets <./api.html#Datasets>`__ secti
   applied such that each worker ends up with the same number of files. We currently do not check for this in any dataloader, but the user's
   program may hang if the shards are uneven.
 
+Sharded Manifests
+~~~~~~~~~~~~~~~~~
+If your dataset / manifest is large, you may wish to use sharded manifest files instead of a single manifest file. The naming convention
+is identical to the audio tarballs and there should be a 1:1 relationship between a sharded audio tarfile and its manifest shard; e.g.
+``'/data/sharded_manifests/manifest__OP_1..64_CL_'`` in the above example. Using sharded manifests improves job startup times and
+decreases memory usage, as each worker only loads manifest shards for the corresponding audio shards instead of the entire manifest.
+
+To enable sharded manifest filename expansion, set the ``shard_manifests`` field of the config file to true. In addition, the 
+``defer_setup`` flag needs to be true as well, so that the dataloader will be initialized after the DDP and its length can be collected from 
+the distributed workers.
+
+
 Conversion to Tarred Datasets
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

From 52e23e0d075e2732096aaf1ecac1f5659f1717a9 Mon Sep 17 00:00:00 2001
From: Nithin Rao <nithinrao.koluguri@gmail.com>
Date: Wed, 7 Jun 2023 18:20:51 -0700
Subject: [PATCH 027/123] added fc-xl, xxl and titanet-s models (#6832)

Signed-off-by: Nithin Rao Koluguri <nithinraok>
Co-authored-by: Nithin Rao Koluguri <nithinraok>
---
 docs/source/asr/data/benchmark_en.csv            |  5 ++++-
 .../speaker_recognition/data/speaker_results.csv |  1 +
 .../fastconformer/fast-conformer_ctc_bpe.yaml    | 16 ++++++++++++++++
 .../fast-conformer_transducer_bpe.yaml           | 16 ++++++++++++++++
 nemo/collections/asr/models/ctc_bpe_models.py    |  7 +++++++
 nemo/collections/asr/models/label_models.py      |  7 +++++++
 nemo/collections/asr/models/rnnt_bpe_models.py   | 14 ++++++++++++++
 7 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/docs/source/asr/data/benchmark_en.csv b/docs/source/asr/data/benchmark_en.csv
index 5f68e9ca22ce..5c764ba38651 100644
--- a/docs/source/asr/data/benchmark_en.csv
+++ b/docs/source/asr/data/benchmark_en.csv
@@ -28,4 +28,7 @@ stt_en_conformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/ca
 stt_en_conformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge"
 stt_en_fastconformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large"
 stt_en_fastconformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large"
-stt_en_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc"
\ No newline at end of file
+stt_en_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc"
+stt_en_fastconformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xlarge"
+stt_en_fastconformer_ctc_xlarge,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_xlarge"
+stt_en_fastconformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xxlarge"
\ No newline at end of file
diff --git a/docs/source/asr/speaker_recognition/data/speaker_results.csv b/docs/source/asr/speaker_recognition/data/speaker_results.csv
index a0e865c9c487..c92c971e4939 100644
--- a/docs/source/asr/speaker_recognition/data/speaker_results.csv
+++ b/docs/source/asr/speaker_recognition/data/speaker_results.csv
@@ -1,4 +1,5 @@
 Model Name,Model Base Class,Model Card
 titanet_large,EncDecSpeakerLabelModel,"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/titanet_large"
+titanet_small,EncDecSpeakerLabelModel,"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/titanet_small"
 speakerverification_speakernet,EncDecSpeakerLabelModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:speakerverification_speakernet"
 ecapa_tdnn,EncDecSpeakerLabelModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:ecapa_tdnn"
\ No newline at end of file
diff --git a/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml b/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml
index 8c7561381299..41a8abd93758 100644
--- a/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml
+++ b/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml
@@ -17,6 +17,22 @@
 #  | bf16      |    32GB    |     64     |
 #  |           |    80GB    |     128    |
 #  +-----------+------------+------------+
+# Here are the recommended configs for different variants of FastConformer-CTC-BPE, other parameters are the same as in this config file.
+#
+#  +--------------+---------+---------+----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Model        | d_model | n_heads | n_layers |conv_kernel_size| weight_decay | pred_hidden/joint_hidden | pred_rnn_layers |  xscaling  |
+#  +==============+=========+========+===========+================+==============+==========================+=================+============+
+#  | Small  (14M) |   176   |    4   |    16     |        9       |     0.0      |           320            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Medium (32M) |   256   |    4   |    16     |        9       |     1e-3     |           640            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Large (120M) |   512   |    8   |    17     |        9       |     1e-3     |           640            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | XLarge (616M)|   1024  |    8   |    24     |        9       |     1e-3     |           640            |        2        |    False   |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | XXLarge(1.2B)|   1024  |    8   |    42     |        5       |     1e-3     |           640            |        2        |    False   |
+#  +--------------------------------------------------------------+--------------+--------------------------+-----------------+------------+ 
+
 # Note:  They are based on the assumption of max_duration of 20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly.
 
 # Default learning parameters in this config are set for global batch size of 2K while you may use lower values.
diff --git a/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml b/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml
index 0b0ec78e077d..9e3da8d3545f 100644
--- a/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml
+++ b/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml
@@ -17,6 +17,22 @@
 #  | bf16      |    32GB    |     64     |
 #  |           |    80GB    |     128    |
 #  +-----------+------------+------------+
+# Here are the recommended configs for different variants of FastConformer-Transducer-BPE, other parameters are the same as in this config file.
+#
+#  +--------------+---------+---------+----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Model        | d_model | n_heads | n_layers |conv_kernel_size| weight_decay | pred_hidden/joint_hidden | pred_rnn_layers |  xscaling  |
+#  +==============+=========+========+===========+================+==============+==========================+=================+============+
+#  | Small  (14M) |   176   |    4   |    16     |        9       |     0.0      |           320            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Medium (32M) |   256   |    4   |    16     |        9       |     1e-3     |           640            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | Large (120M) |   512   |    8   |    17     |        9       |     1e-3     |           640            |        1        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | XLarge (616M)|   1024  |    8   |    24     |        9       |     1e-3     |           640            |        2        |    True    |
+#  +--------------+---------+--------+-----------+----------------+--------------+--------------------------+-----------------+------------+
+#  | XXLarge(1.2B)|   1024  |    8   |    42     |        5       |     1e-3     |           640            |        2        |    False   |
+#  +--------------------------------------------------------------+--------------+--------------------------+-----------------+------------+ 
+
 # Note:  They are based on the assumption of max_duration of 20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly.
 
 # Default learning parameters in this config are set for global batch size of 2K while you may use lower values.
diff --git a/nemo/collections/asr/models/ctc_bpe_models.py b/nemo/collections/asr/models/ctc_bpe_models.py
index a74c7f3de5c2..7d3b236b2bab 100644
--- a/nemo/collections/asr/models/ctc_bpe_models.py
+++ b/nemo/collections/asr/models/ctc_bpe_models.py
@@ -606,4 +606,11 @@ def list_available_models(cls) -> List[PretrainedModelInfo]:
         )
         results.append(model)
 
+        model = PretrainedModelInfo(
+            pretrained_model_name="stt_en_fastconformer_ctc_xlarge",
+            description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_xlarge",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_ctc_xlarge/versions/1.20.0/files/stt_en_fastconformer_ctc_xlarge.nemo",
+        )
+        results.append(model)
+
         return results
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index aefa8743826b..cc789dacff11 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -92,6 +92,13 @@ def list_available_models(cls) -> List[PretrainedModelInfo]:
         )
         result.append(model)
 
+        model = PretrainedModelInfo(
+            pretrained_model_name="titanet_small",
+            description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:titanet_small",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/titanet_small/versions/1.19.0/files/titanet-s.nemo",
+        )
+        result.append(model)
+
         return result
 
     def __init__(self, cfg: DictConfig, trainer: Trainer = None):
diff --git a/nemo/collections/asr/models/rnnt_bpe_models.py b/nemo/collections/asr/models/rnnt_bpe_models.py
index 6fed8be9d410..9ed38a376103 100644
--- a/nemo/collections/asr/models/rnnt_bpe_models.py
+++ b/nemo/collections/asr/models/rnnt_bpe_models.py
@@ -253,6 +253,20 @@ def list_available_models(cls) -> List[PretrainedModelInfo]:
         )
         results.append(model)
 
+        model = PretrainedModelInfo(
+            pretrained_model_name="stt_en_fastconformer_transducer_xlarge",
+            description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xlarge",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_xlarge/versions/1.20.1/files/stt_en_fastconformer_transducer_xlarge.nemo",
+        )
+        results.append(model)
+
+        model = PretrainedModelInfo(
+            pretrained_model_name="stt_en_fastconformer_transducer_xxlarge",
+            description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xxlarge",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_xxlarge/versions/1.20.0/files/stt_en_fastconformer_transducer_xxlarge.nemo",
+        )
+        results.append(model)
+
         return results
 
     def __init__(self, cfg: DictConfig, trainer: Trainer = None):

From 6903d9be12c7b5882913728663810da9a7f690d0 Mon Sep 17 00:00:00 2001
From: bene-ges <antonova_sasha@list.ru>
Date: Thu, 8 Jun 2023 07:31:59 +0300
Subject: [PATCH 028/123] add reference to our paper (#6821)

* add reference to our paper

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add paper reference to docs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

---------

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>
---
 docs/source/nlp/nlp_all.bib                   |  9 ++++
 .../nlp/spellchecking_asr_customization.rst   |  2 +-
 .../spellchecking_asr_customization/README.md |  4 +-
 .../spellchecking_model.py                    |  1 +
 ...pellMapper_English_ASR_Customization.ipynb | 42 +++++++++++--------
 5 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/docs/source/nlp/nlp_all.bib b/docs/source/nlp/nlp_all.bib
index fd0f15f6d1da..48a53240e52b 100644
--- a/docs/source/nlp/nlp_all.bib
+++ b/docs/source/nlp/nlp_all.bib
@@ -216,3 +216,12 @@ @article{jegou2022faiss
   pages={ascl--2210},
   year={2022}
 }
+
+@misc{antonova2023spellmapper,
+  title={SpellMapper: A non-autoregressive neural spellchecker for ASR customization with candidate retrieval based on n-gram mappings}, 
+  author={Alexandra Antonova and Evelina Bakhturina and Boris Ginsburg},
+  year={2023},
+  eprint={2306.02317},
+  archivePrefix={arXiv},
+  primaryClass={cs.CL}
+}
diff --git a/docs/source/nlp/spellchecking_asr_customization.rst b/docs/source/nlp/spellchecking_asr_customization.rst
index f9009b520361..c6666c4e338c 100644
--- a/docs/source/nlp/spellchecking_asr_customization.rst
+++ b/docs/source/nlp/spellchecking_asr_customization.rst
@@ -3,7 +3,7 @@
 SpellMapper (Spellchecking ASR Customization) Model
 =====================================================
 
-SpellMapper is a non-autoregressive model for postprocessing of ASR output. It gets as input a single ASR hypothesis (text) and a custom vocabulary and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any. Unlike traditional spellchecking approaches, which aim to correct known words using language models, SpellMapper's goal is to correct highly specific user terms, out-of-vocabulary (OOV) words or spelling variations (e.g., "John Koehn", "Jon Cohen").
+`SpellMapper <https://arxiv.org/abs/2306.02317>`__ :cite:`nlp-ner-antonova2023spellmapper` is a non-autoregressive model for postprocessing of ASR output. It gets as input a single ASR hypothesis (text) and a custom vocabulary and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any. Unlike traditional spellchecking approaches, which aim to correct known words using language models, SpellMapper's goal is to correct highly specific user terms, out-of-vocabulary (OOV) words or spelling variations (e.g., "John Koehn", "Jon Cohen").
 
 This model is an alternative to word boosting/shallow fusion approaches:
 
diff --git a/examples/nlp/spellchecking_asr_customization/README.md b/examples/nlp/spellchecking_asr_customization/README.md
index 2d83fd8d11ad..9d2063eff181 100644
--- a/examples/nlp/spellchecking_asr_customization/README.md
+++ b/examples/nlp/spellchecking_asr_customization/README.md
@@ -1,6 +1,6 @@
 # SpellMapper - spellchecking model for ASR Customization
-
-This model is inspired by Microsoft's paper https://arxiv.org/pdf/2203.00888.pdf, but does not repeat its implementation.
+Paper: https://arxiv.org/abs/2306.02317
+This model was partly inspired by Microsoft's paper https://arxiv.org/pdf/2203.00888.pdf.
 The goal is to build a model that gets as input a single ASR hypothesis (text) and a vocabulary of custom words/phrases and predicts which fragments in the ASR hypothesis should be replaced by which custom words/phrases if any.
 Our model is non-autoregressive (NAR) based on transformer architecture (BERT with multiple separators).
 
diff --git a/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py b/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py
index fc889de2dc63..15ffb2dd1bcd 100644
--- a/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py
+++ b/nemo/collections/nlp/models/spellchecking_asr_customization/spellchecking_model.py
@@ -43,6 +43,7 @@
 @experimental
 class SpellcheckingAsrCustomizationModel(NLPModel):
     """
+    https://arxiv.org/abs/2306.02317
     BERT-based model for Spellchecking ASR Customization.
     It takes as input ASR hypothesis and candidate customization entries.
     It labels the hypothesis with correct entry index or 0.
diff --git a/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
index e11025aeb1d3..cc949ad699b3 100644
--- a/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
+++ b/tutorials/nlp/SpellMapper_English_ASR_Customization.ipynb
@@ -1,13 +1,14 @@
 {
   "cells": [
     {
+      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "PiRuohn_FQco"
       },
       "source": [
         "# Overview\n",
-        "This tutorial demonstrates how to run inference with SpellMapper - a model for Spellchecking ASR (Automatic Speech Recognition) Customization.\n",
+        "This tutorial demonstrates how to run inference with [SpellMapper](https://arxiv.org/abs/2306.02317) - a model for Spellchecking ASR (Automatic Speech Recognition) Customization.\n",
         "\n",
         "Estimated time: 10-15 min.\n",
         "\n",
@@ -957,25 +958,25 @@
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "Free GPU memory to avoid OOM."
-      ],
       "metadata": {
         "id": "bt2TMLLvdUHm"
-      }
+      },
+      "source": [
+        "Free GPU memory to avoid OOM."
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "del spectrogram_generator\n",
-        "del vocoder\n",
-        "torch.cuda.empty_cache()"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "ZwEpAOCaRH7s"
       },
       "outputs": [],
-      "execution_count": null
+      "source": [
+        "del spectrogram_generator\n",
+        "del vocoder\n",
+        "torch.cuda.empty_cache()"
+      ]
     },
     {
       "cell_type": "markdown",
@@ -1363,22 +1364,27 @@
       ]
     },
     {
+      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "upvTbkFAeYtR"
       },
       "source": [
         "# Final notes\n",
-        "1. Our paper...\n",
+        "1. Bash-script with example of inference pipeline [run_infer.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/spellchecking_asr_customization/run_infer.sh)\n",
         "\n",
-        "2. To reproduce evaluation experiments from this paper see these scripts:\n",
+        "2. Check our paper: [SpellMapper: A non-autoregressive neural spellchecker for ASR customization with candidate retrieval based on n-gram mappings](https://arxiv.org/abs/2306.02317)\n",
+        "\n",
+        "3. To reproduce evaluation experiments from this paper see these scripts:\n",
         " - [test_on_kensho.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n",
         " - [test_on_userlibri.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n",
         " - [test_on_spoken_wikipedia.sh](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/evaluation/test_on_kensho.sh)\n",
         "\n",
-        "3. To reproduce training see [README.md](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/README.md)\n",
+        "4. To reproduce creation of training data see [README.md](https://github.com/bene-ges/nemo_compatible/blob/main/scripts/nlp/en_spellmapper/README.md)\n",
+        "\n",
+        "5. To run training see [run_training.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/spellchecking_asr_customization/run_training.sh)\n",
         "\n",
-        "4. Promising future research directions would be:\n",
+        "6. Promising future research directions would be:\n",
         "  - add a simple trainable classifier on top of SpellMapper predictions instead of using multiple thresholds\n",
         "  - retrain with adding more various false positives to the training data"
       ]
@@ -1387,9 +1393,9 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "toc_visible": true,
+      "gpuType": "T4",
       "provenance": [],
-      "gpuType": "T4"
+      "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
@@ -1401,4 +1407,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
\ No newline at end of file
+}

From 9cca92bb37edd7d43d3a937f2abb7c1706807056 Mon Sep 17 00:00:00 2001
From: Somshubra Majumdar <titu1994@gmail.com>
Date: Thu, 8 Jun 2023 09:21:33 -0700
Subject: [PATCH 029/123] Upperbound Numpy to < 1.24 (#6829)

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 requirements/requirements.txt     | 2 +-
 requirements/requirements_nlp.txt | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 9d4fab43186b..7481e337c999 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,6 +1,6 @@
 huggingface_hub
 numba
-numpy>=1.22
+numpy>=1.22,<1.24
 onnx>=1.7.0
 python-dateutil
 ruamel.yaml
diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt
index b4e5ac5c5801..1ff4c444c2bf 100644
--- a/requirements/requirements_nlp.txt
+++ b/requirements/requirements_nlp.txt
@@ -14,7 +14,6 @@ markdown2
 matplotlib>=3.3.2
 megatron_core==0.1.0
 nltk>=3.6.5
-numpy
 opencc
 pangu
 rapidfuzz

From b67d41090ca3302d059a8f31e0dc9efa8b52b003 Mon Sep 17 00:00:00 2001
From: Vahid Noroozi <VahidooX@users.noreply.github.com>
Date: Thu, 8 Jun 2023 10:12:22 -0700
Subject: [PATCH 030/123] Multi-lookahead cache-aware streaming models (#6711)

* added methods.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added methods.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added initial code.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added initial code.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added initial code.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added config files.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* fixed bugs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* updated confs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* updated confs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* updated confs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* updated confs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* improved f.conv1d

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* pulled from main.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* pulled from main.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added postpostnorm.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* fixed the target continiouse bug.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added dw_striding causal.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added print for debugging.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added print for debugging.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* fixed causal convolutions.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* added _midnorm.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* fixed transcribe.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* cleaned code.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* moved back configs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* moved back configs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* updated fast emit for FC models.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* updated fast emit for FC models.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixed bug.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* fixed bug and addressed comments.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixed configs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* fixed configs.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

* dropped the test.

Signed-off-by: Vahid <vnoroozi@nvidia.com>

---------

Signed-off-by: Vahid <vnoroozi@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../conformer_ctc_bpe_streaming.yaml          |   8 +-
 .../conformer_transducer_bpe_streaming.yaml   |  10 +-
 .../fastconformer_ctc_bpe_streaming.yaml      |   9 +-
 .../fastconformer_ctc_char_streaming.yaml     |  10 +-
 ...astconformer_transducer_bpe_streaming.yaml |  15 +-
 ...stconformer_transducer_char_streaming.yaml |  16 +-
 ...r_hybrid_transducer_ctc_bpe_streaming.yaml |  15 +-
 ..._hybrid_transducer_ctc_char_streaming.yaml |  15 +-
 .../asr/modules/conformer_encoder.py          | 306 ++++++++++--------
 .../asr/modules/squeezeformer_encoder.py      |   1 -
 .../parts/submodules/multi_head_attention.py  |   6 +-
 tests/collections/nlp/test_huggingface.py     |  19 +-
 12 files changed, 272 insertions(+), 158 deletions(-)

diff --git a/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe_streaming.yaml b/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe_streaming.yaml
index 98f23458cd86..32afd919a454 100644
--- a/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe_streaming.yaml
+++ b/examples/asr/conf/conformer/cache_aware_streaming/conformer_ctc_bpe_streaming.yaml
@@ -103,10 +103,16 @@ model:
     # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
     # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large
     # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one
-    # for chunked_limited you may calculate the look-ahead or right context by the following formula:
     # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 27*4*0.01=1.08s
+
+    # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs.
+    # The first item in the list would be the default during test/validation/inference.
+    # An example of settings for multi-lookahead:
+    #    att_context_size: [[140,27],[140,13],[140,2],[140,0]]
+    #    att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25]
     att_context_size: [140, 27] # -1 means unlimited context
     att_context_style: chunked_limited # regular or chunked_limited
+    att_context_probs: null
 
     xscaling: true # scales up the input embeddings by sqrt(d_model)
     untie_biases: true # unties the biases of the TransformerXL layers
diff --git a/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml b/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml
index 9d6e3a54d9fe..d55e5f927b2e 100644
--- a/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml
+++ b/examples/asr/conf/conformer/cache_aware_streaming/conformer_transducer_bpe_streaming.yaml
@@ -113,10 +113,16 @@ model:
     # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
     # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large
     # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one
-    # for chunked_limited you may calculate the look-ahead or right context by the following formula:
-    # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 27*4*0.01=1.08s
+    # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s
+
+    # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs.
+    # The first item in the list would be the default during test/validation/inference.
+    # An example of settings for multi-lookahead:
+    #    att_context_size: [[140,27],[140,13],[140,2],[140,0]]
+    #    att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25]
     att_context_size: [140, 27] # -1 means unlimited context
     att_context_style: chunked_limited # regular or chunked_limited
+    att_context_probs: null
 
     xscaling: true # scales up the input embeddings by sqrt(d_model)
     untie_biases: true # unties the biases of the TransformerXL layers
diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_bpe_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_bpe_streaming.yaml
index c68b30a33d5a..749216b1925d 100644
--- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_bpe_streaming.yaml
+++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_bpe_streaming.yaml
@@ -97,10 +97,17 @@ model:
     # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
     # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large
     # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one
-    # for chunked_limited you may calculate the look-ahead or right context by the following formula:
     # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s
+
+    # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs.
+    # The first item in the list would be the default during test/validation/inference.
+    # An example of settings for multi-lookahead:
+    #    att_context_size: [[70,13],[70,6],[70,1],[70,0]]
+    #    att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25]
     att_context_size: [70, 13] # -1 means unlimited context
     att_context_style: chunked_limited # regular or chunked_limited
+    att_context_probs: null
+
 
     xscaling: true # scales up the input embeddings by sqrt(d_model)
     pos_emb_max_len: 5000
diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_char_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_char_streaming.yaml
index 654895ec065d..17345119c529 100644
--- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_char_streaming.yaml
+++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_ctc_char_streaming.yaml
@@ -100,11 +100,19 @@ model:
     n_heads: 8 # may need to be lower for smaller d_models
 
     # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
-    # for att_context_style=regular, the right context is recommended to be a small number around 0 to 2 as multiple-layers may increase the effective right context too large
+    # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large
     # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one
     # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s
+
+    # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs.
+    # The first item in the list would be the default during test/validation/inference.
+    # An example of settings for multi-lookahead:
+    #    att_context_size: [[70,13],[70,6],[70,1],[70,0]]
+    #    att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25]
     att_context_size: [70, 13] # -1 means unlimited context
     att_context_style: chunked_limited # regular or chunked_limited
+    att_context_probs: null
+
 
     xscaling: true # scales up the input embeddings by sqrt(d_model)
     pos_emb_max_len: 5000
diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml
index 5f223061a420..dbd036458cb8 100644
--- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml
+++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_bpe_streaming.yaml
@@ -102,10 +102,17 @@ model:
     # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
     # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large
     # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one
-    # for chunked_limited you may calculate the look-ahead or right context by the following formula:
     # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s
+
+    # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs.
+    # The first item in the list would be the default during test/validation/inference.
+    # An example of settings for multi-lookahead:
+    #    att_context_size: [[70,13],[70,6],[70,1],[70,0]]
+    #    att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25]
     att_context_size: [70, 13] # -1 means unlimited context
     att_context_style: chunked_limited # regular or chunked_limited
+    att_context_probs: null
+
 
     xscaling: true # scales up the input embeddings by sqrt(d_model)
     pos_emb_max_len: 5000
@@ -191,9 +198,9 @@ model:
     loss_name: "default"
     warprnnt_numba_kwargs:
       # FastEmit regularization: https://arxiv.org/abs/2010.11148
-      # You may enable FastEmit to reduce the latency of the model for streaming
-      # It also helps to improve the accuracy of the model in streaming mode
-      fastemit_lambda: 1e-3  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
+      # You may enable FastEmit to increase the accuracy and reduce the latency of the model for streaming
+      # You may set it to lower values like 1e-3 for models with larger right context
+      fastemit_lambda: 5e-3  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
       clamp: -1.0  # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only.
 
   optim:
diff --git a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml
index 68a78ba60aac..50f73d35ca75 100644
--- a/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml
+++ b/examples/asr/conf/fastconformer/cache_aware_streaming/fastconformer_transducer_char_streaming.yaml
@@ -106,11 +106,19 @@ model:
     n_heads: 8 # may need to be lower for smaller d_models
 
     # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
-    # for att_context_style=regular, the right context is recommended to be a small number around 0 to 2 as multiple-layers may increase the effective right context too large
+    # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large
     # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one
     # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s
+
+    # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs.
+    # The first item in the list would be the default during test/validation/inference.
+    # An example of settings for multi-lookahead:
+    #    att_context_size: [[70,13],[70,6],[70,1],[70,0]]
+    #    att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25]
     att_context_size: [70, 13] # -1 means unlimited context
     att_context_style: chunked_limited # regular or chunked_limited
+    att_context_probs: null
+
 
     xscaling: true # scales up the input embeddings by sqrt(d_model)
     pos_emb_max_len: 5000
@@ -196,9 +204,9 @@ model:
     loss_name: "default"
     warprnnt_numba_kwargs:
       # FastEmit regularization: https://arxiv.org/abs/2010.11148
-      # You may enable FastEmit to reduce the latency of the model for streaming
-      # It also helps to improve the accuracy of the model in streaming mode
-      fastemit_lambda: 1e-3  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
+      # You may enable FastEmit to increase the accuracy and reduce the latency of the model for streaming
+      # You may set it to lower values like 1e-3 for models with larger right context
+      fastemit_lambda: 5e-3  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
       clamp: -1.0  # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only.
 
   optim:
diff --git a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml
index 8b7a2ce7b39d..26dabaa039fe 100644
--- a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml
+++ b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_bpe_streaming.yaml
@@ -8,6 +8,8 @@
 # FastConformer-CTC's architecture config: NeMo/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml
 # FastConformer-Transducer's architecture config, along with the optimal batch size and precision: NeMo/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml
 
+# Note: if training loss does not converge, you may increase warm-up to 20K.
+
 name: "FastConformer-Hybrid-Transducer-CTC-BPE-Streaming"
 
 model:
@@ -106,8 +108,15 @@ model:
     # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large
     # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one
     # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s
+
+    # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs.
+    # The first item in the list would be the default during test/validation/inference.
+    # An example of settings for multi-lookahead:
+    #    att_context_size: [[70,13],[70,6],[70,1],[70,0]]
+    #    att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25]
     att_context_size: [70, 13] # -1 means unlimited context
     att_context_style: chunked_limited # regular or chunked_limited
+    att_context_probs: null
 
     xscaling: true # scales up the input embeddings by sqrt(d_model)
     pos_emb_max_len: 5000
@@ -206,9 +215,9 @@ model:
     loss_name: "default"
     warprnnt_numba_kwargs:
       # FastEmit regularization: https://arxiv.org/abs/2010.11148
-      # You may enable FastEmit to reduce the latency of the model for streaming
-      # It also helps to improve the accuracy of the model in streaming mode
-      fastemit_lambda: 1e-3  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
+      # You may enable FastEmit to increase the accuracy and reduce the latency of the model for streaming
+      # You may set it to lower values like 1e-3 for models with larger right context
+      fastemit_lambda: 5e-3  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
       clamp: -1.0  # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only.
 
   optim:
diff --git a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml
index a24829b50788..d8362636f04a 100644
--- a/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml
+++ b/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/fastconformer_hybrid_transducer_ctc_char_streaming.yaml
@@ -8,6 +8,8 @@
 # FastConformer-CTC's architecture config: NeMo/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml
 # FastConformer-Transducer's architecture config, along with the optimal batch size and precision: NeMo/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml
 
+# Note: if training loss does not converge, you may increase warm-up to 20K.
+
 name: "FastConformer-Hybrid-Transducer-CTC-Char-Streaming"
 
 model:
@@ -111,8 +113,15 @@ model:
     # for att_context_style=regular, the right context is recommended to be a small number around 0 to 3 as multiple-layers may increase the effective right context too large
     # for att_context_style=chunked_limited, the left context need to be dividable by the right context plus one
     # look-ahead(secs) = att_context_size[1]*subsampling_factor*window_stride, example: 13*8*0.01=1.04s
+
+    # For multi-lookahead models, you may specify a list of context sizes. During the training, different context sizes would be used randomly with the distribution specified by att_context_probs.
+    # The first item in the list would be the default during test/validation/inference.
+    # An example of settings for multi-lookahead:
+    #    att_context_size: [[70,13],[70,6],[70,1],[70,0]]
+    #    att_context_probs: [0.25, 0.25, 0.25, 0.25, 0.25]
     att_context_size: [70, 13] # -1 means unlimited context
     att_context_style: chunked_limited # regular or chunked_limited
+    att_context_probs: null
 
     xscaling: true # scales up the input embeddings by sqrt(d_model)
     pos_emb_max_len: 5000
@@ -211,9 +220,9 @@ model:
     loss_name: "default"
     warprnnt_numba_kwargs:
       # FastEmit regularization: https://arxiv.org/abs/2010.11148
-      # You may enable FastEmit to reduce the latency of the model for streaming
-      # It also helps to improve the accuracy of the model in streaming mode
-      fastemit_lambda: 1e-3  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
+      # You may enable FastEmit to increase the accuracy and reduce the latency of the model for streaming
+      # You may set it to lower values like 1e-3 for models with larger right context
+      fastemit_lambda: 5e-3  # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start.
       clamp: -1.0  # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only.
 
   optim:
diff --git a/nemo/collections/asr/modules/conformer_encoder.py b/nemo/collections/asr/modules/conformer_encoder.py
index 7c786f9c9720..74c255741039 100644
--- a/nemo/collections/asr/modules/conformer_encoder.py
+++ b/nemo/collections/asr/modules/conformer_encoder.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import math
+import random
 from collections import OrderedDict
 from dataclasses import dataclass
 from typing import List, Optional, Set
@@ -89,9 +90,13 @@ class ConformerEncoder(NeuralModule, StreamingEncoder, Exportable, AccessMixin):
             Defaults to 5000
         n_heads (int): number of heads in multi-headed attention layers
             Defaults to 4.
-        att_context_size (List[int]): List of 2 ints corresponding to left and right attention context sizes,
-            or None for full context.
-            Defaults to None.
+        att_context_size (List[Union[List[int],int]]): specifies the context sizes on each side. Each context size should be a list of two integers like [100,100].
+            A list of context sizes like [[100,100],[100,50]] can also be passed. -1 means unlimited context.
+            Defaults to [-1,-1]
+        att_context_probs (List[float]): a list of probabilities of each one of the att_context_size when a list of them is passed. If not specified, uniform distribution is being used.
+            Defaults to None
+        att_context_style (str): 'regular' or 'chunked_limited'.
+            Defaults to 'regular'
         xscaling (bool): enables scaling the inputs to the multi-headed attention layers by sqrt(d_model)
             Defaults to True.
         untie_biases (bool): whether to not share (untie) the bias weights between layers of Transformer-XL
@@ -100,6 +105,11 @@ class ConformerEncoder(NeuralModule, StreamingEncoder, Exportable, AccessMixin):
             Defaults to 31.
         conv_norm_type (str): the type of the normalization in the convolutional modules
             Defaults to 'batch_norm'.
+        conv_context_size (list): it can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size.
+            None means [(conv_kernel_size-1)//2, (conv_kernel_size-1)//2], and 'causal' means [(conv_kernel_size-1), 0].
+            Defaults to None.
+        conv_dual_mode (bool): specifies if convolution should be dual mode when dual_offline mode is being used. When enables, the left half of the convolution kernel would get masked in streaming cases.
+            Defaults to False
         dropout (float): the dropout rate used in all layers except the attention layers
             Defaults to 0.1.
         dropout_pre_encoder (float): the dropout rate used before the encoder
@@ -256,6 +266,7 @@ def __init__(
         self_attention_model='rel_pos',
         n_heads=4,
         att_context_size=None,
+        att_context_probs=None,
         att_context_style='regular',
         xscaling=True,
         untie_biases=True,
@@ -279,7 +290,6 @@ def __init__(
         self.d_model = d_model
         self.n_layers = n_layers
         self._feat_in = feat_in
-        self.scale = math.sqrt(self.d_model)
         self.att_context_style = att_context_style
         self.subsampling_factor = subsampling_factor
         self.subsampling_conv_chunking_factor = subsampling_conv_chunking_factor
@@ -289,51 +299,19 @@ def __init__(
         self.global_attn_separate = global_attn_separate
         self.global_tokens_spacing = global_tokens_spacing
 
-        if att_context_size:
-            self.att_context_size = list(att_context_size)
-        else:
-            self.att_context_size = [-1, -1]
-
-        if isinstance(conv_context_size, ListConfig):
-            conv_context_size = list(conv_context_size)
-
-        if conv_context_size is not None:
-            if (
-                not isinstance(conv_context_size, list)
-                and not isinstance(conv_context_size, str)
-                and not isinstance(conv_context_size, ListConfig)
-            ):
-                raise ValueError(
-                    f"Invalid conv_context_size! It should be the string 'causal' or a list of two integers."
-                )
-            if conv_context_size == "causal":
-                conv_context_size = [conv_kernel_size - 1, 0]
-            else:
-                if conv_context_size[0] + conv_context_size[1] + 1 != conv_kernel_size:
-                    raise ValueError(f"Invalid conv_context_size: {self.conv_context_size}!")
-        else:
-            conv_context_size = [(conv_kernel_size - 1) // 2, (conv_kernel_size - 1) // 2]
-        self.conv_context_size = conv_context_size
-
-        if att_context_style == "chunked_limited":
-            # the left context for self-attention in chunked_limited mode should be dividable by the right context
-            # right context=att_context_size[1]+1, and left_context=self.att_context_size[0]
-            if self.att_context_size[0] > 0 and self.att_context_size[0] % (self.att_context_size[1] + 1) > 0:
-                raise ValueError("att_context_size[0] % (att_context_size[1] + 1) should be zero!")
-            if self.att_context_size[1] < 0:
-                raise ValueError("Right context can not be unlimited for chunked_limited style!")
-            self.chunk_size = self.att_context_size[1] + 1
-
-            # left_chunks_num specifies the number of chunks to be visible by each chunk on the left side
-            if self.att_context_size[0] >= 0:
-                self.left_chunks_num = self.att_context_size[0] // self.chunk_size
-            else:
-                self.left_chunks_num = 100000
-
-        elif att_context_style == "regular":
-            self.chunk_size = None
-        else:
-            raise ValueError("Invalid att_context_style!")
+        # Setting up the att_context_size
+        (
+            self.att_context_size_all,
+            self.att_context_size,
+            self.att_context_probs,
+            self.conv_context_size,
+        ) = self._calc_context_sizes(
+            att_context_style=att_context_style,
+            att_context_size=att_context_size,
+            att_context_probs=att_context_probs,
+            conv_context_size=conv_context_size,
+            conv_kernel_size=conv_kernel_size,
+        )
 
         if xscaling:
             self.xscale = math.sqrt(d_model)
@@ -379,6 +357,7 @@ def __init__(
 
         self._feat_out = d_model
 
+        # Biases for relative positional encoding
         if not untie_biases and self_attention_model == "rel_pos":
             d_head = d_model // n_heads
             pos_bias_u = nn.Parameter(torch.Tensor(n_heads, d_head))
@@ -389,8 +368,8 @@ def __init__(
             pos_bias_u = None
             pos_bias_v = None
 
+        # Positional encodings
         self.pos_emb_max_len = pos_emb_max_len
-        self.att_mask = None
         if self_attention_model == "rel_pos":
             self.pos_enc = RelPositionalEncoding(
                 d_model=d_model,
@@ -458,51 +437,6 @@ def __init__(
         # will be set in self.forward() if defined in AccessMixin config
         self.interctc_capture_at_layers = None
 
-    def update_max_seq_length(self, seq_length: int, device):
-        # Find global max audio length across all nodes
-        if torch.distributed.is_initialized():
-            global_max_len = torch.tensor([seq_length], dtype=torch.float32, device=device)
-
-            # Update across all ranks in the distributed system
-            torch.distributed.all_reduce(global_max_len, op=torch.distributed.ReduceOp.MAX)
-
-            seq_length = global_max_len.to(torch.int64).item()
-
-        if seq_length > self.max_audio_length:
-            self.set_max_audio_length(seq_length)
-
-    def set_max_audio_length(self, max_audio_length):
-        """
-        Sets maximum input length.
-        Pre-calculates internal seq_range mask.
-        """
-        self.max_audio_length = max_audio_length
-        device = next(self.parameters()).device
-        self.pos_enc.extend_pe(max_audio_length, device)
-
-        if self.self_attention_model != "rel_pos_local_attn":
-            att_mask = torch.ones(1, max_audio_length, max_audio_length, dtype=torch.bool, device=device)
-            if self.chunk_size is None:
-                if self.att_context_size[0] >= 0:
-                    att_mask = att_mask.triu(diagonal=-self.att_context_size[0])
-                if self.att_context_size[1] >= 0:
-                    att_mask = att_mask.tril(diagonal=self.att_context_size[1])
-            else:
-                chunk_idx = torch.arange(0, max_audio_length, dtype=torch.int64, device=att_mask.device)
-                chunk_idx = torch.div(chunk_idx, self.chunk_size, rounding_mode="trunc")
-                diff_chunks = chunk_idx.unsqueeze(1) - chunk_idx.unsqueeze(0)
-                chunked_limited_mask = torch.logical_and(
-                    torch.le(diff_chunks, self.left_chunks_num), torch.ge(diff_chunks, 0)
-                )
-                att_mask = torch.logical_and(att_mask, chunked_limited_mask.unsqueeze(0))
-
-            if hasattr(self, 'att_mask'):
-                self.att_mask = att_mask
-            else:
-                self.register_buffer('att_mask', att_mask, persistent=False)
-        else:
-            self.att_mask = None
-
     def forward_for_export(
         self, audio_signal, length, cache_last_channel=None, cache_last_time=None, cache_last_channel_len=None
     ):
@@ -565,17 +499,24 @@ def forward_internal(
         self, audio_signal, length, cache_last_channel=None, cache_last_time=None, cache_last_channel_len=None
     ):
         self.update_max_seq_length(seq_length=audio_signal.size(2), device=audio_signal.device)
-        max_audio_length = audio_signal.size(-1)
 
         if length is None:
             length = audio_signal.new_full(
-                (audio_signal.size(0),), max_audio_length, dtype=torch.int64, device=audio_signal.device
+                (audio_signal.size(0),), audio_signal.size(-1), dtype=torch.int64, device=audio_signal.device
             )
 
         if cache_last_time is not None:
             cache_last_time_next = torch.zeros_like(cache_last_time)
         else:
             cache_last_time_next = None
+
+        # select a random att_context_size with the distribution specified by att_context_probs during training
+        # for non-validation cases like test, validation or inference, it uses the first mode in self.att_context_size
+        if self.training and len(self.att_context_size_all) > 1:
+            cur_att_context_size = random.choices(self.att_context_size_all, weights=self.att_context_probs)[0]
+        else:
+            cur_att_context_size = self.att_context_size
+
         audio_signal = torch.transpose(audio_signal, 1, 2)
 
         if isinstance(self.pre_encode, nn.Linear):
@@ -588,11 +529,10 @@ def forward_internal(
                 audio_signal = audio_signal[:, self.streaming_cfg.drop_extra_pre_encoded :, :]
                 length = (length - self.streaming_cfg.drop_extra_pre_encoded).clamp(min=0)
 
-        max_audio_length = audio_signal.size(1)
-
         if self.reduction_position is not None and cache_last_channel is not None:
             raise ValueError("Caching with reduction feature is not supported yet!")
 
+        max_audio_length = audio_signal.size(1)
         if cache_last_channel is not None:
             cache_len = self.streaming_cfg.last_channel_cache_size
             cache_keep_size = max_audio_length - self.streaming_cfg.cache_drop_size
@@ -606,17 +546,20 @@ def forward_internal(
             cache_len = 0
             offset = None
 
-        if self.self_attention_model == 'abs_pos':
-            audio_signal, pos_emb = self.pos_enc(x=audio_signal)
-        else:
-            audio_signal, pos_emb = self.pos_enc(x=audio_signal, cache_len=cache_len)
+        audio_signal, pos_emb = self.pos_enc(x=audio_signal, cache_len=cache_len)
 
         # Create the self-attention and padding masks
-        pad_mask, att_mask = self._create_masks(max_audio_length, padding_length, offset, audio_signal.device)
+        pad_mask, att_mask = self._create_masks(
+            att_context_size=cur_att_context_size,
+            padding_length=padding_length,
+            max_audio_length=max_audio_length,
+            offset=offset,
+            device=audio_signal.device,
+        )
 
         if cache_last_channel is not None:
             pad_mask = pad_mask[:, cache_len:]
-            if self.att_mask is not None:
+            if att_mask is not None:
                 att_mask = att_mask[:, cache_len:]
 
         for lth, (drop_prob, layer) in enumerate(zip(self.layer_drop_probs, self.layers)):
@@ -650,7 +593,13 @@ def forward_internal(
                 # Don't update the audio_signal here because then it will again scale the audio_signal
                 # and cause an increase in the WER
                 _, pos_emb = self.pos_enc(x=audio_signal, cache_len=cache_len)
-                pad_mask, att_mask = self._create_masks(max_audio_length, length, offset, audio_signal.device)
+                pad_mask, att_mask = self._create_masks(
+                    att_context_size=cur_att_context_size,
+                    padding_length=length,
+                    max_audio_length=max_audio_length,
+                    offset=offset,
+                    device=audio_signal.device,
+                )
 
             # saving tensors if required for interctc loss
             if self.is_access_enabled():
@@ -687,7 +636,60 @@ def forward_internal(
         else:
             return audio_signal, length
 
-    def _create_masks(self, max_audio_length, padding_length, offset, device):
+    def update_max_seq_length(self, seq_length: int, device):
+        # Find global max audio length across all nodes
+        if torch.distributed.is_initialized():
+            global_max_len = torch.tensor([seq_length], dtype=torch.float32, device=device)
+
+            # Update across all ranks in the distributed system
+            torch.distributed.all_reduce(global_max_len, op=torch.distributed.ReduceOp.MAX)
+
+            seq_length = global_max_len.int().item()
+
+        if seq_length > self.max_audio_length:
+            self.set_max_audio_length(seq_length)
+
+    def set_max_audio_length(self, max_audio_length):
+        """
+        Sets maximum input length.
+        Pre-calculates internal seq_range mask.
+        """
+        self.max_audio_length = max_audio_length
+        device = next(self.parameters()).device
+        self.pos_enc.extend_pe(max_audio_length, device)
+
+    def _create_masks(self, att_context_size, padding_length, max_audio_length, offset, device):
+        if self.self_attention_model != "rel_pos_local_attn":
+            att_mask = torch.ones(1, max_audio_length, max_audio_length, dtype=torch.bool, device=device)
+
+            if self.att_context_style == "regular":
+                if att_context_size[0] >= 0:
+                    att_mask = att_mask.triu(diagonal=-att_context_size[0])
+                if att_context_size[1] >= 0:
+                    att_mask = att_mask.tril(diagonal=att_context_size[1])
+            elif self.att_context_style == "chunked_limited":
+                # When right context is unlimited, just the left side of the masking need to get updated
+                if att_context_size[1] == -1:
+                    if att_context_size[0] >= 0:
+                        att_mask = att_mask.triu(diagonal=-att_context_size[0])
+                else:
+                    chunk_size = att_context_size[1] + 1
+                    # left_chunks_num specifies the number of chunks to be visible by each chunk on the left side
+                    if att_context_size[0] >= 0:
+                        left_chunks_num = att_context_size[0] // chunk_size
+                    else:
+                        left_chunks_num = 10000
+
+                    chunk_idx = torch.arange(0, max_audio_length, dtype=torch.int, device=att_mask.device)
+                    chunk_idx = torch.div(chunk_idx, chunk_size, rounding_mode="trunc")
+                    diff_chunks = chunk_idx.unsqueeze(1) - chunk_idx.unsqueeze(0)
+                    chunked_limited_mask = torch.logical_and(
+                        torch.le(diff_chunks, left_chunks_num), torch.ge(diff_chunks, 0)
+                    )
+                    att_mask = torch.logical_and(att_mask, chunked_limited_mask.unsqueeze(0))
+        else:
+            att_mask = None
+
         # pad_mask is the masking to be used to ignore paddings
         pad_mask = torch.arange(0, max_audio_length, device=device).expand(
             padding_length.size(0), -1
@@ -697,24 +699,19 @@ def _create_masks(self, max_audio_length, padding_length, offset, device):
             pad_mask_off = torch.arange(0, max_audio_length, device=device).expand(
                 padding_length.size(0), -1
             ) >= offset.unsqueeze(-1)
-
             pad_mask = pad_mask_off.logical_and(pad_mask)
 
-        if self.att_mask is not None:
+        if att_mask is not None:
             # pad_mask_for_att_mask is the mask which helps to ignore paddings
             pad_mask_for_att_mask = pad_mask.unsqueeze(1).repeat([1, max_audio_length, 1])
             pad_mask_for_att_mask = torch.logical_and(pad_mask_for_att_mask, pad_mask_for_att_mask.transpose(1, 2))
             # att_mask is the masking to be used by the MHA layers to ignore the tokens not supposed to be visible
-            att_mask = self.att_mask[:, :max_audio_length, :max_audio_length]
+            att_mask = att_mask[:, :max_audio_length, :max_audio_length]
             # paddings should also get ignored, so pad_mask_for_att_mask is used to ignore their corresponding scores
             att_mask = torch.logical_and(pad_mask_for_att_mask, att_mask.to(pad_mask_for_att_mask.device))
-
             att_mask = ~att_mask
-        else:
-            att_mask = None
 
         pad_mask = ~pad_mask
-
         return pad_mask, att_mask
 
     def enable_pad_mask(self, on=True):
@@ -723,8 +720,64 @@ def enable_pad_mask(self, on=True):
         self.use_pad_mask = on
         return mask
 
+    def _calc_context_sizes(
+        self, att_context_size, att_context_probs, att_context_style, conv_context_size, conv_kernel_size
+    ):
+        # convert att_context_size to a standard list of lists
+        if att_context_size:
+            att_context_size_all = list(att_context_size)
+            if isinstance(att_context_size_all[0], int):
+                att_context_size_all = [att_context_size_all]
+            for i, att_cs in enumerate(att_context_size_all):
+                if isinstance(att_cs, ListConfig):
+                    att_context_size_all[i] = list(att_cs)
+                if att_context_style == "chunked_limited":
+                    if att_cs[0] > 0 and att_cs[0] % (att_cs[1] + 1) > 0:
+                        raise ValueError(f"att_context_size[{i}][0] % (att_context_size[{i}][1] + 1) should be zero!")
+                    if att_cs[1] < 0 and len(att_context_size_all) <= 1:
+                        raise ValueError(
+                            f"Right context (att_context_size[{i}][1]) can not be unlimited for chunked_limited style!"
+                        )
+        else:
+            att_context_size_all = [[-1, -1]]
+
+        if att_context_probs:
+            if len(att_context_probs) != len(att_context_size_all):
+                raise ValueError("The size of the att_context_probs should be the same as att_context_size.")
+            att_context_probs = list(att_context_probs)
+            if sum(att_context_probs) != 1:
+                raise ValueError(
+                    "The sum of numbers in att_context_probs should be equal to one to be a distribution."
+                )
+        else:
+            att_context_probs = [1.0 / len(att_context_size_all)] * len(att_context_size_all)
+
+        if conv_context_size is not None:
+            if isinstance(conv_context_size, ListConfig):
+                conv_context_size = list(conv_context_size)
+            if not isinstance(conv_context_size, list) and not isinstance(conv_context_size, str):
+                raise ValueError(
+                    f"Invalid conv_context_size! It should be the string 'causal' or a list of two integers."
+                )
+            if conv_context_size == "causal":
+                conv_context_size = [conv_kernel_size - 1, 0]
+            else:
+                if conv_context_size[0] + conv_context_size[1] + 1 != conv_kernel_size:
+                    raise ValueError(f"Invalid conv_context_size: {self.conv_context_size}!")
+        else:
+            conv_context_size = [(conv_kernel_size - 1) // 2, (conv_kernel_size - 1) // 2]
+        return att_context_size_all, att_context_size_all[0], att_context_probs, conv_context_size
+
+    def set_default_att_context_size(self, att_context_size):
+        self.att_context_size = att_context_size
+
     def setup_streaming_params(
-        self, chunk_size: int = None, shift_size: int = None, left_chunks: int = None, max_context: int = 10000
+        self,
+        chunk_size: int = None,
+        shift_size: int = None,
+        left_chunks: int = None,
+        att_context_size: list = None,
+        max_context: int = 10000,
     ):
         """
             This function sets the needed values and parameters to perform streaming. The configuration would be stored in self.streaming_cfg.
@@ -737,25 +790,28 @@ def setup_streaming_params(
                     Defaults to -1 (means feat_out is d_model)
         """
         streaming_cfg = CacheAwareStreamingConfig()
+
+        # When att_context_size is not specified, it uses the default_att_context_size
+        if att_context_size is None:
+            att_context_size = self.att_context_size
+
         if chunk_size is not None:
             if chunk_size < 1:
                 raise ValueError("chunk_size needs to be a number larger or equal to one.")
             lookahead_steps = chunk_size - 1
             streaming_cfg.cache_drop_size = chunk_size - shift_size
         elif self.att_context_style == "chunked_limited":
-            lookahead_steps = self.att_context_size[1]
+            lookahead_steps = att_context_size[1]
             streaming_cfg.cache_drop_size = 0
         elif self.att_context_style == "regular":
-            lookahead_steps = self.att_context_size[1] * self.n_layers + self.conv_context_size[1] * self.n_layers
+            lookahead_steps = att_context_size[1] * self.n_layers + self.conv_context_size[1] * self.n_layers
             streaming_cfg.cache_drop_size = lookahead_steps
         else:
             streaming_cfg.cache_drop_size = 0
             lookahead_steps = None
 
         if chunk_size is None:
-            streaming_cfg.last_channel_cache_size = (
-                self.att_context_size[0] if self.att_context_size[0] >= 0 else max_context
-            )
+            streaming_cfg.last_channel_cache_size = att_context_size[0] if att_context_size[0] >= 0 else max_context
         else:
             if left_chunks is None:
                 raise ValueError("left_chunks can not be None when chunk_size is set.")
@@ -878,9 +934,9 @@ def change_attention_model(
                 'rel_pos_local_attn': relative positional embedding and Transformer-XL with local attention using
                     overlapping windows. Attention context is determined by att_context_size parameter.
                 'abs_pos': absolute positional embedding and Transformer
-                If None is provided, the self_attention_model isn't changed. Defauts to None.
+                If None is provided, the self_attention_model isn't changed. Defaults to None.
             att_context_size (List[int]): List of 2 ints corresponding to left and right attention context sizes,
-                or None to keep as it is. Defauts to None.
+                or None to keep as it is. Defaults to None.
             update_config (bool): Whether to update the config or not with the new attention model.
                 Defaults to True.
             device (torch.device): If provided, new layers will be moved to the device.
@@ -889,19 +945,16 @@ def change_attention_model(
 
         if att_context_size:
             att_context_size = list(att_context_size)
-        elif hasattr(self._cfg, "att_context_size"):
-            att_context_size = self._cfg.att_context_size
         else:
             att_context_size = self.att_context_size
 
         if self_attention_model is None:
-            self_attention_model = self._cfg.self_attention_model
+            self_attention_model = self.self_attention_model
 
         if self_attention_model == 'rel_pos_local_attn' and max(att_context_size) <= 0:
             raise ValueError("When using local attention, context size must be set > 0")
 
         if self_attention_model == "rel_pos":
-            self.att_mask = None
             new_pos_enc = RelPositionalEncoding(
                 d_model=self._cfg.d_model,
                 dropout_rate=self._cfg.dropout,
@@ -938,7 +991,6 @@ def change_attention_model(
 
         for name, m in self.named_modules():
             if type(m) == ConformerLayer:
-
                 if self_attention_model == 'rel_pos':
                     new_attn = RelPositionMultiHeadAttention(
                         n_head=self._cfg.n_heads,
diff --git a/nemo/collections/asr/modules/squeezeformer_encoder.py b/nemo/collections/asr/modules/squeezeformer_encoder.py
index 952c9b53d233..a887abd19ebb 100644
--- a/nemo/collections/asr/modules/squeezeformer_encoder.py
+++ b/nemo/collections/asr/modules/squeezeformer_encoder.py
@@ -149,7 +149,6 @@ def __init__(
         d_ff = d_model * ff_expansion_factor
         self.d_model = d_model
         self._feat_in = feat_in
-        self.scale = math.sqrt(self.d_model)
         if att_context_size:
             self.att_context_size = att_context_size
         else:
diff --git a/nemo/collections/asr/parts/submodules/multi_head_attention.py b/nemo/collections/asr/parts/submodules/multi_head_attention.py
index 40baf1141bd3..b7356ffe87e4 100644
--- a/nemo/collections/asr/parts/submodules/multi_head_attention.py
+++ b/nemo/collections/asr/parts/submodules/multi_head_attention.py
@@ -888,17 +888,19 @@ def extend_pe(self, length, device):
         positions = torch.arange(0, length, dtype=torch.float32, device=device).unsqueeze(1)
         self.create_pe(positions=positions)
 
-    def forward(self, x: torch.Tensor):
+    def forward(self, x: torch.Tensor, cache_len=0):
         """Adds positional encoding.
         Args:
             x (torch.Tensor): Input. Its shape is (batch, time, feature_size)
+            cache_len (int): the size of the cache which is used to shift positions
         Returns:
             x+pos_emb (torch.Tensor): Its shape is (batch, time, feature_size)
             pos_emb (torch.Tensor): Its shape is (1, time, feature_size)
         """
+        input_len = x.size(1) + cache_len
         if self.xscale:
             x = x * self.xscale
-        pos_emb = self.pe[:, : x.size(1)]
+        pos_emb = self.pe[:, :input_len]
         if self.dropout_emb:
             pos_emb = self.dropout_emb(pos_emb)
         x = x + pos_emb
diff --git a/tests/collections/nlp/test_huggingface.py b/tests/collections/nlp/test_huggingface.py
index cfe2845caa9b..0ad7b5850475 100644
--- a/tests/collections/nlp/test_huggingface.py
+++ b/tests/collections/nlp/test_huggingface.py
@@ -85,12 +85,13 @@ def test_get_pretrained_chinese_bert_wwm_model(self):
         tokenizer = get_tokenizer(tokenizer_name=model_name)
         assert isinstance(tokenizer, AutoTokenizer)
 
-    @pytest.mark.with_downloads()
-    @pytest.mark.unit
-    def test_get_pretrained_arabic_model(self):
-        model_name = 'asafaya/bert-base-arabic'
-        self.omega_conf.language_model.pretrained_model_name = model_name
-        model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf)
-        assert isinstance(model, nemo_nlp.modules.BertModule)
-        tokenizer = get_tokenizer(tokenizer_name=model_name)
-        assert isinstance(tokenizer, AutoTokenizer)
+    # model is not on HF anymore
+    # @pytest.mark.with_downloads()
+    # @pytest.mark.unit
+    # def test_get_pretrained_arabic_model(self):
+    #     model_name = 'asafaya/bert-base-arabic'
+    #     self.omega_conf.language_model.pretrained_model_name = model_name
+    #     model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf)
+    #     assert isinstance(model, nemo_nlp.modules.BertModule)
+    #     tokenizer = get_tokenizer(tokenizer_name=model_name)
+    #     assert isinstance(tokenizer, AutoTokenizer)

From 3ed3c4e7d251f32b2be5c835a89e6ab2f389a056 Mon Sep 17 00:00:00 2001
From: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Date: Thu, 8 Jun 2023 22:33:01 +0300
Subject: [PATCH 031/123] added changes to ramp up bs  (#6799)

* rampup bs changes

Signed-off-by: dimapihtar <dpykhtar@nvidia.com>

* rampup bs changes

Signed-off-by: dimapihtar <dpykhtar@nvidia.com>

* fixed styling

Signed-off-by: dimapihtar <dpykhtar@nvidia.com>

* fix bug

Signed-off-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>

---------

Signed-off-by: dimapihtar <dpykhtar@nvidia.com>
Signed-off-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 .../megatron/data_samplers.py                 |  3 +-
 .../language_modeling/megatron_base_model.py  | 18 ++++--
 .../language_modeling/megatron_gpt_model.py   | 55 +++++++++----------
 3 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py
index edc58ee999c2..7df915533492 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/data_samplers.py
@@ -33,6 +33,7 @@ def __init__(
         data_parallel_size: int,
         drop_last: bool = True,
         global_batch_size: Optional[int] = None,
+        rampup_batch_size: Optional[list] = None,
         pad_samples_to_global_batch_size: Optional[bool] = False,
     ) -> None:
         # Sanity checks.
@@ -50,7 +51,7 @@ def __init__(
                     data_parallel_rank, data_parallel_size
                 )
             )
-        if global_batch_size is not None:
+        if global_batch_size is not None and rampup_batch_size is None:
             if global_batch_size % (micro_batch_size * data_parallel_size) != 0:
                 raise RuntimeError(
                     f"`global_batch_size` ({global_batch_size}) is not divisible by "
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index 2aaedbe5a806..2568a14f8dbf 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -481,10 +481,20 @@ def configure_optimizers(self):
 
     def compute_consumed_samples(self, steps_since_resume=0):
         app_state = AppState()
-        consumed_samples = (
-            self.init_consumed_samples
-            + steps_since_resume * app_state.data_parallel_size * self.cfg.micro_batch_size * get_num_microbatches()
-        )
+
+        if self.cfg.get('rampup_batch_size', None):
+            from apex.transformer.pipeline_parallel.utils import _GLOBAL_NUM_MICROBATCHES_CALCULATOR
+
+            current_global_batch_size = getattr(_GLOBAL_NUM_MICROBATCHES_CALCULATOR, 'current_global_batch_size', 1)
+            consumed_samples = self.prev_consumed_samples + self.if_first_step * current_global_batch_size
+        else:
+            consumed_samples = (
+                self.init_consumed_samples
+                + steps_since_resume
+                * app_state.data_parallel_size
+                * self.cfg.micro_batch_size
+                * get_num_microbatches()
+            )
         return int(consumed_samples)
 
     def _extract_consumed_samples_from_ckpt(self, ckpt_path):
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 3530ffcfc371..8eff896cf9d8 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -208,6 +208,12 @@ def __init__(self, cfg: DictConfig, trainer: Trainer):
 
         self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False)
 
+        self.rampup_batch_size = self.cfg.get('rampup_batch_size', None)
+        if self.rampup_batch_size:
+            self.prev_consumed_samples = 0
+            self.if_first_step = 0
+            self.prev_global_batch_size = None
+
         if not self.megatron_amp_o2 and self.cfg.get('virtual_pipeline_model_parallel_size', None):
             raise ValueError('Virtual pipeline model parallel is only supported when using megatron_amp_O2')
 
@@ -507,6 +513,13 @@ def training_step(self, dataloader_iter, batch_idx):
             The input batch to each micro-batch is fetched using the dataloader function
             in the micro-batch fwd function.
         """
+        if self.rampup_batch_size:
+            num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR
+            current_global_batch_size = num_microbatch_calculator.current_global_batch_size
+            logging.info(current_global_batch_size)
+            # do validation and save the checkpoint when gbs is changed
+            if self.prev_global_batch_size != current_global_batch_size and self.prev_global_batch_size:
+                self.trainer.should_stop = True
 
         # we zero grads here because we also call backward in the megatron-core fwd/bwd functions
         self._optimizer.zero_grad()
@@ -580,16 +593,15 @@ def training_step(self, dataloader_iter, batch_idx):
             'consumed_samples', consumed_samples, prog_bar=True, rank_zero_only=True, batch_size=1,
         )
 
-        if self.cfg.get('rampup_batch_size', None):
-            micro_batch_size = self.cfg.get('micro_batch_size', 1)
-            total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes
-            current_global_batch_size = get_num_microbatches() * micro_batch_size * total_gpus_number
-            self.log('global_batch_size', current_global_batch_size, prog_bar=True, rank_zero_only=True, batch_size=1)
-
-            num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR
+        if self.rampup_batch_size:
+            self.prev_global_batch_size = current_global_batch_size
+            self.prev_consumed_samples = consumed_samples
             num_microbatch_calculator.update(
-                consumed_samples=consumed_samples, consistency_check=True,
+                consumed_samples=consumed_samples, consistency_check=False,
             )
+            current_global_batch_size = num_microbatch_calculator.current_global_batch_size
+            self.log('global_batch_size', current_global_batch_size, prog_bar=True, rank_zero_only=True, batch_size=1)
+            self.if_first_step = 1
 
         return loss_mean
 
@@ -936,6 +948,7 @@ def build_pretraining_data_loader(
                     data_parallel_size=parallel_state.get_data_parallel_world_size(),
                     drop_last=drop_last,
                     global_batch_size=self.cfg.global_batch_size,
+                    rampup_batch_size=self.cfg.rampup_batch_size,
                     pad_samples_to_global_batch_size=pad_samples_to_global_batch_size,
                 )
             elif self.cfg.data.dataloader_type == 'cyclic':
@@ -986,28 +999,10 @@ def setup(self, stage=None):
         self.init_consumed_samples = init_consumed_samples
         self.init_global_step = self.trainer.global_step
 
-        rampup_batch_size = self.cfg.get('rampup_batch_size', None)
-        if rampup_batch_size:
-            start_batch_size = rampup_batch_size[0]
-            batch_size_increment = rampup_batch_size[1]
-            total_gpus_number = self.trainer.num_devices * self.trainer.num_nodes
-
-            assert start_batch_size % (total_gpus_number) == 0, (
-                'expected'
-                ' start batch size ({}) to be divisible by total number of GPUs'
-                ' ({})'.format(start_batch_size, total_gpus_number)
-            )
-
-            micro_batch_size = self.cfg.get('micro_batch_size', 1)
-            tensor_model_parallel_size = self.cfg.get('tensor_model_parallel_size', 1)
-            pipeline_model_parallel_size = self.cfg.get('pipeline_model_parallel_size', 1)
-            total_data_parallel_size = total_gpus_number // (tensor_model_parallel_size * pipeline_model_parallel_size)
-
-            assert batch_size_increment % (micro_batch_size * total_data_parallel_size) == 0, (
-                'expected'
-                ' batch size increment ({}) to be divisible by micro_batch_size ({}) times total data parallel size'
-                ' ({})'.format(batch_size_increment, micro_batch_size, total_data_parallel_size)
-            )
+        if self.rampup_batch_size:
+            num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR
+            num_microbatch_calculator.update(self.init_consumed_samples, consistency_check=False)
+            self.prev_consumed_samples = self.init_consumed_samples
 
         if stage == 'predict':
             return

From 7538a08affeb1d48286d1643b60a91429ba01efe Mon Sep 17 00:00:00 2001
From: Dounx <imdounx@gmail.com>
Date: Fri, 9 Jun 2023 03:45:04 +0800
Subject: [PATCH 032/123] Fix typo in core.rst (#6838)

Signed-off-by: Dounx <imdounx@gmail.com>
---
 docs/source/core/core.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/core/core.rst b/docs/source/core/core.rst
index 4f5589653172..7b2edfa0f5c4 100644
--- a/docs/source/core/core.rst
+++ b/docs/source/core/core.rst
@@ -38,7 +38,7 @@ To see all available pretrained models for a specific NeMo model, use the ``list
 
 .. code-block:: Python
 
-    nemo_asr.model.EncDecCTCModel.list_available_models()
+    nemo_asr.models.EncDecCTCModel.list_available_models()
 
 For detailed information on the available pretrained models, refer to the collections documentation: 
 

From 014fa02bef4355275d7d21ec58ce91788460426b Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Thu, 8 Jun 2023 13:48:59 -0700
Subject: [PATCH 033/123] add back ptuning pp2 test (#6394)

Signed-off-by: arendu <adithya.r@gmail.com>
---
 Jenkinsfile | 87 ++++++++++++++++++++++++++---------------------------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 27537b53a557..d16379cabb8a 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -3588,50 +3588,49 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
       }
     }
 
-    // TODO: add when https://github.com/NVIDIA/apex/pull/1596 is merged
-    // stage('L2: Megatron GPT Prompt Tuning TP1 PP2') {
-    //   when {
-    //     anyOf {
-    //       branch 'main'
-    //       changeRequest target: 'main'
-    //     }
-    //   }
-    //   failFast true
-    //   parallel{
-    //     stage('GPT Prompt Learning TP=1 PP=2') {
-    //       steps {
-    //         sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning.py \
-    //             --config-name=megatron_gpt_prompt_learning_config \
-    //             name='/home/TestData/nlp/prompt_learning/p_tuning_test_pp' \
-    //             trainer.devices=2 \
-    //             trainer.max_steps=1 \
-    //             trainer.val_check_interval=1 \
-    //             trainer.max_epochs=null \
-    //             model.optim.name=fused_adam \
-    //             model.data.num_workers=1 \
-    //             model.pipeline_model_parallel_size=2 \
-    //             model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \
-    //             model.existing_tasks=[] \
-    //             model.new_tasks=['boolq'] \
-    //             model.data.train_ds=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl'] \
-    //             model.data.validation_ds=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl'] \
-    //             model.global_batch_size=4"
-    //         sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp"
-    //         sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py \
-    //             virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo' \
-    //             gpt_model_file='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \
-    //             inference.greedy=True \
-    //             inference.add_BOS=False \
-    //             trainer.devices=2 \
-    //             pipeline_model_parallel_size=2 \
-    //             pred_file_path=/home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt \
-    //             data_paths=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl']"
-    //         sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo"
-    //         sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt"
-    //       }
-    //     }
-    //   }
-    // }
+     stage('L2: Megatron GPT Prompt Tuning TP1 PP2') {
+       when {
+         anyOf {
+           branch 'main'
+           changeRequest target: 'main'
+         }
+       }
+       failFast true
+       parallel{
+         stage('GPT Prompt Learning TP=1 PP=2') {
+           steps {
+             sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning.py \
+                 --config-name=megatron_gpt_prompt_learning_config \
+                 name='/home/TestData/nlp/prompt_learning/p_tuning_test_pp' \
+                 trainer.devices=2 \
+                 trainer.max_steps=1 \
+                 trainer.val_check_interval=1 \
+                 trainer.max_epochs=null \
+                 model.optim.name=fused_adam \
+                 model.data.num_workers=1 \
+                 model.pipeline_model_parallel_size=2 \
+                 model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \
+                 model.existing_tasks=[] \
+                 model.new_tasks=['boolq'] \
+                 model.data.train_ds=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl'] \
+                 model.data.validation_ds=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl'] \
+                 model.global_batch_size=4"
+             sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp"
+             sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning_eval.py \
+                 virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo' \
+                 gpt_model_file='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \
+                 inference.greedy=True \
+                 inference.add_BOS=False \
+                 trainer.devices=2 \
+                 pipeline_model_parallel_size=2 \
+                 pred_file_path=/home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt \
+                 data_paths=['/home/TestData/nlp/prompt_learning/boolq_CI_test.jsonl']"
+             sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp.nemo"
+             sh "rm -rf /home/TestData/nlp/prompt_learning/p_tuning_test_pp_preds.txt"
+           }
+         }
+       }
+     }
 
     // TODO: Add this test back. Test was failing on CI machines due to HW error
     // stage('L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval') {

From 29ceec05edb8ad0d8e88896c85f6eac7055c3bd3 Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Thu, 8 Jun 2023 15:21:16 -0700
Subject: [PATCH 034/123] t5 lora tuning (#6612)

* t5 lora

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* eval lora t5

Signed-off-by: arendu <adithya.r@gmail.com>

* adjust differernt lora dims

Signed-off-by: arendu <adithya.r@gmail.com>

* minor changes

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* bugfix for state_dict

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: David <amosalla@asu.edu>
---
 .../conf/megatron_t5_lora_inference.yaml      |  36 ++++
 .../conf/megatron_t5_lora_tuning_config.yaml  |  99 +++++++++++
 .../tuning/megatron_t5_lora_eval.py           | 160 ++++++++++++++++++
 .../tuning/megatron_t5_lora_tuning.py         | 107 ++++++++++++
 .../megatron_t5_adapter_model.py              | 129 ++++++++++++++
 .../megatron/adapters/parallel_adapters.py    |  30 +++-
 .../nlp/modules/common/megatron/attention.py  |  21 ++-
 7 files changed, 580 insertions(+), 2 deletions(-)
 create mode 100644 examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml
 create mode 100644 examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml
 create mode 100644 examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py
 create mode 100644 examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py

diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml
new file mode 100644
index 000000000000..008241d19389
--- /dev/null
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_inference.yaml
@@ -0,0 +1,36 @@
+inference:
+  greedy: True # Whether or not to use sampling ; use greedy decoding otherwise
+  top_k: 0  # The number of highest probability vocabulary tokens to keep for top-k-filtering.
+  top_p: 0.9 # If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+  temperature: 1.0 # sampling temperature
+  add_BOS: True # add the bos token at the begining of the prompt
+  tokens_to_generate: 30 # The minimum length of the sequence to be generated.
+  all_probs: False  # whether return the log prob for all the tokens in vocab
+  repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
+  min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
+  compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
+
+
+trainer:
+  devices: 1
+  num_nodes: 1
+  accelerator: gpu
+  logger: False # logger provided by exp_manager
+  precision: 16 # 16, 32, or bf16
+
+data:
+  test_ds: ???
+  num_workers: 1
+  global_batch_size: 4
+  micro_batch_size: 4
+  
+tensor_model_parallel_size: -1
+pipeline_model_parallel_size: -1
+pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
+language_model_path: ??? # GPT nemo file path # used when starting from a .nemo file
+adapter_model_file: ??? # .nemo file saved during training (using megatron_t5_lora_tuning.py)
+pred_file_path: null # save predictions to this file
+checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training
+checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
+hparams_file: null # model configuration file, only used for PTL checkpoint loading
+batch_size: 8 
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml
new file mode 100644
index 000000000000..6663df58c823
--- /dev/null
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_t5_lora_tuning_config.yaml
@@ -0,0 +1,99 @@
+name: adapter_tuning_${model.new_tasks[0]}_max_epochs${trainer.max_epochs}_lora_dim${model.lora_tuning.kqv_adapter_dim}
+
+trainer:
+  devices: 1
+  accelerator: gpu
+  num_nodes: 1
+  precision: 16
+  logger: False 
+  enable_checkpointing: False
+  replace_sampler_ddp: False
+  max_epochs: 10
+  max_steps: 1000
+  log_every_n_steps: 1
+  val_check_interval: 2
+  accumulate_grad_batches: 1
+  gradient_clip_val: 0.0
+  resume_from_checkpoint: null 
+  benchmark: False
+
+exp_manager:
+  explicit_log_dir: null
+  exp_dir: nemo-lora-mt0-tr
+  name: ${name}
+  create_wandb_logger: False
+  wandb_logger_kwargs:
+    project: null
+    name: null
+  resume_if_exists: True
+  resume_ignore_no_checkpoint: True
+  create_checkpoint_callback: True
+  checkpoint_callback_params:
+    monitor: reduced_train_loss
+    save_top_k: 1
+    mode: min
+    save_nemo_on_train_end: True # Should be false, correct prompt learning model file is saved at model.virtual_prompt_save_path set below
+    filename: "megatron_t5_adapter_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
+    model_parallel_size: ${model.tensor_model_parallel_size}
+    save_best_model: True
+
+model:
+  seed: 1234
+  nemo_path: ${exp_manager.exp_dir}/${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
+  virtual_prompt_style: 'no-prompts' #'prompt-tuning' # adapter tuning requires no virtual prompts
+  encoder_seq_length: 2048
+  gradient_as_bucket_view: false
+  tensor_model_parallel_size: 1
+  pipeline_model_parallel_size: 1 
+  global_batch_size: 4
+  micro_batch_size: 4
+  validation_global_batch_size: ${model.global_batch_size}
+  validation_micro_batch_size: ${model.micro_batch_size}
+  validation_drop_last: False
+  report_validation_metric: False
+  validation_metric: accuracy
+  
+  restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
+  language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
+  existing_tasks: []
+  new_tasks: ["taskname"] 
+
+  task_templates: 
+  - taskname: "taskname" # The task name
+    prompt_template: "{prompt} {completion}" # Prompt template for task, specify virtual prompt positions with <|VIRTUAL_PROMPT_#|>
+    total_virtual_tokens: 0 # Sum of tokens in virtual_token_splits must add to this number. Can differ between new and existing tasks, but must match across all new tasks being tuned at the same time.
+    virtual_token_splits: [] # number of virtual tokens to be inserted at each VIRTUAL PROMPT location, must add to total_virtual_tokens
+    truncate_field: "prompt" # The {field} in the prompt template whose text will be truncated if the input is too long, if null, inputs that are too long will just be skipped.
+    answer_field: "completion"
+
+  lora_tuning:
+      kqv_adapter_dim: 24
+      kv_adapter_dim: 16
+      q_adapter_dim: 8
+      adapter_dropout: 0.1
+      column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal
+      row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal
+
+  data:
+    train_ds: ???
+    validation_ds: ???
+    shuffle: True
+    num_workers: 0
+    pin_memory: True
+    add_eos: True
+
+
+  optim:
+    name: fused_adam
+    lr: 1e-3
+    weight_decay: 0.01 
+    betas: 
+    - 0.9
+    - 0.98
+    sched:
+      name: CosineAnnealing
+      warmup_steps: 50
+      constant_steps: 0
+      min_lr: 0.0
+      monitor: val_loss
+      reduce_on_plateau: false
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py
new file mode 100644
index 000000000000..d9de94843071
--- /dev/null
+++ b/examples/nlp/language_modeling/tuning/megatron_t5_lora_eval.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch
+import torch.multiprocessing as mp
+from megatron.core import parallel_state
+from omegaconf import OmegaConf
+from omegaconf.omegaconf import open_dict
+from pytorch_lightning.trainer.trainer import Trainer
+
+from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel
+from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
+from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
+from nemo.core.config import hydra_runner
+from nemo.utils.app_state import AppState
+
+mp.set_start_method("spawn", force=True)
+
+"""
+This is the script to run an Adapter Tuned GPT Model for text generation.
+
+Usage:
+    Assume the model has TP=1, PP=1 in the following use cases.
+    a. run greedy inference using a base gpt nemo file, and an adapter nemo file:
+        python megatron_gpt_ia3_eval.py \
+            gpt_model_file=PATH TO GPT MODEL NEMO FILE \
+            adapter_model_file=PATH TO ADAPTER MODEL NEMO FILE (generated by training script: ./megatron_gpt_ia3_tuning.py) \
+            data_paths=[PATH TO A JSONL FILE CONTAINING PROMPTS], \
+            pred_file_path=PATH TO OUTPUT FILE TO DUMP PREDICTIONS
+"""
+
+if not torch.cuda.is_available():
+    raise EnvironmentError("GPU is needed for the inference")
+
+
+@hydra_runner(config_path="conf", config_name="megatron_t5_adapter_inference")
+def main(cfg) -> None:
+
+    # trainer required for restoring model parallel models
+    trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer)
+
+    if (
+        cfg.tensor_model_parallel_size < 0
+        or cfg.pipeline_model_parallel_size < 0
+        or cfg.get('pipeline_model_parallel_split_rank', -1) < 0
+    ):
+        model_config = MegatronT5LoraModel.restore_from(
+            restore_path=cfg.language_model_path, trainer=trainer, return_config=True,
+        )
+
+        with open_dict(cfg):
+            cfg.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1)
+            cfg.pipeline_model_parallel_size = model_config.get('pipeline_model_parallel_size', 1)
+            cfg.pipeline_model_parallel_split_rank = model_config.get('pipeline_model_parallel_split_rank', 0)
+
+    app_state = AppState()
+    if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1:
+        app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
+        (
+            app_state.tensor_model_parallel_rank,
+            app_state.pipeline_model_parallel_rank,
+            app_state.model_parallel_size,
+            app_state.data_parallel_size,
+            app_state.pipeline_model_parallel_split_rank,
+            app_state.virtual_pipeline_model_parallel_rank,
+        ) = fake_initialize_model_parallel(
+            world_size=app_state.model_parallel_size,
+            rank=trainer.global_rank,
+            tensor_model_parallel_size_=cfg.tensor_model_parallel_size,
+            pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size,
+            pipeline_model_parallel_split_rank_=cfg.pipeline_model_parallel_split_rank,
+        )
+
+    # Load an adapter model,  must be provided in config
+    if cfg.get("adapter_model_file", None) is not None and cfg.get("language_model_path", None) is not None:
+        # Update frozen GPT model path in case it has changed
+        adapter_tuning_cfg = MegatronT5LoraModel.restore_from(
+            cfg.adapter_model_file, trainer=trainer, return_config=True
+        )
+        with open_dict(adapter_tuning_cfg):
+            adapter_tuning_cfg.language_model_path = cfg.language_model_path
+            adapter_tuning_cfg.pretrained_language_model_path = cfg.language_model_path
+            adapter_tuning_cfg.micro_batch_size = cfg.data.micro_batch_size
+            adapter_tuning_cfg.global_batch_size = cfg.data.global_batch_size
+
+        # Now load prompt learning model with frozen gpt model base
+        model = MegatronT5LoraModel.restore_from(
+            restore_path=cfg.adapter_model_file, trainer=trainer, override_config_path=adapter_tuning_cfg
+        )
+
+    # Or load regular GPT model
+    else:
+        raise NotImplementedError(
+            "This script is meant for inference from an Infused Adapter Tuned T5 Model, config should contain an adapter_model_file and a language_model_path"
+        )
+
+    # check whether the DDP is initialized
+    if parallel_state.is_unitialized():
+
+        def dummy():
+            return
+
+        if trainer.strategy.launcher is not None:
+            trainer.strategy.launcher.launch(dummy, trainer=trainer)
+        trainer.strategy.setup_environment()
+
+    model.freeze()
+
+    # Have to turn off activations_checkpoint_method for inference
+    try:
+        model.model.language_model.encoder.activations_checkpoint_method = None
+    except AttributeError:
+        pass
+
+    try:
+        model.frozen_model.model.language_model.encoder.activations_checkpoint_method = None
+    except AttributeError:
+        pass
+
+    test_ds, test_dl = model.build_virtual_prompt_dataset(
+        dataset_paths=cfg.data.test_ds,
+        batch_size=cfg.data.global_batch_size,
+        for_train=False,
+        drop_last=False,
+        shuffle=False,
+        num_workers=cfg.data.num_workers,
+        pin_memory=True,
+    )
+
+    config = OmegaConf.to_container(cfg.inference)
+    model.set_inference_config(config)
+    response = trainer.predict(model, test_dl)
+    print("***************************")
+    if cfg.pred_file_path is not None:
+        with open(cfg.pred_file_path, "w", encoding="utf-8") as f:
+            for batch in response:
+                for inp, pred in zip(batch['input_text'], batch['preds_text']):
+                    inp = ' '.join(inp.split('\n'))
+                    pred = ' '.join(pred.split('\n'))
+                    f.write(f'{inp} {pred}\n')
+        print("predictions saved to {}".format(cfg.pred_file_path))
+    else:
+        print(response)
+    print("***************************")
+
+
+if __name__ == '__main__':
+    main()  # noqa pylint: disable=no-value-for-parameter
diff --git a/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py
new file mode 100644
index 000000000000..b2a45d0ec3fd
--- /dev/null
+++ b/examples/nlp/language_modeling/tuning/megatron_t5_lora_tuning.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import torch.multiprocessing as mp
+from omegaconf.omegaconf import OmegaConf, open_dict
+from pytorch_lightning import Trainer
+from pytorch_lightning.plugins.environments import TorchElasticEnvironment
+
+from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel
+from nemo.collections.nlp.parts.nlp_overrides import (
+    GradScaler,
+    MegatronHalfPrecisionPlugin,
+    NLPDDPStrategy,
+    NLPSaveRestoreConnector,
+    PipelineMixedPrecisionPlugin,
+)
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+from nemo.utils.exp_manager import exp_manager
+
+mp.set_start_method("spawn", force=True)
+
+"""
+This is the script to train an Adapter infused GPT Model for text generation.
+A base GPT Model is required as a starting point. This script will then insert
+Adapters into each Transformer layer and will train/update only these adapters
+during training. The base GPT Model weights will remain frozen.
+
+During training this script will only save the newly trained Adapter weights
+in checkpoints. At the end of training a .nemo file of Adapter weights will 
+be saved.
+
+Usage:
+    Assuming the base model is a 125m GPT Model, with TP=1, PP=1:
+    a. run a training run for a base gpt nemo file:
+        python megatron_gpt_adapter_tuning.py \
+            "model.data.train_ds=[PATH TO TRAINING JSONL FILE]",
+            "model.data.validation_ds=[PATH TO VALIDATION JSONL FILE]",
+            model.language_model_path="PATH TO BASE GPT MODEL .nemo FILE"
+            name="NAME OF TRAINING RUN"
+            exp_manager.exp_dir="DIR TO SAVE CHECKPOINTS and .nemo FILE",
+            trainer.max_epochs=2
+"""
+
+
+@hydra_runner(config_path="conf", config_name="megatron_t5_lora_tuning_config")
+def main(cfg) -> None:
+    logging.info("\n\n************** Experiment configuration ***********")
+    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
+
+    megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False)
+    with_distributed_adam = cfg.model.optim.get('name') == 'distributed_fused_adam'
+
+    plugins = []
+    strategy = NLPDDPStrategy(
+        no_ddp_communication_hook=True,  # we don't use DDP for async grad allreduce
+        gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
+        find_unused_parameters=False,
+    )
+    if cfg.trainer.precision in [16, 'bf16']:
+        scaler = None
+        if cfg.trainer.precision == 16:
+            scaler = GradScaler(
+                init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
+                growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
+                hysteresis=cfg.model.get('hysteresis', 2),
+            )
+        if megatron_amp_o2 and not with_distributed_adam:
+            plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler))
+        else:
+            plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler))
+
+    if cfg.get('cluster_type', None) == 'BCP':
+        plugins.append(TorchElasticEnvironment())
+
+    trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer)
+    exp_manager(trainer, cfg.exp_manager)
+
+    # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams
+    with open_dict(cfg):
+        cfg.model.precision = cfg.trainer.precision
+
+    # load existing or init new soft prompt GPT model
+    if cfg.model.get("restore_path", None):
+        model = MegatronT5LoraModel.restore_from(
+            cfg.model.restore_path, cfg.model, trainer=trainer, save_restore_connector=NLPSaveRestoreConnector()
+        )
+    else:
+        model = MegatronT5LoraModel(cfg.model, trainer=trainer)
+
+    trainer.fit(model)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py
index 31c147022486..03bc11cc3d3c 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_adapter_model.py
@@ -35,6 +35,9 @@
 from nemo.collections.nlp.modules.common.megatron.adapters.parallel_adapters import (
     AdapterName,
     InfusedAdapterConfig,
+    LoraKQVAdapterConfig,
+    LoraKVAdapterConfig,
+    LoraQAdapterConfig,
     MLPInfusedAdapterConfig,
     ParallelLinearAdapterConfig,
 )
@@ -420,6 +423,132 @@ def list_available_models(cls):
         pass
 
 
+class MegatronT5LoraModel(MegatronT5BaseAdapterModel):
+    """
+    TODO  (@adithyare)
+    """
+
+    def __init__(self, cfg: DictConfig, trainer: Trainer):
+        super().__init__(cfg, trainer)
+        # assert cfg.lora_tuning.get('adapter_dim', 0) > 0, "adapter_dim has not been set."
+        # assert (
+        #     cfg.lora_tuning.adapter_dim % cfg.tensor_model_parallel_size == 0
+        # ), "The adapter dim should be divisible by tensor_model_parallel_size."
+
+        encoder_adapter_name_keys = [AdapterName.LORA_KQV_ADAPTER]
+        decoder_adapter_name_keys = [
+            AdapterName.LORA_KQV_ADAPTER,
+            AdapterName.LORA_KV_ADAPTER,
+            AdapterName.LORA_Q_ADAPTER,
+        ]
+
+        # add adapter keys to the list -> to update state dict
+        self.adapter_name_keys = encoder_adapter_name_keys + decoder_adapter_name_keys
+
+        frozen_model_cfg = MegatronT5Model.restore_from(
+            cfg.get('language_model_path'), trainer=trainer, return_config=True
+        )
+        for _, layer in self.frozen_model.named_modules():
+            if hasattr(layer, 'activations_checkpoint_method'):
+                layer.activations_checkpoint_method = (
+                    None  # (@adithyare) adapter learning does not support activations checkpointing atm.
+                )
+
+        self.frozen_model.freeze()
+        logging.info(f'Before adding adapters:\n{self.frozen_model.summarize()}')
+        encoder = self.frozen_model.enc_dec_model.enc_dec_model.encoder
+        decoder = self.frozen_model.enc_dec_model.enc_dec_model.decoder
+
+        if encoder:
+            encoder_cfg = self._get_component_cfg('encoder', frozen_model_cfg, cfg)
+            self._add_adapters_to_component(encoder, encoder_cfg, encoder_adapter_name_keys)
+            logging.info(f'Adding encoder adapters:\n{self.frozen_model.summarize()}')
+
+        if decoder:
+            decoder_cfg = self._get_component_cfg('decoder', frozen_model_cfg, cfg)
+            self._add_adapters_to_component(decoder, decoder_cfg, decoder_adapter_name_keys)
+            logging.info(f'Adding decoder adapters:\n{self.frozen_model.summarize()}')
+
+    def _add_adapters_to_component(self, component, component_cfg, adapter_name_keys):
+        for _, module in component.named_modules():
+            if isinstance(module, adapter_mixins.AdapterModuleMixin):
+                for adapter_key in adapter_name_keys:
+                    adapter_cfg = self._get_adapter_cfg(component_cfg, adapter_key)
+                    if model_utils.import_class_by_path(adapter_cfg._target_) in module.get_accepted_adapter_types():
+                        module.add_adapter(name=adapter_key, cfg=adapter_cfg)
+                        print(f"in adding {adapter_key}")
+
+    def _get_component_cfg(self, component_name, frozen_model_cfg, cfg):
+        if component_name in frozen_model_cfg:
+            component_cfg = frozen_model_cfg.get(component_name)
+            with open_dict(component_cfg):
+                component_cfg.tensor_model_parallel_size = frozen_model_cfg.tensor_model_parallel_size
+                component_cfg.lora_tuning = cfg.lora_tuning
+        else:
+            component_cfg = frozen_model_cfg
+            with open_dict(component_cfg):
+                component_cfg.lora_tuning = cfg.lora_tuning
+        return component_cfg
+
+    def _get_adapter_cfg(self, component_cfg, adapter_key):
+        if component_cfg.kv_channels is None:
+            assert (
+                component_cfg.hidden_size % component_cfg.num_attention_heads == 0
+            ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None'
+            kv_channels = component_cfg.hidden_size // component_cfg.num_attention_heads
+        else:
+            kv_channels = component_cfg.kv_channels
+        projection_size = kv_channels * component_cfg.num_attention_heads
+
+        if adapter_key == AdapterName.LORA_KQV_ADAPTER:
+            adapter_cfg = LoraKQVAdapterConfig(
+                in_features=component_cfg.hidden_size,
+                out_features=3 * projection_size,
+                dim=component_cfg.lora_tuning.kqv_adapter_dim,
+                norm_position="none",
+                norm_type="none",
+                activation="identity",
+                column_init_method=component_cfg.lora_tuning.get("column_init_method", "normal"),
+                row_init_method=component_cfg.lora_tuning.get("row_init_method", "zero"),
+                gather_output=False,
+                dropout=0.0,
+            )
+        elif adapter_key == AdapterName.LORA_KV_ADAPTER:
+            adapter_cfg = LoraKVAdapterConfig(
+                in_features=component_cfg.hidden_size,
+                out_features=2 * projection_size,
+                dim=component_cfg.lora_tuning.kv_adapter_dim,
+                norm_position="none",
+                norm_type="none",
+                activation="identity",
+                column_init_method=component_cfg.lora_tuning.get("column_init_method", "normal"),
+                row_init_method=component_cfg.lora_tuning.get("row_init_method", "zero"),
+                gather_output=False,
+                dropout=0.0,
+            )
+        elif adapter_key == AdapterName.LORA_Q_ADAPTER:
+            adapter_cfg = LoraQAdapterConfig(
+                in_features=component_cfg.hidden_size,
+                out_features=1 * projection_size,
+                dim=component_cfg.lora_tuning.q_adapter_dim,
+                norm_position="none",
+                norm_type="none",
+                activation="identity",
+                column_init_method=component_cfg.lora_tuning.get("column_init_method", "normal"),
+                row_init_method=component_cfg.lora_tuning.get("row_init_method", "zero"),
+                gather_output=False,
+                dropout=0.0,
+            )
+        else:
+            raise RuntimeError("Unexpected adapter key name..")
+
+        return adapter_cfg
+
+    @classmethod
+    def list_available_models(cls):
+        pass
+
+
 class MegatronT5InfusedAdapterModel(MegatronT5BaseAdapterModel):
     """
     MegatronGPTInfusedAdapterModel is a model that combines a base model (GPTModel) with a "Infused Adapter that can Inhibiting and Amplify Inner Activations", known as IA3.
diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
index b26b971a38ba..679020019ab1 100644
--- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
+++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
@@ -202,7 +202,25 @@ class ParallelLinearAdapterConfig:
 
 class LoraKQVAdapter(ParallelLinearAdapter):
     """
-    Lora Adapters are the same arch as regualr adapters but with potentially different input and output feature sizes 
+    Lora Adapters are the same arch as regular adapters but with potentially different input and output feature sizes 
+    and they do not use an bottleneck activation function
+    """
+
+    pass
+
+
+class LoraKVAdapter(ParallelLinearAdapter):
+    """
+    Lora Adapters are the same arch as regular adapters but with potentially different input and output feature sizes 
+    and they do not use an bottleneck activation function
+    """
+
+    pass
+
+
+class LoraQAdapter(ParallelLinearAdapter):
+    """
+    Lora Adapters are the same arch as regular adapters but with potentially different input and output feature sizes 
     and they do not use an bottleneck activation function
     """
 
@@ -214,6 +232,16 @@ class LoraKQVAdapterConfig(ParallelLinearAdapterConfig):
     _target_: str = "{0}.{1}".format(LoraKQVAdapter.__module__, LoraKQVAdapter.__name__)
 
 
+@dataclass
+class LoraQAdapterConfig(ParallelLinearAdapterConfig):
+    _target_: str = "{0}.{1}".format(LoraQAdapter.__module__, LoraQAdapter.__name__)
+
+
+@dataclass
+class LoraKVAdapterConfig(ParallelLinearAdapterConfig):
+    _target_: str = "{0}.{1}".format(LoraKVAdapter.__module__, LoraKVAdapter.__name__)
+
+
 class PromptEncoderAdapter(nn.Module, AdapterModuleUtil):
     """
     The Tensor Parallel MLP prompt encoder network that is used to generate the virtual 
diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py
index aaeb05d43cde..9c954b5e6313 100644
--- a/nemo/collections/nlp/modules/common/megatron/attention.py
+++ b/nemo/collections/nlp/modules/common/megatron/attention.py
@@ -22,6 +22,8 @@
     AdapterName,
     InfusedAdapterConfig,
     LoraKQVAdapterConfig,
+    LoraKVAdapterConfig,
+    LoraQAdapterConfig,
 )
 from nemo.collections.nlp.modules.common.megatron.fused_softmax import MatchedScaleMaskSoftmax
 from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
@@ -115,7 +117,14 @@ def __init__(
         self.megatron_legacy = megatron_legacy
         self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2)
 
-        self.set_accepted_adapter_types([InfusedAdapterConfig._target_, LoraKQVAdapterConfig._target_])
+        self.set_accepted_adapter_types(
+            [
+                InfusedAdapterConfig._target_,
+                LoraKQVAdapterConfig._target_,
+                LoraQAdapterConfig._target_,
+                LoraKVAdapterConfig._target_,
+            ]
+        )
 
         if kv_channels is None:
             assert (
@@ -395,6 +404,11 @@ def forward(
         else:
             # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)]
             mixed_kv_layer, _ = self.key_value(encoder_output)
+            if self.is_adapter_available():
+                lora_kv_adapter = self.get_adapter_module(AdapterName.LORA_KV_ADAPTER)
+                if lora_kv_adapter:
+                    lora_mixed_kv_layer = lora_kv_adapter(encoder_output)
+                    mixed_kv_layer = mixed_kv_layer + lora_mixed_kv_layer
 
             # [sk, b, (np * 2 * hn)] --> [sk, b, np, 2 * hn]
             new_tensor_shape = mixed_kv_layer.size()[:-1] + (
@@ -412,6 +426,11 @@ def forward(
 
             # Attention head [sq, b, h] --> [sq, b, hp]
             query_layer, _ = self.query(hidden_states)
+            if self.is_adapter_available():
+                lora_q_adapter = self.get_adapter_module(AdapterName.LORA_Q_ADAPTER)
+                if lora_q_adapter:
+                    lora_q_layer = lora_q_adapter(hidden_states)
+                    query_layer = query_layer + lora_q_layer
             # [sq, b, hp] --> [sq, b, np, hn]
             new_tensor_shape = query_layer.size()[:-1] + (
                 self.num_attention_heads_per_partition,

From 72faf557a1d207425fd3cb68369603e3c369a9c7 Mon Sep 17 00:00:00 2001
From: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com>
Date: Thu, 8 Jun 2023 20:43:45 -0700
Subject: [PATCH 035/123] NFA updates (#6695)

* update V_NEGATIVE_NUM constant to make better use of torch.float32 range

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* adjust backpointers dtype if U_max too large

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Remove print statements

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Remove need for user to specify model_downsample_factor

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* change model.cfg.sample_rate to model.cfg.preprocessor.sample_rate

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* add check to make sure that window_stride is in model.cfg.preprocessor

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* reduce memory consumption of backpointers by making them relative instead of absolute

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* update librosa.get_duration() 'filename' param to 'path'

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Do not throw error if 'text' or 'pred_text' are empty and make sure CTM filepaths in the output manifest are null

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* preprocess input text by removing any duplicate spaces and converting any newlines to spaces

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Use Utterance dataclass instead of dictionaries for keeping track of token/word/segment alignments

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* refactor so can save alignments as ctm and ass format files

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* fix bugs for saving character based ASS files and for using pred_text to do alignment

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Make token level .ass file use tokens with recovered capitalization

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Do not try to generate alignment files if text or pred text is empty, or if number of tokens is too large for T

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* rename output manifest file to say '_with_output_file_paths.json'

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* add flag to resegment ass subtitle file to fill available text space

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Fix bug in resegmentation code

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Fix bug which skipped some utterances if batch_size more than 1

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* reduce memory requirements by doing torch.gather on a slice of the log probs when they are needed

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* reduce memory requirements by not saving whole v_matrix

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* remove any extra spaces in pred_text

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove unused list pred_text_all_lines

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* support using hybrid Transducer-CTC models for alignment

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* fix typo - add brackets to torch.cuda.is_available()

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* make sure token case restoration will work if superscript or subscript num is in text

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* remove any BOM from input text

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* pick out 1st hypotheses if there is a tuple of them

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Remove print statement

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* add detail to error message if fail to recover capitalization of tokens

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* add flag use_local_attention

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* rename additional_ctm_grouping_separator -> additional_segment_grouping_separator

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* update description of additional_segment_grouping_separator

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* add simple docstring to get_utt_obj function

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* Make docstring for add_t_start_end_to_utt_obj

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* update docstrings for add_t_start_end_to_utt_obj and get_batch_variables

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* update README and comments in align.py

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* change 'ground truth' -> 'reference text' in documentation

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* add header

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* add comments to get_utt_obj function

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* move constants so they are after imports

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* add file description for make_ass_files

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* get rid of Utterance object's S attribute, and correct tests so they pass now

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* remove some unused variables

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* remove unused variable model from functions saving output files

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* remove unused var minimum_timestamp_duration from make_ass_files functions and return utt_obj

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* move minimum_timestamp_duration param to CTMFileConfig

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* remove unused enumerate and unused import

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

* switch reading duration from librosa to soundfile to avoid filename/path deprecation message

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

---------

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>
Signed-off-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 tools/nemo_forced_aligner/README.md           |  69 +-
 tools/nemo_forced_aligner/align.py            | 186 +++--
 tools/nemo_forced_aligner/requirements.txt    |   3 +-
 .../test_add_t_start_end_to_boundary_info.py  | 121 ---
 .../tests/test_add_t_start_end_to_utt_obj.py  | 288 +++++++
 .../tests/test_get_utt_obj.py                 | 344 ++++++++
 .../test_get_y_and_boundary_info_for_utt.py   | 158 ----
 .../tests/test_restore_token_case.py          |  36 +
 tools/nemo_forced_aligner/utils/constants.py  |   2 +-
 tools/nemo_forced_aligner/utils/data_prep.py  | 752 ++++++++++++++----
 .../utils/make_ass_files.py                   | 428 ++++++++++
 .../utils/make_ctm_files.py                   | 114 +++
 .../utils/make_output_files.py                | 209 -----
 .../utils/make_output_manifest.py             |  35 +
 .../utils/viterbi_decoding.py                 |  70 +-
 15 files changed, 2011 insertions(+), 804 deletions(-)
 delete mode 100644 tools/nemo_forced_aligner/tests/test_add_t_start_end_to_boundary_info.py
 create mode 100644 tools/nemo_forced_aligner/tests/test_add_t_start_end_to_utt_obj.py
 create mode 100644 tools/nemo_forced_aligner/tests/test_get_utt_obj.py
 delete mode 100644 tools/nemo_forced_aligner/tests/test_get_y_and_boundary_info_for_utt.py
 create mode 100644 tools/nemo_forced_aligner/tests/test_restore_token_case.py
 create mode 100644 tools/nemo_forced_aligner/utils/make_ass_files.py
 create mode 100644 tools/nemo_forced_aligner/utils/make_ctm_files.py
 delete mode 100644 tools/nemo_forced_aligner/utils/make_output_files.py
 create mode 100644 tools/nemo_forced_aligner/utils/make_output_manifest.py

diff --git a/tools/nemo_forced_aligner/README.md b/tools/nemo_forced_aligner/README.md
index 35ee78ffecb0..423c76878db6 100644
--- a/tools/nemo_forced_aligner/README.md
+++ b/tools/nemo_forced_aligner/README.md
@@ -7,7 +7,6 @@ A tool for doing Forced Alignment using Viterbi decoding of NeMo CTC-based model
 ``` bash
 python <path_to_NeMo>/tools/nemo_forced_aligner/align.py \
         pretrained_name="stt_en_citrinet_1024_gamma_0_25" \
-        model_downsample_factor=8 \
         manifest_filepath=<path to manifest of utterances you want to align> \
         output_dir=<path to where your ctm files will be saved>
 ```
@@ -23,34 +22,44 @@ Call the `align.py` script, specifying the parameters as follows:
 * `model_path`: string specifying the local filepath to a CTC NeMo ASR model which will be used to generate the log-probs which we will use to do alignment. If `pretrained_name` is specified, `model_path` must not be specified.
 >Note: NFA can only use CTC models (not Transducer models) at the moment. If you want to transcribe a long audio file (longer than ~5-10 mins), do not use Conformer CTC model as that will likely give Out Of Memory errors.
 
-* `model_downsample_factor`: the downsample factor of the ASR model. It should be 2 if your model is QuartzNet, 4 if it is Conformer CTC, 8 if it is Citrinet.
-
 * `manifest_filepath`: The path to the manifest of the data you want to align, containing `'audio_filepath'` and `'text'` fields. The audio filepaths need to be absolute paths.
 
-* `output_dir`: The folder where to save CTM files containing the generated alignments and new JSON manifest containing paths to those CTM files. There will be one CTM file per utterance (ie one CTM file per line in the manifest). The files will be called `<output_dir>/{tokens,words,additional_segments}/<utt_id>.ctm` and each line in each file will start with `<utt_id>`. By default, `utt_id` will be the stem of the audio_filepath. This can be changed by overriding `audio_filepath_parts_in_utt_id`. The new JSON manifest will be at `<output_dir>/<original manifest file name>_with_ctm_paths.json`.
+* `output_dir`: The folder where to save the output files (e.g. CTM, ASS) containing the generated alignments and new JSON manifest containing paths to those CTM/ASS files. The CTM file will be called `<output_dir>/ctm/{tokens,words,segments}/<utt_id>.ctm` and each line in each file will start with `<utt_id>`. By default, `utt_id` will be the stem of the audio_filepath. This can be changed by overriding `audio_filepath_parts_in_utt_id`. The new JSON manifest will be at `<output_dir>/<original manifest file name>_with_ctm_paths.json`. The ASS files will be at `<output_dir>/ass/{tokens,words}/<utt_id>.ass`. You can adjust which files should be saved by adjusting the parameter `save_output_file_formats`. 
+
+### Optional parameters:
+
+* `align_using_pred_text`: if True, will transcribe the audio using the ASR model (specified by `pretrained_name` or `model_path`) and then use that transcription as the reference text for the forced alignment. The `"pred_text"` will be saved in the output JSON manifest at `<output_dir>/{original manifest name}_with_ctm_paths.json`. To avoid over-writing other transcribed texts, if there are already `"pred_text"` entries in the original manifest, the program will exit without attempting to generate alignments.  (Default: False). 
+
+* `transcribe_device`: The device that will be used for generating log-probs (i.e. transcribing). If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). If specified `transcribe_device` needs to be a string that can be input to the `torch.device()` method. (Default: `None`).
+
+* `viterbi_device`: The device that will be used for doing Viterbi decoding. If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). If specified `transcribe_device` needs to be a string that can be input to the `torch.device()` method.(Default: `None`).
 
-* **[OPTIONAL]** `align_using_pred_text`: if True, will transcribe the audio using the ASR model (specified by `pretrained_name` or `model_path`) and then use that transcription as the 'ground truth' for the forced alignment. The `"pred_text"` will be saved in the output JSON manifest at `<output_dir>/{original manifest name}_with_ctm_paths.json`. To avoid over-writing other transcribed texts, if there are already `"pred_text"` entries in the original manifest, the program will exit without attempting to generate alignments.  (Default: False). 
+* `batch_size`: The batch_size that will be used for generating log-probs and doing Viterbi decoding. (Default: 1).
 
-* **[OPTIONAL]** `transcribe_device`: The device that will be used for generating log-probs (i.e. transcribing). If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). If specified `transcribe_device` needs to be a string that can be input to the `torch.device()` method. (Default: `None`).
+* `use_local_attention`: boolean flag specifying whether to try to use local attention for the ASR Model (will only work if the ASR Model is a Conformer model). If local attention is used, we will set the local attention context size to [64,64].
 
-* **[OPTIONAL]** `viterbi_device`: The device that will be used for doing Viterbi decoding. If None, NFA will set it to 'cuda' if it is available (otherwise will set it to 'cpu'). If specified `transcribe_device` needs to be a string that can be input to the `torch.device()` method.(Default: `None`).
+* `additional_segment_grouping_separator`: an optional string used to separate the text into smaller segments. If this is not specified, then the whole text will be treated as a single segment. (Default: `None`. Cannot be empty string or space (" "), as NFA will automatically produce word-level timestamps for substrings separated by spaces).
+> Note: the `additional_segment_grouping_separator` will be removed from the reference text and all the output files, ie it is treated as a marker which is not part of the reference text. The separator will essentially be treated as a space, and any additional spaces around it will be amalgamated into one, i.e. if `additional_segment_grouping_separator="|"`, the following texts will be treated equivalently: `“abc|def”`, `“abc |def”`, `“abc| def”`, `“abc | def"`.
 
-* **[OPTIONAL]** `batch_size`: The batch_size that will be used for generating log-probs and doing Viterbi decoding. (Default: 1).
+* `remove_blank_tokens_from_ctm`: a boolean denoting whether to remove <blank> tokens from token-level output CTMs. (Default: False). 
 
-* **[OPTIONAL]** `additional_ctm_grouping_separator`: the string used to separate CTM segments if you want to obtain CTM files at a level that is not the token level or the word level. NFA will always produce token-level and word-level CTM files in: `<output_dir>/tokens/<utt_id>.ctm` and `<output_dir>/words/<utt_id>.ctm`. If `additional_ctm_grouping_separator` is specified, an additional folder `<output_dir>/{tokens/words/additional_segments}/<utt_id>.ctm` will be created containing CTMs for `addtional_ctm_grouping_separator`-separated segments. (Default: `None`. Cannot be empty string or space (" "), as space-separated word-level CTMs will always be saved in `<output_dir>/words/<utt_id>.ctm`.)
-> Note: the `additional_ctm_grouping_separator` will be removed from the ground truth text and all the output CTMs, ie it is treated as a marker which is not part of the ground truth. The separator will essentially be treated as a space, and any additional spaces around it will be amalgamated into one, i.e. if `additional_ctm_grouping_separator="|"`, the following texts will be treated equivalently: `“abc|def”`, `“abc |def”`, `“abc| def”`, `“abc | def"`.
+* `audio_filepath_parts_in_utt_id`: This specifies how many of the 'parts' of the audio_filepath we will use (starting from the final part of the audio_filepath) to determine the utt_id that will be used in the CTM files. (Default: 1, i.e. utt_id will be the stem of the basename of audio_filepath). Note also that any spaces that are present in the audio_filepath will be replaced with dashes, so as not to change the number of space-separated elements in the CTM files.
 
-* **[OPTIONAL]** `remove_blank_tokens_from_ctm`: a boolean denoting whether to remove <blank> tokens from token-level output CTMs. (Default: False). 
+* `minimum_timestamp_duration`: a float indicating a minimum duration (in seconds) for timestamps in the CTM. If any line in the CTM has a duration lower than the `minimum_timestamp_duration`, it will be enlarged from the middle outwards until it meets the minimum_timestamp_duration, or reaches the beginning or end of the audio file. Note that this may cause timestamps to overlap. (Default: 0, i.e. no modifications to predicted duration).
 
-* **[OPTIONAL]** `audio_filepath_parts_in_utt_id`: This specifies how many of the 'parts' of the audio_filepath we will use (starting from the final part of the audio_filepath) to determine the utt_id that will be used in the CTM files. (Default: 1, i.e. utt_id will be the stem of the basename of audio_filepath). Note also that any spaces that are present in the audio_filepath will be replaced with dashes, so as not to change the number of space-separated elements in the CTM files.
+* `use_buffered_chunked_streaming`: a flag to indicate whether to do buffered chunk streaming. Notice only CTC models (e.g., stt_en_citrinet_1024_gamma_0_25)with `per_feature` preprocessor are supported. The below two params are needed if this option set to `True`.
 
-* **[OPTIONAL]** `minimum_timestamp_duration`: a float indicating a minimum duration (in seconds) for timestamps in the CTM. If any line in the CTM has a duration lower than the `minimum_timestamp_duration`, it will be enlarged from the middle outwards until it meets the minimum_timestamp_duration, or reaches the beginning or end of the audio file. Note that this may cause timestamps to overlap. (Default: 0, i.e. no modifications to predicted duration).
+* `chunk_len_in_secs`: the chunk size for buffered chunked streaming inference. Default is 1.6 seconds.
 
-* **[OPTIONAL]** `use_buffered_chunked_streaming`: a flag to indicate whether to do buffered chunk streaming. Notice only CTC models (e.g., stt_en_citrinet_1024_gamma_0_25)with `per_feature` preprocessor are supported. The below two params are needed if this option set to `True`.
+* `total_buffer_in_secs`: the buffer size for buffered chunked streaming inference. Default is 4.0 seconds.
 
-* **[OPTIONAL]** `chunk_len_in_secs`: the chunk size for buffered chunked streaming inference. Default is 1.6 seconds.
+* `simulate_cache_aware_streaming`: a flag to indicate whether to use cache aware streaming to do get the logits for alignment. Default: `False`.
 
-* **[OPTIONAL]** `total_buffer_in_secs`: the buffer size for buffered chunked streaming inference. Default is 4.0 seconds.
+* `save_output_file_formats`: list of file formats to use for saving the output. Default: `["ctm", "ass"]` (these are all the available ones currently).
+
+* `ctm_file_config`: `CTMFileConfig` to specify the configuration of the output CTM files.
+
+* `ass_file_config`: `ASSFileConfig` to specify the configuration of the output ASS files.
 
 # Input manifest file format
 By default, NFA needs to be provided with a 'manifest' file where each line specifies the absolute "audio_filepath" and "text" of each utterance that you wish to produce alignments for, like the format below:
@@ -58,25 +67,35 @@ By default, NFA needs to be provided with a 'manifest' file where each line spec
 {"audio_filepath": "/absolute/path/to/audio.wav", "text": "the transcription of the utterance"}
 ```
 
-You can omit the `"text"` field from the manifest if you specify `align_using_pred_text=true`. In that case, any `"text"` fields in the manifest will be ignored: the ASR model at `pretrained_name` or `model_path` will be used to transcribe the audio and obtain `"pred_text"`, which will be used as the 'ground truth' for the forced alignment process. The `"pred_text"` will also be saved in the output manifest JSON file at `<output_dir>/<original manifest file name>_with_ctm_paths.json`. To remove the possibility of overwriting `"pred_text"`, NFA will raise an error if `align_using_pred_text=true` and there are existing `"pred_text"` fields in the original manifest.
+You can omit the `"text"` field from the manifest if you specify `align_using_pred_text=true`. In that case, any `"text"` fields in the manifest will be ignored: the ASR model at `pretrained_name` or `model_path` will be used to transcribe the audio and obtain `"pred_text"`, which will be used as the reference text for the forced alignment process. The `"pred_text"` will also be saved in the output manifest JSON file at `<output_dir>/<original manifest file name>_with_output_file_paths.json`. To remove the possibility of overwriting `"pred_text"`, NFA will raise an error if `align_using_pred_text=true` and there are existing `"pred_text"` fields in the original manifest.
 
-> Note: NFA does not require `"duration"` fields in the manifest, and can align long audio files without running out of memory. Depending on your machine specs, you can align audios up to 5-10 minutes on Conformer CTC models, up to around 1.5 hours for QuartzNet models, and up to several hours for Citrinet models. NFA will also produce better alignments the more accurate the ground-truth `"text"` is.
+> Note: NFA does not require `"duration"` fields in the manifest, and can align long audio files without running out of memory. The duration of audio file you can align will depend on the amount of memory on your machine. NFA will also produce better alignments the more accurate the reference text in `"text"` is.
 
 
 # Output CTM file format
 For each utterance specified in a line of `manifest_filepath`, several CTM files will be generated:
-* a CTM file containing token-level alignments at `<output_dir>/tokens/<utt_id>.ctm`,
-* a CTM file containing word-level alignments at `<output_dir>/words/<utt_id>.ctm`,
-* if `additional_ctm_grouping_separator` is specified, there will also be a CTM file containing those segments at `output_dir/additional_segments`.
+* a CTM file containing token-level alignments at `<output_dir>/ctm/tokens/<utt_id>.ctm`,
+* a CTM file containing word-level alignments at `<output_dir>/ctm/words/<utt_id>.ctm`,
+* a CTM file containing segment-level alignments at `<output_dir>/ctm/segments/<utt_id>.ctm`. If `additional_segment_grouping_separator` is specified, the segments will be parts of the text separated by `additonal_segment_grouping_separator`. If it is not specified, the entire text will be treated as a single segment.
+
 Each CTM file will contain lines of the format:
 `<utt_id> 1 <start time in seconds> <duration in seconds> <text, ie token/word/segment>`.
 Note the second item in the line (the 'channel ID', which is required by the CTM file format) is always 1, as NFA operates on single channel audio.
 
+# Output ASS file format
+NFA will produce the following ASS files, which you can use to generate subtitle videos:
+* ASS files with token-level highlighting will be at `<output_dir>/ass/tokens/<utt_id>.ass,`
+* ASS files with word-level highlighting will be at `<output_dir>/ass/words/<utt_id>.ass`.
+All words belonging to the same segment 'segments' will appear at the same time in the subtitles generated with the ASS files. If you find that your segments are not the right size, you can use set `ass_file_config.resegment_text_to_fill_space=true` and specify some number of `ass_file_config.max_lines_per_segment`.
+
+
 # Output JSON manifest file format
-A new manifest file will be saved at `<output_dir>/<original manifest file name>_with_ctm_paths.json`. It will contain the same fields as the original manifest, and additionally:
-* `"token_level_ctm_filepath"`
-* `"word_level_ctm_filepath"`
-* `"additonal_segment_level_ctm_filepath"` (if `additional_ctm_grouping_separator` is specified)
+A new manifest file will be saved at `<output_dir>/<original manifest file name>_with_output_file_paths.json`. It will contain the same fields as the original manifest, and additionally:
+* `"token_level_ctm_filepath"` (if `save_output_file_formats` contains `ctm`)
+* `"word_level_ctm_filepath"` (if `save_output_file_formats` contains `ctm`)
+* `"segment_level_ctm_filepath"` (if `save_output_file_formats` contains `ctm`)
+* `"token_level_ass_filepath"` (if `save_output_file_formats` contains `ass`)
+* `"word_level_ass_filepath"` (if `save_output_file_formats` contains `ass`)
 * `"pred_text"` (if `align_using_pred_text=true`)
 
 
diff --git a/tools/nemo_forced_aligner/align.py b/tools/nemo_forced_aligner/align.py
index ed3ca3e45b5b..296c4a009cc4 100644
--- a/tools/nemo_forced_aligner/align.py
+++ b/tools/nemo_forced_aligner/align.py
@@ -15,22 +15,27 @@
 import copy
 import math
 import os
-from dataclasses import dataclass, is_dataclass
-from typing import Optional
+from dataclasses import dataclass, field, is_dataclass
+from pathlib import Path
+from typing import List, Optional
 
 import torch
 from omegaconf import OmegaConf
 from utils.data_prep import (
+    add_t_start_end_to_utt_obj,
     get_batch_starts_ends,
-    get_batch_tensors_and_boundary_info,
+    get_batch_variables,
     get_manifest_lines_batch,
     is_entry_in_all_lines,
     is_entry_in_any_lines,
 )
-from utils.make_output_files import make_ctm, make_new_manifest
+from utils.make_ass_files import make_ass_files
+from utils.make_ctm_files import make_ctm_files
+from utils.make_output_manifest import write_manifest_out_line
 from utils.viterbi_decoding import viterbi_decoding
 
 from nemo.collections.asr.models.ctc_models import EncDecCTCModel
+from nemo.collections.asr.models.hybrid_rnnt_ctc_models import EncDecHybridRNNTCTCModel
 from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchASR
 from nemo.collections.asr.parts.utils.transcribe_utils import setup_model
 from nemo.core.config import hydra_runner
@@ -48,16 +53,11 @@
         log-probs which we will use to do alignment.
         Note: NFA can only use CTC models (not Transducer models) at the moment.
         Note: if a model_path is provided, it will override the pretrained_name.
-    model_downsample_factor: an int indicating the downsample factor of the ASR model, ie the ratio of input 
-        timesteps to output timesteps. 
-        If the ASR model is a QuartzNet model, its downsample factor is 2.
-        If the ASR model is a Conformer CTC model, its downsample factor is 4.
-        If the ASR model is a Citirnet model, its downsample factor is 8.
     manifest_filepath: filepath to the manifest of the data you want to align,
         containing 'audio_filepath' and 'text' fields.
     output_dir: the folder where output CTM files and new JSON manifest will be saved.
     align_using_pred_text: if True, will transcribe the audio using the specified model and then use that transcription 
-        as the 'ground truth' for the forced alignment. 
+        as the reference text for the forced alignment. 
     transcribe_device: None, or a string specifying the device that will be used for generating log-probs (i.e. "transcribing").
         The string needs to be in a format recognized by torch.device(). If None, NFA will set it to 'cuda' if it is available 
         (otherwise will set it to 'cpu').
@@ -65,12 +65,11 @@
         The string needs to be in a format recognized by torch.device(). If None, NFA will set it to 'cuda' if it is available 
         (otherwise will set it to 'cpu').
     batch_size: int specifying batch size that will be used for generating log-probs and doing Viterbi decoding.
-    additional_ctm_grouping_separator:  the string used to separate CTM segments if you want to obtain CTM files at a 
-        level that is not the token level or the word level. NFA will always produce token-level and word-level CTM 
-        files in: `<output_dir>/tokens/<utt_id>.ctm` and `<output_dir>/words/<utt_id>.ctm`. 
-        If `additional_ctm_grouping_separator` is specified, an additional folder 
-        `<output_dir>/{tokens/words/additional_segments}/<utt_id>.ctm` will be created containing CTMs 
-        for `addtional_ctm_grouping_separator`-separated segments. 
+    use_local_attention: boolean flag specifying whether to try to use local attention for the ASR Model (will only
+        work if the ASR Model is a Conformer model). If local attention is used, we will set the local attention context 
+        size to [64,64].
+    additional_segment_grouping_separator: an optional string used to separate the text into smaller segments. 
+        If this is not specified, then the whole text will be treated as a single segment. 
     remove_blank_tokens_from_ctm:  a boolean denoting whether to remove <blank> tokens from token-level output CTMs. 
     audio_filepath_parts_in_utt_id: int specifying how many of the 'parts' of the audio_filepath
         we will use (starting from the final part of the audio_filepath) to determine the 
@@ -80,11 +79,6 @@
         e.g. if audio_filepath is "/a/b/c/d/e 1.wav" and audio_filepath_parts_in_utt_id is 1 => utt_id will be "e1"
         e.g. if audio_filepath is "/a/b/c/d/e 1.wav" and audio_filepath_parts_in_utt_id is 2 => utt_id will be "d_e1"
         e.g. if audio_filepath is "/a/b/c/d/e 1.wav" and audio_filepath_parts_in_utt_id is 3 => utt_id will be "c_d_e1"
-    minimum_timestamp_duration: a float indicating a minimum duration (in seconds) for timestamps in the CTM. If any 
-        line in the CTM has a duration lower than the `minimum_timestamp_duration`, it will be enlarged from the 
-        middle outwards until it meets the minimum_timestamp_duration, or reaches the beginning or end of the audio 
-        file. Note that this may cause timestamps to overlap.
-
     use_buffered_infer: False, if set True, using streaming to do get the logits for alignment
                         This flag is useful when aligning large audio file.
                         However, currently the chunk streaming inference does not support batch inference,
@@ -96,15 +90,39 @@
                       which will cut one audio into segments and do inference on chunk_batch_size segments at a time
 
     simulate_cache_aware_streaming: False, if set True, using cache aware streaming to do get the logits for alignment
+
+    save_output_file_formats: List of strings specifying what type of output files to save (default: ["ctm", "ass"])
+    ctm_file_config: CTMFileConfig to specify the configuration of the output CTM files
+    ass_file_config: ASSFileConfig to specify the configuration of the output ASS files
 """
 
 
+@dataclass
+class CTMFileConfig:
+    remove_blank_tokens: bool = False
+    # minimum duration (in seconds) for timestamps in the CTM.If any line in the CTM has a
+    # duration lower than this, it will be enlarged from the middle outwards until it
+    # meets the minimum_timestamp_duration, or reaches the beginning or end of the audio file.
+    # Note that this may cause timestamps to overlap.
+    minimum_timestamp_duration: float = 0
+
+
+@dataclass
+class ASSFileConfig:
+    fontsize: int = 20
+    marginv: int = 20
+    # if resegment_text_to_fill_space is True, the ASS files will use new segments
+    # such that each segment will not take up more than (approximately) max_lines_per_segment
+    # when the ASS file is applied to a video
+    resegment_text_to_fill_space: bool = False
+    max_lines_per_segment: int = 2
+
+
 @dataclass
 class AlignmentConfig:
     # Required configs
     pretrained_name: Optional[str] = None
     model_path: Optional[str] = None
-    model_downsample_factor: Optional[int] = None
     manifest_filepath: Optional[str] = None
     output_dir: Optional[str] = None
 
@@ -113,9 +131,8 @@ class AlignmentConfig:
     transcribe_device: Optional[str] = None
     viterbi_device: Optional[str] = None
     batch_size: int = 1
-    additional_ctm_grouping_separator: Optional[str] = None
-    remove_blank_tokens_from_ctm: bool = False
-    minimum_timestamp_duration: float = 0
+    use_local_attention: bool = True
+    additional_segment_grouping_separator: Optional[str] = None
     audio_filepath_parts_in_utt_id: int = 1
 
     # Buffered chunked streaming configs
@@ -127,6 +144,11 @@ class AlignmentConfig:
     # Cache aware streaming configs
     simulate_cache_aware_streaming: Optional[bool] = False
 
+    # Output file configs
+    save_output_file_formats: List[str] = field(default_factory=lambda: ["ctm", "ass"])
+    ctm_file_config: CTMFileConfig = CTMFileConfig()
+    ass_file_config: ASSFileConfig = ASSFileConfig()
+
 
 @hydra_runner(config_name="AlignmentConfig", schema=AlignmentConfig)
 def main(cfg: AlignmentConfig):
@@ -143,9 +165,6 @@ def main(cfg: AlignmentConfig):
     if cfg.model_path is not None and cfg.pretrained_name is not None:
         raise ValueError("One of cfg.model_path and cfg.pretrained_name must be None")
 
-    if cfg.model_downsample_factor is None:
-        raise ValueError("cfg.model_downsample_factor must be specified")
-
     if cfg.manifest_filepath is None:
         raise ValueError("cfg.manifest_filepath must be specified")
 
@@ -155,10 +174,10 @@ def main(cfg: AlignmentConfig):
     if cfg.batch_size < 1:
         raise ValueError("cfg.batch_size cannot be zero or a negative number")
 
-    if cfg.additional_ctm_grouping_separator == "" or cfg.additional_ctm_grouping_separator == " ":
+    if cfg.additional_segment_grouping_separator == "" or cfg.additional_segment_grouping_separator == " ":
         raise ValueError("cfg.additional_grouping_separator cannot be empty string or space character")
 
-    if cfg.minimum_timestamp_duration < 0:
+    if cfg.ctm_file_config.minimum_timestamp_duration < 0:
         raise ValueError("cfg.minimum_timestamp_duration cannot be a negative number")
 
     # Validate manifest contents
@@ -179,18 +198,18 @@ def main(cfg: AlignmentConfig):
         if not is_entry_in_all_lines(cfg.manifest_filepath, "text"):
             raise RuntimeError(
                 "At least one line in cfg.manifest_filepath does not contain a 'text' entry. "
-                "NFA requires all lines to contain a 'text' entry when cfg.align_using_pred_text=True."
+                "NFA requires all lines to contain a 'text' entry when cfg.align_using_pred_text=False."
             )
 
     # init devices
     if cfg.transcribe_device is None:
-        transcribe_device = torch.device("cuda" if torch.cuda.is_available else "cpu")
+        transcribe_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     else:
         transcribe_device = torch.device(cfg.transcribe_device)
     logging.info(f"Device to be used for transcription step (`transcribe_device`) is {transcribe_device}")
 
     if cfg.viterbi_device is None:
-        viterbi_device = torch.device("cuda" if torch.cuda.is_available else "cpu")
+        viterbi_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     else:
         viterbi_device = torch.device(cfg.viterbi_device)
     logging.info(f"Device to be used for viterbi step (`viterbi_device`) is {viterbi_device}")
@@ -205,15 +224,24 @@ def main(cfg: AlignmentConfig):
     model, _ = setup_model(cfg, transcribe_device)
     model.eval()
 
-    if not isinstance(model, EncDecCTCModel):
+    if isinstance(model, EncDecHybridRNNTCTCModel):
+        model.change_decoding_strategy(decoder_type="ctc")
+
+    if cfg.use_local_attention:
+        logging.info(
+            "Flag use_local_attention is set to True => will try to use local attention for model if it allows it"
+        )
+        model.change_attention_model(self_attention_model="rel_pos_local_attn", att_context_size=[64, 64])
+
+    if not (isinstance(model, EncDecCTCModel) or isinstance(model, EncDecHybridRNNTCTCModel)):
         raise NotImplementedError(
-            f"Model {cfg.model_name} is not an instance of NeMo EncDecCTCModel."
-            " Currently only instances of EncDecCTCModels are supported"
+            f"Model is not an instance of NeMo EncDecCTCModel or ENCDecHybridRNNTCTCModel."
+            " Currently only instances of these models are supported"
         )
 
-    if cfg.minimum_timestamp_duration > 0:
+    if cfg.ctm_file_config.minimum_timestamp_duration > 0:
         logging.warning(
-            f"cfg.minimum_timestamp_duration has been set to {cfg.minimum_timestamp_duration} seconds. "
+            f"cfg.ctm_file_config.minimum_timestamp_duration has been set to {cfg.ctm_file_config.minimum_timestamp_duration} seconds. "
             "This may cause the alignments for some tokens/words/additional segments to be overlapping."
         )
 
@@ -255,84 +283,48 @@ def main(cfg: AlignmentConfig):
     # get start and end line IDs of batches
     starts, ends = get_batch_starts_ends(cfg.manifest_filepath, cfg.batch_size)
 
-    if cfg.align_using_pred_text:
-        # record pred_texts to save them in the new manifest at the end of this script
-        pred_text_all_lines = []
-    else:
-        pred_text_all_lines = None
+    # init output_timestep_duration = None and we will calculate and update it during the first batch
+    output_timestep_duration = None
+
+    # init f_manifest_out
+    os.makedirs(cfg.output_dir, exist_ok=True)
+    tgt_manifest_name = str(Path(cfg.manifest_filepath).stem) + "_with_output_file_paths.json"
+    tgt_manifest_filepath = str(Path(cfg.output_dir) / tgt_manifest_name)
+    f_manifest_out = open(tgt_manifest_filepath, 'w')
 
     # get alignment and save in CTM batch-by-batch
     for start, end in zip(starts, ends):
         manifest_lines_batch = get_manifest_lines_batch(cfg.manifest_filepath, start, end)
 
-        (
-            log_probs_batch,
-            y_batch,
-            T_batch,
-            U_batch,
-            token_info_batch,
-            word_info_batch,
-            segment_info_batch,
-            pred_text_batch,
-        ) = get_batch_tensors_and_boundary_info(
+        (log_probs_batch, y_batch, T_batch, U_batch, utt_obj_batch, output_timestep_duration,) = get_batch_variables(
             manifest_lines_batch,
             model,
-            cfg.additional_ctm_grouping_separator,
+            cfg.additional_segment_grouping_separator,
             cfg.align_using_pred_text,
+            cfg.audio_filepath_parts_in_utt_id,
+            output_timestep_duration,
             cfg.simulate_cache_aware_streaming,
             cfg.use_buffered_chunked_streaming,
             buffered_chunk_params,
         )
 
-        if cfg.align_using_pred_text:
-            pred_text_all_lines.extend(pred_text_batch)
-
         alignments_batch = viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device)
 
-        make_ctm(
-            token_info_batch,
-            alignments_batch,
-            manifest_lines_batch,
-            model,
-            cfg.model_downsample_factor,
-            os.path.join(cfg.output_dir, "tokens"),
-            cfg.remove_blank_tokens_from_ctm,
-            cfg.audio_filepath_parts_in_utt_id,
-            cfg.minimum_timestamp_duration,
-        )
+        for utt_obj, alignment_utt in zip(utt_obj_batch, alignments_batch):
 
-        make_ctm(
-            word_info_batch,
-            alignments_batch,
-            manifest_lines_batch,
-            model,
-            cfg.model_downsample_factor,
-            os.path.join(cfg.output_dir, "words"),
-            False,  # dont try to remove blank tokens because we dont expect them to be there anyway
-            cfg.audio_filepath_parts_in_utt_id,
-            cfg.minimum_timestamp_duration,
-        )
+            utt_obj = add_t_start_end_to_utt_obj(utt_obj, alignment_utt, output_timestep_duration)
+
+            if "ctm" in cfg.save_output_file_formats:
+                utt_obj = make_ctm_files(utt_obj, cfg.output_dir, cfg.ctm_file_config,)
+
+            if "ass" in cfg.save_output_file_formats:
+                utt_obj = make_ass_files(utt_obj, cfg.output_dir, cfg.ass_file_config)
 
-        if cfg.additional_ctm_grouping_separator:
-            make_ctm(
-                segment_info_batch,
-                alignments_batch,
-                manifest_lines_batch,
-                model,
-                cfg.model_downsample_factor,
-                os.path.join(cfg.output_dir, "additional_segments"),
-                False,  # dont try to remove blank tokens because we dont expect them to be there anyway
-                cfg.audio_filepath_parts_in_utt_id,
-                cfg.minimum_timestamp_duration,
+            write_manifest_out_line(
+                f_manifest_out, utt_obj,
             )
 
-    make_new_manifest(
-        cfg.output_dir,
-        cfg.manifest_filepath,
-        cfg.additional_ctm_grouping_separator,
-        cfg.audio_filepath_parts_in_utt_id,
-        pred_text_all_lines,
-    )
+    f_manifest_out.close()
 
     return None
 
diff --git a/tools/nemo_forced_aligner/requirements.txt b/tools/nemo_forced_aligner/requirements.txt
index 3af8ebf1b488..9daa6d2f2496 100644
--- a/tools/nemo_forced_aligner/requirements.txt
+++ b/tools/nemo_forced_aligner/requirements.txt
@@ -1,2 +1,3 @@
 nemo_toolkit[all]
-pytest
+prettyprinter # for testing
+pytest # for testing
diff --git a/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_boundary_info.py b/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_boundary_info.py
deleted file mode 100644
index 406c4be1fb70..000000000000
--- a/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_boundary_info.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-from utils.make_output_files import add_t_start_end_to_boundary_info
-
-ALIGNMENT = [
-    1,
-    1,
-    3,
-    3,
-    4,
-    5,
-    7,
-    7,
-    9,
-    10,
-    11,
-    12,
-    13,
-    15,
-    17,
-    17,
-    19,
-    21,
-    23,
-    23,
-]
-
-INPUT_TOKEN_INFO = [
-    {'text': '<b>', 's_start': 0, 's_end': 0},
-    {'text': 'h', 's_start': 1, 's_end': 1},
-    {'text': '<b>', 's_start': 2, 's_end': 2},
-    {'text': 'i', 's_start': 3, 's_end': 3},
-    {'text': '<b>', 's_start': 4, 's_end': 4},
-    {'text': '<space>', 's_start': 5, 's_end': 5},
-    {'text': '<b>', 's_start': 6, 's_end': 6},
-    {'text': 'w', 's_start': 7, 's_end': 7},
-    {'text': '<b>', 's_start': 8, 's_end': 8},
-    {'text': 'o', 's_start': 9, 's_end': 9},
-    {'text': '<b>', 's_start': 10, 's_end': 10},
-    {'text': 'r', 's_start': 11, 's_end': 11},
-    {'text': '<b>', 's_start': 12, 's_end': 12},
-    {'text': 'l', 's_start': 13, 's_end': 13},
-    {'text': '<b>', 's_start': 14, 's_end': 14},
-    {'text': 'd', 's_start': 15, 's_end': 15},
-    {'text': '<b>', 's_start': 16, 's_end': 16},
-    {'text': '<space>', 's_start': 17, 's_end': 17},
-    {'text': '<b>', 's_start': 18, 's_end': 18},
-    {'text': 'h', 's_start': 19, 's_end': 19},
-    {'text': '<b>', 's_start': 20, 's_end': 20},
-    {'text': 'e', 's_start': 21, 's_end': 21},
-    {'text': '<b>', 's_start': 22, 's_end': 22},
-    {'text': 'y', 's_start': 23, 's_end': 23},
-    {'text': '<b>', 's_start': 24, 's_end': 24},
-]
-
-EXPECTED_OUTPUT_TOKEN_INFO = [
-    {'text': 'h', 's_start': 1, 's_end': 1, 't_start': 0, 't_end': 1},
-    {'text': 'i', 's_start': 3, 's_end': 3, 't_start': 2, 't_end': 3},
-    {'text': '<b>', 's_start': 4, 's_end': 4, 't_start': 4, 't_end': 4},
-    {'text': '<space>', 's_start': 5, 's_end': 5, 't_start': 5, 't_end': 5},
-    {'text': 'w', 's_start': 7, 's_end': 7, 't_start': 6, 't_end': 7},
-    {'text': 'o', 's_start': 9, 's_end': 9, 't_start': 8, 't_end': 8},
-    {'text': '<b>', 's_start': 10, 's_end': 10, 't_start': 9, 't_end': 9},
-    {'text': 'r', 's_start': 11, 's_end': 11, 't_start': 10, 't_end': 10},
-    {'text': '<b>', 's_start': 12, 's_end': 12, 't_start': 11, 't_end': 11},
-    {'text': 'l', 's_start': 13, 's_end': 13, 't_start': 12, 't_end': 12},
-    {'text': 'd', 's_start': 15, 's_end': 15, 't_start': 13, 't_end': 13},
-    {'text': '<space>', 's_start': 17, 's_end': 17, 't_start': 14, 't_end': 15},
-    {'text': 'h', 's_start': 19, 's_end': 19, 't_start': 16, 't_end': 16},
-    {'text': 'e', 's_start': 21, 's_end': 21, 't_start': 17, 't_end': 17},
-    {'text': 'y', 's_start': 23, 's_end': 23, 't_start': 18, 't_end': 19},
-]
-
-
-INPUT_WORD_INFO = [
-    {'text': 'hi', 's_start': 1, 's_end': 3},
-    {'text': 'world', 's_start': 7, 's_end': 15},
-    {'text': 'hey', 's_start': 19, 's_end': 23},
-]
-
-EXPECTED_OUTPUT_WORD_INFO = [
-    {'text': 'hi', 's_start': 1, 's_end': 3, 't_start': 0, 't_end': 3},
-    {'text': 'world', 's_start': 7, 's_end': 15, 't_start': 6, 't_end': 13},
-    {'text': 'hey', 's_start': 19, 's_end': 23, 't_start': 16, 't_end': 19},
-]
-
-INPUT_SEGMENT_INFO = [
-    {'text': 'hi world', 's_start': 1, 's_end': 15},
-    {'text': 'hey', 's_start': 19, 's_end': 23},
-]
-
-EXPECTED_OUTPUT_SEGMENT_INFO = [
-    {'text': 'hi world', 's_start': 1, 's_end': 15, 't_start': 0, 't_end': 13},
-    {'text': 'hey', 's_start': 19, 's_end': 23, 't_start': 16, 't_end': 19},
-]
-
-
-@pytest.mark.parametrize(
-    "input_boundary_info_utt,alignment_utt,expected_output_boundary_info_utt",
-    [
-        (INPUT_TOKEN_INFO, ALIGNMENT, EXPECTED_OUTPUT_TOKEN_INFO),
-        (INPUT_WORD_INFO, ALIGNMENT, EXPECTED_OUTPUT_WORD_INFO),
-        (INPUT_SEGMENT_INFO, ALIGNMENT, EXPECTED_OUTPUT_SEGMENT_INFO),
-    ],
-)
-def test_add_t_start_end_to_boundary_info(input_boundary_info_utt, alignment_utt, expected_output_boundary_info_utt):
-    output_boundary_info_utt = add_t_start_end_to_boundary_info(input_boundary_info_utt, alignment_utt)
-    assert output_boundary_info_utt == expected_output_boundary_info_utt
diff --git a/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_utt_obj.py b/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_utt_obj.py
new file mode 100644
index 000000000000..62092d5afaeb
--- /dev/null
+++ b/tools/nemo_forced_aligner/tests/test_add_t_start_end_to_utt_obj.py
@@ -0,0 +1,288 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+import pytest
+from utils.data_prep import Segment, Token, Utterance, Word, add_t_start_end_to_utt_obj
+
+OUTPUT_TIMESTEP_DURATION = 0.04
+
+ALIGNMENT = [
+    1,
+    1,
+    3,
+    3,
+    4,
+    5,
+    7,
+    7,
+    9,
+    10,
+    11,
+    12,
+    13,
+    15,
+    17,
+    17,
+    19,
+    21,
+    23,
+    23,
+]
+
+EXPECTED_OUTPUT_UTTERANCE = Utterance(
+    text='hi world | hey',
+    token_ids_with_blanks=[
+        28,
+        8,
+        28,
+        9,
+        28,
+        0,
+        28,
+        23,
+        28,
+        15,
+        28,
+        18,
+        28,
+        12,
+        28,
+        4,
+        28,
+        0,
+        28,
+        8,
+        28,
+        5,
+        28,
+        25,
+        28,
+    ],
+    segments_and_tokens=[
+        Token(text='<b>', text_cased='<b>', s_start=0, s_end=0, t_start=-1, t_end=-1),
+        Segment(
+            text="hi world",
+            s_start=1,
+            s_end=15,
+            t_start=0 * OUTPUT_TIMESTEP_DURATION,
+            t_end=14 * OUTPUT_TIMESTEP_DURATION,
+            words_and_tokens=[
+                Word(
+                    text="hi",
+                    s_start=1,
+                    s_end=3,
+                    t_start=0 * OUTPUT_TIMESTEP_DURATION,
+                    t_end=4 * OUTPUT_TIMESTEP_DURATION,
+                    tokens=[
+                        Token(
+                            text='h',
+                            text_cased='h',
+                            s_start=1,
+                            s_end=1,
+                            t_start=0 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=2 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(text='<b>', text_cased='<b>', s_start=2, s_end=2, t_start=-1, t_end=-1),
+                        Token(
+                            text='i',
+                            text_cased='i',
+                            s_start=3,
+                            s_end=3,
+                            t_start=2 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=4 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                    ],
+                ),
+                Token(
+                    text='<b>',
+                    text_cased='<b>',
+                    s_start=4,
+                    s_end=4,
+                    t_start=4 * OUTPUT_TIMESTEP_DURATION,
+                    t_end=5 * OUTPUT_TIMESTEP_DURATION,
+                ),
+                Token(
+                    text='<space>',
+                    text_cased='<space>',
+                    s_start=5,
+                    s_end=5,
+                    t_start=5 * OUTPUT_TIMESTEP_DURATION,
+                    t_end=6 * OUTPUT_TIMESTEP_DURATION,
+                ),
+                Token(text='<b>', text_cased='<b>', s_start=6, s_end=6, t_start=-1, t_end=-1),
+                Word(
+                    text="world",
+                    s_start=7,
+                    s_end=15,
+                    t_start=6 * OUTPUT_TIMESTEP_DURATION,
+                    t_end=14 * OUTPUT_TIMESTEP_DURATION,
+                    tokens=[
+                        Token(
+                            text='w',
+                            text_cased='w',
+                            s_start=7,
+                            s_end=7,
+                            t_start=6 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=8 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(text='<b>', text_cased='<b>', s_start=8, s_end=8, t_start=-1, t_end=-1),
+                        Token(
+                            text='o',
+                            text_cased='o',
+                            s_start=9,
+                            s_end=9,
+                            t_start=8 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=9 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(
+                            text='<b>',
+                            text_cased='<b>',
+                            s_start=10,
+                            s_end=10,
+                            t_start=9 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=10 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(
+                            text='r',
+                            text_cased='r',
+                            s_start=11,
+                            s_end=11,
+                            t_start=10 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=11 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(
+                            text='<b>',
+                            text_cased='<b>',
+                            s_start=12,
+                            s_end=12,
+                            t_start=11 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=12 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(
+                            text='l',
+                            text_cased='l',
+                            s_start=13,
+                            s_end=13,
+                            t_start=12 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=13 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(text='<b>', text_cased='<b>', s_start=14, s_end=14, t_start=-1, t_end=-1),
+                        Token(
+                            text='d',
+                            text_cased='d',
+                            s_start=15,
+                            s_end=15,
+                            t_start=13 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=14 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                    ],
+                ),
+            ],
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=16, s_end=16, t_start=-1, t_end=-1),
+        Token(
+            text='<space>',
+            text_cased='<space>',
+            s_start=17,
+            s_end=17,
+            t_start=14 * OUTPUT_TIMESTEP_DURATION,
+            t_end=16 * OUTPUT_TIMESTEP_DURATION,
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=18, s_end=18, t_start=-1, t_end=-1),
+        Segment(
+            text="hey",
+            s_start=19,
+            s_end=23,
+            t_start=16 * OUTPUT_TIMESTEP_DURATION,
+            t_end=20 * OUTPUT_TIMESTEP_DURATION,
+            words_and_tokens=[
+                Word(
+                    text="hey",
+                    s_start=19,
+                    s_end=23,
+                    t_start=16 * OUTPUT_TIMESTEP_DURATION,
+                    t_end=20 * OUTPUT_TIMESTEP_DURATION,
+                    tokens=[
+                        Token(
+                            text='h',
+                            text_cased='h',
+                            s_start=19,
+                            s_end=19,
+                            t_start=16 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=17 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(text='<b>', text_cased='<b>', s_start=20, s_end=20, t_start=-1, t_end=-1),
+                        Token(
+                            text='e',
+                            text_cased='e',
+                            s_start=21,
+                            s_end=21,
+                            t_start=17 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=18 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                        Token(text='<b>', text_cased='<b>', s_start=22, s_end=22, t_start=-1, t_end=-1),
+                        Token(
+                            text='y',
+                            text_cased='y',
+                            s_start=23,
+                            s_end=23,
+                            t_start=18 * OUTPUT_TIMESTEP_DURATION,
+                            t_end=20 * OUTPUT_TIMESTEP_DURATION,
+                        ),
+                    ],
+                )
+            ],
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=24, s_end=24, t_start=-1, t_end=-1),
+    ],
+)
+
+
+@pytest.mark.parametrize(
+    "alignment,expected_output_utterance, output_timestep_duration",
+    [(ALIGNMENT, EXPECTED_OUTPUT_UTTERANCE, OUTPUT_TIMESTEP_DURATION),],
+)
+def test_add_t_start_end_to_utt_obj(alignment, expected_output_utterance, output_timestep_duration):
+    input_utterance = copy.deepcopy(expected_output_utterance)
+
+    # set all t_start and t_end to None in input_utterance
+    for segment_or_token in input_utterance.segments_and_tokens:
+        if type(segment_or_token) is Segment:
+            segment = segment_or_token
+            segment.t_start = None
+            segment.t_end = None
+
+            for word_or_token in segment.words_and_tokens:
+                if type(word_or_token) is Word:
+                    word = word_or_token
+                    word.t_start = None
+                    word.t_end = None
+
+                    for token in word.tokens:
+                        token.t_start = None
+                        token.t_end = None
+                else:
+                    token = word_or_token
+                    token.t_start = None
+                    token.t_end = None
+
+        else:
+            token = segment_or_token
+            token.t_start = None
+            token.t_end = None
+
+    output_utterance = add_t_start_end_to_utt_obj(input_utterance, alignment, output_timestep_duration)
+    assert output_utterance == expected_output_utterance
diff --git a/tools/nemo_forced_aligner/tests/test_get_utt_obj.py b/tools/nemo_forced_aligner/tests/test_get_utt_obj.py
new file mode 100644
index 000000000000..31dd978263c0
--- /dev/null
+++ b/tools/nemo_forced_aligner/tests/test_get_utt_obj.py
@@ -0,0 +1,344 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import prettyprinter
+import pytest
+from prettyprinter import pretty_call, register_pretty
+from utils.data_prep import Segment, Token, Utterance, Word, get_utt_obj
+
+from nemo.collections.asr.models import ASRModel
+
+
+def get_utt_obj_pp_string(utt_obj):
+    @register_pretty(Word)
+    def pretty_utterance(value, ctx):
+        return pretty_call(
+            ctx,
+            Word,
+            text=value.text,
+            s_start=value.s_start,
+            s_end=value.s_end,
+            t_start=value.t_start,
+            t_end=value.t_end,
+            tokens=value.tokens,
+        )
+
+    @register_pretty(Segment)
+    def pretty_utterance(value, ctx):
+        return pretty_call(
+            ctx,
+            Segment,
+            text=value.text,
+            s_start=value.s_start,
+            s_end=value.s_end,
+            t_start=value.t_start,
+            t_end=value.t_end,
+            words_and_tokens=value.words_and_tokens,
+        )
+
+    @register_pretty(Utterance)
+    def pretty_utterance(value, ctx):
+        return pretty_call(
+            ctx,
+            Utterance,
+            text=value.text,
+            token_ids_with_blanks=value.token_ids_with_blanks,
+            segments_and_tokens=value.segments_and_tokens,
+            audio_filepath=value.audio_filepath,
+            utt_id=value.utt_id,
+        )
+
+    return prettyprinter.pformat(utt_obj)
+
+
+T_FOR_TEST = 999
+AUDIO_FILEPATH_FOR_TEST = "arbitrary_string.wav"
+UTT_ID_FOR_TEST = "arbitrary_string"
+
+EN_TEXT = "hi world | hey"
+
+EN_CN_EXPECTED_UTTERANCE = Utterance(
+    text='hi world | hey',
+    token_ids_with_blanks=[1024, 317, 1024, 472, 1024, 25, 1024, 20, 1024],
+    segments_and_tokens=[
+        Token(text='<b>', text_cased='<b>', s_start=0, s_end=0, t_start=None, t_end=None),
+        Segment(
+            text='hi world',
+            s_start=1,
+            s_end=3,
+            t_start=None,
+            t_end=None,
+            words_and_tokens=[
+                Word(
+                    text='hi',
+                    s_start=1,
+                    s_end=1,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[Token(text='▁hi', text_cased='▁hi', s_start=1, s_end=1, t_start=None, t_end=None)],
+                ),
+                Token(text='<b>', text_cased='<b>', s_start=2, s_end=2, t_start=None, t_end=None),
+                Word(
+                    text='world',
+                    s_start=3,
+                    s_end=3,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[Token(text='▁world', text_cased='▁world', s_start=3, s_end=3, t_start=None, t_end=None)],
+                ),
+            ],
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=4, s_end=4, t_start=None, t_end=None),
+        Segment(
+            text='hey',
+            s_start=5,
+            s_end=7,
+            t_start=None,
+            t_end=None,
+            words_and_tokens=[
+                Word(
+                    text='hey',
+                    s_start=5,
+                    s_end=7,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[
+                        Token(text='▁he', text_cased='▁he', s_start=5, s_end=5, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=6, s_end=6, t_start=None, t_end=None),
+                        Token(text='y', text_cased='y', s_start=7, s_end=7, t_start=None, t_end=None),
+                    ],
+                )
+            ],
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=8, s_end=8, t_start=None, t_end=None),
+    ],
+    audio_filepath=AUDIO_FILEPATH_FOR_TEST,
+    utt_id=UTT_ID_FOR_TEST,
+)
+
+EN_QN_EXPECTED_UTTERANCE = Utterance(
+    text='hi world | hey',
+    token_ids_with_blanks=[
+        28,
+        8,
+        28,
+        9,
+        28,
+        0,
+        28,
+        23,
+        28,
+        15,
+        28,
+        18,
+        28,
+        12,
+        28,
+        4,
+        28,
+        0,
+        28,
+        8,
+        28,
+        5,
+        28,
+        25,
+        28,
+    ],
+    segments_and_tokens=[
+        Token(text='<b>', text_cased='<b>', s_start=0, s_end=0, t_start=None, t_end=None),
+        Segment(
+            text="hi world",
+            s_start=1,
+            s_end=15,
+            t_start=None,
+            t_end=None,
+            words_and_tokens=[
+                Word(
+                    text="hi",
+                    s_start=1,
+                    s_end=3,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[
+                        Token(text='h', text_cased='h', s_start=1, s_end=1, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=2, s_end=2, t_start=None, t_end=None),
+                        Token(text='i', text_cased='i', s_start=3, s_end=3, t_start=None, t_end=None),
+                    ],
+                ),
+                Token(text='<b>', text_cased='<b>', s_start=4, s_end=4, t_start=None, t_end=None),
+                Token(text='<space>', text_cased='<space>', s_start=5, s_end=5, t_start=None, t_end=None),
+                Token(text='<b>', text_cased='<b>', s_start=6, s_end=6, t_start=None, t_end=None),
+                Word(
+                    text="world",
+                    s_start=7,
+                    s_end=15,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[
+                        Token(text='w', text_cased='w', s_start=7, s_end=7, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=8, s_end=8, t_start=None, t_end=None),
+                        Token(text='o', text_cased='o', s_start=9, s_end=9, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=10, s_end=10, t_start=None, t_end=None),
+                        Token(text='r', text_cased='r', s_start=11, s_end=11, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=12, s_end=12, t_start=None, t_end=None),
+                        Token(text='l', text_cased='l', s_start=13, s_end=13, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=14, s_end=14, t_start=None, t_end=None),
+                        Token(text='d', text_cased='d', s_start=15, s_end=15, t_start=None, t_end=None),
+                    ],
+                ),
+            ],
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=16, s_end=16, t_start=None, t_end=None),
+        Token(text='<space>', text_cased='<space>', s_start=17, s_end=17, t_start=None, t_end=None),
+        Token(text='<b>', text_cased='<b>', s_start=18, s_end=18, t_start=None, t_end=None),
+        Segment(
+            text="hey",
+            s_start=19,
+            s_end=23,
+            t_start=None,
+            t_end=None,
+            words_and_tokens=[
+                Word(
+                    text="hey",
+                    s_start=19,
+                    s_end=23,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[
+                        Token(text='h', text_cased='h', s_start=19, s_end=19, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=20, s_end=20, t_start=None, t_end=None),
+                        Token(text='e', text_cased='e', s_start=21, s_end=21, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=22, s_end=22, t_start=None, t_end=None),
+                        Token(text='y', text_cased='y', s_start=23, s_end=23, t_start=None, t_end=None),
+                    ],
+                )
+            ],
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=24, s_end=24, t_start=None, t_end=None),
+    ],
+    audio_filepath=AUDIO_FILEPATH_FOR_TEST,
+    utt_id=UTT_ID_FOR_TEST,
+)
+
+
+ZH_TEXT = "人工 智能|技术"
+
+ZH_CN_EXPECTED_UTTERANCE = Utterance(
+    text='人工 智能|技术',
+    token_ids_with_blanks=[
+        5206,
+        125,
+        5206,
+        1329,
+        5206,
+        0,
+        5206,
+        2029,
+        5206,
+        3668,
+        5206,
+        0,
+        5206,
+        1695,
+        5206,
+        2075,
+        5206,
+    ],
+    segments_and_tokens=[
+        Token(text='<b>', text_cased='<b>', s_start=0, s_end=0, t_start=None, t_end=None),
+        Segment(
+            text='人工 智能',
+            s_start=1,
+            s_end=9,
+            t_start=None,
+            t_end=None,
+            words_and_tokens=[
+                Word(
+                    text='人工',
+                    s_start=1,
+                    s_end=3,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[
+                        Token(text='人', text_cased='人', s_start=1, s_end=1, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=2, s_end=2, t_start=None, t_end=None),
+                        Token(text='工', text_cased='工', s_start=3, s_end=3, t_start=None, t_end=None),
+                    ],
+                ),
+                Token(text='<b>', text_cased='<b>', s_start=4, s_end=4, t_start=None, t_end=None),
+                Token(text='<space>', text_cased='<space>', s_start=5, s_end=5, t_start=None, t_end=None),
+                Token(text='<b>', text_cased='<b>', s_start=6, s_end=6, t_start=None, t_end=None),
+                Word(
+                    text='智能',
+                    s_start=7,
+                    s_end=9,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[
+                        Token(text='智', text_cased='智', s_start=7, s_end=7, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=8, s_end=8, t_start=None, t_end=None),
+                        Token(text='能', text_cased='能', s_start=9, s_end=9, t_start=None, t_end=None),
+                    ],
+                ),
+            ],
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=10, s_end=10, t_start=None, t_end=None),
+        Token(text='<space>', text_cased='<space>', s_start=11, s_end=11, t_start=None, t_end=None),
+        Token(text='<b>', text_cased='<b>', s_start=12, s_end=12, t_start=None, t_end=None),
+        Segment(
+            text='技术',
+            s_start=13,
+            s_end=15,
+            t_start=None,
+            t_end=None,
+            words_and_tokens=[
+                Word(
+                    text='技术',
+                    s_start=13,
+                    s_end=15,
+                    t_start=None,
+                    t_end=None,
+                    tokens=[
+                        Token(text='技', text_cased='技', s_start=13, s_end=13, t_start=None, t_end=None),
+                        Token(text='<b>', text_cased='<b>', s_start=14, s_end=14, t_start=None, t_end=None),
+                        Token(text='术', text_cased='术', s_start=15, s_end=15, t_start=None, t_end=None),
+                    ],
+                )
+            ],
+        ),
+        Token(text='<b>', text_cased='<b>', s_start=16, s_end=16, t_start=None, t_end=None),
+    ],
+    audio_filepath=AUDIO_FILEPATH_FOR_TEST,
+    utt_id=UTT_ID_FOR_TEST,
+)
+
+
+@pytest.mark.parametrize(
+    "text,model_pretrained_name,separator,expected_utterance",
+    [
+        (EN_TEXT, "stt_en_citrinet_256_gamma_0_25", "|", EN_CN_EXPECTED_UTTERANCE),
+        (EN_TEXT, "stt_en_quartznet15x5", "|", EN_QN_EXPECTED_UTTERANCE),
+        (ZH_TEXT, "stt_zh_citrinet_512", "|", ZH_CN_EXPECTED_UTTERANCE),
+    ],
+)
+def test_token_info(text, model_pretrained_name, separator, expected_utterance):
+    model = ASRModel.from_pretrained(model_pretrained_name)
+    utt_obj = get_utt_obj(
+        text, model, separator, T=T_FOR_TEST, audio_filepath=AUDIO_FILEPATH_FOR_TEST, utt_id=UTT_ID_FOR_TEST
+    )
+    print(f"expected utterance object: {get_utt_obj_pp_string(expected_utterance)}\n")
+    print(f"output utterance object in test: {get_utt_obj_pp_string(utt_obj)}\n")
+
+    assert utt_obj == expected_utterance
diff --git a/tools/nemo_forced_aligner/tests/test_get_y_and_boundary_info_for_utt.py b/tools/nemo_forced_aligner/tests/test_get_y_and_boundary_info_for_utt.py
deleted file mode 100644
index f5bc722d5a1c..000000000000
--- a/tools/nemo_forced_aligner/tests/test_get_y_and_boundary_info_for_utt.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-from utils.data_prep import get_y_and_boundary_info_for_utt
-
-from nemo.collections.asr.models import ASRModel
-
-EN_TEXT = "hi world | hey"
-
-EN_QN_EXPECTED_TOKEN_INFO = [
-    {'text': '<b>', 's_start': 0, 's_end': 0},
-    {'text': 'h', 's_start': 1, 's_end': 1},
-    {'text': '<b>', 's_start': 2, 's_end': 2},
-    {'text': 'i', 's_start': 3, 's_end': 3},
-    {'text': '<b>', 's_start': 4, 's_end': 4},
-    {'text': '<space>', 's_start': 5, 's_end': 5},
-    {'text': '<b>', 's_start': 6, 's_end': 6},
-    {'text': 'w', 's_start': 7, 's_end': 7},
-    {'text': '<b>', 's_start': 8, 's_end': 8},
-    {'text': 'o', 's_start': 9, 's_end': 9},
-    {'text': '<b>', 's_start': 10, 's_end': 10},
-    {'text': 'r', 's_start': 11, 's_end': 11},
-    {'text': '<b>', 's_start': 12, 's_end': 12},
-    {'text': 'l', 's_start': 13, 's_end': 13},
-    {'text': '<b>', 's_start': 14, 's_end': 14},
-    {'text': 'd', 's_start': 15, 's_end': 15},
-    {'text': '<b>', 's_start': 16, 's_end': 16},
-    {'text': '<space>', 's_start': 17, 's_end': 17},
-    {'text': '<b>', 's_start': 18, 's_end': 18},
-    {'text': 'h', 's_start': 19, 's_end': 19},
-    {'text': '<b>', 's_start': 20, 's_end': 20},
-    {'text': 'e', 's_start': 21, 's_end': 21},
-    {'text': '<b>', 's_start': 22, 's_end': 22},
-    {'text': 'y', 's_start': 23, 's_end': 23},
-    {'text': '<b>', 's_start': 24, 's_end': 24},
-]
-
-EN_QN_EXPECTED_WORD_INFO = [
-    {'text': 'hi', 's_start': 1, 's_end': 3},
-    {'text': 'world', 's_start': 7, 's_end': 15},
-    {'text': 'hey', 's_start': 19, 's_end': 23},
-]
-
-EN_QN_EXPECTED_SEGMENT_INFO = [
-    {'text': 'hi world', 's_start': 1, 's_end': 15},
-    {'text': 'hey', 's_start': 19, 's_end': 23},
-]
-
-EN_CN_EXPECTED_TOKEN_INFO = [
-    {'text': '<b>', 's_start': 0, 's_end': 0},
-    {'text': '▁hi', 's_start': 1, 's_end': 1},
-    {'text': '<b>', 's_start': 2, 's_end': 2},
-    {'text': '▁world', 's_start': 3, 's_end': 3},
-    {'text': '<b>', 's_start': 4, 's_end': 4},
-    {'text': '▁he', 's_start': 5, 's_end': 5},
-    {'text': '<b>', 's_start': 6, 's_end': 6},
-    {'text': 'y', 's_start': 7, 's_end': 7},
-    {'text': '<b>', 's_start': 8, 's_end': 8},
-]
-
-EN_CN_EXPECTED_WORD_INFO = [
-    {'text': 'hi', 's_start': 1, 's_end': 1},
-    {'text': 'world', 's_start': 3, 's_end': 3},
-    {'text': 'hey', 's_start': 5, 's_end': 7},
-]
-
-EN_CN_EXPECTED_SEGMENT_INFO = [
-    {'text': 'hi world', 's_start': 1, 's_end': 3},
-    {'text': 'hey', 's_start': 5, 's_end': 7},
-]
-
-
-ZH_TEXT = "人工 智能|技术"
-
-ZH_EXPECTED_TOKEN_INFO = [
-    {'text': '<b>', 's_start': 0, 's_end': 0},
-    {'text': '人', 's_start': 1, 's_end': 1},
-    {'text': '<b>', 's_start': 2, 's_end': 2},
-    {'text': '工', 's_start': 3, 's_end': 3},
-    {'text': '<b>', 's_start': 4, 's_end': 4},
-    {'text': '<space>', 's_start': 5, 's_end': 5},
-    {'text': '<b>', 's_start': 6, 's_end': 6},
-    {'text': '智', 's_start': 7, 's_end': 7},
-    {'text': '<b>', 's_start': 8, 's_end': 8},
-    {'text': '能', 's_start': 9, 's_end': 9},
-    {'text': '<b>', 's_start': 10, 's_end': 10},
-    {'text': '<space>', 's_start': 11, 's_end': 11},
-    {'text': '<b>', 's_start': 12, 's_end': 12},
-    {'text': '技', 's_start': 13, 's_end': 13},
-    {'text': '<b>', 's_start': 14, 's_end': 14},
-    {'text': '术', 's_start': 15, 's_end': 15},
-    {'text': '<b>', 's_start': 16, 's_end': 16},
-]
-
-ZH_EXPECTED_WORD_INFO = [
-    {'text': '人工', 's_start': 1, 's_end': 3},
-    {'text': '智能', 's_start': 7, 's_end': 9},
-    {'text': '技术', 's_start': 13, 's_end': 15},
-]
-
-ZH_EXPECTED_SEGMENT_INFO = [
-    {'text': '人工 智能', 's_start': 1, 's_end': 9},
-    {'text': '技术', 's_start': 13, 's_end': 15},
-]
-
-
-@pytest.mark.parametrize(
-    "text,model_pretrained_name,separator,expected_token_info",
-    [
-        (EN_TEXT, "stt_en_quartznet15x5", "|", EN_QN_EXPECTED_TOKEN_INFO),
-        (EN_TEXT, "stt_en_citrinet_256_gamma_0_25", "|", EN_CN_EXPECTED_TOKEN_INFO),
-        (ZH_TEXT, "stt_zh_citrinet_512", "|", ZH_EXPECTED_TOKEN_INFO),
-    ],
-)
-def test_token_info(text, model_pretrained_name, separator, expected_token_info):
-    model = ASRModel.from_pretrained(model_pretrained_name)
-    _, token_info, *_ = get_y_and_boundary_info_for_utt(text, model, separator)
-    assert token_info == expected_token_info
-
-
-@pytest.mark.parametrize(
-    "text,model_pretrained_name,separator,expected_word_info",
-    [
-        (EN_TEXT, "stt_en_quartznet15x5", "|", EN_QN_EXPECTED_WORD_INFO),
-        (EN_TEXT, "stt_en_citrinet_256_gamma_0_25", "|", EN_CN_EXPECTED_WORD_INFO),
-        (ZH_TEXT, "stt_zh_citrinet_512", "|", ZH_EXPECTED_WORD_INFO),
-    ],
-)
-def test_word_info(text, model_pretrained_name, separator, expected_word_info):
-    model = ASRModel.from_pretrained(model_pretrained_name)
-    _, _, word_info, _ = get_y_and_boundary_info_for_utt(text, model, separator)
-    assert word_info == expected_word_info
-
-
-@pytest.mark.parametrize(
-    "text,model_pretrained_name,separator,expected_segment_info",
-    [
-        (EN_TEXT, "stt_en_quartznet15x5", "|", EN_QN_EXPECTED_SEGMENT_INFO),
-        (EN_TEXT, "stt_en_citrinet_256_gamma_0_25", "|", EN_CN_EXPECTED_SEGMENT_INFO),
-        (ZH_TEXT, "stt_zh_citrinet_512", "|", ZH_EXPECTED_SEGMENT_INFO),
-    ],
-)
-def test_segment_info(text, model_pretrained_name, separator, expected_segment_info):
-    model = ASRModel.from_pretrained(model_pretrained_name)
-    *_, segment_info = get_y_and_boundary_info_for_utt(text, model, separator)
-    assert segment_info == expected_segment_info
diff --git a/tools/nemo_forced_aligner/tests/test_restore_token_case.py b/tools/nemo_forced_aligner/tests/test_restore_token_case.py
new file mode 100644
index 000000000000..6217dfc0ba94
--- /dev/null
+++ b/tools/nemo_forced_aligner/tests/test_restore_token_case.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from utils.data_prep import restore_token_case
+
+
+@pytest.mark.parametrize(
+    "word,word_tokens,expected_word_tokens_cased",
+    [
+        ("HEY!", ['▁he', 'y', '!'], ['▁HE', 'Y', '!']),
+        ("BabABa▁", ['▁b', 'a', 'b', 'a', 'b', 'a'], ['▁B', 'a', 'b', 'A', 'B', 'a']),
+        ("BabAB▁a", ['▁b', 'a', 'b', 'a', 'b', '_a'], ['▁B', 'a', 'b', 'A', 'B', '_a']),
+        ("Bab▁AB▁a", ['▁b', 'a', 'b', '▁a', 'b', '▁a'], ['▁B', 'a', 'b', '▁A', 'B', '▁a']),
+        ("▁Bab▁AB▁a", ['▁b', 'a', 'b', '▁a', 'b', '▁a'], ['▁B', 'a', 'b', '▁A', 'B', '▁a']),
+        ("▁Bab▁AB▁▁a", ['▁b', 'a', 'b', '▁a', 'b', '▁a'], ['▁B', 'a', 'b', '▁A', 'B', '▁a']),
+        ("▁▁BabAB▁a", ['▁b', 'a', 'b', 'a', 'b', '▁a'], ['▁B', 'a', 'b', 'A', 'B', '▁a']),
+        ("m²", ['▁', 'm', '2'], ['▁', 'm', '2']),
+        ("²", ['▁', '2'], ['▁', '2']),
+    ],
+)
+def test_restore_token_case(word, word_tokens, expected_word_tokens_cased):
+    word_tokens_cased = restore_token_case(word, word_tokens)
+    assert word_tokens_cased == expected_word_tokens_cased
diff --git a/tools/nemo_forced_aligner/utils/constants.py b/tools/nemo_forced_aligner/utils/constants.py
index 894f880401cb..51ce934be479 100644
--- a/tools/nemo_forced_aligner/utils/constants.py
+++ b/tools/nemo_forced_aligner/utils/constants.py
@@ -16,4 +16,4 @@
 
 SPACE_TOKEN = "<space>"
 
-V_NEGATIVE_NUM = -1e30
+V_NEGATIVE_NUM = -3.4e38  # this is just above the most negative number in torch.float32
diff --git a/tools/nemo_forced_aligner/utils/data_prep.py b/tools/nemo_forced_aligner/utils/data_prep.py
index 852be91d78c4..20f401389c4e 100644
--- a/tools/nemo_forced_aligner/utils/data_prep.py
+++ b/tools/nemo_forced_aligner/utils/data_prep.py
@@ -13,13 +13,24 @@
 # limitations under the License.
 
 import json
-import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Union
 
 import soundfile as sf
 import torch
 from tqdm.auto import tqdm
 from utils.constants import BLANK_TOKEN, SPACE_TOKEN, V_NEGATIVE_NUM
 
+from nemo.utils import logging
+
+
+def _get_utt_id(audio_filepath, audio_filepath_parts_in_utt_id):
+    fp_parts = Path(audio_filepath).parts[-audio_filepath_parts_in_utt_id:]
+    utt_id = Path("_".join(fp_parts)).stem
+    utt_id = utt_id.replace(" ", "-")  # replace any spaces in the filepath with dashes
+    return utt_id
+
 
 def get_batch_starts_ends(manifest_filepath, batch_size):
     """
@@ -70,10 +81,16 @@ def is_entry_in_all_lines(manifest_filepath, entry):
 
 def get_manifest_lines_batch(manifest_filepath, start, end):
     manifest_lines_batch = []
-    with open(manifest_filepath, "r") as f:
+    with open(manifest_filepath, "r", encoding="utf-8-sig") as f:
         for line_i, line in enumerate(f):
             if line_i >= start and line_i <= end:
-                manifest_lines_batch.append(json.loads(line))
+                data = json.loads(line)
+                if "text" in data:
+                    # remove any BOM, any duplicated spaces, convert any
+                    # newline chars to spaces
+                    data["text"] = data["text"].replace("\ufeff", "")
+                    data["text"] = " ".join(data["text"].split())
+                manifest_lines_batch.append(data)
 
             if line_i == end:
                 break
@@ -91,42 +108,138 @@ def get_char_tokens(text, model):
     return tokens
 
 
-def get_y_and_boundary_info_for_utt(text, model, separator):
+def is_sub_or_superscript_pair(ref_text, text):
+    """returns True if ref_text is a subscript or superscript version of text"""
+    sub_or_superscript_to_num = {
+        "⁰": "0",
+        "¹": "1",
+        "²": "2",
+        "³": "3",
+        "⁴": "4",
+        "⁵": "5",
+        "⁶": "6",
+        "⁷": "7",
+        "⁸": "8",
+        "⁹": "9",
+        "₀": "0",
+        "₁": "1",
+        "₂": "2",
+        "₃": "3",
+        "₄": "4",
+        "₅": "5",
+        "₆": "6",
+        "₇": "7",
+        "₈": "8",
+        "₉": "9",
+    }
+
+    if text in sub_or_superscript_to_num:
+        if sub_or_superscript_to_num[text] == ref_text:
+            return True
+    return False
+
+
+def restore_token_case(word, word_tokens):
+
+    # remove repeated "▁" and "_" from word as that is what the tokenizer will do
+    while "▁▁" in word:
+        word = word.replace("▁▁", "▁")
+
+    while "__" in word:
+        word = word.repalce("__", "_")
+
+    word_tokens_cased = []
+    word_char_pointer = 0
+
+    for token in word_tokens:
+        token_cased = ""
+
+        for token_char in token:
+            if token_char == word[word_char_pointer]:
+                token_cased += token_char
+                word_char_pointer += 1
+
+            else:
+                if token_char.upper() == word[word_char_pointer] or is_sub_or_superscript_pair(
+                    token_char, word[word_char_pointer]
+                ):
+                    token_cased += token_char.upper()
+                    word_char_pointer += 1
+                else:
+                    if token_char == "▁" or token_char == "_":
+                        if word[word_char_pointer] == "▁" or word[word_char_pointer] == "_":
+                            token_cased += token_char
+                            word_char_pointer += 1
+                        elif word_char_pointer == 0:
+                            token_cased += token_char
+
+                    else:
+                        raise RuntimeError(
+                            f"Unexpected error - failed to recover capitalization of tokens for word {word}"
+                        )
+
+        word_tokens_cased.append(token_cased)
+
+    return word_tokens_cased
+
+
+@dataclass
+class Token:
+    text: str = None
+    text_cased: str = None
+    s_start: int = None
+    s_end: int = None
+    t_start: float = None
+    t_end: float = None
+
+
+@dataclass
+class Word:
+    text: str = None
+    s_start: int = None
+    s_end: int = None
+    t_start: float = None
+    t_end: float = None
+    tokens: List[Token] = field(default_factory=list)
+
+
+@dataclass
+class Segment:
+    text: str = None
+    s_start: int = None
+    s_end: int = None
+    t_start: float = None
+    t_end: float = None
+    words_and_tokens: List[Union[Word, Token]] = field(default_factory=list)
+
+
+@dataclass
+class Utterance:
+    token_ids_with_blanks: List[int] = field(default_factory=list)
+    segments_and_tokens: List[Union[Segment, Token]] = field(default_factory=list)
+    text: str = None
+    pred_text: str = None
+    audio_filepath: str = None
+    utt_id: str = None
+    saved_output_files: dict = field(default_factory=dict)
+
+
+def get_utt_obj(
+    text, model, separator, T, audio_filepath, utt_id,
+):
     """
-    Get y_token_ids_with_blanks, token_info, word_info and segment_info for the text provided, tokenized 
-    by the model provided.
-    y_token_ids_with_blanks is a list of the indices of the text tokens with the blank token id in between every
-    text token.
-    token_info, word_info and segment_info are lists of dictionaries containing information about 
-    where the tokens/words/segments start and end.
-    For example, 'hi world | hey ' with separator = '|' and tokenized by a BPE tokenizer can have token_info like:
-    token_info = [
-        {'text': '<b>', 's_start': 0, 's_end': 0},
-        {'text': '▁hi', 's_start': 1, 's_end': 1},
-        {'text': '<b>', 's_start': 2, 's_end': 2},
-        {'text': '▁world', 's_start': 3, 's_end': 3},
-        {'text': '<b>', 's_start': 4, 's_end': 4},
-        {'text': '▁he', 's_start': 5, 's_end': 5},
-        {'text': '<b>', 's_start': 6, 's_end': 6},
-        {'text': 'y', 's_start': 7, 's_end': 7},
-        {'text': '<b>', 's_start': 8, 's_end': 8},    
-    ]
-    's_start' and 's_end' indicate where in the sequence of tokens does each token start and end.
-
-    The word_info will be as follows:
-    word_info = [
-        {'text': 'hi', 's_start': 1, 's_end': 1},
-        {'text': 'world', 's_start': 3, 's_end': 3},
-        {'text': 'hey', 's_start': 5, 's_end': 7},
-    ]
-    's_start' and 's_end' indicate where in the sequence of tokens does each word start and end.
-
-    segment_info will be as follows:
-    segment_info = [
-        {'text': 'hi world', 's_start': 1, 's_end': 3},
-        {'text': 'hey', 's_start': 5, 's_end': 7},
-    ]
-    's_start' and 's_end' indicate where in the sequence of tokens does each segment start and end.
+    Function to create an Utterance object and add all necessary information to it except
+        for timings of the segments / words / tokens according to the alignment - that will
+        be done later in a different function, after the alignment is done.
+
+        The Utterance object has a list segments_and_tokens which contains Segment objects and
+        Token objects (for blank tokens in between segments).
+        Within the Segment objects, there is a list words_and_tokens which contains Word objects and
+        Token objects (for blank tokens in between words).
+        Within the Word objects, there is a list tokens tokens which contains Token objects for
+        blank and non-blank tokens.
+        We will be building up these lists in this function. This data structure will then be useful for
+        generating the various output files that we wish to save.
     """
 
     if not separator:  # if separator is not defined - treat the whole text as one segment
@@ -137,157 +250,429 @@ def get_y_and_boundary_info_for_utt(text, model, separator):
     # remove any spaces at start and end of segments
     segments = [seg.strip() for seg in segments]
 
+    utt = Utterance(text=text, audio_filepath=audio_filepath, utt_id=utt_id,)
+
+    # build up lists: token_ids_with_blanks, segments_and_tokens.
+    # The code for these is different depending on whether we use char-based tokens or not
     if hasattr(model, 'tokenizer'):
         if hasattr(model, 'blank_id'):
             BLANK_ID = model.blank_id
         else:
-            BLANK_ID = len(model.decoder.vocabulary)  # TODO: check
+            BLANK_ID = len(model.tokenizer.vocab)  # TODO: check
 
-        y_token_ids_with_blanks = [BLANK_ID]
-        token_info = [{"text": BLANK_TOKEN, "s_start": 0, "s_end": 0,}]
-        word_info = []
-        segment_info = []
+        utt.token_ids_with_blanks = [BLANK_ID]
+
+        # check for text being 0 length
+        if len(text) == 0:
+            return utt
+
+        # check for # tokens + token repetitions being > T
+        all_tokens = model.tokenizer.text_to_ids(text)
+        n_token_repetitions = 0
+        for i_tok in range(1, len(all_tokens)):
+            if all_tokens[i_tok] == all_tokens[i_tok - 1]:
+                n_token_repetitions += 1
+
+        if len(all_tokens) + n_token_repetitions > T:
+            logging.info(
+                f"Utterance {utt_id} has too many tokens compared to the audio file duration."
+                " Will not generate output alignment files for this utterance."
+            )
+            return utt
+
+        # build up data structures containing segments/words/tokens
+        utt.segments_and_tokens.append(Token(text=BLANK_TOKEN, text_cased=BLANK_TOKEN, s_start=0, s_end=0,))
 
         segment_s_pointer = 1  # first segment will start at s=1 because s=0 is a blank
         word_s_pointer = 1  # first word will start at s=1 because s=0 is a blank
 
         for segment in segments:
+            # add the segment to segment_info and increment the segment_s_pointer
+            segment_tokens = model.tokenizer.text_to_tokens(segment)
+            utt.segments_and_tokens.append(
+                Segment(
+                    text=segment,
+                    s_start=segment_s_pointer,
+                    # segment_tokens do not contain blanks => need to muliply by 2
+                    # s_end needs to be the index of the final token (including blanks) of the current segment:
+                    # segment_s_pointer + len(segment_tokens) * 2 is the index of the first token of the next segment =>
+                    # => need to subtract 2
+                    s_end=segment_s_pointer + len(segment_tokens) * 2 - 2,
+                )
+            )
+            segment_s_pointer += (
+                len(segment_tokens) * 2
+            )  # multiply by 2 to account for blanks (which are not present in segment_tokens)
+
             words = segment.split(" ")  # we define words to be space-separated sub-strings
-            for word in words:
+            for word_i, word in enumerate(words):
 
                 word_tokens = model.tokenizer.text_to_tokens(word)
-                word_ids = model.tokenizer.text_to_ids(word)
-                for token, id_ in zip(word_tokens, word_ids):
-                    # add the text token and the blank that follows it
-                    # to our token-based variables
-                    y_token_ids_with_blanks.extend([id_, BLANK_ID])
-                    token_info.extend(
-                        [
-                            {
-                                "text": token,
-                                "s_start": len(y_token_ids_with_blanks) - 2,
-                                "s_end": len(y_token_ids_with_blanks) - 2,
-                            },
-                            {
-                                "text": BLANK_TOKEN,
-                                "s_start": len(y_token_ids_with_blanks) - 1,
-                                "s_end": len(y_token_ids_with_blanks) - 1,
-                            },
-                        ]
-                    )
+                word_token_ids = model.tokenizer.text_to_ids(word)
+                word_tokens_cased = restore_token_case(word, word_tokens)
 
                 # add the word to word_info and increment the word_s_pointer
-                word_info.append(
-                    {
-                        "text": word,
-                        "s_start": word_s_pointer,
-                        "s_end": word_s_pointer + (len(word_tokens) - 1) * 2,  # TODO check this,
-                    }
+                utt.segments_and_tokens[-1].words_and_tokens.append(
+                    # word_tokens do not contain blanks => need to muliply by 2
+                    # s_end needs to be the index of the final token (including blanks) of the current word:
+                    # word_s_pointer + len(word_tokens) * 2 is the index of the first token of the next word =>
+                    # => need to subtract 2
+                    Word(text=word, s_start=word_s_pointer, s_end=word_s_pointer + len(word_tokens) * 2 - 2)
                 )
-                word_s_pointer += len(word_tokens) * 2  # TODO check this
+                word_s_pointer += (
+                    len(word_tokens) * 2
+                )  # multiply by 2 to account for blanks (which are not present in word_tokens)
+
+                for token_i, (token, token_id, token_cased) in enumerate(
+                    zip(word_tokens, word_token_ids, word_tokens_cased)
+                ):
+                    # add the text tokens and the blanks in between them
+                    # to our token-based variables
+                    utt.token_ids_with_blanks.extend([token_id, BLANK_ID])
+                    # adding Token object for non-blank token
+                    utt.segments_and_tokens[-1].words_and_tokens[-1].tokens.append(
+                        Token(
+                            text=token,
+                            text_cased=token_cased,
+                            # utt.token_ids_with_blanks has the form [...., <this non-blank token>, <blank>] =>
+                            # => if do len(utt.token_ids_with_blanks) - 1 you get the index of the final <blank>
+                            # => we want to do len(utt.token_ids_with_blanks) - 2 to get the index of <this non-blank token>
+                            s_start=len(utt.token_ids_with_blanks) - 2,
+                            # s_end is same as s_start since the token only occupies one element in the list
+                            s_end=len(utt.token_ids_with_blanks) - 2,
+                        )
+                    )
 
-            # add the segment to segment_info and increment the segment_s_pointer
-            segment_tokens = model.tokenizer.text_to_tokens(segment)
-            segment_info.append(
-                {
-                    "text": segment,
-                    "s_start": segment_s_pointer,
-                    "s_end": segment_s_pointer + (len(segment_tokens) - 1) * 2,
-                }
+                    # adding Token object for blank tokens in between the tokens of the word
+                    # (ie do not add another blank if you have reached the end)
+                    if token_i < len(word_tokens) - 1:
+                        utt.segments_and_tokens[-1].words_and_tokens[-1].tokens.append(
+                            Token(
+                                text=BLANK_TOKEN,
+                                text_cased=BLANK_TOKEN,
+                                # utt.token_ids_with_blanks has the form [...., <this blank token>] =>
+                                # => if do len(utt.token_ids_with_blanks) -1 you get the index of this <blank>
+                                s_start=len(utt.token_ids_with_blanks) - 1,
+                                # s_end is same as s_start since the token only occupies one element in the list
+                                s_end=len(utt.token_ids_with_blanks) - 1,
+                            )
+                        )
+
+                # add a Token object for blanks in between words in this segment
+                # (but only *in between* - do not add the token if it is after the final word)
+                if word_i < len(words) - 1:
+                    utt.segments_and_tokens[-1].words_and_tokens.append(
+                        Token(
+                            text=BLANK_TOKEN,
+                            text_cased=BLANK_TOKEN,
+                            # utt.token_ids_with_blanks has the form [...., <this blank token>] =>
+                            # => if do len(utt.token_ids_with_blanks) -1 you get the index of this <blank>
+                            s_start=len(utt.token_ids_with_blanks) - 1,
+                            # s_end is same as s_start since the token only occupies one element in the list
+                            s_end=len(utt.token_ids_with_blanks) - 1,
+                        )
+                    )
+
+            # add the blank token in between segments/after the final segment
+            utt.segments_and_tokens.append(
+                Token(
+                    text=BLANK_TOKEN,
+                    text_cased=BLANK_TOKEN,
+                    # utt.token_ids_with_blanks has the form [...., <this blank token>] =>
+                    # => if do len(utt.token_ids_with_blanks) -1 you get the index of this <blank>
+                    s_start=len(utt.token_ids_with_blanks) - 1,
+                    # s_end is same as s_start since the token only occupies one element in the list
+                    s_end=len(utt.token_ids_with_blanks) - 1,
+                )
             )
-            segment_s_pointer += len(segment_tokens) * 2
 
-        return y_token_ids_with_blanks, token_info, word_info, segment_info
+        return utt
 
     elif hasattr(model.decoder, "vocabulary"):  # i.e. tokenization is simply character-based
 
         BLANK_ID = len(model.decoder.vocabulary)  # TODO: check this is correct
         SPACE_ID = model.decoder.vocabulary.index(" ")
 
-        y_token_ids_with_blanks = [BLANK_ID]
-        token_info = [{"text": BLANK_TOKEN, "s_start": 0, "s_end": 0,}]
-        word_info = []
-        segment_info = []
+        utt.token_ids_with_blanks = [BLANK_ID]
+
+        # check for text being 0 length
+        if len(text) == 0:
+            return utt
+
+        # check for # tokens + token repetitions being > T
+        all_tokens = get_char_tokens(text, model)
+        n_token_repetitions = 0
+        for i_tok in range(1, len(all_tokens)):
+            if all_tokens[i_tok] == all_tokens[i_tok - 1]:
+                n_token_repetitions += 1
+
+        if len(all_tokens) + n_token_repetitions > T:
+            logging.info(
+                f"Utterance {utt_id} has too many tokens compared to the audio file duration."
+                " Will not generate output alignment files for this utterance."
+            )
+            return utt
+
+        # build up data structures containing segments/words/tokens
+        utt.segments_and_tokens.append(Token(text=BLANK_TOKEN, text_cased=BLANK_TOKEN, s_start=0, s_end=0,))
 
         segment_s_pointer = 1  # first segment will start at s=1 because s=0 is a blank
         word_s_pointer = 1  # first word will start at s=1 because s=0 is a blank
 
         for i_segment, segment in enumerate(segments):
-            words = segment.split(" ")  # we define words to be space-separated characters
+            # add the segment to segment_info and increment the segment_s_pointer
+            segment_tokens = get_char_tokens(segment, model)
+            utt.segments_and_tokens.append(
+                Segment(
+                    text=segment,
+                    s_start=segment_s_pointer,
+                    # segment_tokens do not contain blanks => need to muliply by 2
+                    # s_end needs to be the index of the final token (including blanks) of the current segment:
+                    # segment_s_pointer + len(segment_tokens) * 2 is the index of the first token of the next segment =>
+                    # => need to subtract 2
+                    s_end=segment_s_pointer + len(segment_tokens) * 2 - 2,
+                )
+            )
+
+            # for correct calculation: multiply len(segment_tokens) by 2 to account for blanks (which are not present in segment_tokens)
+            # and + 2 to account for [<token for space in between segments>, <blank token after that space token>]
+            segment_s_pointer += len(segment_tokens) * 2 + 2
+
+            words = segment.split(" ")  # we define words to be space-separated substrings
             for i_word, word in enumerate(words):
 
                 # convert string to list of characters
                 word_tokens = list(word)
                 # convert list of characters to list of their ids in the vocabulary
-                word_ids = get_char_tokens(word, model)
-                for token, id_ in zip(word_tokens, word_ids):
-                    # add the text token and the blank that follows it
+                word_token_ids = get_char_tokens(word, model)
+
+                # add the word to word_info and increment the word_s_pointer
+                utt.segments_and_tokens[-1].words_and_tokens.append(
+                    # note for s_end:
+                    # word_tokens do not contain blanks => need to muliply by 2
+                    # s_end needs to be the index of the final token (including blanks) of the current word:
+                    # word_s_pointer + len(word_tokens) * 2 is the index of the first token of the next word =>
+                    # => need to subtract 2
+                    Word(text=word, s_start=word_s_pointer, s_end=word_s_pointer + len(word_tokens) * 2 - 2)
+                )
+
+                # for correct calculation: multiply len(word_tokens) by 2 to account for blanks (which are not present in word_tokens)
+                # and + 2 to account for [<token for space in between words>, <blank token after that space token>]
+                word_s_pointer += len(word_tokens) * 2 + 2
+
+                for token_i, (token, token_id) in enumerate(zip(word_tokens, word_token_ids)):
+                    # add the text tokens and the blanks in between them
                     # to our token-based variables
-                    y_token_ids_with_blanks.extend([id_, BLANK_ID])
-                    token_info.extend(
-                        [
-                            {
-                                "text": token,
-                                "s_start": len(y_token_ids_with_blanks) - 2,
-                                "s_end": len(y_token_ids_with_blanks) - 2,
-                            },
-                            {
-                                "text": BLANK_TOKEN,
-                                "s_start": len(y_token_ids_with_blanks) - 1,
-                                "s_end": len(y_token_ids_with_blanks) - 1,
-                            },
-                        ]
+                    utt.token_ids_with_blanks.extend([token_id])
+                    utt.segments_and_tokens[-1].words_and_tokens[-1].tokens.append(
+                        Token(
+                            text=token,
+                            text_cased=token,
+                            # utt.token_ids_with_blanks has the form [..., <this non-blank token>]
+                            # => do len(utt.token_ids_with_blanks) - 1 to get the index of this non-blank token
+                            s_start=len(utt.token_ids_with_blanks) - 1,
+                            # s_end is same as s_start since the token only occupies one element in the list
+                            s_end=len(utt.token_ids_with_blanks) - 1,
+                        )
                     )
 
-                # add space token (and the blank after it) unless this is the final word in the final segment
-                if not (i_segment == len(segments) - 1 and i_word == len(words) - 1):
-                    y_token_ids_with_blanks.extend([SPACE_ID, BLANK_ID])
-                    token_info.extend(
-                        (
-                            {
-                                "text": SPACE_TOKEN,
-                                "s_start": len(y_token_ids_with_blanks) - 2,
-                                "s_end": len(y_token_ids_with_blanks) - 2,
-                            },
-                            {
-                                "text": BLANK_TOKEN,
-                                "s_start": len(y_token_ids_with_blanks) - 1,
-                                "s_end": len(y_token_ids_with_blanks) - 1,
-                            },
+                    if token_i < len(word_tokens) - 1:  # only add blank tokens that are in the middle of words
+                        utt.token_ids_with_blanks.extend([BLANK_ID])
+                        utt.segments_and_tokens[-1].words_and_tokens[-1].tokens.append(
+                            Token(
+                                text=BLANK_TOKEN,
+                                text_cased=BLANK_TOKEN,
+                                # utt.token_ids_with_blanks has the form [..., <this blank token>]
+                                # => do len(utt.token_ids_with_blanks) - 1 to get the index of this blank token
+                                s_start=len(utt.token_ids_with_blanks) - 1,
+                                # s_end is same as s_start since the token only occupies one element in the list
+                                s_end=len(utt.token_ids_with_blanks) - 1,
+                            )
+                        )
+
+                # add space token (and the blanks around it) unless this is the final word in a segment
+                if i_word < len(words) - 1:
+                    utt.token_ids_with_blanks.extend([BLANK_ID, SPACE_ID, BLANK_ID])
+                    utt.segments_and_tokens[-1].words_and_tokens.append(
+                        Token(
+                            text=BLANK_TOKEN,
+                            text_cased=BLANK_TOKEN,
+                            # utt.token_ids_with_blanks has the form
+                            # [..., <final token of previous word>, <blank token>, <space token>, <blank token>]
+                            # => do len(utt.token_ids_with_blanks) - 3 to get the index of the blank token before the space token
+                            s_start=len(utt.token_ids_with_blanks) - 3,
+                            # s_end is same as s_start since the token only occupies one element in the list
+                            s_end=len(utt.token_ids_with_blanks) - 3,
+                        )
+                    )
+                    utt.segments_and_tokens[-1].words_and_tokens.append(
+                        Token(
+                            text=SPACE_TOKEN,
+                            text_cased=SPACE_TOKEN,
+                            # utt.token_ids_with_blanks has the form
+                            # [..., <final token of previous word>, <blank token>, <space token>, <blank token>]
+                            # => do len(utt.token_ids_with_blanks) - 2 to get the index of the space token
+                            s_start=len(utt.token_ids_with_blanks) - 2,
+                            # s_end is same as s_start since the token only occupies one element in the list
+                            s_end=len(utt.token_ids_with_blanks) - 2,
+                        )
+                    )
+                    utt.segments_and_tokens[-1].words_and_tokens.append(
+                        Token(
+                            text=BLANK_TOKEN,
+                            text_cased=BLANK_TOKEN,
+                            # utt.token_ids_with_blanks has the form
+                            # [..., <final token of previous word>, <blank token>, <space token>, <blank token>]
+                            # => do len(utt.token_ids_with_blanks) - 1 to get the index of the blank token after the space token
+                            s_start=len(utt.token_ids_with_blanks) - 1,
+                            # s_end is same as s_start since the token only occupies one element in the list
+                            s_end=len(utt.token_ids_with_blanks) - 1,
                         )
                     )
-                # add the word to word_info and increment the word_s_pointer
-                word_info.append(
-                    {
-                        "text": word,
-                        "s_start": word_s_pointer,
-                        "s_end": word_s_pointer + len(word_tokens) * 2 - 2,  # TODO check this,
-                    }
-                )
-                word_s_pointer += len(word_tokens) * 2 + 2  # TODO check this
 
-            # add the segment to segment_info and increment the segment_s_pointer
-            segment_tokens = get_char_tokens(segment, model)
-            segment_info.append(
-                {
-                    "text": segment,
-                    "s_start": segment_s_pointer,
-                    "s_end": segment_s_pointer + (len(segment_tokens) - 1) * 2,
-                }
+            # add a blank to the segment, and add a space after if this is not the final segment
+            utt.token_ids_with_blanks.extend([BLANK_ID])
+            utt.segments_and_tokens.append(
+                Token(
+                    text=BLANK_TOKEN,
+                    text_cased=BLANK_TOKEN,
+                    # utt.token_ids_with_blanks has the form [..., <this blank token>]
+                    # => do len(utt.token_ids_with_blanks) - 1 to get the index of this blank token
+                    s_start=len(utt.token_ids_with_blanks) - 1,
+                    # s_end is same as s_start since the token only occupies one element in the list
+                    s_end=len(utt.token_ids_with_blanks) - 1,
+                )
             )
-            segment_s_pointer += len(segment_tokens) * 2 + 2
 
-        return y_token_ids_with_blanks, token_info, word_info, segment_info
+            if i_segment < len(segments) - 1:
+                utt.token_ids_with_blanks.extend([SPACE_ID, BLANK_ID])
+                utt.segments_and_tokens.append(
+                    Token(
+                        text=SPACE_TOKEN,
+                        text_cased=SPACE_TOKEN,
+                        # utt.token_ids_with_blanks has the form
+                        # [..., <space token>, <blank token>]
+                        # => do len(utt.token_ids_with_blanks) - 2 to get the index of the space token
+                        s_start=len(utt.token_ids_with_blanks) - 2,
+                        # s_end is same as s_start since the token only occupies one element in the list
+                        s_end=len(utt.token_ids_with_blanks) - 2,
+                    )
+                )
+                utt.segments_and_tokens.append(
+                    Token(
+                        text=BLANK_TOKEN,
+                        text_cased=BLANK_TOKEN,
+                        # utt.token_ids_with_blanks has the form
+                        # [..., <space token>, <blank token>]
+                        # => do len(utt.token_ids_with_blanks) - 1 to get the index of the blank token
+                        s_start=len(utt.token_ids_with_blanks) - 1,
+                        # s_end is same as s_start since the token only occupies one element in the list
+                        s_end=len(utt.token_ids_with_blanks) - 1,
+                    )
+                )
+
+        return utt
 
     else:
         raise RuntimeError("Cannot get tokens of this model.")
 
 
-def get_batch_tensors_and_boundary_info(
+def add_t_start_end_to_utt_obj(utt_obj, alignment_utt, output_timestep_duration):
+    """
+    Function to add t_start and t_end (representing time in seconds) to the Utterance object utt_obj.
+    Args:
+        utt_obj: Utterance object to which we will add t_start and t_end for its 
+            constituent segments/words/tokens.
+        alignment_utt: a list of ints indicating which token does the alignment pass through at each 
+            timestep (will take the form [0, 0, 1, 1, ..., <num of tokens including blanks in uterance>]).
+        output_timestep_duration: a float indicating the duration of a single output timestep from
+            the ASR Model.
+
+    Returns:
+        utt_obj: updated Utterance object.
+    """
+
+    # General idea for the algorithm of how we add t_start and t_end
+    # the timestep where a token s starts is the location of the first appearance of s_start in alignment_utt
+    # the timestep where a token s ends is the location of the final appearance of s_end in alignment_utt
+    # We will make dictionaries num_to_first_alignment_appearance and
+    # num_to_last_appearance and use that to update all of
+    # the t_start and t_end values in utt_obj.
+    # We will put t_start = t_end = -1 for tokens that are skipped (should only be blanks)
+
+    num_to_first_alignment_appearance = dict()
+    num_to_last_alignment_appearance = dict()
+
+    prev_s = -1  # use prev_s to keep track of when the s changes
+    for t, s in enumerate(alignment_utt):
+        if s > prev_s:
+            num_to_first_alignment_appearance[s] = t
+
+            if prev_s >= 0:  # dont record prev_s = -1
+                num_to_last_alignment_appearance[prev_s] = t - 1
+        prev_s = s
+    # add last appearance of the final s
+    num_to_last_alignment_appearance[prev_s] = len(alignment_utt) - 1
+
+    # update all the t_start and t_end in utt_obj
+    for segment_or_token in utt_obj.segments_and_tokens:
+        if type(segment_or_token) is Segment:
+            segment = segment_or_token
+            segment.t_start = num_to_first_alignment_appearance[segment.s_start] * output_timestep_duration
+            segment.t_end = (num_to_last_alignment_appearance[segment.s_end] + 1) * output_timestep_duration
+
+            for word_or_token in segment.words_and_tokens:
+                if type(word_or_token) is Word:
+                    word = word_or_token
+                    word.t_start = num_to_first_alignment_appearance[word.s_start] * output_timestep_duration
+                    word.t_end = (num_to_last_alignment_appearance[word.s_end] + 1) * output_timestep_duration
+
+                    for token in word.tokens:
+                        if token.s_start in num_to_first_alignment_appearance:
+                            token.t_start = num_to_first_alignment_appearance[token.s_start] * output_timestep_duration
+                        else:
+                            token.t_start = -1
+
+                        if token.s_end in num_to_last_alignment_appearance:
+                            token.t_end = (
+                                num_to_last_alignment_appearance[token.s_end] + 1
+                            ) * output_timestep_duration
+                        else:
+                            token.t_end = -1
+                else:
+                    token = word_or_token
+                    if token.s_start in num_to_first_alignment_appearance:
+                        token.t_start = num_to_first_alignment_appearance[token.s_start] * output_timestep_duration
+                    else:
+                        token.t_start = -1
+
+                    if token.s_end in num_to_last_alignment_appearance:
+                        token.t_end = (num_to_last_alignment_appearance[token.s_end] + 1) * output_timestep_duration
+                    else:
+                        token.t_end = -1
+
+        else:
+            token = segment_or_token
+            if token.s_start in num_to_first_alignment_appearance:
+                token.t_start = num_to_first_alignment_appearance[token.s_start] * output_timestep_duration
+            else:
+                token.t_start = -1
+
+            if token.s_end in num_to_last_alignment_appearance:
+                token.t_end = (num_to_last_alignment_appearance[token.s_end] + 1) * output_timestep_duration
+            else:
+                token.t_end = -1
+
+    return utt_obj
+
+
+def get_batch_variables(
     manifest_lines_batch,
     model,
     separator,
     align_using_pred_text,
+    audio_filepath_parts_in_utt_id,
+    output_timestep_duration,
     simulate_cache_aware_streaming=False,
     use_buffered_chunked_streaming=False,
     buffered_chunk_params={},
@@ -296,10 +681,9 @@ def get_batch_tensors_and_boundary_info(
     Returns:
         log_probs, y, T, U (y and U are s.t. every other token is a blank) - these are the tensors we will need
             during Viterbi decoding.
-        token_info_list, word_info_list, segment_info_list - these are lists of dictionaries which we will need
-            for writing the CTM files with the human-readable alignments.
-        pred_text_list - this is a list of the transcriptions from our model which we will save to our output JSON
-            file if align_using_pred_text is True.
+        utt_obj_batch: a list of Utterance objects for every utterance in the batch.
+        output_timestep_duration: a float indicating the duration of a single output timestep from
+            the ASR Model.
     """
 
     # get hypotheses by calling 'transcribe'
@@ -320,6 +704,11 @@ def get_batch_tensors_and_boundary_info(
                 hypotheses = model.transcribe_simulate_cache_aware_streaming(
                     audio_filepaths_batch, return_hypotheses=True, batch_size=B
                 )
+
+        # if hypotheses form a tuple (from Hybrid model), extract just "best" hypothesis
+        if type(hypotheses) == tuple and len(hypotheses) == 2:
+            hypotheses = hypotheses[0]
+
         for hypothesis in hypotheses:
             log_probs_list_batch.append(hypothesis.y_sequence)
             T_list_batch.append(hypothesis.y_sequence.shape[0])
@@ -341,30 +730,52 @@ def get_batch_tensors_and_boundary_info(
     # token_info_batch, word_info_batch, segment_info_batch
     y_list_batch = []
     U_list_batch = []
-    token_info_batch = []
-    word_info_batch = []
-    segment_info_batch = []
+    utt_obj_batch = []
 
     for i_line, line in enumerate(manifest_lines_batch):
         if align_using_pred_text:
-            gt_text_for_alignment = pred_text_batch[i_line]
+            gt_text_for_alignment = " ".join(pred_text_batch[i_line].split())
         else:
             gt_text_for_alignment = line["text"]
-        y_utt, token_info_utt, word_info_utt, segment_info_utt = get_y_and_boundary_info_for_utt(
-            gt_text_for_alignment, model, separator
+        utt_obj = get_utt_obj(
+            gt_text_for_alignment,
+            model,
+            separator,
+            T_list_batch[i_line],
+            audio_filepaths_batch[i_line],
+            _get_utt_id(audio_filepaths_batch[i_line], audio_filepath_parts_in_utt_id),
         )
 
-        y_list_batch.append(y_utt)
-        U_list_batch.append(len(y_utt))
-        token_info_batch.append(token_info_utt)
-        word_info_batch.append(word_info_utt)
-        segment_info_batch.append(segment_info_utt)
+        # update utt_obj.pred_text or utt_obj.text
+        if align_using_pred_text:
+            utt_obj.pred_text = pred_text_batch[i_line]
+            if len(utt_obj.pred_text) == 0:
+                logging.info(
+                    f"'pred_text' of utterance {utt_obj.utt_id} is empty - we will not generate"
+                    " any output alignment files for this utterance"
+                )
+            if "text" in line:
+                utt_obj.text = line["text"]  # keep the text as we will save it in the output manifest
+        else:
+            utt_obj.text = line["text"]
+            if len(utt_obj.text) == 0:
+                logging.info(
+                    f"'text' of utterance {utt_obj.utt_id} is empty - we will not generate"
+                    " any output alignment files for this utterance"
+                )
+
+        y_list_batch.append(utt_obj.token_ids_with_blanks)
+        U_list_batch.append(len(utt_obj.token_ids_with_blanks))
+        utt_obj_batch.append(utt_obj)
 
     # turn log_probs, y, T, U into dense tensors for fast computation during Viterbi decoding
     T_max = max(T_list_batch)
     U_max = max(U_list_batch)
     #  V = the number of tokens in the vocabulary + 1 for the blank token.
-    V = len(model.decoder.vocabulary) + 1
+    if hasattr(model, 'tokenizer'):
+        V = len(model.tokenizer.vocab) + 1
+    else:
+        V = len(model.decoder.vocabulary) + 1
     T_batch = torch.tensor(T_list_batch)
     U_batch = torch.tensor(U_list_batch)
 
@@ -383,13 +794,40 @@ def get_batch_tensors_and_boundary_info(
         U_utt = U_batch[b]
         y_batch[b, :U_utt] = torch.tensor(y_utt)
 
+    # calculate output_timestep_duration if it is None
+    if output_timestep_duration is None:
+        if not 'window_stride' in model.cfg.preprocessor:
+            raise ValueError(
+                "Don't have attribute 'window_stride' in 'model.cfg.preprocessor' => cannot calculate "
+                " model_downsample_factor => stopping process"
+            )
+
+        if not 'sample_rate' in model.cfg.preprocessor:
+            raise ValueError(
+                "Don't have attribute 'sample_rate' in 'model.cfg.preprocessor' => cannot calculate start "
+                " and end time of segments => stopping process"
+            )
+
+        with sf.SoundFile(audio_filepaths_batch[0]) as f:
+            audio_dur = f.frames / f.samplerate
+        n_input_frames = audio_dur / model.cfg.preprocessor.window_stride
+        model_downsample_factor = round(n_input_frames / int(T_batch[0]))
+
+        output_timestep_duration = (
+            model.preprocessor.featurizer.hop_length * model_downsample_factor / model.cfg.preprocessor.sample_rate
+        )
+
+        logging.info(
+            f"Calculated that the model downsample factor is {model_downsample_factor}"
+            f" and therefore the ASR model output timestep duration is {output_timestep_duration}"
+            " -- will use this for all batches"
+        )
+
     return (
         log_probs_batch,
         y_batch,
         T_batch,
         U_batch,
-        token_info_batch,
-        word_info_batch,
-        segment_info_batch,
-        pred_text_batch,
+        utt_obj_batch,
+        output_timestep_duration,
     )
diff --git a/tools/nemo_forced_aligner/utils/make_ass_files.py b/tools/nemo_forced_aligner/utils/make_ass_files.py
new file mode 100644
index 000000000000..f1beea838573
--- /dev/null
+++ b/tools/nemo_forced_aligner/utils/make_ass_files.py
@@ -0,0 +1,428 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This file contains functions for make ASS-format subtitle files based on the generated alignment.
+ASS files can be generated highlighting token-level alignments or word-level alignments.
+In both cases, 'segment' boundaries will be used to determine which parts of the text will appear
+at the same time. 
+For the token-level ASS files, the text will be highlighted token-by-token, with the timings determined 
+by the NFA alignments. 
+For the word-level ASS files, the text will be highlighted word-by-word, with the timings determined
+by the NFA alignemtns.
+"""
+
+import os
+
+from utils.constants import BLANK_TOKEN, SPACE_TOKEN
+from utils.data_prep import Segment, Token, Word
+
+PLAYERRESX = 384
+PLAYERRESY = 288
+MARGINL = 10
+MARGINR = 10
+
+
+def seconds_to_ass_format(seconds_float):
+    seconds_float = float(seconds_float)
+    mm, ss_decimals = divmod(seconds_float, 60)
+    hh, mm = divmod(mm, 60)
+
+    hh = str(round(hh))
+    if len(hh) == 1:
+        hh = '0' + hh
+
+    mm = str(round(mm))
+    if len(mm) == 1:
+        mm = '0' + mm
+
+    ss_decimals = f"{ss_decimals:.2f}"
+    if len(ss_decimals.split(".")[0]) == 1:
+        ss_decimals = "0" + ss_decimals
+
+    srt_format_time = f"{hh}:{mm}:{ss_decimals}"
+
+    return srt_format_time
+
+
+def make_ass_files(
+    utt_obj, output_dir_root, ass_file_config,
+):
+
+    # don't try to make files if utt_obj.segments_and_tokens is empty, which will happen
+    # in the case of the ground truth text being empty or the number of tokens being too large vs audio duration
+    if not utt_obj.segments_and_tokens:
+        return utt_obj
+
+    if ass_file_config.resegment_text_to_fill_space:
+        utt_obj = resegment_utt_obj(utt_obj, ass_file_config)
+
+    utt_obj = make_word_level_ass_file(utt_obj, output_dir_root, ass_file_config,)
+    utt_obj = make_token_level_ass_file(utt_obj, output_dir_root, ass_file_config,)
+
+    return utt_obj
+
+
+def _get_word_n_chars(word):
+    n_chars = 0
+    for token in word.tokens:
+        if token.text != BLANK_TOKEN:
+            n_chars += len(token.text)
+    return n_chars
+
+
+def _get_segment_n_chars(segment):
+    n_chars = 0
+    for word_or_token in segment.words_and_tokens:
+        if word_or_token.text == SPACE_TOKEN:
+            n_chars += 1
+        elif word_or_token.text != BLANK_TOKEN:
+            n_chars += len(word_or_token.text)
+    return n_chars
+
+
+def resegment_utt_obj(utt_obj, ass_file_config):
+
+    # get list of just all words and tokens
+    all_words_and_tokens = []
+    for segment_or_token in utt_obj.segments_and_tokens:
+        if type(segment_or_token) is Segment:
+            all_words_and_tokens.extend(segment_or_token.words_and_tokens)
+        else:
+            all_words_and_tokens.append(segment_or_token)
+
+    # figure out how many chars will fit into one 'slide' and thus should be the max
+    # size of a segment
+    approx_chars_per_line = (PLAYERRESX - MARGINL - MARGINR) / (
+        ass_file_config.fontsize * 0.6
+    )  # assume chars 0.6 as wide as they are tall
+    approx_lines_per_segment = (PLAYERRESY - ass_file_config.marginv) / (
+        ass_file_config.fontsize * 1.15
+    )  # assume line spacing is 1.15
+    if approx_lines_per_segment > ass_file_config.max_lines_per_segment:
+        approx_lines_per_segment = ass_file_config.max_lines_per_segment
+
+    max_chars_per_segment = int(approx_chars_per_line * approx_lines_per_segment)
+
+    new_segments_and_tokens = []
+    all_words_and_tokens_pointer = 0
+    for word_or_token in all_words_and_tokens:
+        if type(word_or_token) is Token:
+            new_segments_and_tokens.append(word_or_token)
+            all_words_and_tokens_pointer += 1
+        else:
+            break
+
+    new_segments_and_tokens.append(Segment())
+
+    while all_words_and_tokens_pointer < len(all_words_and_tokens):
+        word_or_token = all_words_and_tokens[all_words_and_tokens_pointer]
+        if type(word_or_token) is Word:
+
+            # if this is going to be the first word in the segment, we definitely want
+            # to add it to the segment
+            if not new_segments_and_tokens[-1].words_and_tokens:
+                new_segments_and_tokens[-1].words_and_tokens.append(word_or_token)
+
+            else:
+                # if not the first word, check what the new length of the segment will be
+                # if short enough - add this word to this segment;
+                # if too long - add to a new segment
+                this_word_n_chars = _get_word_n_chars(word_or_token)
+                segment_so_far_n_chars = _get_segment_n_chars(new_segments_and_tokens[-1])
+                if this_word_n_chars + segment_so_far_n_chars < max_chars_per_segment:
+                    new_segments_and_tokens[-1].words_and_tokens.append(word_or_token)
+                else:
+                    new_segments_and_tokens.append(Segment())
+                    new_segments_and_tokens[-1].words_and_tokens.append(word_or_token)
+
+        else:  # i.e. word_or_token is a token
+            # currently this breaks the convention of tokens at the end/beginning
+            # of segments being listed as separate tokens in segment.word_and_tokens
+            # TODO: change code so we follow this convention
+            new_segments_and_tokens[-1].words_and_tokens.append(word_or_token)
+
+        all_words_and_tokens_pointer += 1
+
+    utt_obj.segments_and_tokens = new_segments_and_tokens
+
+    return utt_obj
+
+
+def make_word_level_ass_file(
+    utt_obj, output_dir_root, ass_file_config,
+):
+
+    default_style_dict = {
+        "Name": "Default",
+        "Fontname": "Arial",
+        "Fontsize": str(ass_file_config.fontsize),
+        "PrimaryColour": "&Hffffff",
+        "SecondaryColour": "&Hffffff",
+        "OutlineColour": "&H0",
+        "BackColour": "&H0",
+        "Bold": "0",
+        "Italic": "0",
+        "Underline": "0",
+        "StrikeOut": "0",
+        "ScaleX": "100",
+        "ScaleY": "100",
+        "Spacing": "0",
+        "Angle": "0",
+        "BorderStyle": "1",
+        "Outline": "1",
+        "Shadow": "0",
+        "Alignment": "2",
+        "MarginL": str(MARGINL),
+        "MarginR": str(MARGINR),
+        "MarginV": str(ass_file_config.marginv),
+        "Encoding": "0",
+    }
+
+    output_dir = os.path.join(output_dir_root, "ass", "words")
+    os.makedirs(output_dir, exist_ok=True)
+    output_file = os.path.join(output_dir, f"{utt_obj.utt_id}.ass")
+
+    with open(output_file, 'w') as f:
+        default_style_top_line = "Format: " + ", ".join(default_style_dict.keys())
+        default_style_bottom_line = "Style: " + ",".join(default_style_dict.values())
+
+        f.write(
+            (
+                "[Script Info]\n"
+                "ScriptType: v4.00+\n"
+                f"PlayResX: {PLAYERRESX}\n"
+                f"PlayResY: {PLAYERRESY}\n"
+                "\n"
+                "[V4+ Styles]\n"
+                f"{default_style_top_line}\n"
+                f"{default_style_bottom_line}\n"
+                "\n"
+                "[Events]\n"
+                "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\n"
+            )
+        )
+
+        # write first set of subtitles for text before speech starts to be spoken
+        words_in_first_segment = []
+        for segment_or_token in utt_obj.segments_and_tokens:
+            if type(segment_or_token) is Segment:
+                first_segment = segment_or_token
+
+                for word_or_token in first_segment.words_and_tokens:
+                    if type(word_or_token) is Word:
+                        words_in_first_segment.append(word_or_token)
+                break
+
+        text_before_speech = r"{\c&c7c1c2&}" + " ".join([x.text for x in words_in_first_segment]) + r"{\r}"
+        subtitle_text = (
+            f"Dialogue: 0,{seconds_to_ass_format(0)},{seconds_to_ass_format(words_in_first_segment[0].t_start)},Default,,0,0,0,,"
+            + text_before_speech.rstrip()
+        )
+
+        f.write(subtitle_text + '\n')
+
+        for segment_or_token in utt_obj.segments_and_tokens:
+            if type(segment_or_token) is Segment:
+                segment = segment_or_token
+
+                words_in_segment = []
+                for word_or_token in segment.words_and_tokens:
+                    if type(word_or_token) is Word:
+                        words_in_segment.append(word_or_token)
+
+                for word_i, word in enumerate(words_in_segment):
+
+                    text_before = " ".join([x.text for x in words_in_segment[:word_i]])
+                    if text_before != "":
+                        text_before += " "
+                    text_before = r"{\c&H3d2e31&}" + text_before + r"{\r}"
+
+                    if word_i < len(words_in_segment) - 1:
+                        text_after = " " + " ".join([x.text for x in words_in_segment[word_i + 1 :]])
+                    else:
+                        text_after = ""
+                    text_after = r"{\c&c7c1c2&}" + text_after + r"{\r}"
+
+                    aligned_text = r"{\c&H09ab39&}" + word.text + r"{\r}"
+                    aligned_text_off = r"{\c&H3d2e31&}" + word.text + r"{\r}"
+
+                    subtitle_text = (
+                        f"Dialogue: 0,{seconds_to_ass_format(word.t_start)},{seconds_to_ass_format(word.t_end)},Default,,0,0,0,,"
+                        + text_before
+                        + aligned_text
+                        + text_after.rstrip()
+                    )
+                    f.write(subtitle_text + '\n')
+
+                    # add subtitles without word-highlighting for when words are not being spoken
+                    if word_i < len(words_in_segment) - 1:
+                        last_word_end = float(words_in_segment[word_i].t_end)
+                        next_word_start = float(words_in_segment[word_i + 1].t_start)
+                        if next_word_start - last_word_end > 0.001:
+                            subtitle_text = (
+                                f"Dialogue: 0,{seconds_to_ass_format(last_word_end)},{seconds_to_ass_format(next_word_start)},Default,,0,0,0,,"
+                                + text_before
+                                + aligned_text_off
+                                + text_after.rstrip()
+                            )
+                            f.write(subtitle_text + '\n')
+
+    utt_obj.saved_output_files[f"words_level_ass_filepath"] = output_file
+
+    return utt_obj
+
+
+def make_token_level_ass_file(
+    utt_obj, output_dir_root, ass_file_config,
+):
+
+    default_style_dict = {
+        "Name": "Default",
+        "Fontname": "Arial",
+        "Fontsize": str(ass_file_config.fontsize),
+        "PrimaryColour": "&Hffffff",
+        "SecondaryColour": "&Hffffff",
+        "OutlineColour": "&H0",
+        "BackColour": "&H0",
+        "Bold": "0",
+        "Italic": "0",
+        "Underline": "0",
+        "StrikeOut": "0",
+        "ScaleX": "100",
+        "ScaleY": "100",
+        "Spacing": "0",
+        "Angle": "0",
+        "BorderStyle": "1",
+        "Outline": "1",
+        "Shadow": "0",
+        "Alignment": "2",
+        "MarginL": str(MARGINL),
+        "MarginR": str(MARGINR),
+        "MarginV": str(ass_file_config.marginv),
+        "Encoding": "0",
+    }
+
+    output_dir = os.path.join(output_dir_root, "ass", "tokens")
+    os.makedirs(output_dir, exist_ok=True)
+    output_file = os.path.join(output_dir, f"{utt_obj.utt_id}.ass")
+
+    with open(output_file, 'w') as f:
+        default_style_top_line = "Format: " + ", ".join(default_style_dict.keys())
+        default_style_bottom_line = "Style: " + ",".join(default_style_dict.values())
+
+        f.write(
+            (
+                "[Script Info]\n"
+                "ScriptType: v4.00+\n"
+                f"PlayResX: {PLAYERRESX}\n"
+                f"PlayResY: {PLAYERRESY}\n"
+                "ScaledBorderAndShadow: yes\n"
+                "\n"
+                "[V4+ Styles]\n"
+                f"{default_style_top_line}\n"
+                f"{default_style_bottom_line}\n"
+                "\n"
+                "[Events]\n"
+                "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\n"
+            )
+        )
+
+        # write first set of subtitles for text before speech starts to be spoken
+        tokens_in_first_segment = []
+        for segment_or_token in utt_obj.segments_and_tokens:
+            if type(segment_or_token) is Segment:
+                for word_or_token in segment_or_token.words_and_tokens:
+                    if type(word_or_token) is Token:
+                        if word_or_token.text != BLANK_TOKEN:
+                            tokens_in_first_segment.append(word_or_token)
+                    else:
+                        for token in word_or_token.tokens:
+                            if token.text != BLANK_TOKEN:
+                                tokens_in_first_segment.append(token)
+
+                break
+
+        for token in tokens_in_first_segment:
+            token.text_cased = token.text_cased.replace(
+                "▁", " "
+            )  # replace underscores used in subword tokens with spaces
+            token.text_cased = token.text_cased.replace(SPACE_TOKEN, " ")  # space token with actual space
+
+        text_before_speech = r"{\c&c7c1c2&}" + "".join([x.text_cased for x in tokens_in_first_segment]) + r"{\r}"
+        subtitle_text = (
+            f"Dialogue: 0,{seconds_to_ass_format(0)},{seconds_to_ass_format(tokens_in_first_segment[0].t_start)},Default,,0,0,0,,"
+            + text_before_speech.rstrip()
+        )
+
+        f.write(subtitle_text + '\n')
+
+        for segment_or_token in utt_obj.segments_and_tokens:
+            if type(segment_or_token) is Segment:
+                segment = segment_or_token
+
+                tokens_in_segment = []  # make list of (non-blank) tokens
+                for word_or_token in segment.words_and_tokens:
+                    if type(word_or_token) is Token:
+                        if word_or_token.text != BLANK_TOKEN:
+                            tokens_in_segment.append(word_or_token)
+                    else:
+                        for token in word_or_token.tokens:
+                            if token.text != BLANK_TOKEN:
+                                tokens_in_segment.append(token)
+
+                for token in tokens_in_segment:
+                    token.text_cased = token.text_cased.replace(
+                        "▁", " "
+                    )  # replace underscores used in subword tokens with spaces
+                    token.text_cased = token.text_cased.replace(SPACE_TOKEN, " ")  # space token with actual space
+
+                for token_i, token in enumerate(tokens_in_segment):
+
+                    text_before = "".join([x.text_cased for x in tokens_in_segment[:token_i]])
+                    text_before = r"{\c&H3d2e31&}" + text_before + r"{\r}"
+
+                    if token_i < len(tokens_in_segment) - 1:
+                        text_after = "".join([x.text_cased for x in tokens_in_segment[token_i + 1 :]])
+                    else:
+                        text_after = ""
+                    text_after = r"{\c&c7c1c2&}" + text_after + r"{\r}"
+
+                    aligned_text = r"{\c&H09ab39&}" + token.text_cased + r"{\r}"
+                    aligned_text_off = r"{\c&H3d2e31&}" + token.text_cased + r"{\r}"
+
+                    subtitle_text = (
+                        f"Dialogue: 0,{seconds_to_ass_format(token.t_start)},{seconds_to_ass_format(token.t_end)},Default,,0,0,0,,"
+                        + text_before
+                        + aligned_text
+                        + text_after.rstrip()
+                    )
+                    f.write(subtitle_text + '\n')
+
+                    # add subtitles without word-highlighting for when words are not being spoken
+                    if token_i < len(tokens_in_segment) - 1:
+                        last_token_end = float(tokens_in_segment[token_i].t_end)
+                        next_token_start = float(tokens_in_segment[token_i + 1].t_start)
+                        if next_token_start - last_token_end > 0.001:
+                            subtitle_text = (
+                                f"Dialogue: 0,{seconds_to_ass_format(last_token_end)},{seconds_to_ass_format(next_token_start)},Default,,0,0,0,,"
+                                + text_before
+                                + aligned_text_off
+                                + text_after.rstrip()
+                            )
+                            f.write(subtitle_text + '\n')
+
+    utt_obj.saved_output_files[f"tokens_level_ass_filepath"] = output_file
+
+    return utt_obj
diff --git a/tools/nemo_forced_aligner/utils/make_ctm_files.py b/tools/nemo_forced_aligner/utils/make_ctm_files.py
new file mode 100644
index 000000000000..f0326c07cf8f
--- /dev/null
+++ b/tools/nemo_forced_aligner/utils/make_ctm_files.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import soundfile as sf
+from utils.constants import BLANK_TOKEN, SPACE_TOKEN
+from utils.data_prep import Segment, Word
+
+
+def make_ctm_files(
+    utt_obj, output_dir_root, ctm_file_config,
+):
+    """
+    Function to save CTM files for all the utterances in the incoming batch.
+    """
+
+    # don't try to make files if utt_obj.segments_and_tokens is empty, which will happen
+    # in the case of the ground truth text being empty or the number of tokens being too large vs audio duration
+    if not utt_obj.segments_and_tokens:
+        return utt_obj
+
+    # get audio file duration if we will need it later
+    if ctm_file_config.minimum_timestamp_duration > 0:
+        with sf.SoundFile(utt_obj.audio_filepath) as f:
+            audio_file_duration = f.frames / f.samplerate
+    else:
+        audio_file_duration = None
+
+    utt_obj = make_ctm("tokens", utt_obj, output_dir_root, audio_file_duration, ctm_file_config,)
+    utt_obj = make_ctm("words", utt_obj, output_dir_root, audio_file_duration, ctm_file_config,)
+    utt_obj = make_ctm("segments", utt_obj, output_dir_root, audio_file_duration, ctm_file_config,)
+
+    return utt_obj
+
+
+def make_ctm(
+    alignment_level, utt_obj, output_dir_root, audio_file_duration, ctm_file_config,
+):
+    output_dir = os.path.join(output_dir_root, "ctm", alignment_level)
+    os.makedirs(output_dir, exist_ok=True)
+
+    boundary_info_utt = []
+    for segment_or_token in utt_obj.segments_and_tokens:
+        if type(segment_or_token) is Segment:
+            segment = segment_or_token
+            if alignment_level == "segments":
+                boundary_info_utt.append(segment)
+
+            for word_or_token in segment.words_and_tokens:
+                if type(word_or_token) is Word:
+                    word = word_or_token
+                    if alignment_level == "words":
+                        boundary_info_utt.append(word)
+
+                    for token in word.tokens:
+                        if alignment_level == "tokens":
+                            boundary_info_utt.append(token)
+
+                else:
+                    token = word_or_token
+                    if alignment_level == "tokens":
+                        boundary_info_utt.append(token)
+
+        else:
+            token = segment_or_token
+            if alignment_level == "tokens":
+                boundary_info_utt.append(token)
+
+    with open(os.path.join(output_dir, f"{utt_obj.utt_id}.ctm"), "w") as f_ctm:
+        for boundary_info_ in boundary_info_utt:  # loop over every token/word/segment
+
+            # skip if t_start = t_end = negative number because we used it as a marker to skip some blank tokens
+            if not (boundary_info_.t_start < 0 or boundary_info_.t_end < 0):
+                text = boundary_info_.text
+                start_time = boundary_info_.t_start
+                end_time = boundary_info_.t_end
+
+                if (
+                    ctm_file_config.minimum_timestamp_duration > 0
+                    and ctm_file_config.minimum_timestamp_duration > end_time - start_time
+                ):
+                    # make the predicted duration of the token/word/segment longer, growing it outwards equal
+                    # amounts from the predicted center of the token/word/segment
+                    token_mid_point = (start_time + end_time) / 2
+                    start_time = max(token_mid_point - ctm_file_config.minimum_timestamp_duration / 2, 0)
+                    end_time = min(
+                        token_mid_point + ctm_file_config.minimum_timestamp_duration / 2, audio_file_duration
+                    )
+
+                if not (
+                    text == BLANK_TOKEN and ctm_file_config.remove_blank_tokens
+                ):  # don't save blanks if we don't want to
+                    # replace any spaces with <space> so we dont introduce extra space characters to our CTM files
+                    text = text.replace(" ", SPACE_TOKEN)
+
+                    f_ctm.write(f"{utt_obj.utt_id} 1 {start_time:.2f} {end_time - start_time:.2f} {text}\n")
+
+    utt_obj.saved_output_files[f"{alignment_level}_level_ctm_filepath"] = os.path.join(
+        output_dir, f"{utt_obj.utt_id}.ctm"
+    )
+
+    return utt_obj
diff --git a/tools/nemo_forced_aligner/utils/make_output_files.py b/tools/nemo_forced_aligner/utils/make_output_files.py
deleted file mode 100644
index a2d8c80a6580..000000000000
--- a/tools/nemo_forced_aligner/utils/make_output_files.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import os
-from pathlib import Path
-
-import soundfile as sf
-from utils.constants import BLANK_TOKEN, SPACE_TOKEN
-
-
-def _get_utt_id(audio_filepath, audio_filepath_parts_in_utt_id):
-    fp_parts = Path(audio_filepath).parts[-audio_filepath_parts_in_utt_id:]
-    utt_id = Path("_".join(fp_parts)).stem
-    utt_id = utt_id.replace(" ", "-")  # replace any spaces in the filepath with dashes
-    return utt_id
-
-
-def add_t_start_end_to_boundary_info(boundary_info_utt, alignment_utt):
-    """
-    We use the list of alignments to add the timesteps where each token/word/segment is predicted to
-    start and end.
-    boundary_info_utt can be any one of the variables referred to as `token_info`, `word_info`, `segment_info` 
-    in other parts of the code.
-
-    e.g. the input boundary info could be
-    boundary_info_utt = [
-        {'text': 'hi', 's_start': 1, 's_end': 3},
-        {'text': 'world', 's_start': 7, 's_end': 15},
-        {'text': 'hey', 's_start': 19, 's_end': 23},
-    ]
-
-    and the alignment could be
-    alignment_utt = [ 1, 1, 3, 3, 4, 5, 7, 7, 9, 10, 11, 12, 13, 15, 17, 17, 19, 21, 23, 23]
-    
-    in which case the output would be:
-    boundary_info_utt = [
-        {'text': 'hi', 's_start': 1, 's_end': 3, 't_start': 0, 't_end': 3},
-        {'text': 'world', 's_start': 7, 's_end': 15, 't_start': 6, 't_end': 13},
-        {'text': 'hey', 's_start': 19, 's_end': 23, 't_start': 16, 't_end': 19},
-    ]
-    """
-    # first remove boundary_info of any items that are not in the alignment
-    # the only items we expect not to be in the alignment are blanks that the alignment chooses to skip
-    # we will iterate boundary_info in reverse order for this to make popping the items simple
-    s_in_alignment = set(alignment_utt)
-    for boundary_info_pointer in range(len(boundary_info_utt) - 1, -1, -1):
-        s_in_boundary_info = set(
-            range(
-                boundary_info_utt[boundary_info_pointer]["s_start"],
-                boundary_info_utt[boundary_info_pointer]["s_end"] + 1,
-            )
-        )
-        item_not_in_alignment = True
-        for s_ in s_in_boundary_info:
-            if s_ in s_in_alignment:
-                item_not_in_alignment = False
-
-        if item_not_in_alignment:
-            boundary_info_utt.pop(boundary_info_pointer)
-
-    # now update boundary_info with t_start and t_end
-    boundary_info_pointer = 0
-    for t, s_at_t in enumerate(alignment_utt):
-        if s_at_t == boundary_info_utt[boundary_info_pointer]["s_start"]:
-            if "t_start" not in boundary_info_utt[boundary_info_pointer]:
-                # we have just reached the start of the word/token/segment in the alignment => update t_start
-                boundary_info_utt[boundary_info_pointer]["t_start"] = t
-
-        if t < len(alignment_utt) - 1:  # this if is to avoid accessing an index that is not in the list
-            if alignment_utt[t + 1] > boundary_info_utt[boundary_info_pointer]["s_end"]:
-                if "t_end" not in boundary_info_utt[boundary_info_pointer]:
-                    boundary_info_utt[boundary_info_pointer]["t_end"] = t
-
-                boundary_info_pointer += 1
-        else:  # i.e. t == len(alignment) - 1, i.e. we are a the final element in alignment
-            # add final t_end if we haven't already
-            if "t_end" not in boundary_info_utt[boundary_info_pointer]:
-                boundary_info_utt[boundary_info_pointer]["t_end"] = t
-
-        if boundary_info_pointer == len(boundary_info_utt):
-            # we have finished populating boundary_info with t_start and t_end,
-            # but we might have some final remaining elements (blanks) in the alignment which we dont care about
-            # => break, so as not to cause issues trying to access boundary_info[boundary_info_pointer]
-            break
-
-    return boundary_info_utt
-
-
-def make_ctm(
-    boundary_info_batch,
-    alignments_batch,
-    manifest_lines_batch,
-    model,
-    model_downsample_factor,
-    output_dir,
-    remove_blank_tokens_from_ctm,
-    audio_filepath_parts_in_utt_id,
-    minimum_timestamp_duration,
-):
-    """
-    Function to save CTM files for all the utterances in the incoming batch.
-    """
-
-    assert len(boundary_info_batch) == len(alignments_batch) == len(manifest_lines_batch)
-    # we also assume that utterances are in the same order in boundary_info_batch, alignments_batch
-    # and manifest_lines_batch - this should be the case unless there is a strange bug upstream in the
-    # code
-
-    os.makedirs(output_dir, exist_ok=True)
-
-    # the ratio to convert from timesteps (the units of 't_start' and 't_end' in boundary_info_utt)
-    # to the number of samples ('samples' in the sense of 16000 'samples' per second)
-    timestep_to_sample_ratio = model.preprocessor.featurizer.hop_length * model_downsample_factor
-
-    for boundary_info_utt, alignment_utt, manifest_line in zip(
-        boundary_info_batch, alignments_batch, manifest_lines_batch
-    ):
-
-        boundary_info_utt = add_t_start_end_to_boundary_info(boundary_info_utt, alignment_utt)
-
-        # get utt_id that will be used for saving CTM file as <utt_id>.ctm
-        utt_id = _get_utt_id(manifest_line['audio_filepath'], audio_filepath_parts_in_utt_id)
-
-        # get audio file duration if we will need it later
-        if minimum_timestamp_duration > 0:
-            with sf.SoundFile(manifest_line["audio_filepath"]) as f:
-                audio_file_duration = f.frames / f.samplerate
-
-        with open(os.path.join(output_dir, f"{utt_id}.ctm"), "w") as f_ctm:
-            for boundary_info_ in boundary_info_utt:  # loop over every token/word/segment
-                text = boundary_info_["text"]
-                start_sample = boundary_info_["t_start"] * timestep_to_sample_ratio
-                end_sample = (boundary_info_["t_end"] + 1) * timestep_to_sample_ratio - 1
-
-                start_time = start_sample / model.cfg.sample_rate
-                end_time = end_sample / model.cfg.sample_rate
-
-                if minimum_timestamp_duration > 0 and minimum_timestamp_duration > end_time - start_time:
-                    # make the predicted duration of the token/word/segment longer, growing it outwards equal
-                    # amounts from the predicted center of the token/word/segment
-                    token_mid_point = (start_time + end_time) / 2
-                    start_time = max(token_mid_point - minimum_timestamp_duration / 2, 0)
-                    end_time = min(token_mid_point + minimum_timestamp_duration / 2, audio_file_duration)
-
-                if not (text == BLANK_TOKEN and remove_blank_tokens_from_ctm):  # don't save blanks if we don't want to
-                    # replace any spaces with <space> so we dont introduce extra space characters to our CTM files
-                    text = text.replace(" ", SPACE_TOKEN)
-
-                    f_ctm.write(f"{utt_id} 1 {start_time:.2f} {end_time - start_time:.2f} {text}\n")
-
-    return None
-
-
-def make_new_manifest(
-    output_dir,
-    original_manifest_filepath,
-    additional_ctm_grouping_separator,
-    audio_filepath_parts_in_utt_id,
-    pred_text_all_lines,
-):
-    """
-    Function to save a new manifest with the same info as the original manifest, but also the paths to the
-    CTM files for each utterance and the "pred_text" if it was used for the alignment.
-    """
-    if pred_text_all_lines:
-        with open(original_manifest_filepath, 'r') as f:
-            num_lines_in_manifest = sum(1 for _ in f)
-
-        if not num_lines_in_manifest == len(pred_text_all_lines):
-            raise RuntimeError(
-                f"Number of lines in the original manifest ({num_lines_in_manifest}) does not match "
-                f"the number of pred_texts we have ({len(pred_text_all_lines)}). Something has gone wrong."
-            )
-
-    tgt_manifest_name = str(Path(original_manifest_filepath).stem) + "_with_ctm_paths.json"
-    tgt_manifest_filepath = str(Path(output_dir) / tgt_manifest_name)
-
-    with open(original_manifest_filepath, 'r') as fin, open(tgt_manifest_filepath, 'w') as fout:
-        for i_line, line in enumerate(fin):
-            data = json.loads(line)
-
-            utt_id = _get_utt_id(data["audio_filepath"], audio_filepath_parts_in_utt_id)
-
-            data["token_level_ctm_filepath"] = str(Path(output_dir) / "tokens" / f"{utt_id}.ctm")
-            data["word_level_ctm_filepath"] = str(Path(output_dir) / "words" / f"{utt_id}.ctm")
-
-            if additional_ctm_grouping_separator:
-                data["additional_segment_level_ctm_filepath"] = str(
-                    Path(output_dir) / "additional_segments" / f"{utt_id}.ctm"
-                )
-
-            if pred_text_all_lines:
-                data['pred_text'] = pred_text_all_lines[i_line]
-
-            new_line = json.dumps(data)
-
-            fout.write(f"{new_line}\n")
diff --git a/tools/nemo_forced_aligner/utils/make_output_manifest.py b/tools/nemo_forced_aligner/utils/make_output_manifest.py
new file mode 100644
index 000000000000..7ee3fc77f7ab
--- /dev/null
+++ b/tools/nemo_forced_aligner/utils/make_output_manifest.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+
+def write_manifest_out_line(
+    f_manifest_out, utt_obj,
+):
+
+    data = {"audio_filepath": utt_obj.audio_filepath}
+    if not utt_obj.text is None:
+        data["text"] = utt_obj.text
+
+    if not utt_obj.pred_text is None:
+        data["pred_text"] = utt_obj.pred_text
+
+    for key, val in utt_obj.saved_output_files.items():
+        data[key] = val
+
+    new_line = json.dumps(data)
+    f_manifest_out.write(f"{new_line}\n")
+
+    return None
diff --git a/tools/nemo_forced_aligner/utils/viterbi_decoding.py b/tools/nemo_forced_aligner/utils/viterbi_decoding.py
index bc9a45dda527..78336f800e14 100644
--- a/tools/nemo_forced_aligner/utils/viterbi_decoding.py
+++ b/tools/nemo_forced_aligner/utils/viterbi_decoding.py
@@ -36,6 +36,7 @@ def viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device)
             Looks like: [[0, 0, 1, 2, 2, 3, 3, ...,  ], ..., [0, 1, 2, 2, 2, 3, 4, ....]].
             Each list inside alignments_batch is of length T_batch[location of utt in batch].
     """
+
     B, T_max, _ = log_probs_batch.shape
     U_max = y_batch.shape[1]
 
@@ -50,15 +51,14 @@ def viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device)
     # make log_probs_padded tensor of shape (B, T_max, V +1 ) where all of
     # log_probs_padded[:,:,-1] is the 'V_NEGATIVE_NUM'
     log_probs_padded = torch.cat((log_probs_batch, padding_for_log_probs), dim=2)
-    # make log_probs_reordered tensor of shape (B, T_max, U_max)
-    # it contains the log_probs for only the tokens that are in the Ground Truth, and in the order
-    # that they occur
-    log_probs_reordered = torch.gather(input=log_probs_padded, dim=2, index=y_batch.unsqueeze(1).repeat(1, T_max, 1))
 
-    # initialize tensors of viterbi probabilies and backpointers
-    v_matrix = V_NEGATIVE_NUM * torch.ones_like(log_probs_reordered)
-    backpointers = -999 * torch.ones_like(v_matrix)
-    v_matrix[:, 0, :2] = log_probs_reordered[:, 0, :2]
+    # initialize v_prev - tensor of previous timestep's viterbi probabilies, of shape (B, U_max)
+    v_prev = V_NEGATIVE_NUM * torch.ones((B, U_max), device=viterbi_device)
+    v_prev[:, :2] = torch.gather(input=log_probs_padded[:, 0, :], dim=1, index=y_batch[:, :2])
+
+    # initialize backpointers_rel - which contains values like 0 to indicate the backpointer is to the same u index,
+    # 1 to indicate the backpointer pointing to the u-1 index and 2 to indicate the backpointer is pointing to the u-2 index
+    backpointers_rel = -99 * torch.ones((B, T_max, U_max), dtype=torch.int8, device=viterbi_device)
 
     # Make a letter_repetition_mask the same shape as y_batch
     # the letter_repetition_mask will have 'True' where the token (including blanks) is the same
@@ -70,24 +70,23 @@ def viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device)
     letter_repetition_mask[:, :2] = 1  # make sure dont apply mask to first 2 tokens
     letter_repetition_mask = letter_repetition_mask == 0
 
-    # bp_absolute_template is a tensor we will need during the Viterbi decoding to convert our argmaxes from indices between 0 and 2,
-    # to indices in the range (0, U_max-1) indicating from which token the mostly path up to that point came from.
-    # it is a tensor of shape (B, U_max) that looks like
-    # bp_absolute_template = [
-    #   [0, 1, 2, ...,, U_max]
-    #   [0, 1, 2, ...,, U_max]
-    #   [0, 1, 2, ...,, U_max]
-    #   ... rows repeated so there are B number of rows in total
-    # ]
-    bp_absolute_template = torch.arange(U_max, device=viterbi_device).unsqueeze(0).repeat(B, 1)
-
     for t in range(1, T_max):
 
         # e_current is a tensor of shape (B, U_max) of the log probs of every possible token at the current timestep
-        e_current = log_probs_reordered[:, t, :]
+        e_current = torch.gather(input=log_probs_padded[:, t, :], dim=1, index=y_batch)
+
+        # apply a mask to e_current to cope with the fact that we do not keep the whole v_matrix and continue
+        # calculating viterbi probabilities during some 'padding' timesteps
+        t_exceeded_T_batch = t >= T_batch
 
-        # v_prev is a tensor of shape (B, U_max) of the viterbi probabilities 1 timestep back and in the same token position
-        v_prev = v_matrix[:, t - 1, :]
+        U_can_be_final = torch.logical_or(
+            torch.arange(0, U_max, device=viterbi_device).unsqueeze(0) == (U_batch.unsqueeze(1) - 0),
+            torch.arange(0, U_max, device=viterbi_device).unsqueeze(0) == (U_batch.unsqueeze(1) - 1),
+        )
+
+        mask = torch.logical_not(torch.logical_and(t_exceeded_T_batch.unsqueeze(1), U_can_be_final,)).long()
+
+        e_current = e_current * mask
 
         # v_prev_shifted is a tensor of shape (B, U_max) of the viterbi probabilities 1 timestep back and 1 token position back
         v_prev_shifted = torch.roll(v_prev, shifts=1, dims=1)
@@ -111,26 +110,27 @@ def viterbi_decoding(log_probs_batch, y_batch, T_batch, U_batch, viterbi_device)
         # candidates_v_current are our candidate viterbi probabilities for every token position, from which
         # we will pick the max and record the argmax
         candidates_v_current = v_prev_dup + e_current.unsqueeze(2)
-        v_current, bp_relative = torch.max(candidates_v_current, dim=2)
-
-        # convert our argmaxes from indices between 0 and 2, to indices in the range (0, U_max-1) indicating
-        # from which token the mostly path up to that point came from
-        bp_absolute = bp_absolute_template - bp_relative
+        # we straight away save results in v_prev instead of v_current, so that the variable v_prev will be ready for the
+        # next iteration of the for-loop
+        v_prev, bp_relative = torch.max(candidates_v_current, dim=2)
 
-        # update our tensors containing all the viterbi probabilites and backpointers
-        v_matrix[:, t, :] = v_current
-        backpointers[:, t, :] = bp_absolute
+        backpointers_rel[:, t, :] = bp_relative
 
-    # trace backpointers TODO: parallelize over batch_size
+    # trace backpointers
     alignments_batch = []
     for b in range(B):
         T_b = int(T_batch[b])
         U_b = int(U_batch[b])
 
-        final_state = int(torch.argmax(v_matrix[b, T_b - 1, U_b - 2 : U_b])) + U_b - 2
-        alignment_b = [final_state]
-        for t in range(T_b - 1, 0, -1):
-            alignment_b.insert(0, int(backpointers[b, t, alignment_b[0]]))
+        if U_b == 1:  # i.e. we put only a blank token in the reference text because the reference text is empty
+            current_u = 0  # set initial u to 0 and let the rest of the code block run as usual
+        else:
+            current_u = int(torch.argmax(v_prev[b, U_b - 2 : U_b])) + U_b - 2
+        alignment_b = [current_u]
+        for t in range(T_max - 1, 0, -1):
+            current_u = current_u - int(backpointers_rel[b, t, current_u])
+            alignment_b.insert(0, current_u)
+        alignment_b = alignment_b[:T_b]
         alignments_batch.append(alignment_b)
 
     return alignments_batch

From 2db352a67c38425062cea936d0623e405ae07bb1 Mon Sep 17 00:00:00 2001
From: Matvei Novikov <mattyson.so@gmail.com>
Date: Sat, 10 Jun 2023 04:20:20 +0400
Subject: [PATCH 036/123] Added rouge monitoring support for T5 (#6737)

* Added rouge monitoring support for t5

Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../models/language_modeling/megatron_finetune_model.py   | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py
index 4ed71756e60e..32024deb19b4 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_finetune_model.py
@@ -49,7 +49,6 @@
 
     HAVE_MEGATRON_CORE = False
 
-
 __all__ = ['MegatronT5FinetuneModel']
 
 
@@ -204,7 +203,7 @@ def on_train_epoch_start(self) -> None:
         return super().on_train_epoch_start()
 
     def cast_for_metric(self, pred, label, metric_name, class_labels=None, labels_are_strings=False):
-        if metric_name == 'exact_string_match':
+        if metric_name == 'exact_string_match' or 'rouge':
             return pred, label
         pred = pred.replace(' ', '')
         label = label.replace(' ', '')
@@ -445,6 +444,8 @@ def inference_epoch_end(self, outputs, mode, data_cfg):
                 self.val_metric[dataloader_idx] if mode == 'validation' else self.test_metric[dataloader_idx]
             )
             metric = metric_object.compute()
+            if metric_name == 'rouge':
+                metric = metric['rouge1_fmeasure']
             # Handle logging of GLUE/XNLI separately here. XNLI has a separate metric per language.
             if isinstance(metric, dict):
                 # GLUE case:
@@ -458,7 +459,8 @@ def inference_epoch_end(self, outputs, mode, data_cfg):
                         if k != 'acc' and 'total' not in k:
                             self.log(metric_log_key + f'_{k}', v, batch_size=1)
                             logging.info(f"{mode} {metric_name} lang {k} : {v}")
-                    metric = metric['acc']
+                    if metric_name != 'rouge':
+                        metric = metric['acc']
             else:
                 self.log(metric_log_key, metric, batch_size=1)
                 logging.info(f"{metric_log_key}: {metric}")

From a87702a522387da0aac62dc1f90a88a8e0bfc7cc Mon Sep 17 00:00:00 2001
From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com>
Date: Mon, 12 Jun 2023 23:15:00 +0800
Subject: [PATCH 037/123] GPT extrapolatable position embedding
 (xpos/sandwich/alibi/kerple) and Flash Attention (#6666)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* move to nvidia megatron repo (#6465) (#6475)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Megatron KERPLE positional embeddings (#6478) (#6480)

* [TTS] FastPitch adapter fine-tune and conditional layer normalization (#6416)

[TTS] FastPitch adapter fine-tune and conditional layer normalization (#6416)

---------


* [TTS] whitelist broken path fix. (#6412)

* [TTS] whitelist broken path fix.


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------


* [TTS] FastPitch speaker encoder (#6417)

* Add initial codes


* Remove wemb


* Fix import


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Restore aligner loss


* Add ConditionalInput


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix error and support pre-trained config


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Follow comments


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Rename config


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Change copyright and random weight test


* Add initial codes


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci


* Fix import error


* Add initial codes


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci


* Fix dataset error


* Remove reference speaker embedding


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci


* Remove SV encoder


* Follow comments


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci


* Fix length type


* Fix append


* Move error msg


* Add look-up into speaker encoder


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci


* Add valueerror msg


* Move lookup


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci


* Remove unused


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci


* Fix error


* Rebase and Fix error


* Fix spk encoder


* Rename n_speakers


* Follow comments


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix n_speakers None error


---------


* Sharded manifests for tarred datasets (#6395)

* testing sharded manifests


* compatibility


* proper fixes


* adding flag tot convert_to_tarred_audio_dataset


* shard_manifests conf param


* propagating the shard_manifests param


* propagating the shard_manifests param


* distributed checks


* typo


* typo


* fixes


* fixes


* fixes


* fixes


* fixes


* fixes


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixes based on PR comments and tests


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixes to convert_to_tarred_audio_dataset.py


* reversing manifest shards flag


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* tests


* excluding manifests from webdataset url expansion


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* expand manifest paths before attempting to cache from datastore


* explicit use of UTF-8 for manifest i/o


---------


* Update wfst_text_normalization.rst (#6374)

Add Hungarian (incoming in NeMo-text-processing)


* Support Swiglu in TP PP Conversion (#6437) (#6451)

* Support Swiglu in TP PP Conversion


* Guard activation


* Guard activation


---------


* Update NeMo_TTS_Primer.ipynb (#6436)

* Update NeMo_TTS_Primer.ipynb

Changed a mistake in line 782. Instead of frequency band (ie. pitch) we should write frequency bin. Note that frequency bins in FFT are not related to pitch.


* Update NeMo_TTS_Primer.ipynb

Corrected the description of spectrogram and mel spectrogram calculations in lines 782 & 783 and added a fourth point to the description and added a reference for more mathematical details at the end of this point.


---------


* add rampup batch size support for Megatron GPT (#6424)

* added rampup batch size support


* added tests for rampup batch size


* fixed the typos


* added assertions


* changed assertion rules


* deleted unused imports


* changed tests for rampup batch size


* updated rampup batch size tests


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixed styling


* rampup batch size tests changes


---------


* Meagtron encoder decoder fix for empty validation outputs (#6459) (#6461)

* 1. Meagtron encoder decoder fix for empty validation outputs.


* 1. Debugging.

---------


* Code-Switching dataset creation - upgrading to aggregate tokenizer manifest format (#6448)

* added functionality to create agg tokenizer compatible manifest for CS, flag to use this mode by default


* updated README with the new agg_tokenizer_manifest flag


* fixed typo in scripts/speech_recognition/code_switching/README.md


* changed agg_tokenizer_manifest to is_lid_manifest


---------


* Added/updated new Conformer configs (#6426) (#6467)

* Update script for ngram rnnt and hat beam search decoding (#6370)

* add rnnt ngram beamsearch script


* add return encoding embedding option


* update script


* add rnnt and hat ngram decoding script


* add some parameters


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add return_encoder_embeddings parameter to RNNTDecodingConfig


* replace return_encoder_embeddings parameter


* generalization of scipt behavior


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove return_encoder_embeddings parameter


* remove return_encoder_embeddings parameter


* add manual encoder_embeddings calculation


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix beam_width value to 8


* fix rescoring description


---------


* BERT pre-training mp fork to spawn (#6442) (#6454)

* change bert fork to spawn


* num_workers=0 fix


---------


* fix replace_bos_with_pad not found (#6443) (#6450)


* reduce workers on NMT CI (#6472) (#6474)


* 1. Added KERPLE positional embeddings to encoder-decoder.


* 1. Added a missing file.


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* 1. Fixing commits.


* 1. Debugging.

* 1. Debugging.

* 1. Debugging.

* 1. Debugging.

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Dima Rekesh <bmwshop@gmail.com>
Signed-off-by: Jim O’Regan <jaoregan@tcd.ie>
Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: Mostafa Ghorbandoost <mos.ghorbandoost@gmail.com>
Signed-off-by: Dmytro Pykhtar <dpykhtar@nvidia.com>
Signed-off-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Signed-off-by: Micha Livne <mlivne@nvidia.com>
Signed-off-by: Kunal Dhawan <kunaldhawan97@gmail.com>
Signed-off-by: andrusenkoau <andrusenkoau@gmail.com>
Signed-off-by: Andrei Andrusenko <52885736+andrusenkoau@users.noreply.github.com>
Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Micha Livne <michalivne@users.noreply.github.com>
Co-authored-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Dima Rekesh <bmwshop@gmail.com>
Co-authored-by: Jim O’Regan <jaoregan@tcd.ie>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Co-authored-by: Mostafa Ghorbandoost <mos.ghorbandoost@gmail.com>
Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Co-authored-by: Dmytro Pykhtar <dpykhtar@nvidia.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Micha Livne <mlivne@nvidia.com>
Co-authored-by: Kunal Dhawan <kunaldhawan97@gmail.com>
Co-authored-by: Andrei Andrusenko <52885736+andrusenkoau@users.noreply.github.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix an invalid link in get_data.py of ljspeech (#6456)

Usage of the link in line 63 leads to downloading a html file not a tsv file, so we need to change it to a raw link.

Signed-off-by: Mostafa Ghorbandoost <mos.ghorbandoost@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* 1. Added external index sample. (#6462) (#6483)

Signed-off-by: Micha Livne <mlivne@nvidia.com>
Co-authored-by: Micha Livne <michalivne@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update README to add core installation (#6488) (#6489)

* update README for megatron-core


* fix


---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix cache aware hybrid bugs (#6466) (#6484)

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix typos (#6494) (#6495)

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add disclaimer about dataset for ASR (#6496)

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix (#6502)

datastore_path_to_webdataset_url(p) if is_datastore_path(p) and is_tarred_path(p) else p
NameError: name 'is_tarred_path' is not defined

Co-authored-by: George <gzelenfroind@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix broken links r1.18.0 (#6501) (#6504)

* fix broken links


* fix broken links


---------

Signed-off-by: Evelina <ebakhturina@nvidia.com>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Create functions for TTS preprocessing without dataloader (#6317)

* [TTS] Create functions for TTS preprocessing without dataloader

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Cache aware streaming nfa (#6209)

* add cache aware streaming to nemo aligner

Signed-off-by: Slyne Deng <slyned@nvidia.com>

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [BugFix] Force _get_batch_preds() to keep logits in decoder timestamps generator (#6499)

* [BugFix] _get_batch_preds() is forced to keep logits in  decoder timestamps generators

Signed-off-by: Taejin Park <tango4j@gmail.com>

* Ingnore keep_logits boolean in FrameASRBatchLogits

Signed-off-by: Taejin Park <tango4j@gmail.com>

---------

Signed-off-by: Taejin Park <tango4j@gmail.com>
Co-authored-by: Jagadeesh Balam <4916480+jbalam-nv@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Fix FastPitch energy code (#6511)

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix custom forward_torch_softmax (#6512) (#6517)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] fixed broken path. (#6514) (#6518)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix normalization of impulse response in ImpulsePerturbation (#6505)

Signed-off-by: Ante Jukić <ajukic@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add interleaved pp support (#6498)

* Add support for Virtual Pipeline Parallel conversion

Signed-off-by: smajumdar <titu1994@gmail.com>

* Add support for Virtual Pipeline Parallel conversion

Signed-off-by: smajumdar <titu1994@gmail.com>

* Switch to megatron core

Signed-off-by: smajumdar <titu1994@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix typos (#6523)

* Fix typos

Signed-off-by: smajumdar <titu1994@gmail.com>

* Fix typos

Signed-off-by: smajumdar <titu1994@gmail.com>

---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* New noise_norm perturbation based on Riva work (#6445)

* Initial commit for new noise_norm perturbation

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Minor fix to random seed in perturb

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated code to reflect feedback

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updates for feedback given by code reviewers

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updates in response to PR feedback

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added comment about ref_mic being None

Signed-off-by: Daniel Egert <degert@nvidia.com>

* Updated perturb to use inspect module

Signed-off-by: Daniel Egert <degert@nvidia.com>

---------

Signed-off-by: Daniel Egert <degert@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Add script for computing feature stats (#6508)

* [TTS] Add script for computing feature stats

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Add overwrite config

Signed-off-by: Ryan <rlangman@nvidia.com>

---------

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add Frame-VAD model and datasets (#6441)

* add model, dataset, necessary utils and tests

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix tarred data

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update docstring

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update pretrained model info

Signed-off-by: stevehuang52 <heh@nvidia.com>

---------

Signed-off-by: stevehuang52 <heh@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Support dynamic length batches with GPT SFT (#6510)

* Support synamic length with GPT SFT

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* make branch functional

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* added back the fast emit section to the configs. (#6540) (#6542)

* added back the fast emit section to the configs.


* added back the fast emit section to the configs.


---------

Signed-off-by: Vahid <vnoroozi@nvidia.com>
Co-authored-by: Vahid Noroozi <VahidooX@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* removing unnessary avoid_bfloat16_autocast_context (#6481)

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* FC models in menu (#6473)

* FC models in menu

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Add tutorials for FastPitch TTS speaker adaptation with adapters (#6431)

* Add tts adapter tutorial

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update main tutorial

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add tts adapter tutorial

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update main tutorial

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update tutorial

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Follow comments

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Follow comments

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix load .nemo error

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Support multi-speaker fine-tune

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Follow comments

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Use .nemo

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Follow Comments

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix bug

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix bug

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix bug

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add precomputed speaker emb

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix space

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Remove repeated argument

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* optional batch size

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix comments in notebook

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Create initial TTS dataset feature processors (#6507)

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix (#6529) (#6546)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add FastConformer Hybrid ASR models for EN, ES, IT, DE, PL, HR, UA, BY (#6549) (#6553)

* Added fastconfomer hybrid asr models for en, es, it, de, pl, hr, ua, by


* updated ASR docs with the fastconformer hybrid checkpoints


* added the fastconformer RNNT and CTC models


---------

Signed-off-by: KunalDhawan <kunaldhawan97@gmail.com>
Co-authored-by: Kunal Dhawan <kunaldhawan97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add scores for FastConformer models (#6557) (#6558)

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix fp16 (#6543) (#6544)

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Patch transcribe and support offline transcribe for hybrid model (#6550) (#6559)

Signed-off-by: fayejf <fayejf07@gmail.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix notebook bad json (#6561)

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Change Megatron Enc Dec model to use persistent_workers (#6548) (#6552)

* persistent workers


* fix


---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Make KenLM with PC for AggregateTokenizer and merge it (#6081)

* do_lowercase, rm_punctuation

Signed-off-by: Nikolay Karpov <nkarpov@nvidia.com>

* support beam_strategy = beam

Signed-off-by: Nikolay Karpov <nkarpov@nvidia.com>

* black

Signed-off-by: Nikolay Karpov <nkarpov@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix config and^Cunctuation capitalization

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* rm math

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* update kenlm

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* black

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add opengrm

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* mv install_beamsearch_decoders

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* punctuation_to_preserve

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Only tikenizer opion

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* Black

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* DEFAULT_TOKEN_OFFSET

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* aggregate_tokenizer

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* install kenlm with more than 5gram

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* install_beamsearch_decoders

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* ngram_bin_path kenlm_bin_path

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* black

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* fix greedy PC bug

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* move global params

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* fix description and perplexity

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* fix description

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* NEMO_PATH

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* nemo:23.01

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* License

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* description

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* isinstance

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* refactor kenlm stdin

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* black

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* add cmd arg

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* use new iter_files

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* EncDecHybridRNNTCTCModel

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* punctuation

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* train_kenlm args

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* add docstrings

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add ngram_merge docs

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* ngram_prune

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* rename to ngram_merge

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* rename to ngram

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* add comments

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* Ngram

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* nemo_model_file

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* install_opengrm_ngram

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* install opengrm

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* rename to install_opengrm.sh

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* rm extra import

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* train_paths

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* text_processing

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* fix ngram_bin_path

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* DECODERS_PATH

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* farcompile

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* rm text processing

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* text_processing

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* AggregateTokenizer.DummyTokenizer

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* comments

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* TextProcessingConfig

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* typo

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* doc

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* types

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* nemo_model_file

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* rm assert

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* import kenlm_utils

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* return None

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* Copyright

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* 2022

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* 2023

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

---------

Signed-off-by: Nikolay Karpov <nkarpov@nvidia.com>
Signed-off-by: Nikolay Karpov <karpnv@gmail.com>
Co-authored-by: Nikolay Karpov <nkarpov@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix for running on 1 GPU.

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* temp rtd fix (#6568) (#6569)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Add script for mapping speaker names to indices (#6509)

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* whitespace (#6574)

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update manifest.py for speedup (#6565) (#6573)

* Update manifest.py

Re-order the checks for faster processing audio filepaths that are already absolute paths


* Update manifest.py


---------

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: Vahid Noroozi <VahidooX@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* More streaming conformer export fixes (#6567) (#6578)

Signed-off-by: Greg Clark <grclark@nvidia.com>
Co-authored-by: Greg Clark <grclark@nvidia.com>
Co-authored-by: Vahid Noroozi <VahidooX@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* user selected max_seq_len should be less than model's max_seq_len (#6333) (#6386)

* user selection should not break model max limit


* eval max seq length


---------

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Co-authored-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Framework for PEFT via mixins  (#6391)

* init commit ptuning via mixin

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updates

Signed-off-by: arendu <adithya.r@gmail.com>

* gpt ptuning places virtual tokens on the left only

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* encoder input modified when pre_process is true

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* optimizer group and state dict updates

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* adapter ptuning working for pp>1

Signed-off-by: arendu <adithya.r@gmail.com>

* adapter defaults

Signed-off-by: arendu <adithya.r@gmail.com>

* adapter ptuining config defaults

Signed-off-by: arendu <adithya.r@gmail.com>

* training works

Signed-off-by: arendu <adithya.r@gmail.com>

* loading and saving adapter only params during training

Signed-off-by: arendu <adithya.r@gmail.com>

* added checks and comments

Signed-off-by: arendu <adithya.r@gmail.com>

* clean up

Signed-off-by: arendu <adithya.r@gmail.com>

* checks for grad is None before calling all_reduce

Signed-off-by: arendu <adithya.r@gmail.com>

* load adapter .nemo file working

Signed-off-by: arendu <adithya.r@gmail.com>

* resume training for adapters

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* peft tuning

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* minor

Signed-off-by: arendu <adithya.r@gmail.com>

* file not needed

Signed-off-by: arendu <adithya.r@gmail.com>

* undo prompt learning dataset changes

Signed-off-by: arendu <adithya.r@gmail.com>

* undo updates to gpt prompt learning model

Signed-off-by: arendu <adithya.r@gmail.com>

* naming updates

Signed-off-by: arendu <adithya.r@gmail.com>

* decoding

Signed-off-by: arendu <adithya.r@gmail.com>

* predict_step in gpt_sft_model

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updates

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* removed inference from tuning config

Signed-off-by: arendu <adithya.r@gmail.com>

* no test in peft training

Signed-off-by: arendu <adithya.r@gmail.com>

* answer only loss and correct defaults for val_loss

Signed-off-by: arendu <adithya.r@gmail.com>

* hybrid adapters and ptuning

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* eval working..

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* prepending tokens for ptuning

Signed-off-by: arendu <adithya.r@gmail.com>

* cleaned up eval config

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* clean up

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* default prompt template

Signed-off-by: arendu <adithya.r@gmail.com>

* Lora added

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Support synamic length with GPT SFT

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* make branch functional

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* defaults to max_pad_length=False in GPT SFT dataset

Signed-off-by: arendu <adithya.r@gmail.com>

* adapter parallel_adapters to support Lora

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added early stopping by default

Signed-off-by: arendu <adithya.r@gmail.com>

* eval script for peft and eval config. bug fixes in predict step and added out_features to t5 adapter config

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updates

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updates

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* docs

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* better defaults

Signed-off-by: arendu <adithya.r@gmail.com>

* updates

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* docs

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* cache and reuse inputs (#6422) (#6452)

Co-authored-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add patches for Virtual Parallel conversion (#6589)

* Add patches for Virtual Parllel conversion

Signed-off-by: smajumdar <titu1994@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Pass `.scale` instead of scaler object to core (#6551)

* pass .scale instead of scaler object to core (#6545)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>

* Update megatron_gpt_model.py

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* scale changes for main

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Documentation for ASR-TTS models (#6594) (#6595)

* Add docs about hybrid ASR-TTS models


* Add docs about text-only datasets


* Add docs about ASR-TTS checkpoints


* Add docs about ASR-TTS configs and training


* Clean up


* ASR-TTS docs: add to api, fix imports


* Clean up


* Wrap optional import


* Revert general ASR import


---------

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
Co-authored-by: Vladimir Bataev <vbataev@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Fix aligner nan loss in fp32 (#6435)

* Fix nan loss in fp32

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update SDP docs (#6485) (#6596)

* add info about SDP e.g. processor classes in docs


* add link to SDP docs in README


* address code review comments and add SDP overview diagram


* Fix spelling typo


---------

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>
Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Bug/typo fixes (#6599)

Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Manual garbage collection with an interval (#6469) (#6482)

* Manual garbage collection with an interval


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* use trainer.global_step for tracking the interval of GC


---------

Signed-off-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Make tensor split contiguous (#6580) (#6593)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [ASR] Fix for old models in change_attention_model (#6608)

* fixes

Signed-off-by: sam1373 <samuelkriman@gmail.com>

* done already

Signed-off-by: sam1373 <samuelkriman@gmail.com>

---------

Signed-off-by: sam1373 <samuelkriman@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update manifest.py to use os.path for get_full_path (#6598)

* Update manifest.py to use os.path for get_full_path

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update manifest.py to get rid of pathlib

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update manifest.py

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* Update manifest.py

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Vahid Noroozi <VahidooX@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Cherry pick commits in #6601 to main (#6611)

* fix write

Signed-off-by: fayejf <fayejf07@gmail.com>

* decoding ctc

Signed-off-by: fayejf <fayejf07@gmail.com>

* temp set rnnt decoding return_best_hypothesis to true

Signed-off-by: fayejf <fayejf07@gmail.com>

* add wer cal back to transcribe_speech as requested

Signed-off-by: fayejf <fayejf07@gmail.com>

* add wer cal back to speech_to_text_buffered_infer_rnnt  as requested

Signed-off-by: fayejf <fayejf07@gmail.com>

* add wer cal back to speech_to_text_buffered_infer_ctc as requested

Signed-off-by: fayejf <fayejf07@gmail.com>

* style fix

Signed-off-by: fayejf <fayejf07@gmail.com>

* reflect change in asr_evaluator

Signed-off-by: fayejf <fayejf07@gmail.com>

* reflect som and vahid comment

Signed-off-by: fayejf <fayejf07@gmail.com>

* remove return_best_hy=true in transcribe_speech

Signed-off-by: fayejf <fayejf07@gmail.com>

* no text skip

Signed-off-by: fayejf <fayejf07@gmail.com>

* revert partial

Signed-off-by: fayejf <fayejf07@gmail.com>

---------

Signed-off-by: fayejf <fayejf07@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Create dummy iters to satisy len checks (#6600) (#6603)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* add GPT eval mode fix for interleaved to main (#6610)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix batch size reconf for T5 FT for multi-validation (#6582) (#6588)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Not doing CastToFloat by default (#6524) (#6563)

* Not doing CastToFloat by default


* Added docustring


* Dummy commit


---------

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Turn autocast off when precision is fp32 (#6576)

* Turn autocast off when precision is fp32 (#6554)

* Turn autocast off when precision is fp32

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* address review

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixes

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* merge

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>

* correct auto-merge

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* correct auto-merge

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* add to GPT SFT

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* update core commit hash in readme (#6622) (#6623)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* add hat image to docs (#6619) (#6621)

Signed-off-by: andrusenkoau <andrusenkoau@gmail.com>
Co-authored-by: Andrei Andrusenko <52885736+andrusenkoau@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Allow indices exchange via distributed (#6618) (#6624)

Signed-off-by: Mikołaj Błaż <mblaz@nvidia.com>
Co-authored-by: mikolajblaz <mikolajblaz@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Offline and streaming inference support for hybrid model (#6570)

* streaming buffered for hybrid + ctc

Signed-off-by: fayejf <fayejf07@gmail.com>

* change default model_stride in eval.yaml

Signed-off-by: fayejf <fayejf07@gmail.com>

* add fc model_stride

Signed-off-by: fayejf <fayejf07@gmail.com>

* small fix

Signed-off-by: fayejf <fayejf07@gmail.com>

* check whether model and decoding match

Signed-off-by: fayejf <fayejf07@gmail.com>

* small fix

Signed-off-by: fayejf <fayejf07@gmail.com>

* streaming buffered for hybrid + rnnt

Signed-off-by: fayejf <fayejf07@gmail.com>

* style fix

Signed-off-by: fayejf <fayejf07@gmail.com>

* fix yaml

Signed-off-by: fayejf <fayejf07@gmail.com>

* reflect comment wip

Signed-off-by: fayejf <fayejf07@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix

Signed-off-by: fayejf <fayejf07@gmail.com>

* refactor and verified

Signed-off-by: fayejf <fayejf07@gmail.com>

* add get_full_path to buffered

Signed-off-by: fayejf <fayejf07@gmail.com>

* small fix

Signed-off-by: fayejf <fayejf07@gmail.com>

* add RNNTDecodingConfig

Signed-off-by: fayejf <fayejf07@gmail.com>

* model name & instruction of changing decoding

Signed-off-by: fayejf <fayejf07@gmail.com>

---------

Signed-off-by: fayejf <fayejf07@gmail.com>
Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Patch decoding for PC models (#6630) (#6631)

* Patch decoding logic for PC models


* Patch decoding logic for PC models


---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix wer.py where 'errors' variable was not set (#6633) (#6634)

Fix wer.py where 'errors' variable was not set when both reference and hypothesis are empty strings

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Restore GPT support for interleaved pipeline parallelism (#6528) (#6613)

* Restore logic for data-parallel communication with pipeline parallelism in GPT


* Support dynamic attention masks in GPT


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Debug typos


* Debug data iterator caching with interleaved pipeline parallelism

Each model chunk accesses the data iterator multiple times, so we need to cache multiple samples.


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update Megatron-LM commit


* Distinguish between list of data iterators and data iterator that is a list


* Create dummy iters to satisy len checks


* Kludge while waiting for Megatron-LM update


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* set transformers offline to avoid rate limiting


---------

Signed-off-by: Tim Moon <tmoon@nvidia.com>
Signed-off-by: Eric Harper <complex451@gmail.com>
Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add FA

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix XPOS

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add warning

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix bugs

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix attention

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix comment

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix cast dtype

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Undo xpos

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* bugfix (#6636)

Signed-off-by: fayejf <fayejf07@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Disable interctc tests (#6638)

Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add megatron_core to requirements (#6639) (#6640)

* add megatron_core to requirements


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Remove from jenkins (#6642)

* Remove from jenkins (#6641)

* add megatron_core to requirements

Signed-off-by: ericharper <complex451@gmail.com>

* remove from jenkins

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove dup

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* sft model can use this script for eval (#6637)

* sft model can use this script for eval

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* please fix me

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* minor

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Fix TTS audio preprocessing bugs (#6628)

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Move black parameters to pyproject.toml (#6647)

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* ASR-TTS Models: Support hybrid RNNT-CTC, improve docs. (#6620)

* ASR-TTS: support hybrid RNNT-CTC models
* Do not warn on optional import
* Explain adding options to config
* Fix import guard docs
* Add docs for ConcatDataset
* Add explanation for sampling parameters
* Initial docs for the enhancer model
* Fix use_start_end_token parameter usage

---------

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix conversion and eval (#6648)

* fix conversion and eval

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Confidence ensembles implementation (#6614)

* Working version to train conf model + save ensemble class

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Working version

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Remove copy of transcribe_speech.py

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Move models parameter to config

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add explicit parameters to transcribe

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Small cleanups

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add temperature and integration tests

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add more tests

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add pc removal config

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Cleanup

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Fix typo

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Address review comments

Signed-off-by: Igor Gitman <igitman@nvidia.com>

---------

Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Patch memory used for NeMo Megatron models (#6615)

* Patch memory used for NeMo Megatron models

Signed-off-by: smajumdar <titu1994@gmail.com>

* Cleanup the dtype of embeddings

Signed-off-by: smajumdar <titu1994@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Refactor util function for parsing precision

Signed-off-by: smajumdar <titu1994@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Refactor util function for parsing precision

Signed-off-by: smajumdar <titu1994@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Try patch for Megatron O2

Signed-off-by: smajumdar <titu1994@gmail.com>

* Refactor to incorporate megatron amp 02 state

Signed-off-by: smajumdar <titu1994@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Refactor to incorporate megatron amp 02 state

Signed-off-by: smajumdar <titu1994@gmail.com>

* Correct indent

Signed-off-by: smajumdar <titu1994@gmail.com>

* Correct utils import

Signed-off-by: smajumdar <titu1994@gmail.com>

---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* handle artifacts when path is dir (#6658)

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* remove upgrading setuptools in reinstall.sh (#6659)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* merge lora weights into base model (#6597)

* merge lora weights into base model

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* typo fix

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* minor update

Signed-off-by: arendu <adithya.r@gmail.com>

* update copyright

Signed-off-by: arendu <adithya.r@gmail.com>

* eval needs to know the PEFT class

Signed-off-by: arendu <adithya.r@gmail.com>

* add target class in training script so that we can use it in eval

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update to work for tp1

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* set restore model path

Signed-off-by: arendu <adithya.r@gmail.com>

* peft can be none

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updated merge script so that eval works easily

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* eval with peft or sft model

Signed-off-by: arendu <adithya.r@gmail.com>

* keep sentences in jsonl format

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* convert sft using correct classpath

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updated to force sft yaml to have the correct target

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updated docs

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix conversion and eval

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* upgrade to 23.04 (#6660)

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Merge r1.18.0 bugfixes and doc updates to main (#6655)

* update branch

Signed-off-by: ericharper <complex451@gmail.com>

* Remove from jenkins (#6641)

* add megatron_core to requirements

Signed-off-by: ericharper <complex451@gmail.com>

* remove from jenkins

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>

* remove dup

Signed-off-by: ericharper <complex451@gmail.com>

* update branch

Signed-off-by: ericharper <complex451@gmail.com>

* [TTS] reformat NeMo versions in the tts logging messages to avoid batch process them when upgrading NeMo versions.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Confidence ensembles: fix issues and add tuning functionality (#6657)

* Implement compute confidence to properly handle blanks

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Implement proper confidence for transducers

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Implement tuning logic

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add tests for confidence tuning

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Remove unused imports

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add types/docs

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add comment about the main conf compute loop

Signed-off-by: Igor Gitman <igitman@nvidia.com>

---------

Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Implement new TextToSpeech dataset (#6575)

* [TTS] Implement new TextToSpeech dataset

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Add unit tests

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Fix defaulting of use_log_energy

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Fix TTS export test

Signed-off-by: Ryan <rlangman@nvidia.com>

---------

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Dialogue dataset  (#6654)

* chatbot interface

Signed-off-by: Yi Dong <yidong@nvidia.com>

* latest gradio

Signed-off-by: Yi Dong <yidong@nvidia.com>

* default greedy

Signed-off-by: Yi Dong <yidong@nvidia.com>

* better chatbot

Signed-off-by: Yi Dong <yidong@nvidia.com>

* handle preamble

Signed-off-by: Yi Dong <yidong@nvidia.com>

* added chatbot training capablity

Signed-off-by: Yi Dong <yidong@nvidia.com>

* added chatbot ui

Signed-off-by: Yi Dong <yidong@nvidia.com>

* remove debug code

Signed-off-by: Yi Dong <yidong@nvidia.com>

* default human

Signed-off-by: Yi Dong <yidong@nvidia.com>

* use special token for roles

Signed-off-by: Yi Dong <yidong@nvidia.com>

* special tokens

Signed-off-by: Yi Dong <yidong@nvidia.com>

* fix name

Signed-off-by: Yi Dong <yidong@nvidia.com>

* new chat dataset

Signed-off-by: Yi Dong <yidong@nvidia.com>

* fix the system token

Signed-off-by: Yi Dong <yidong@nvidia.com>

* upgrade gradio

Signed-off-by: Yi Dong <yidong@nvidia.com>

* save the chat history

Signed-off-by: Yi Dong <yidong@nvidia.com>

* update ui

Signed-off-by: root <you@example.com>

* update chat interface

Signed-off-by: Yi Dong <yidong@nvidia.com>

* handles canonical form

Signed-off-by: Yi Dong <yidong@nvidia.com>

* new sft chatbot

Signed-off-by: Yi Dong <yidong@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* change format

Signed-off-by: Yi Dong <yidong@nvidia.com>

* check extra_id in the tokenizer

Signed-off-by: Yi Dong <yidong@nvidia.com>

* added vocab property check

Signed-off-by: Yi Dong <yidong@nvidia.com>

* added missing file

Signed-off-by: Yi Dong <yidong@nvidia.com>

---------

Signed-off-by: Yi Dong <yidong@nvidia.com>
Signed-off-by: root <you@example.com>
Co-authored-by: root <you@example.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add support for RNNT/hybrid models to partial transcribe (#6609)

* Add support for RNNT/hybrid models to partial transcribe

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* Update transcribe_utils.py

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* Update transcribe_speech.py

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* Update transcribe_utils.py

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* eval_beamsearch_ngram.py with hybrid ctc (#6656)

* separate_punctuation = false

* ctc decoding strategy = model.decoding

* transcribe(files, logprobs=True) returns logprobs


---------

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix bucketing bug issue for picking new bucket (#6663)

Signed-off-by: Nithin Rao Koluguri <nithinraok>
Co-authored-by: Nithin Rao Koluguri <nithinraok>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add t5 flash-attention

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* PE refactor (#6673)

* PE refactor

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add singleton alibi

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix FA mask

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* singleton PE

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix attn bias inference

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix eval

Signed-off-by: Evelina <ebakhturina@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Add callback for saving audio during FastPitch training (#6665)

* [TTS] Add callback for saving audio during FastPitch training

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Allow NGC model name for vocoder

Signed-off-by: Ryan <rlangman@nvidia.com>

---------

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* update batch size recommendation to min 32 for 43b (#6675)

* update batch size recommendation to min 32 for 43b

Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Make Note usage consistent in adapter_mixins.py (#6678)

Inconsistent usage of the word Note, which includes a broken reading in one case.

I'm just doing some tidying -- not trying to be critical.

Signed-off-by: Brian McBrayer <BrianMcBrayer@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix masking bug for TTS Aligner (#6677)

Signed-off-by: Jocelyn Huang <jocelynh@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [ASR] Adding ssl config for fast-conformer (#6672)

* adding ssl config for fast-conformer
adding boolean flags for ssl losses

Signed-off-by: Krishna Puvvada <kpuvvada@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* renaming fast-conformer to fastconformer in config folder

Signed-off-by: Krishna Puvvada <kpuvvada@nvidia.com>

---------

Signed-off-by: Krishna Puvvada <kpuvvada@nvidia.com>
Co-authored-by: Krishna Puvvada <kpuvvada@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix xpos offset

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix sequence parallel

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix parallel

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Uncomment correct bias size

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Remove unused module

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix singleton tril

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix kerple/sandwitch rename xpos

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* fix sandwich

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add unitest

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix bug

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add requirements

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Remove requirements

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Remove requirement flash-attn

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix FA causal for inference

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add experimental PE

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update all invalid tree references to blobs for NeMo samples (#6679)

The tree is invalid as this points to a blob, and the links would not
open in colab.

Signed-off-by: Brian McBrayer <BrianMcBrayer@users.noreply.github.com>
Co-authored-by: Brian McBrayer <brian@acceleratepath.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Update README.rst about container (#6686)

Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix a bug, use _ceil_to_nearest instead as _round_to_nearest is not defined (#6681) (#6682)

Co-authored-by: Li Tao <chntaoli@163.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Enable ONNX export of 5B GPT trained with TE FP8 modules (#6458)

* add GPT FP8 ONNX export support

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* changes
1. Add dynamic axes for inputs
2. Update model input_example to resolve size error by TE

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* Conform to Python style guidelines

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* refactor to avoid typecasting bf16 string

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* fix attribute error in export_utils

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* set constant_folding to False by default

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* refactor exportable wrapper into model class definition

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* remove conditional replacement of modules

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* set fp8_recipe to None by default

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* address all comments

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* typecast precision check for fp16

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* rename export script

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

---------

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Add script for text preprocessing (#6541)

* [TTS] Add script for text preprocessing

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Use Normalizer.input_case

Signed-off-by: Ryan <rlangman@nvidia.com>

---------

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Fix adapter duration issue (#6697)

* Fix duration issue

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix duration issue

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add scale aligner loss

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix bug

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* karpnv/issues6690 (#6705)

* add sudo

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* RUN

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

---------

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Limit codeql scope (#6710)

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* eval fix (#6685)

* allows usage of pre-extracted base model

Signed-off-by: arendu <adithya.r@gmail.com>

* extracted model checking and loading

Signed-off-by: arendu <adithya.r@gmail.com>

* style

Signed-off-by: arendu <adithya.r@gmail.com>

* style

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* removed sft eval script, can use peft eval script for sft models

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix k2 installation in Docker with CUDA 12 (#6707) (#6709)

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
Co-authored-by: Vladimir Bataev <vbataev@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] Filter out silent audio files during preprocessing (#6716)

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* not pinning version (#6680)

Signed-off-by: Yi Dong <yidong@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Tutorial fixes (#6717) (#6718)

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* preprocess squad in sft format (#6727)

* preprocess squad in sft format

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix Codeql (#6731)

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] fix inconsistent type hints for IpaG2p (#6733)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* VP Fixes for converter + Config management (#6698)

* [Temp] VP Fixes

Signed-off-by: smajumdar <titu1994@gmail.com>

* Revert logging

Signed-off-by: smajumdar <titu1994@gmail.com>

---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Graph RNNT: Grid- and Compose-Transducer. W-Transducer loss (#6168)

* add GraphTransducerLossBase abstract class with the interface for Graph-based loses
* add RNN-T implementation in GraphRnntLoss with tests
* add W-Transducer implementation in GraphWTransducerLoss with tests
* add GraphRnntLoss + GraphWTransducerLoss to RNN-T loss resolver

---------

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix fastpitch test nightly (#6730)

* fix test fastpitch nightly

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Reformat

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix if elif condition

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix for interctc test random failure (#6644)

Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* check for first or last stage (#6708) (#6743)

* check for first or last stage


* remove redundant check


* fix typo


* add map_location


---------

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* sharded manifests docs (#6751)

Signed-off-by: Dima Rekesh <drekesh@nvidia.com>
Co-authored-by: Dima Rekesh <drekesh@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] relax hardcoded prefix for phonemes and tones and infer phoneme set through dict (#6735)

* [TTS] relax hardcoded prefix for phonemes and tones and infer phoneme set through dict.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* None checks for prefix.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

---------

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS] corrected misleading deprecation warnings. (#6702)

* [TTS] corrected misleading deprecation warnings.
* deprecation warning is only triggered when old models applied old g2p.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

---------

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Bug fix to restore act ckpt (#6753) (#6755)

* Bug fix to restore act ckpt


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Co-authored-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Bug fix to reset sequence parallelism (#6756) (#6770)

* Bug fix to reset sequence parallelism


* Update seq par reset/restore


* Add nested loop


---------

Signed-off-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Co-authored-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix TTS adapter tutorial (#6741)

* Fix adapter tutorial

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix typos

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix checkpointed forward and add test for full activation checkpointing (#6744) (#6771)

* fix checkpointed forward and add test for full activation checkpointing


* add method


* add method


---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* lora notebook (#6765)

* lora training

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix Links (#6777) (#6778)

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Remove alibi tril

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add flash-attn requirement

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* revert sft dataset changes

Signed-off-by: Evelina <ebakhturina@nvidia.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Move flash-attn requirement

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add install

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* peft eval directly from ckpt (#6785)

* update to load from ckpt

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* load ckpt peft model

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update style

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add Frame-VAD examples and utils (#6463)

* add model, dataset, necessary utils and tests

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix tarred data

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* add fvad examples and update utils

Signed-off-by: stevehuang52 <heh@nvidia.com>

* add copyright

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor and add tests

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update dataset

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update test

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typos

Signed-off-by: stevehuang52 <heh@nvidia.com>

---------

Signed-off-by: stevehuang52 <heh@nvidia.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
Co-authored-by: Taejin Park <tango4j@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [TTS][zh] refine hardcoded lowercase for ASCII letters. (#6781)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Revert evaluation

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Revert evaluation

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix gpu

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Spellchecking ASR customization model (#6179)

* bug fixes

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* fix bugs, add preparation and evaluation scripts, add readme

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* small fixes

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add real coverage calculation, small fixes, more debug information

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add option to pass a filelist and output folder - to handle inference from multiple input files

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* added preprocessing for yago wikipedia articles - finding yago entities and their subphrases

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* yago wiki preprocessing, sampling, pseudonormalization

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* more scripts for preparation of training examples

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some alphabet checks

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add bert on subwords, concatenate it to bert on characters

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add calculation of character_pos_to_subword_pos

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* tensor join bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* double hidden_size in classifier

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* default index value 0 instead of -1 because index cannot be negative

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pad index value 0 instead of -1 because index cannot be negative

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* remove pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bugs, add creation of tarred dataset

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add possibility to change sequence len at inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change sampling of dummy candidates at inference, add candidate info file

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix import

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* update transcription now uses info

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* write path

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* 1. add tarred dataset support(untested). 2. fix bug with ban_ngrams in indexing

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* skip short_sent if no real candidates

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix import

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add braceexpand

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug in np.ones

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug in collate

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change tensor type to long because of error in torch.gather

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix for empty spans tensor

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* same fixes in _collate_fn for tarred dataset

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug from previous commit

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change int types to be shorter to minimize tar size

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring of datasets and inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* tar by 100k examples, small fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes, add analytics script

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* Add functions for dynamic programming comparison to get best path by ngrams

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes to support testing on SPGISpeech

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add preprocessing for userlibri

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* some refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* some refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small refactoring before pr. Add bash-scripts reproducing evaluation

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* style fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes in inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix - didn't move window on last symbol

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug - shuffle was before truncation of sorted candidates

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring, fix some bugs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* variour fixes. Add word_indices at inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add candidate positions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Move data preparation and evaluation to other repo

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add infer_reproduce_paper. Refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactor inference using fragment indices

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some helper functions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug with parameters order

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bugs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring, fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add multiple variants of adjusting start/end positions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* more fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add unit tests, other fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix CodeQl warnings

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fixes

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* fix bugs, add preparation and evaluation scripts, add readme

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* small fixes

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add real coverage calculation, small fixes, more debug information

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add option to pass a filelist and output folder - to handle inference from multiple input files

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* added preprocessing for yago wikipedia articles - finding yago entities and their subphrases

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* yago wiki preprocessing, sampling, pseudonormalization

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* more scripts for preparation of training examples

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some alphabet checks

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add bert on subwords, concatenate it to bert on characters

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add calculation of character_pos_to_subword_pos

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* tensor join bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* double hidden_size in classifier

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* default index value 0 instead of -1 because index cannot be negative

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* pad index value 0 instead of -1 because index cannot be negative

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* remove pdb

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bugs, add creation of tarred dataset

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add possibility to change sequence len at inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change sampling of dummy candidates at inference, add candidate info file

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix import

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* update transcription now uses info

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* write path

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* 1. add tarred dataset support(untested). 2. fix bug with ban_ngrams in indexing

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* skip short_sent if no real candidates

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix import

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add braceexpand

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug in np.ones

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug in collate

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change tensor type to long because of error in torch.gather

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix for empty spans tensor

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* same fixes in _collate_fn for tarred dataset

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug from previous commit

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* change int types to be shorter to minimize tar size

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring of datasets and inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* tar by 100k examples, small fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes, add analytics script

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* Add functions for dynamic programming comparison to get best path by ngrams

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fixes to support testing on SPGISpeech

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add preprocessing for userlibri

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* some refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* some refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* move some functions to utils to reuse from other project

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small refactoring before pr. Add bash-scripts reproducing evaluation

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* style fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes in inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* bug fix - didn't move window on last symbol

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug - shuffle was before truncation of sorted candidates

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring, fix some bugs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* variour fixes. Add word_indices at inference

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add candidate positions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Move data preparation and evaluation to other repo

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add infer_reproduce_paper. Refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactor inference using fragment indices

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some helper functions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug with parameters order

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bugs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* refactoring, fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add multiple variants of adjusting start/end positions

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* more fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add unit tests, other fixes

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix CodeQl warnings

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add script for full inference pipeline, refactoring

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add tutorial

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* take example data from HuggingFace

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add docs

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix comment

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* fix bug

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* small fixes for PR

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* add some more tests

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* try to fix tests adding with_downloads

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

* skip tests with tokenizer download

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>

---------

Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>
Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>
Co-authored-by: Alexandra Antonova <aleksandraa@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix test

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix device

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Revert

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* clean

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Change device

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Change device

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add test FA

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add CI

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix yaml order

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Test random attention mask

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add install FA for tests

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* cherry pick 6788 (#6816)

* cherry pick 6788

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Evelina <ebakhturina@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Support 2D mask

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add missing comp_att_mask arg

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix code ql

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Megatron MPT-7B Support (#6804)

* Initial commit of MPT-7B functionality

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added various fixes requested by reviewers

Signed-off-by: Daniel Egert <degert@nvidia.com>

* Added conversion script for mpt-7b to Nemo

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added small note about TP and PP values

Signed-off-by: Daniel Egert <degert@nvidia.com>

* Replaced all print statements with Nemo logging

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Daniel Egert <degert@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Fix test triton

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update FA in CI

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix Jenkin error

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Resume with FA

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Follow comments

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix README

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix README

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Remove torch.cuda

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Remove unused import

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* kerple init

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add TE comment

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Fix error when inference.compute_attention_mask=False

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Dima Rekesh <bmwshop@gmail.com>
Signed-off-by: Jim O’Regan <jaoregan@tcd.ie>
Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: Mostafa Ghorbandoost <mos.ghorbandoost@gmail.com>
Signed-off-by: Dmytro Pykhtar <dpykhtar@nvidia.com>
Signed-off-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Signed-off-by: Micha Livne <mlivne@nvidia.com>
Signed-off-by: Kunal Dhawan <kunaldhawan97@gmail.com>
Signed-off-by: andrusenkoau <andrusenkoau@gmail.com>
Signed-off-by: Andrei Andrusenko <52885736+andrusenkoau@users.noreply.github.com>
Signed-off-by: Evelina <ebakhturina@nvidia.com>
Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: Taejin Park <tango4j@gmail.com>
Signed-off-by: Ante Jukić <ajukic@nvidia.com>
Signed-off-by: Daniel Egert <degert@nvidia.com>
Signed-off-by: stevehuang52 <heh@nvidia.com>
Signed-off-by: Vahid <vnoroozi@nvidia.com>
Signed-off-by: KunalDhawan <kunaldhawan97@gmail.com>
Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: fayejf <fayejf07@gmail.com>
Signed-off-by: Nikolay Karpov <nkarpov@nvidia.com>
Signed-off-by: Nikolay Karpov <karpnv@gmail.com>
Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Signed-off-by: Greg Clark <grclark@nvidia.com>
Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>
Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: Sangkug Lym <slym@nvidia.com>
Signed-off-by: sam1373 <samuelkriman@gmail.com>
Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Signed-off-by: Mikołaj Błaż <mblaz@nvidia.com>
Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com>
Signed-off-by: Tim Moon <tmoon@nvidia.com>
Signed-off-by: Eric Harper <complex451@gmail.com>
Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com>
Signed-off-by: Yi Dong <yidong@nvidia.com>
Signed-off-by: root <you@example.com>
Signed-off-by: Nithin Rao Koluguri <nithinraok>
Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Signed-off-by: Brian McBrayer <BrianMcBrayer@users.noreply.github.com>
Signed-off-by: Jocelyn Huang <jocelynh@nvidia.com>
Signed-off-by: Krishna Puvvada <kpuvvada@nvidia.com>
Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>
Signed-off-by: Dima Rekesh <drekesh@nvidia.com>
Signed-off-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>
Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>
Signed-off-by: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Micha Livne <michalivne@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Dima Rekesh <bmwshop@gmail.com>
Co-authored-by: Jim O’Regan <jaoregan@tcd.ie>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Co-authored-by: Mostafa Ghorbandoost <mos.ghorbandoost@gmail.com>
Co-authored-by: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Co-authored-by: Dmytro Pykhtar <dpykhtar@nvidia.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Micha Livne <mlivne@nvidia.com>
Co-authored-by: Kunal Dhawan <kunaldhawan97@gmail.com>
Co-authored-by: Andrei Andrusenko <52885736+andrusenkoau@users.noreply.github.com>
Co-authored-by: George <37293288+Jorjeous@users.noreply.github.com>
Co-authored-by: George <gzelenfroind@nvidia.com>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
Co-authored-by: Ryan Langman <rlangman@nvidia.com>
Co-authored-by: Slyne Deng <slynedeng@gmail.com>
Co-authored-by: Taejin Park <tango4j@gmail.com>
Co-authored-by: Jagadeesh Balam <4916480+jbalam-nv@users.noreply.github.com>
Co-authored-by: anteju <108555623+anteju@users.noreply.github.com>
Co-authored-by: trias702 <25867060+trias702@users.noreply.github.com>
Co-authored-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: Vahid Noroozi <VahidooX@users.noreply.github.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
Co-authored-by: Nikolay Karpov <karpnv@gmail.com>
Co-authored-by: Nikolay Karpov <nkarpov@nvidia.com>
Co-authored-by: Hoo Chang Shin <hshin@nvidia.com>
Co-authored-by: Greg Clark <grclark@nvidia.com>
Co-authored-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Co-authored-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: Vladimir Bataev <vbataev@nvidia.com>
Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com>
Co-authored-by: Igor Gitman <igitman@nvidia.com>
Co-authored-by: Samuel Kriman <samuelkriman@gmail.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Co-authored-by: mikolajblaz <mikolajblaz@users.noreply.github.com>
Co-authored-by: Tim Moon <4406448+timmoon10@users.noreply.github.com>
Co-authored-by: Adi Renduchintala <adithya.r@gmail.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
Co-authored-by: root <you@example.com>
Co-authored-by: Nithin Rao <nithinrao.koluguri@gmail.com>
Co-authored-by: Evelina <ebakhturina@nvidia.com>
Co-authored-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Co-authored-by: Brian McBrayer <BrianMcBrayer@users.noreply.github.com>
Co-authored-by: Jocelyn <jocelynh@nvidia.com>
Co-authored-by: Krishna Puvvada <93558329+krishnacpuvvada@users.noreply.github.com>
Co-authored-by: Krishna Puvvada <kpuvvada@nvidia.com>
Co-authored-by: Brian McBrayer <brian@acceleratepath.com>
Co-authored-by: Li Tao <chntaoli@163.com>
Co-authored-by: asfiyab-nvidia <117682710+asfiyab-nvidia@users.noreply.github.com>
Co-authored-by: Dima Rekesh <drekesh@nvidia.com>
Co-authored-by: Markel Sanz Ausin <markelsanz14@gmail.com>
Co-authored-by: bene-ges <antonova_sasha@list.ru>
Co-authored-by: Alexandra Antonova <aleksandraa@nvidia.com>
---
 Dockerfile                                    |   5 +
 Jenkinsfile                                   | 346 ++++++++++++++++++
 README.rst                                    |  10 +
 .../conf/megatron_gpt_config.yaml             |   5 +-
 .../conf/megatron_model_base_config.yaml      |   3 +-
 .../conf/megatron_gpt_peft_eval_config.yaml   |   3 +-
 .../language_modeling/megatron/gpt_model.py   |   4 +
 .../language_modeling/megatron_base_model.py  |  14 +-
 .../language_modeling/megatron_gpt_model.py   |  10 +-
 .../megatron_gpt_prompt_learning_model.py     |   1 +
 .../nlp/modules/common/megatron/attention.py  | 328 +++++++++++------
 .../modules/common/megatron/language_model.py |  95 ++++-
 .../modules/common/megatron/layer_norm_1p.py  |  30 +-
 .../common/megatron/megatron_decoders.py      |   4 +
 .../common/megatron/megatron_encoders.py      |   4 +
 .../megatron/megatron_transformer_decoder.py  |   4 +
 .../megatron/megatron_transformer_encoder.py  |  14 +-
 .../megatron/position_embedding/__init__.py   |  31 ++
 .../alibi_relative_position_embedding.py      |  50 ++-
 .../kerple_relative_position_embedding.py     |  19 +-
 .../rotary_position_embedding.py}             |   3 +-
 .../sandwich_relative_position_embedding.py   |  75 ++++
 .../t5_relative_position_embedding.py         |   9 +-
 .../xpos_position_embedding.py                |  78 ++++
 .../common/megatron/retrieval_transformer.py  |   2 +-
 .../megatron/token_level_encoder_decoder.py   |  27 +-
 .../modules/common/megatron/transformer.py    |  43 ++-
 .../nlp/modules/common/megatron/utils.py      |  31 +-
 .../common/text_generation_strategy.py        |  34 +-
 .../modules/common/text_generation_utils.py   |  11 +-
 .../ngram_lm/create_lexicon_from_arpa.py      | 153 ++++----
 .../convert_mpt_7b_hf_to_nemo.py              | 212 +++++++++++
 tests/collections/nlp/test_flash_attention.py | 247 +++++++++++++
 .../nlp/test_position_embedding.py            | 211 +++++++++++
 .../collections/nlp/test_retrieval_module.py  |   2 +-
 .../nlp/test_retrieval_module_inference.py    |   2 +-
 36 files changed, 1843 insertions(+), 277 deletions(-)
 create mode 100644 nemo/collections/nlp/modules/common/megatron/position_embedding/__init__.py
 rename nemo/collections/nlp/modules/common/megatron/{ => position_embedding}/alibi_relative_position_embedding.py (73%)
 rename nemo/collections/nlp/modules/common/megatron/{ => position_embedding}/kerple_relative_position_embedding.py (81%)
 rename nemo/collections/nlp/modules/common/megatron/{rotary_pos_embedding.py => position_embedding/rotary_position_embedding.py} (96%)
 create mode 100644 nemo/collections/nlp/modules/common/megatron/position_embedding/sandwich_relative_position_embedding.py
 rename nemo/collections/nlp/modules/common/megatron/{ => position_embedding}/t5_relative_position_embedding.py (95%)
 create mode 100644 nemo/collections/nlp/modules/common/megatron/position_embedding/xpos_position_embedding.py
 create mode 100644 scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py
 create mode 100644 tests/collections/nlp/test_flash_attention.py
 create mode 100644 tests/collections/nlp/test_position_embedding.py

diff --git a/Dockerfile b/Dockerfile
index 82d16a561886..7722555357b2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -72,6 +72,11 @@ WORKDIR /tmp/nemo
 COPY requirements .
 RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-check --no-cache-dir -r $f; done
 
+# install flash attention dependencies
+RUN pip install flash-attn
+# pinned triton version for flash-attention https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attn_triton.py#L3
+RUN pip install triton==2.0.0.dev20221202 
+
 # install k2, skip if installation fails
 COPY scripts /tmp/nemo/scripts/
 RUN INSTALL_MSG=$(/bin/bash /tmp/nemo/scripts/speech_recognition/k2/setup.sh); INSTALL_CODE=$?; \
diff --git a/Jenkinsfile b/Jenkinsfile
index d16379cabb8a..d335378173f0 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -65,6 +65,14 @@ pipeline {
             pip install -e .'
       }
     }
+      
+    stage('Flash Attention installation') {
+      steps {
+        // pinned triton version for flash-attention https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attn_triton.py#L3
+        sh 'pip install flash-attn && \
+            pip install triton==2.0.0.dev20221202'
+      }
+    }
 
     stage('PyTorch Lightning version') {
       steps {
@@ -3144,6 +3152,88 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
       }
     }
     stage('L2: Megatron GPT Pretraining and Resume Training TP=2') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      steps {
+        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        trainer.devices=2 \
+        trainer.accelerator=gpu \
+        trainer.log_every_n_steps=1 \
+        trainer.val_check_interval=2 \
+        trainer.limit_val_batches=2 \
+        trainer.accumulate_grad_batches=1 \
+        trainer.max_steps=3 \
+        trainer.precision=16 \
+        trainer.gradient_clip_val=1.0 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        model.tensor_model_parallel_size=2 \
+        model.optim.name=fused_adam \
+        model.optim.lr=2e-4 \
+        model.optim.sched.warmup_steps=1 \
+        model.optim.sched.constant_steps=1 \
+        model.optim.sched.min_lr=8e-5 \
+        model.max_position_embeddings=128 \
+        model.encoder_seq_length=128 \
+        model.data.seq_length=128 \
+        model.normalization=rmsnorm \
+        model.bias=False \
+        model.bias_activation_fusion=False \
+        model.bias_dropout_add_fusion=False \
+        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        model.num_layers=8 \
+        model.hidden_size=256 \
+        model.num_attention_heads=8 \
+        model.activations_checkpoint_method='block' \
+        model.activations_checkpoint_granularity='full' \
+        model.activations_checkpoint_num_layers=1 \
+        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        trainer.devices=2 \
+        trainer.accelerator=gpu \
+        trainer.log_every_n_steps=1 \
+        trainer.val_check_interval=2 \
+        trainer.limit_val_batches=1 \
+        trainer.accumulate_grad_batches=1 \
+        trainer.max_steps=6 \
+        trainer.precision=16 \
+        trainer.gradient_clip_val=1.0 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        exp_manager.resume_if_exists=True \
+        model.tensor_model_parallel_size=2 \
+        model.optim.name=fused_adam \
+        model.optim.lr=2e-4 \
+        model.optim.sched.warmup_steps=2 \
+        model.optim.sched.constant_steps=2 \
+        model.optim.sched.min_lr=8e-5 \
+        model.max_position_embeddings=128 \
+        model.encoder_seq_length=128 \
+        model.data.seq_length=128 \
+        model.normalization=rmsnorm \
+        model.bias=False \
+        model.bias_activation_fusion=False \
+        model.bias_dropout_add_fusion=False \
+        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        model.num_layers=8 \
+        model.hidden_size=256 \
+        model.num_attention_heads=8 \
+        model.activations_checkpoint_method='block' \
+        model.activations_checkpoint_granularity='full' \
+        model.activations_checkpoint_num_layers=1 \
+        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
+        sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
+      }
+    }
+    stage('L2: Megatron GPT with Rope Pretraining and Resume Training TP=2') {
       when {
         anyOf {
           branch 'main'
@@ -3229,6 +3319,262 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
       }
     }
+    stage('L2: Megatron GPT with Rope Pretraining using Flash Attention and Resume Training TP=2') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      steps {
+        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        trainer.devices=2 \
+        trainer.accelerator=gpu \
+        trainer.log_every_n_steps=1 \
+        trainer.val_check_interval=2 \
+        trainer.limit_val_batches=2 \
+        trainer.accumulate_grad_batches=1 \
+        trainer.max_steps=3 \
+        trainer.precision=16 \
+        trainer.gradient_clip_val=1.0 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        model.tensor_model_parallel_size=2 \
+        model.optim.name=fused_adam \
+        model.optim.lr=2e-4 \
+        model.optim.sched.warmup_steps=1 \
+        model.optim.sched.constant_steps=1 \
+        model.optim.sched.min_lr=8e-5 \
+        model.max_position_embeddings=128 \
+        model.encoder_seq_length=128 \
+        model.data.seq_length=128 \
+        model.position_embedding_type=rope \
+        model.rotary_percentage=0.5 \
+        model.normalization=rmsnorm \
+        model.bias=False \
+        model.bias_activation_fusion=False \
+        model.bias_dropout_add_fusion=False \
+        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        model.num_layers=8 \
+        model.hidden_size=256 \
+        model.num_attention_heads=8 \
+        model.activations_checkpoint_method='block' \
+        model.activations_checkpoint_granularity='full' \
+        model.activations_checkpoint_num_layers=1 \
+        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
+        model.use_flash_attention=True"
+        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        trainer.devices=2 \
+        trainer.accelerator=gpu \
+        trainer.log_every_n_steps=1 \
+        trainer.val_check_interval=2 \
+        trainer.limit_val_batches=1 \
+        trainer.accumulate_grad_batches=1 \
+        trainer.max_steps=6 \
+        trainer.precision=16 \
+        trainer.gradient_clip_val=1.0 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        exp_manager.resume_if_exists=True \
+        model.tensor_model_parallel_size=2 \
+        model.optim.name=fused_adam \
+        model.optim.lr=2e-4 \
+        model.optim.sched.warmup_steps=2 \
+        model.optim.sched.constant_steps=2 \
+        model.optim.sched.min_lr=8e-5 \
+        model.max_position_embeddings=128 \
+        model.encoder_seq_length=128 \
+        model.data.seq_length=128 \
+        model.position_embedding_type=rope \
+        model.rotary_percentage=0.5 \
+        model.normalization=rmsnorm \
+        model.bias=False \
+        model.bias_activation_fusion=False \
+        model.bias_dropout_add_fusion=False \
+        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        model.num_layers=8 \
+        model.hidden_size=256 \
+        model.num_attention_heads=8 \
+        model.activations_checkpoint_method='block' \
+        model.activations_checkpoint_granularity='full' \
+        model.activations_checkpoint_num_layers=1 \
+        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
+        model.use_flash_attention=True"
+        sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
+        sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
+      }
+    }
+    stage('L2: Megatron GPT with ALiBi Pretraining and Resume Training TP=2') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      steps {
+        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        trainer.devices=2 \
+        trainer.accelerator=gpu \
+        trainer.log_every_n_steps=1 \
+        trainer.val_check_interval=2 \
+        trainer.limit_val_batches=2 \
+        trainer.accumulate_grad_batches=1 \
+        trainer.max_steps=3 \
+        trainer.precision=16 \
+        trainer.gradient_clip_val=1.0 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        model.tensor_model_parallel_size=2 \
+        model.optim.name=fused_adam \
+        model.optim.lr=2e-4 \
+        model.optim.sched.warmup_steps=1 \
+        model.optim.sched.constant_steps=1 \
+        model.optim.sched.min_lr=8e-5 \
+        model.max_position_embeddings=128 \
+        model.encoder_seq_length=128 \
+        model.data.seq_length=128 \
+        model.position_embedding_type=alibi \
+        model.normalization=rmsnorm \
+        model.bias=False \
+        model.bias_activation_fusion=False \
+        model.bias_dropout_add_fusion=False \
+        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        model.num_layers=8 \
+        model.hidden_size=256 \
+        model.num_attention_heads=8 \
+        model.activations_checkpoint_method='block' \
+        model.activations_checkpoint_granularity='full' \
+        model.activations_checkpoint_num_layers=1 \
+        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        trainer.devices=2 \
+        trainer.accelerator=gpu \
+        trainer.log_every_n_steps=1 \
+        trainer.val_check_interval=2 \
+        trainer.limit_val_batches=1 \
+        trainer.accumulate_grad_batches=1 \
+        trainer.max_steps=6 \
+        trainer.precision=16 \
+        trainer.gradient_clip_val=1.0 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        exp_manager.resume_if_exists=True \
+        model.tensor_model_parallel_size=2 \
+        model.optim.name=fused_adam \
+        model.optim.lr=2e-4 \
+        model.optim.sched.warmup_steps=2 \
+        model.optim.sched.constant_steps=2 \
+        model.optim.sched.min_lr=8e-5 \
+        model.max_position_embeddings=128 \
+        model.encoder_seq_length=128 \
+        model.data.seq_length=128 \
+        model.position_embedding_type=alibi \
+        model.normalization=rmsnorm \
+        model.bias=False \
+        model.bias_activation_fusion=False \
+        model.bias_dropout_add_fusion=False \
+        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        model.num_layers=8 \
+        model.hidden_size=256 \
+        model.num_attention_heads=8 \
+        model.activations_checkpoint_method='block' \
+        model.activations_checkpoint_granularity='full' \
+        model.activations_checkpoint_num_layers=1 \
+        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
+        sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
+      }
+    }
+    stage('L2: Megatron GPT with KERPLE Pretraining and Resume Training TP=2') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      steps {
+        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        trainer.devices=2 \
+        trainer.accelerator=gpu \
+        trainer.log_every_n_steps=1 \
+        trainer.val_check_interval=2 \
+        trainer.limit_val_batches=2 \
+        trainer.accumulate_grad_batches=1 \
+        trainer.max_steps=3 \
+        trainer.precision=16 \
+        trainer.gradient_clip_val=1.0 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        model.tensor_model_parallel_size=2 \
+        model.optim.name=fused_adam \
+        model.optim.lr=2e-4 \
+        model.optim.sched.warmup_steps=1 \
+        model.optim.sched.constant_steps=1 \
+        model.optim.sched.min_lr=8e-5 \
+        model.max_position_embeddings=128 \
+        model.encoder_seq_length=128 \
+        model.data.seq_length=128 \
+        model.position_embedding_type=kerple \
+        model.normalization=rmsnorm \
+        model.bias=False \
+        model.bias_activation_fusion=False \
+        model.bias_dropout_add_fusion=False \
+        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        model.num_layers=8 \
+        model.hidden_size=256 \
+        model.num_attention_heads=8 \
+        model.activations_checkpoint_method='block' \
+        model.activations_checkpoint_granularity='full' \
+        model.activations_checkpoint_num_layers=1 \
+        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        trainer.devices=2 \
+        trainer.accelerator=gpu \
+        trainer.log_every_n_steps=1 \
+        trainer.val_check_interval=2 \
+        trainer.limit_val_batches=1 \
+        trainer.accumulate_grad_batches=1 \
+        trainer.max_steps=6 \
+        trainer.precision=16 \
+        trainer.gradient_clip_val=1.0 \
+        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        exp_manager.resume_if_exists=True \
+        model.tensor_model_parallel_size=2 \
+        model.optim.name=fused_adam \
+        model.optim.lr=2e-4 \
+        model.optim.sched.warmup_steps=2 \
+        model.optim.sched.constant_steps=2 \
+        model.optim.sched.min_lr=8e-5 \
+        model.max_position_embeddings=128 \
+        model.encoder_seq_length=128 \
+        model.data.seq_length=128 \
+        model.position_embedding_type=kerple \
+        model.normalization=rmsnorm \
+        model.bias=False \
+        model.bias_activation_fusion=False \
+        model.bias_dropout_add_fusion=False \
+        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        model.num_layers=8 \
+        model.hidden_size=256 \
+        model.num_attention_heads=8 \
+        model.activations_checkpoint_method='block' \
+        model.activations_checkpoint_granularity='full' \
+        model.activations_checkpoint_num_layers=1 \
+        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
+        sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
+      }
+    }
     stage('L2: Megatron GPT Pretraining and Resume Training PP=2') {
       when {
         anyOf {
diff --git a/README.rst b/README.rst
index b9ba7fce30f3..863b279b2be8 100644
--- a/README.rst
+++ b/README.rst
@@ -280,6 +280,16 @@ It is highly recommended to use the NVIDIA PyTorch or NeMo container if having i
 
 Transformer Engine requires PyTorch to be built with CUDA 11.8.
 
+
+Flash Attention
+~~~~~~~~~~~~~~~~~~~~
+Transformer Engine already supports Flash Attention for GPT models. If you want to use Flash Attention for non-causal models or use with attention bias (introduced from position encoding, e.g. Alibi), please install `flash-attn <https://github.com/HazyResearch/flash-attention>`_. 
+
+.. code-block:: bash
+
+  pip install flash-attn
+  pip install triton==2.0.0.dev20221202
+
 NeMo Text Processing
 ~~~~~~~~~~~~~~~~~~~~
 NeMo Text Processing, specifically (Inverse) Text Normalization, is now a separate repository `https://github.com/NVIDIA/NeMo-text-processing <https://github.com/NVIDIA/NeMo-text-processing>`_.
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index d502f255bd8e..d1132a32349a 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -77,7 +77,7 @@ model:
   transformer_block_type: 'pre_ln' # Options ['pre_ln', 'post_ln', 'normformer']
   openai_gelu: False # Use OpenAI's GELU instead of the default GeLU
   normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True.
-  position_embedding_type: 'learned_absolute' # Position embedding type. Options ['learned_absolute', 'rope']
+  position_embedding_type: 'learned_absolute' # Position embedding type. Options ['learned_absolute', 'rope', 'alibi', 'kerple' , 'xpos', 'sandwich'] xpos and sandwich are experimental.
   rotary_percentage: 1.0 # If using position_embedding_type=rope, then the per head dim is multiplied by this.
   attention_type: 'multihead' # Attention type. Options ['multihead']
   share_embeddings_and_output_weights: True # Share embedding and output layer weights.
@@ -167,6 +167,9 @@ model:
   reduce_amax: True # Perform reduction to sync amax tensors across GPUs after every iteration
   use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False.
 
+  ## Flash Attention
+  use_flash_attention: False # Use flash attention in self-attention module, this config does nothing when transformer_engine=True
+  
   data:
    # Path to data must be specified by the user.
     # Supports List, String and Dictionary
diff --git a/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml b/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml
index d3feb97ea9b4..e98ebae6da63 100644
--- a/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_model_base_config.yaml
@@ -36,4 +36,5 @@ megatron_legacy: False # Whether to use the legacy Megatron model. This affects
 normalize_attention_scores: True # Whether to scale the output Q * K^T by 1 / sqrt(hidden_size_per_head). This arg is provided as a configuration option mostly for compatibility with models that have been weight-converted from HF. You almost always want to se this to True.
 num_moe_experts: 1 # When >1, FFNs are changed to MoE layers
 moe_frequency: 1 # every Nth ffn layer will be made MoE 
-moe_dropout: 0.0 # Dropout value for MoE layers
\ No newline at end of file
+moe_dropout: 0.0 # Dropout value for MoE layers
+use_flash_attention: false # Use flash attention in self-attention module
\ No newline at end of file
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
index 69dc17f244f5..8c21117969ab 100755
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_eval_config.yaml
@@ -129,4 +129,5 @@ inference:
   repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
   min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
   compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-  outfile_path: output.txt
\ No newline at end of file
+  outfile_path: output.txt
+  compute_attention_mask: True
\ No newline at end of file
diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
index e890e6ae4807..b43dc98f2fe7 100755
--- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
@@ -151,6 +151,7 @@ def __init__(
         gradient_accumulation_fusion=False,
         persist_layer_norm=False,
         openai_gelu=False,
+        megatron_legacy=False,
         onnx_safe=False,
         sequence_parallel=False,
         transformer_engine=False,
@@ -163,6 +164,7 @@ def __init__(
         fp8_amax_compute_algo='most_recent',
         reduce_amax=True,
         use_emha=False,
+        use_flash_attention=False,
     ):
         super(GPTModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights)
 
@@ -232,6 +234,7 @@ def __init__(
             persist_layer_norm=persist_layer_norm,
             openai_gelu=openai_gelu,
             onnx_safe=onnx_safe,
+            megatron_legacy=megatron_legacy,
             sequence_parallel=sequence_parallel,
             transformer_engine=transformer_engine,
             fp8=fp8,
@@ -243,6 +246,7 @@ def __init__(
             fp8_amax_compute_algo=fp8_amax_compute_algo,
             reduce_amax=reduce_amax,
             use_emha=use_emha,
+            use_flash_attention=use_flash_attention,
         )
 
         if self.share_embeddings_and_output_weights:
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index 2568a14f8dbf..7be679376175 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -25,6 +25,7 @@
 from pytorch_lightning.trainer.trainer import Trainer
 
 from nemo.collections.nlp.models.nlp_model import NLPModel
+from nemo.collections.nlp.modules.common.megatron.attention import HAVE_FLASH_ATTENTION
 from nemo.collections.nlp.modules.common.megatron.clip_grads import (
     clip_grad_norm_distributed_optimizer,
     clip_grad_norm_fp32,
@@ -84,6 +85,12 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True):
         if trainer is None:
             raise ValueError(f"Trainer cannot be None for Megatron-based models. Please provide a PTL trainer object.")
 
+        if cfg.get('use_flash_attention', False) and not HAVE_FLASH_ATTENTION:
+            raise ImportError(
+                "flash_attn was not found. Please see the installation instructions: https://github.com/HazyResearch/flash-attention."
+                "If you use flash_attn with triton. Please install triton==2.0.0.dev20221202."
+            )
+
         # this prevents base constructor from initializing tokenizer
         self.tokenizer = None
 
@@ -205,9 +212,10 @@ def _build_tokenizer(self):
         self.tokenizer = get_nmt_tokenizer(
             library=self._cfg.tokenizer.library,
             model_name=self._cfg.tokenizer.type,
-            tokenizer_model=self.register_artifact("tokenizer.model", self._cfg.tokenizer.model),
-            vocab_file=self.register_artifact("tokenizer.vocab_file", self._cfg.tokenizer.vocab_file),
-            merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.merge_file),
+            tokenizer_model=self.register_artifact("tokenizer.model", self._cfg.tokenizer.get('model', None)),
+            vocab_file=self.register_artifact("tokenizer.vocab_file", self._cfg.tokenizer.get('vocab_file', None)),
+            merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.get('merge_file', None)),
+            use_fast=self.cfg.tokenizer.get('use_fast', False),
             delimiter=self.cfg.tokenizer.get('delimiter', None),
             legacy=legacy,
         )
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 8eff896cf9d8..853c637eb3b3 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -300,7 +300,7 @@ def get_inference_config(self):
     def model_provider_func(self, pre_process, post_process):
         """Model depends on pipeline paralellism."""
         model = GPTModel(
-            vocab_size=self.padded_vocab_size,
+            vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size),
             hidden_size=self.cfg.hidden_size,
             max_position_embeddings=self.cfg.max_position_embeddings,
             num_layers=self.cfg.num_layers,
@@ -357,6 +357,8 @@ def model_provider_func(self, pre_process, post_process):
             fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'),
             reduce_amax=self.cfg.get('reduce_amax', True),
             use_emha=self.cfg.get('use_emha', False),
+            use_flash_attention=self.cfg.get('use_flash_attention', False),
+            megatron_legacy=self.cfg.get('megatron_legacy', False),
         )
 
         return model
@@ -765,7 +767,6 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_
             if self.get_attention_mask_from_fusion:
                 required_keys.remove('attention_mask')
             batch = {key: val.cuda(non_blocking=True) if key in required_keys else None for key, val in batch.items()}
-
             # Model forward pass
             output_tensor = model(
                 batch['tokens'],
@@ -822,9 +823,10 @@ def fwd_output_only_func(dataloader_iter, model):
                     inference_max_sequence_len,
                 ) = batch
                 tokens = tokens.cuda()
-                attention_mask = attention_mask.cuda()
                 position_ids = position_ids.cuda()
-                attention_mask = attention_mask[0:1]
+                if attention_mask is not None:
+                    attention_mask = attention_mask.cuda()
+                    attention_mask = attention_mask[0:1]
                 extra_arg['set_inference_key_value_memory'] = set_inference_key_value_memory[0].item()
                 extra_arg['inference_max_sequence_len'] = inference_max_sequence_len[0].item()
             output_tensor = model(tokens, position_ids, attention_mask, **extra_arg)
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
index 95448e67bd11..81ca1c283ad0 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
@@ -753,6 +753,7 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
                 "add_BOS": inference_config["add_BOS"],
                 "all_probs": inference_config["all_probs"],
                 "compute_logprob": inference_config["compute_logprob"],
+                "compute_attention_mask": inference_config.get("compute_attention_mask", True),
             }
 
             task_ids, processed_inputs = batch
diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py
index 9c954b5e6313..b0d98e0c2fb1 100644
--- a/nemo/collections/nlp/modules/common/megatron/attention.py
+++ b/nemo/collections/nlp/modules/common/megatron/attention.py
@@ -27,8 +27,15 @@
 )
 from nemo.collections.nlp.modules.common.megatron.fused_softmax import MatchedScaleMaskSoftmax
 from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
-from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import apply_rotary_pos_emb
-from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, attention_mask_func
+from nemo.collections.nlp.modules.common.megatron.position_embedding import XPOSPositionEmbedding
+from nemo.collections.nlp.modules.common.megatron.position_embedding.rotary_position_embedding import (
+    apply_rotary_pos_emb,
+)
+from nemo.collections.nlp.modules.common.megatron.utils import (
+    ApexGuardDefaults,
+    _cast_if_autocast_enabled,
+    attention_mask_func,
+)
 from nemo.collections.nlp.parts import utils_funcs
 from nemo.core import adapter_mixins
 
@@ -55,6 +62,20 @@
 
     HAVE_MEGATRON_CORE = False
 
+try:
+    from flash_attn.bert_padding import pad_input, unpad_input
+    from flash_attn.flash_attn_interface import flash_attn_unpadded_func
+    from flash_attn.flash_attn_triton import flash_attn_func
+
+    HAVE_FLASH_ATTENTION = True
+
+except (ImportError, ModuleNotFoundError):
+
+    HAVE_FLASH_ATTENTION = False
+
+    flash_attn_unpadded_func, flash_attn_func = None, None
+    unpad_input, pad_input = None, None
+
 """ We use the following notation throughout this file:
      h: hidden size
      n: number of attention heads
@@ -104,9 +125,9 @@ def __init__(
         sequence_parallel=False,
         gradient_accumulation_fusion=False,
         normalize_attention_scores=True,
+        use_flash_attention=False,
     ):
         super(ParallelAttention, self).__init__()
-
         self.layer_number = max(1, layer_number)
         self.attention_type = attention_type
         self.attn_mask_type = attn_mask_type
@@ -201,6 +222,8 @@ def __init__(
             multi_query_attention=multi_query_attention,
             sequence_parallel=sequence_parallel,
             normalize_attention_scores=normalize_attention_scores,
+            position_embedding_type=position_embedding_type,
+            use_flash_attention=use_flash_attention,
         )
 
         # Output.
@@ -292,14 +315,14 @@ def custom_forward(*inputs):
 
         return hidden_states
 
-    def _allocate_memory(self, inference_max_sequence_len, batch_size, dtype):
+    def _allocate_memory(self, inference_max_sequence_len, batch_size, dtype, device):
         return torch.empty(
             inference_max_sequence_len,
             batch_size,
             self.num_attention_heads_per_partition,
             self.hidden_size_per_attention_head,
             dtype=dtype,
-            device=torch.cuda.current_device(),
+            device=device,
         )
 
     def _transpose_last_dim(self, mixed_layer, num_splits, num_splits_first):
@@ -357,10 +380,10 @@ def forward(
         if set_inference_key_value_memory:
             assert inference_max_sequence_len and inference_max_sequence_len > 0
             self.inference_key_memory = self._allocate_memory(
-                inference_max_sequence_len, hidden_states.size(1), hidden_states.dtype
+                inference_max_sequence_len, hidden_states.size(1), hidden_states.dtype, hidden_states.device
             )
             self.inference_value_memory = self._allocate_memory(
-                inference_max_sequence_len, hidden_states.size(1), hidden_states.dtype
+                inference_max_sequence_len, hidden_states.size(1), hidden_states.dtype, hidden_states.device
             )
             self.inference_current_sequence_len = 0
 
@@ -469,7 +492,8 @@ def forward(
             key_layer = self.inference_key_memory[:end, ...]
             value_layer = self.inference_value_memory[:end, ...]
             # Adjust attention mask
-            attention_mask = attention_mask[..., start:end, :end]
+            if attention_mask is not None:
+                attention_mask = attention_mask[..., start:end, :end]
             # adjust the key rotary positional embedding
             if rotary_pos_emb is not None:
                 q_pos_emb, k_pos_emb = rotary_pos_emb
@@ -711,6 +735,8 @@ def __init__(
         sequence_parallel=False,
         normalize_attention_scores=True,
         multi_query_attention=False,
+        position_embedding_type='learned_absolute',
+        use_flash_attention=False,
     ):
 
         super(CoreAttention, self).__init__()
@@ -723,6 +749,7 @@ def __init__(
         elif int(precision) == 16:
             self.fp16 = True
         self.multi_query_attention = multi_query_attention
+        self.position_embedding_type = position_embedding_type
 
         self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
         self.attention_softmax_in_fp32 = False
@@ -772,8 +799,17 @@ def __init__(
         # Dropout. Note that for a single iteration, this layer will generate
         # different outputs on different number of parallel partitions but
         # on average it should not be partition dependent.
+        self.attention_dropout_p = attention_dropout
         self.attention_dropout = torch.nn.Dropout(attention_dropout)
 
+        if use_flash_attention:
+            self.attn_fn = self.flash_attention
+        else:
+            self.attn_fn = self.torch_attention
+
+        if position_embedding_type.lower() == 'xpos':
+            self.xpos = XPOSPositionEmbedding(kv_channels)
+
     def forward(
         self,
         query_layer,
@@ -786,19 +822,43 @@ def forward(
         relative_position_bias=None,
         headscale_tensor=None,
     ):
+        b, np, sq, sk, hn = (
+            query_layer.size(1),
+            query_layer.size(2),
+            query_layer.size(0),
+            key_layer.size(0),
+            query_layer.size(3),
+        )
 
-        # ===================================
-        # Raw attention scores. [b, np, s, s]
-        # ===================================
+        # ==================================================
+        # Update attention mask for inference. [b, np, sq, sk]
+        # ==================================================
+        if get_key_value:
+            with torch.no_grad():
+                if layer_past is not None:
+                    attention_mask = attention_mask[..., sq - 1, :sk].unsqueeze(2)
+                else:
+                    attention_mask = attention_mask[..., :sq, :sk]
 
-        # [b, np, sq, sk]
-        output_size = (query_layer.size(1), query_layer.size(2), query_layer.size(0), key_layer.size(0))
+        # ==================================================
+        # Update attention bias. [b, np, sq, sk]
+        # ==================================================
+        if relative_position_bias is not None:
+            relative_position_bias = relative_position_bias[
+                :,
+                self.num_attention_heads_partition_offset : self.num_attention_heads_partition_offset
+                + self.num_attention_heads_per_partition,
+                -sq:,
+                -sk:,
+            ]
 
+        # ==================================================
+        # Update query_layer, key_layer, value_layer
+        # ==================================================
         # TODO: figure out how to do this
         # apply relative positional encoding (rotary embedding)
         if rotary_pos_emb is not None:
             q_pos_emb, k_pos_emb = rotary_pos_emb
-
             query_layer = apply_rotary_pos_emb(query_layer, q_pos_emb)
             key_layer = apply_rotary_pos_emb(key_layer, k_pos_emb)
             # TODO, can apply positional embedding to value_layer so it has
@@ -806,86 +866,67 @@ def forward(
             # otherwise, only relative positional embedding takes effect
             # value_layer = apply_rotary_pos_emb(value_layer, k_pos_emb)
 
-        if self.multi_query_attention:
-            # [sq, b, np, hn] -> [b, np * sq, hn]
-            query_layer = query_layer.permute([1, 2, 0, 3]).reshape(
-                output_size[0], output_size[1] * output_size[2], -1
-            )
+        if self.position_embedding_type.lower() == 'xpos':
+            query_layer = self.xpos(query_layer, offset=key_layer.shape[-2] - query_layer.shape[-2], downscale=False)
+            key_layer = self.xpos(key_layer, offset=0, downscale=True)
 
-            # [sk, b, 1, hn] -> [b, hn, sk]
-            key_layer = key_layer.squeeze(2).permute(1, 2, 0)
+        # ==================================================
+        # query_layer [sq, b, np, hn]
+        # key_layer   [sk, b, np, hn]
+        # value_layer [sk, b, np, hn]
+        # attention_mask [b, 1, sq, sk] or [b, s]
+        # relative_position_bias [b, np, sq, sk]
+        # context_layer [b, np, sq, hn]
+        # ==================================================
+        context_layer = self.attn_fn(query_layer, key_layer, value_layer, attention_mask, relative_position_bias)
 
-            # preallocting input tensor: [b * np, sq, sk]
-            matmul_input_buffer = torch.empty(
-                output_size[0] * output_size[1],
-                output_size[2],
-                output_size[3],
-                dtype=query_layer.dtype,
-                device=torch.cuda.current_device(),
-            )
+        if headscale_tensor is not None:
+            context_layer = context_layer * headscale_tensor
 
-            # Raw attention scores. [b * np, sq, sk]
-            matmul_result = torch.baddbmm(
-                matmul_input_buffer,
-                query_layer,  # [b * np, sq, hn]
-                key_layer,  # [b * np, hn, sk]
-                beta=0.0,
-                alpha=(1.0 / self.norm_factor),
-            )
-        else:
-            # [sq, b, np, hn] -> [sq, b * np, hn]
-            query_layer = query_layer.view(output_size[2], output_size[0] * output_size[1], -1)
-            # [sk, b, np, hn] -> [sk, b * np, hn]
-            key_layer = key_layer.view(output_size[3], output_size[0] * output_size[1], -1)
-
-            # preallocting input tensor: [b * np, sq, sk]
-            matmul_input_buffer = torch.empty(
-                output_size[0] * output_size[1],
-                output_size[2],
-                output_size[3],
-                dtype=query_layer.dtype,
-                device=torch.cuda.current_device(),
-            )
+        # [b, np, sq, hn] --> [sq, b, np, hn]
+        context_layer = context_layer.permute(2, 0, 1, 3).contiguous()
 
-            # Raw attention scores. [b * np, sq, sk]
-            matmul_result = torch.baddbmm(
-                matmul_input_buffer,
-                query_layer.transpose(0, 1),  # [b * np, sq, hn]
-                key_layer.transpose(0, 1).transpose(1, 2),  # [b * np, hn, sk]
-                beta=0.0,
-                alpha=(1.0 / self.norm_factor) if self.normalize_attention_scores else 1.0,
-            )
+        # [sq, b, np, hn] --> [sq, b, hp]
+        new_context_layer_shape = context_layer.size()[:-2] + (self.hidden_size_per_partition,)
+        context_layer = context_layer.view(*new_context_layer_shape)
 
-        # change view to [b, np, sq, sk]
-        attention_scores = matmul_result.view(*output_size)
+        return context_layer
 
-        if relative_position_bias is not None:
-            attention_scores += relative_position_bias[
-                :,
-                self.num_attention_heads_partition_offset : self.num_attention_heads_partition_offset
-                + self.num_attention_heads_per_partition,
-                : attention_scores.size(2),
-                : attention_scores.size(3),
-            ]
+    def torch_attention(self, query_layer, key_layer, value_layer, attention_mask, attention_bias):
+        sq, b, np, hn = query_layer.shape
+        sk = key_layer.shape[0]
 
-        # ==================================================
-        # Update attention mask for inference. [b, np, sq, sk]
-        # ==================================================
+        if self.multi_query_attention:
+            query_layer = rearrange(query_layer, 'sq b np hn -> b (np sq) hn')
+            key_layer = rearrange(key_layer, 'sk b 1 hn -> b hn sk')
+            value_layer = rearrange(value_layer, 'sv b np hn -> (b np) sv hn')
+        else:
+            query_layer = rearrange(query_layer, 'sq b np hn -> (b np) sq hn')
+            key_layer = rearrange(key_layer, 'sk b np hn -> (b np) hn sk')
+            value_layer = rearrange(value_layer, 'sv b np hn -> (b np) sv hn')
+
+        matmul_input_buffer = torch.empty(
+            query_layer.shape[0],
+            query_layer.shape[1],
+            key_layer.shape[2],
+            dtype=query_layer.dtype,
+            device=query_layer.device,
+        )
 
-        if get_key_value:
-            with torch.no_grad():
-                if layer_past is not None:
-                    attention_mask = attention_mask[
-                        ..., attention_scores.size(3) - 1, : attention_scores.size(3)
-                    ].unsqueeze(2)
-                else:
-                    attention_mask = attention_mask[..., : attention_scores.size(3), : attention_scores.size(3)]
+        matmul_result = torch.baddbmm(
+            matmul_input_buffer,
+            query_layer,
+            key_layer,
+            beta=0.0,
+            alpha=(1.0 / self.norm_factor) if self.normalize_attention_scores else 1.0,
+        )
+
+        # change view to [b, np, sq, sk]
+        attention_scores = matmul_result.view(b, np, sq, sk)
 
-        # ===========================
-        # Attention probs and dropout
-        # ===========================
+        if attention_bias is not None:
+            attention_scores += attention_bias
 
-        # attention scores and attention mask [b, np, sq, sk]
         attention_probs = self.scale_mask_softmax(attention_scores, attention_mask)
 
         # This is actually dropping out entire tokens to attend to, which might
@@ -897,36 +938,111 @@ def forward(
         else:
             attention_probs = self.attention_dropout(attention_probs)
 
-        # =========================
-        # Context layer. [sq, b, hp]
-        # =========================
+        # change view [b * np, sq, sk]
+        attention_probs = rearrange(attention_probs, 'b np sq sk -> (b np) sq sk')
 
-        # value_layer -> context layer.
-        # [sk, b, np, hn] --> [b, np, sq, hn]
+        # matmul: [b * np, sq, hn]
+        context_layer = torch.bmm(attention_probs, value_layer)
 
-        # context layer shape: [b, np, sq, hn]
-        output_size = (value_layer.size(1), value_layer.size(2), query_layer.size(0), value_layer.size(3))
+        # change view [b, np, sq, hn]
+        context_layer = rearrange(context_layer, '(b np) sq hn -> b np sq hn', np=np)
 
-        # change view [sk, b * np, hn]
-        value_layer = value_layer.view(value_layer.size(0), output_size[0] * output_size[1], -1)
+        return context_layer
 
-        # change view [b * np, sq, sk]
-        attention_probs = attention_probs.view(output_size[0] * output_size[1], output_size[2], -1)
+    def flash_attention(self, query_layer, key_layer, value_layer, attention_mask, attention_bias):
+        query_layer = rearrange(query_layer, 'sq b np hn -> b sq np hn')
+        key_layer = rearrange(key_layer, 'sk b np hn -> b sk np hn')
+        value_layer = rearrange(value_layer, 'sv b np hn -> b sv np hn')
 
-        # matmul: [b * np, sq, hn]
-        context_layer = torch.bmm(attention_probs, value_layer.transpose(0, 1))
+        # Use to ensure dtype cast to fp16 or bf16
+        query_layer = _cast_if_autocast_enabled(query_layer)
+        key_layer = _cast_if_autocast_enabled(key_layer)
+        value_layer = _cast_if_autocast_enabled(value_layer)
+        attention_mask = _cast_if_autocast_enabled(attention_mask)
+        attention_bias = _cast_if_autocast_enabled(attention_bias)
 
-        # change view [b, np, sq, hn]
-        context_layer = context_layer.view(*output_size)
+        if attention_bias is not None:
+            return self.flash_attention_triton(query_layer, key_layer, value_layer, attention_mask, attention_bias,)
+        else:
+            return self.flash_attention_cuda(query_layer, key_layer, value_layer, attention_mask,)
+
+    def reset_is_causal(self, query_length, key_length, causal):
+        if query_length != key_length:
+            if query_length == 1:
+                return False
+            raise NotImplementedError(
+                "Flash attention does not support query and key with different number of tokens, unless number of query tokens is 1."
+            )
+        return causal
+
+    def flash_attention_cuda(self, query_layer, key_layer, value_layer, attention_mask):
+        batch_size, seqlen, nheads, _ = query_layer.shape
+
+        # True: attend / False: not attend
+        if attention_mask is None:
+            attention_mask_q = torch.ones(batch_size, query_layer.shape[1], device=query_layer.device).bool()
+            attention_mask_kv = torch.ones(batch_size, key_layer.shape[1], device=query_layer.device).bool()
+        elif len(attention_mask.shape) == 4:
+            # [b, 1, sq, sk] -> [b, sq] / [b, sk]
+            attention_mask_q = torch.any(torch.eq(attention_mask, False), dim=3).squeeze(1)
+            attention_mask_kv = torch.any(torch.eq(attention_mask, False), dim=2).squeeze(1)
+        else:
+            assert len(attention_mask.shape) == 2
+            attention_mask_q = attention_mask
+            attention_mask_kv = attention_mask
+
+        q, indices_q, cu_seqlens_q, max_seqlen_q = unpad_input(query_layer, attention_mask_q)
+        k, _, cu_seqlens_k, max_seqlen_k = unpad_input(key_layer, attention_mask_kv)
+        v, _, _, _ = unpad_input(value_layer, attention_mask_kv)
+        causal = self.reset_is_causal(
+            query_layer.shape[1], key_layer.shape[1], self.attn_mask_type == AttnMaskType.causal
+        )
+        context_layer = flash_attn_unpadded_func(
+            q,
+            k,
+            v,
+            cu_seqlens_q,
+            cu_seqlens_k,
+            max_seqlen_q,
+            max_seqlen_k,
+            dropout_p=self.attention_dropout_p if self.training else 0.0,
+            causal=causal,
+        )
 
-        if headscale_tensor is not None:
-            context_layer = context_layer * headscale_tensor
+        # [b, sq, np, hn]
+        context_layer = pad_input(context_layer, indices_q, batch_size, seqlen)
 
-        # [b, np, sq, hn] --> [sq, b, np, hn]
-        context_layer = context_layer.permute(2, 0, 1, 3).contiguous()
+        # [b, sq, np, hn] -> [b, np, sq, hn]
+        context_layer = context_layer.permute(0, 2, 1, 3)
+        return context_layer
 
-        # [sq, b, np, hn] --> [sq, b, hp]
-        new_context_layer_shape = context_layer.size()[:-2] + (self.hidden_size_per_partition,)
-        context_layer = context_layer.view(*new_context_layer_shape)
+    def flash_attention_triton(self, query_layer, key_layer, value_layer, attention_mask, attention_bias):
+        if self.attention_dropout_p > 0.0:
+            raise NotImplementedError(f'attention_dropout not implemented for flash_attention with attention bias')
+
+        if attention_mask is not None:
+            if len(attention_mask.shape) == 4:
+                # [b, 1, sq, sk] -> [b, 1, sq, 1] / [b, 1, 1, sk]
+                attention_mask_q = torch.any(torch.eq(attention_mask, False), dim=3).unsqueeze(3)
+                attention_mask_kv = torch.any(torch.eq(attention_mask, False), dim=2).unsqueeze(2)
+            else:
+                # [b, s] -> [b, 1, s, 1] / [b, 1, 1, s]
+                assert len(attention_mask.shape) == 2
+                attention_mask_q = attention_mask.unsqueeze(1).unsqueeze(3)
+                attention_mask_kv = attention_mask.unsqueeze(1).unsqueeze(2)
+
+            attention_bias = attention_bias.masked_fill(~attention_mask_q, torch.finfo(query_layer.dtype).min)
+            attention_bias = attention_bias.masked_fill(~attention_mask_kv, torch.finfo(query_layer.dtype).min)
+
+        causal = self.reset_is_causal(
+            query_layer.shape[1], key_layer.shape[1], self.attn_mask_type == AttnMaskType.causal
+        )
+        context_layer = flash_attn_func(query_layer, key_layer, value_layer, attention_bias, causal)
+
+        # [b, sq, np, hn] -> [b, np, sq, hn]
+        context_layer = context_layer.permute(0, 2, 1, 3)
+
+        if attention_mask is not None:
+            context_layer = context_layer * attention_mask_q
 
         return context_layer
diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py
index b8b12cf0caec..2d10576dc7d0 100755
--- a/nemo/collections/nlp/modules/common/megatron/language_model.py
+++ b/nemo/collections/nlp/modules/common/megatron/language_model.py
@@ -21,7 +21,12 @@
 )
 from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType
 from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
-from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding
+from nemo.collections.nlp.modules.common.megatron.position_embedding import (
+    ALiBiRelativePositionEmbedding,
+    KERPLERelativePositionEmbedding,
+    RotaryEmbedding,
+    SandwichRelativePositionEmbedding,
+)
 from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformer
 from nemo.collections.nlp.modules.common.megatron.utils import (
     ApexGuardDefaults,
@@ -116,6 +121,7 @@ def get_language_model(
     fp8_amax_compute_algo='most_recent',
     reduce_amax=True,
     use_emha=False,
+    use_flash_attention=False,
 ):
     """Build language model and return along with the key to save."""
 
@@ -191,6 +197,7 @@ def get_language_model(
         fp8_amax_compute_algo=fp8_amax_compute_algo,
         reduce_amax=reduce_amax,
         use_emha=use_emha,
+        use_flash_attention=use_flash_attention,
     )
     # key used for checkpoints.
     language_model_key = 'language_model'
@@ -497,6 +504,7 @@ def __init__(
         fp8_amax_compute_algo='most_recent',
         reduce_amax=True,
         use_emha=False,
+        use_flash_attention=False,
     ):
         super(TransformerLanguageModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights)
 
@@ -518,7 +526,6 @@ def __init__(
         self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
         self.sequence_parallel = sequence_parallel
         self.dtype = utils_funcs.dtype_from_precision(precision, megatron_amp_O2)
-
         if kv_channels is None:
 
             assert (
@@ -551,6 +558,40 @@ def __init__(
                 rotary_dim = int(rotary_dim * rotary_percentage)
             self.rotary_pos_emb = RotaryEmbedding(rotary_dim)
 
+        elif position_embedding_type == 'alibi':
+            # TODO: If this is used for encoder-decodemax_position_embeddingsr model, implement proper logic and following
+            # addition for decoder. Currently it is only used for decoder model only.
+            # Encoder-decoder model, such as T5 is implemented in token_level_encoder_decoder.py
+            self.encoder_relative_position_embedding = ALiBiRelativePositionEmbedding(
+                bidirectional=encoder_attn_mask_type != AttnMaskType.causal,
+                num_attention_heads=num_attention_heads,
+                layer_type=LayerType.encoder,
+                num_attention_heads_alibi=None,
+                max_seq_len=max_position_embeddings,
+            )
+
+        elif position_embedding_type == 'kerple':
+            # TODO: If this is used for encoder-decodemax_position_embeddingsr model, implement proper logic and following
+            # addition for decoder. Currently it is only used for decoder model only.
+            # Encoder-decoder model, such as T5 is implemented in token_level_encoder_decoder.py
+            self.encoder_relative_position_embedding = KERPLERelativePositionEmbedding(
+                bidirectional=encoder_attn_mask_type != AttnMaskType.causal,
+                num_attention_heads=num_attention_heads,
+                layer_type=LayerType.encoder,
+                num_attention_heads_kerple=None,
+                max_seq_len=max_position_embeddings,
+            )
+            assert use_flash_attention == False  # flash-attention not supported with kerple at this point
+
+        elif position_embedding_type == 'sandwich':
+            self.encoder_relative_position_embedding = SandwichRelativePositionEmbedding(
+                bidirectional=encoder_attn_mask_type != AttnMaskType.causal,
+                num_attention_heads=num_attention_heads,
+                layer_type=LayerType.encoder,
+                hidden_size=self.hidden_size // num_attention_heads if kv_channels is None else kv_channels,
+                max_seq_len=max_position_embeddings,
+            )
+
         # Transformer.
         self.encoder = ParallelTransformer(
             init_method=self.init_method,
@@ -602,6 +643,8 @@ def __init__(
             fp8_amax_compute_algo=fp8_amax_compute_algo,
             reduce_amax=reduce_amax,
             use_emha=use_emha,
+            position_embedding_type=position_embedding_type,
+            use_flash_attention=use_flash_attention,
         )
         self._encoder_key = 'encoder'
 
@@ -642,6 +685,8 @@ def __init__(
                 activations_checkpoint_granularity=activations_checkpoint_granularity,
                 activations_checkpoint_layers_per_pipeline=activations_checkpoint_layers_per_pipeline,
                 transformer_engine=transformer_engine,
+                position_embedding_type=position_embedding_type,
+                use_flash_attention=use_flash_attention,
             )
             self._decoder_key = 'decoder'
 
@@ -713,26 +758,35 @@ def forward(
             pass
 
         # enc_attn_mask: [1, 1, s, s]
-
-        if self.position_embedding_type == 'rope':
-            if inference_max_sequence_len is not None:
-                rotary_pos_emb = self.rotary_pos_emb(inference_max_sequence_len)
-            elif self.encoder.input_tensor is not None:
-                if self.sequence_parallel:
-                    rotary_pos_emb = self.rotary_pos_emb(
-                        self.encoder.input_tensor.size(0) * parallel_state.get_tensor_model_parallel_world_size()
-                    )
-                else:
-                    rotary_pos_emb = self.rotary_pos_emb(self.encoder.input_tensor.size(0))
+        if inference_max_sequence_len is not None:
+            enc_seq_length = inference_max_sequence_len
+        elif self.encoder.input_tensor is not None:
+            if self.sequence_parallel:
+                enc_seq_length = (
+                    self.encoder.input_tensor.size(0) * parallel_state.get_tensor_model_parallel_world_size()
+                )
             else:
-                if self.sequence_parallel:
-                    rotary_pos_emb = self.rotary_pos_emb(
-                        encoder_input.size(0) * parallel_state.get_tensor_model_parallel_world_size()
-                    )
-                else:
-                    rotary_pos_emb = self.rotary_pos_emb(encoder_input.size(0))
+                enc_seq_length = self.encoder.input_tensor.size(0)
         else:
-            rotary_pos_emb = None
+            if self.sequence_parallel:
+                enc_seq_length = encoder_input.size(0) * parallel_state.get_tensor_model_parallel_world_size()
+            else:
+                enc_seq_length = encoder_input.size(0)
+
+        rotary_pos_emb = None
+        encoder_self_attention_relative_position_bias = None
+        if self.position_embedding_type == 'rope':
+            rotary_pos_emb = self.rotary_pos_emb(enc_seq_length)
+        elif (
+            self.position_embedding_type == 'alibi'
+            or self.position_embedding_type == 'sandwich'
+            or self.position_embedding_type == 'kerple'
+        ):
+            encoder_self_attention_relative_position_bias = self.encoder_relative_position_embedding(
+                query_seq_length=enc_seq_length, key_seq_length=enc_seq_length,
+            )
+            # causal attention bias: [1, head, 1, k]
+            # non-causal attention bias: [1, head, q, k]
 
         # encoder.
         if enc_hidden_states is None:
@@ -747,6 +801,7 @@ def forward(
                 rotary_pos_emb=(rotary_pos_emb, None, None)
                 if rotary_pos_emb is not None
                 else None,  # This assumes that this being used as a GPT/BERT model only (no cross-attention)
+                self_attention_relative_position_bias=encoder_self_attention_relative_position_bias,
             )
         else:
             encoder_output = enc_hidden_states.to(encoder_input.dtype)
diff --git a/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py b/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py
index ca59bcc8850a..4a94b37aae7b 100644
--- a/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py
+++ b/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py
@@ -13,7 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from torch import nn
+import torch
+from nemo.collections.nlp.modules.common.megatron.utils import _cast_if_autocast_enabled
 
 try:
     from apex.contrib.layer_norm.layer_norm import FastLayerNorm as OrigFastLayerNorm
@@ -35,8 +36,8 @@ def __init__(self, *args, **kwargs):
             ), 'LayerNorm1P implemented only as an apex.contrib.layer_norm.FastLayerNorm extension'
 
         def reset_parameters(self):
-            nn.init.zeros_(self.weight)
-            nn.init.zeros_(self.bias)
+            torch.nn.init.zeros_(self.weight)
+            torch.nn.init.zeros_(self.bias)
 
         def forward(self, x):
             return _fast_layer_norm(x, self.weight + 1, self.bias, self.epsilon)
@@ -44,6 +45,27 @@ def forward(self, x):
 
 else:
 
-    class LayerNorm1P(nn.Module):
+    class LayerNorm1P(torch.nn.Module):
         def __init__(self, *args, **kwargs):
             raise NotImplementedError('LayerNorm1P available only with apex installed')
+
+
+class LPLayerNorm(torch.nn.LayerNorm):
+    def __init__(self, normalized_shape, eps=1e-05, elementwise_affine=True, device=None, dtype=None):
+        super().__init__(
+            normalized_shape=normalized_shape,
+            eps=eps,
+            elementwise_affine=elementwise_affine,
+            device=device,
+            dtype=dtype,
+        )
+
+    def forward(self, x):
+        module_device = x.device
+        downcast_x = _cast_if_autocast_enabled(x)
+        downcast_weight = _cast_if_autocast_enabled(self.weight) if self.weight is not None else self.weight
+        downcast_bias = _cast_if_autocast_enabled(self.bias) if self.bias is not None else self.bias
+        with torch.autocast(enabled=False, device_type=module_device.type):
+            return torch.nn.functional.layer_norm(
+                downcast_x, self.normalized_shape, downcast_weight, downcast_bias, self.eps
+            )
diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py
index 28eb39e630fc..ca2000842fe4 100644
--- a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py
+++ b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py
@@ -88,6 +88,8 @@ def get_decoder_model(
     moe_dropout=0.0,
     turn_off_rop=False,  # turn off the RoP positional embedding
     version=1,
+    position_embedding_type='learned_absolute',
+    use_flash_attention=False,
 ):
     """Build language model and return along with the key to save."""
 
@@ -145,6 +147,8 @@ def get_decoder_model(
             num_moe_experts=num_moe_experts,
             moe_frequency=moe_frequency,
             moe_dropout=moe_dropout,
+            position_embedding_type=position_embedding_type,
+            use_flash_attention=use_flash_attention,
         )
     elif arch == "retro":
         decoder = MegatronRetrievalTransformerDecoderModule(
diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py
index 4005ffbd879e..9f5d917e2077 100644
--- a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py
+++ b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py
@@ -90,6 +90,8 @@ def get_encoder_model(
     moe_dropout=0.0,
     turn_off_rop=False,  # turn off the RoP positional embedding
     version=1,  # model version
+    position_embedding_type='learned_absolute',
+    use_flash_attention=False,
 ):
     """Build language model and return along with the key to save."""
 
@@ -147,6 +149,8 @@ def get_encoder_model(
             num_moe_experts=num_moe_experts,
             moe_frequency=moe_frequency,
             moe_dropout=moe_dropout,
+            position_embedding_type=position_embedding_type,
+            use_flash_attention=use_flash_attention,
         )
     elif arch == "retro":
         encoder = MegatronRetrievalTransformerEncoderModule(
diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py
index c3cb1fd05c3b..f2c42597eb83 100644
--- a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py
+++ b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py
@@ -85,6 +85,8 @@ def __init__(
         num_moe_experts=1,
         moe_frequency=1,
         moe_dropout=0.0,
+        position_embedding_type='learned_absolute',
+        use_flash_attention=False,
     ):
         super(MegatronTransformerDecoderModule, self).__init__()
 
@@ -149,6 +151,8 @@ def __init__(
             num_moe_experts=num_moe_experts,
             moe_frequency=moe_frequency,
             moe_dropout=moe_dropout,
+            position_embedding_type=position_embedding_type,
+            use_flash_attention=use_flash_attention,
         )
         self._model_key = 'model'
 
diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py
index 2eacf8aad672..60c347338105 100644
--- a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py
+++ b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py
@@ -82,6 +82,8 @@ def __init__(
         num_moe_experts=1,
         moe_frequency=1,
         moe_dropout=0.0,
+        position_embedding_type='learned_absolute',
+        use_flash_attention=False,
     ):
         super(MegatronTransformerEncoderModule, self).__init__()
 
@@ -96,6 +98,7 @@ def __init__(
         self.parent_model_type = parent_model_type
         self.normalization = normalization
         self.transformer_block_type = transformer_block_type
+        self.use_flash_attention = use_flash_attention
 
         if kv_channels is None:
 
@@ -147,6 +150,8 @@ def __init__(
             num_moe_experts=num_moe_experts,
             moe_frequency=moe_frequency,
             moe_dropout=moe_dropout,
+            position_embedding_type=position_embedding_type,
+            use_flash_attention=use_flash_attention,
         )
         self._model_key = 'model'
 
@@ -163,9 +168,12 @@ def forward(
         enc_self_attention_relative_position_bias=None,
     ):
         # convert to Megatron mask
-        enc_attn_mask_3d = build_attention_mask_3d(
-            source_mask=enc_attn_mask, target_mask=enc_attn_mask, attn_mask_type=self.model_attn_mask_type,
-        )
+        if self.use_flash_attention:
+            enc_attn_mask_3d = enc_attn_mask < 0.5
+        else:
+            enc_attn_mask_3d = build_attention_mask_3d(
+                source_mask=enc_attn_mask, target_mask=enc_attn_mask, attn_mask_type=self.model_attn_mask_type,
+            )
 
         # transformer encoder
         enc_output = self.model(
diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/__init__.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/__init__.py
new file mode 100644
index 000000000000..fdbbed86cb2c
--- /dev/null
+++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/__init__.py
@@ -0,0 +1,31 @@
+# coding=utf-8
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo.collections.nlp.modules.common.megatron.position_embedding.alibi_relative_position_embedding import (
+    ALiBiRelativePositionEmbedding,
+)
+from nemo.collections.nlp.modules.common.megatron.position_embedding.kerple_relative_position_embedding import (
+    KERPLERelativePositionEmbedding,
+)
+from nemo.collections.nlp.modules.common.megatron.position_embedding.rotary_position_embedding import RotaryEmbedding
+from nemo.collections.nlp.modules.common.megatron.position_embedding.sandwich_relative_position_embedding import (
+    SandwichRelativePositionEmbedding,
+)
+from nemo.collections.nlp.modules.common.megatron.position_embedding.t5_relative_position_embedding import (
+    T5RelativePositionEmbedding,
+)
+from nemo.collections.nlp.modules.common.megatron.position_embedding.xpos_position_embedding import (
+    XPOSPositionEmbedding,
+)
diff --git a/nemo/collections/nlp/modules/common/megatron/alibi_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/alibi_relative_position_embedding.py
similarity index 73%
rename from nemo/collections/nlp/modules/common/megatron/alibi_relative_position_embedding.py
rename to nemo/collections/nlp/modules/common/megatron/position_embedding/alibi_relative_position_embedding.py
index 4f5abd96743b..6425e288f277 100644
--- a/nemo/collections/nlp/modules/common/megatron/alibi_relative_position_embedding.py
+++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/alibi_relative_position_embedding.py
@@ -42,20 +42,31 @@ def build_slopes(num_attention_heads, num_attention_heads_alibi):
     """
     Builds a slopes tensor.
     """
-    slopes = torch.Tensor(
-        get_slopes(num_attention_heads_alibi) + [0] * (num_attention_heads - num_attention_heads_alibi)
-    ).cuda()
-    return slopes.unsqueeze(-1).unsqueeze(-1)
+    slopes = (
+        torch.Tensor(get_slopes(num_attention_heads_alibi) + [0] * (num_attention_heads - num_attention_heads_alibi))
+        .unsqueeze(-1)
+        .unsqueeze(-1)
+    )
 
+    if torch.cuda.is_available():
+        slopes = slopes.to(torch.cuda.current_device())
 
-def build_relative_position(query_length, key_length, num_attention_heads):
-    context_position = torch.arange(query_length)[:, None].cuda()
-    memory_position = torch.arange(key_length)[None, :].cuda()
-    # shape (query_length, key_length, num_heads)
-    relative_position = memory_position - context_position
+    return slopes
+
+
+def build_relative_position(max_seq_len, full=True):
+    """
+    full=True:  shape (max_seq_len, max_seq_len)
+    full=False: shape (max_seq_len)
+    """
+    relative_position = torch.arange(1 - max_seq_len, 1)[None, :].mul(-1)  # (1, max_seq_len)
+
+    if full:
+        memory_position = torch.arange(1 - max_seq_len, 1)[:, None].mul(-1)
+        relative_position = torch.abs(memory_position - relative_position)  # (max_seq_len, max_seq_len)
 
-    # shape (num_attention_heads, max_seq_len, max_seq_len)
-    relative_position = torch.abs(relative_position).unsqueeze(0).expand(num_attention_heads, -1, -1)
+    if torch.cuda.is_available():
+        relative_position = relative_position.to(torch.cuda.current_device())
 
     return relative_position
 
@@ -68,7 +79,7 @@ class ALiBiRelativePositionEmbedding(torch.nn.Module):
     """
 
     def __init__(
-        self, bidirectional, num_attention_heads, layer_type, num_attention_heads_alibi=None, max_seq_len=512
+        self, bidirectional, num_attention_heads, layer_type, num_attention_heads_alibi=None, max_seq_len=512,
     ):
         """
         Args:
@@ -101,20 +112,25 @@ def __init__(
         # cache the slopes
         self.slopes = build_slopes(num_attention_heads, num_attention_heads_alibi)
         # cache the relative position bias. shape (num_attention_heads, max_seq_len, max_seq_len)
-        self.relative_position = build_relative_position(max_seq_len, max_seq_len, num_attention_heads)
+        # if we use causal attention (not bidrectional), we can use singleton relative position
+        self.relative_position = (
+            build_relative_position(max_seq_len, full=bidirectional).unsqueeze(0).expand(num_attention_heads, -1, -1)
+        )
 
     def forward(self, query_seq_length, key_seq_length):
         # used cached relative position if possible
         max_seq_len = max(query_seq_length, key_seq_length)
         if max_seq_len > self.max_seq_len:
-            relative_position = build_relative_position(max_seq_len, max_seq_len, self.num_attention_heads)
+            relative_position = (
+                build_relative_position(max_seq_len, full=self.bidirectional)
+                .unsqueeze(0)
+                .expand(self.num_attention_heads, -1, -1)
+            )
         else:
             relative_position = self.relative_position
         # shape (num_attention_heads, query_seq_length, key_seq_length)
-        relative_position = relative_position[:, :query_seq_length, :key_seq_length]
+        relative_position = relative_position[:, -query_seq_length:, -key_seq_length:]
         # if not bidirectional, mask out the future positions
-        if not self.bidirectional:
-            relative_position = torch.tril(relative_position)
 
         # shape (1, num_heads, query_length, key_length)
         return -relative_position.unsqueeze(0) * self.slopes
diff --git a/nemo/collections/nlp/modules/common/megatron/kerple_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/kerple_relative_position_embedding.py
similarity index 81%
rename from nemo/collections/nlp/modules/common/megatron/kerple_relative_position_embedding.py
rename to nemo/collections/nlp/modules/common/megatron/position_embedding/kerple_relative_position_embedding.py
index 54276d6fa21e..fc0c837da556 100644
--- a/nemo/collections/nlp/modules/common/megatron/kerple_relative_position_embedding.py
+++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/kerple_relative_position_embedding.py
@@ -17,7 +17,7 @@
 
 import torch
 
-from nemo.collections.nlp.modules.common.megatron.alibi_relative_position_embedding import (
+from nemo.collections.nlp.modules.common.megatron.position_embedding.alibi_relative_position_embedding import (
     build_relative_position,
     build_slopes,
 )
@@ -33,7 +33,7 @@ class KERPLERelativePositionEmbedding(torch.nn.Module):
     """
 
     def __init__(
-        self, bidirectional, num_attention_heads, layer_type, num_attention_heads_kerple=None, max_seq_len=512
+        self, bidirectional, num_attention_heads, layer_type, num_attention_heads_kerple=None, max_seq_len=512,
     ):
         """
         Args:
@@ -65,21 +65,26 @@ def __init__(
 
         # initialize the slopes
         self.kerple_b = torch.nn.Parameter(build_slopes(num_attention_heads, num_attention_heads_kerple))
-        self.kerple_a = torch.zeros_like(self.kerple_b)
-        self.kerple_p = torch.ones_like(self.kerple_b)
+        self.kerple_a = torch.nn.Parameter(torch.ones_like(self.kerple_b))
+        self.kerple_p = torch.nn.Parameter(torch.ones_like(self.kerple_b))
 
         # cache the relative position bias. shape (num_attention_heads, max_seq_len, max_seq_len)
-        self.relative_position = build_relative_position(max_seq_len, max_seq_len, num_attention_heads)
+        # if we use causal attention (not bidrectional), we can use singleton relative position
+        self.relative_position = (
+            build_relative_position(max_seq_len, full=True).unsqueeze(0).expand(num_attention_heads, -1, -1)
+        )
 
     def forward(self, query_seq_length, key_seq_length):
         # used cached relative position if possible
         max_seq_len = max(query_seq_length, key_seq_length)
         if max_seq_len > self.max_seq_len:
-            relative_position = build_relative_position(max_seq_len, max_seq_len, self.num_attention_heads)
+            relative_position = (
+                build_relative_position(max_seq_len, full=True).unsqueeze(0).expand(self.num_attention_heads, -1, -1)
+            )
         else:
             relative_position = self.relative_position
         # shape (num_attention_heads, query_seq_length, key_seq_length)
-        relative_position = relative_position[:, :query_seq_length, :key_seq_length]
+        relative_position = relative_position[:, -query_seq_length:, -key_seq_length:]
         # if not bidirectional, mask out the future positions
         if not self.bidirectional:
             relative_position = torch.tril(relative_position)
diff --git a/nemo/collections/nlp/modules/common/megatron/rotary_pos_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py
similarity index 96%
rename from nemo/collections/nlp/modules/common/megatron/rotary_pos_embedding.py
rename to nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py
index 191601054ef8..5a8d6d7dd333 100644
--- a/nemo/collections/nlp/modules/common/megatron/rotary_pos_embedding.py
+++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py
@@ -38,7 +38,8 @@ def forward(self, max_seq_len, offset=0):
 
 def _rotate_half(x):
     """
-    change sign so the last dimension becomes [-odd, +even]
+    change sign so the last dimension
+    [A, B, C, D] -> [-C, -D, A, B]
     """
     x = rearrange(x, '... (j d) -> ... j d', j=2)
     x1, x2 = x.unbind(dim=-2)
diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/sandwich_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/sandwich_relative_position_embedding.py
new file mode 100644
index 000000000000..0e2dfd7d2ef6
--- /dev/null
+++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/sandwich_relative_position_embedding.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from nemo.collections.nlp.modules.common.megatron.position_embedding.alibi_relative_position_embedding import (
+    build_relative_position,
+)
+from nemo.utils.decorators import experimental
+
+__all__ = ['SandwichRelativePositionEmbedding']
+
+
+@experimental
+class SandwichRelativePositionEmbedding(torch.nn.Module):
+    """
+    Dissecting Transformer Length Extrapolation via the Lens of Receptive Field Analysis
+    Based on https://arxiv.org/abs/2212.10356
+    """
+
+    def __init__(
+        self, bidirectional, num_attention_heads, layer_type, hidden_size, max_seq_len=512,
+    ):
+        """
+        Args:
+            num_attention_heads: Number of attention heads
+            hidden_size: Hidden size per attention head
+        """
+        super().__init__()
+        self.bidirectional = bidirectional
+        self.layer_type = layer_type
+        self.num_attention_heads = num_attention_heads
+        self.hidden_size = hidden_size
+        self.max_seq_len = max_seq_len
+        self.relative_position = build_relative_position(max_seq_len, full=True)
+
+    def forward(self, query_seq_length, key_seq_length):
+        # used cached relative position if possible
+        max_seq_len = max(query_seq_length, key_seq_length)
+        if max_seq_len > self.max_seq_len:
+            relative_position = build_relative_position(max_seq_len, full=True)
+        else:
+            relative_position = self.relative_position
+
+        # shape (query_seq_length, key_seq_length)
+        relative_position = relative_position[-query_seq_length:, -key_seq_length:]
+        # if not bidirectional, mask out the future positions
+        if not self.bidirectional:
+            relative_position = torch.tril(relative_position)
+
+        inv_freq = 1.0 / (
+            10000
+            ** (2 * torch.arange(1, self.hidden_size / 2 + 1, device=relative_position.device) / self.hidden_size)
+        )
+
+        _bias = torch.sum((relative_position[:, :, None].repeat(1, 1, len(inv_freq)) * inv_freq).cos(), axis=2)
+        bias = _bias.repeat(self.num_attention_heads, 1, 1)
+
+        _bias_scales = torch.arange(1, self.num_attention_heads + 1, 1, device=relative_position.device)
+        bias_scales = _bias_scales[:, None, None]
+
+        scaled_bias = (bias - self.hidden_size / 2) / (bias_scales * 8 / self.num_attention_heads).unsqueeze(0)
+
+        return scaled_bias
diff --git a/nemo/collections/nlp/modules/common/megatron/t5_relative_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/t5_relative_position_embedding.py
similarity index 95%
rename from nemo/collections/nlp/modules/common/megatron/t5_relative_position_embedding.py
rename to nemo/collections/nlp/modules/common/megatron/position_embedding/t5_relative_position_embedding.py
index c2a0c8661acf..4566d9aa7876 100644
--- a/nemo/collections/nlp/modules/common/megatron/t5_relative_position_embedding.py
+++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/t5_relative_position_embedding.py
@@ -43,9 +43,7 @@ def __init__(
 
         # Relative position Embedding
         # Relative Position embedding (all attention layers).
-        self.relative_position_embedding = torch.nn.Embedding(
-            self.relative_position_num_buckets, num_attention_heads
-        ).to(torch.cuda.current_device())
+        self.relative_position_embedding = torch.nn.Embedding(self.relative_position_num_buckets, num_attention_heads)
         self._relative_position_embedding_key = 'relative_position_embedding'
         init_method(self.relative_position_embedding.weight)
 
@@ -104,8 +102,9 @@ def _compute_relative_position_bucket(self, query_length, key_length):
         """
 
         """Compute binned relative position bias"""
-        context_position = torch.arange(query_length, dtype=torch.long, device=torch.cuda.current_device())[:, None]
-        memory_position = torch.arange(key_length, dtype=torch.long, device=torch.cuda.current_device())[None, :]
+        device = self.relative_position_embedding.weight.device
+        context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
+        memory_position = torch.arange(key_length, dtype=torch.long, device=device)[None, :]
 
         relative_position = memory_position - context_position  # shape (query_length, key_length)
         relative_position_bucket_tensor = self._relative_position_bucket(
diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/xpos_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/xpos_position_embedding.py
new file mode 100644
index 000000000000..ef59234790c5
--- /dev/null
+++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/xpos_position_embedding.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.nn as nn
+from einops import rearrange
+from nemo.utils.decorators import experimental
+
+
+def fixed_pos_embedding(x):
+    seq_len, dim = x.shape
+    inv_freq = 1.0 / (10000 ** (torch.arange(0, dim) / dim))
+    sinusoid_inp = torch.einsum("i , j -> i j", torch.arange(0, seq_len, dtype=torch.float), inv_freq).to(x)
+    return torch.sin(sinusoid_inp), torch.cos(sinusoid_inp)
+
+
+def rotate_every_two(x):
+    x1 = x[:, :, ::2]
+    x2 = x[:, :, 1::2]
+    x = torch.stack((-x2, x1), dim=-1)
+    return x.flatten(-2)  # in einsum notation: rearrange(x, '... d j -> ... (d j)')\
+
+
+def duplicate_interleave(m):
+    """
+    A simple version of `torch.repeat_interleave` for duplicating a matrix while interleaving the copy.
+    """
+    dim0 = m.shape[0]
+    m = m.view(-1, 1)  # flatten the matrix
+    m = m.repeat(1, 2)  # repeat all elements into the 2nd dimension
+    m = m.view(dim0, -1)  # reshape into a matrix, interleaving the copy
+    return m
+
+
+def apply_rotary_pos_emb(x, sin, cos, scale=1):
+    sin, cos = map(lambda t: duplicate_interleave(t * scale), (sin, cos))
+    # einsum notation for lambda t: repeat(t[offset:x.shape[1]+offset,:], "n d -> () n () (d j)", j=2)
+    return (x * cos) + (rotate_every_two(x) * sin)
+
+
+@experimental
+class XPOSPositionEmbedding(nn.Module):
+    def __init__(self, head_dim, scale_base=2048):
+        super().__init__()
+        self.head_dim = head_dim
+        self.scale_base = scale_base
+        self.register_buffer("scale", (torch.arange(0, head_dim, 2) + 0.4 * head_dim) / (1.4 * head_dim))
+
+    def forward(self, x, offset=0, downscale=False):
+        length, b = x.shape[0], x.shape[1]
+        x = rearrange(x, 's b np hn -> (b np) s hn')
+        min_pos = -(length + offset) // 2
+        max_pos = length + offset + min_pos
+        scale = self.scale ** torch.arange(min_pos, max_pos, 1).to(self.scale).div(self.scale_base)[:, None]
+        sin, cos = fixed_pos_embedding(scale)
+
+        if scale.shape[0] > length:
+            scale = scale[-length:]
+            sin = sin[-length:]
+            cos = cos[-length:]
+
+        if downscale:
+            scale = 1 / scale
+
+        x = apply_rotary_pos_emb(x, sin, cos, scale)
+        x = rearrange(x, '(b np) s hn -> s b np hn', b=b)
+        return x
diff --git a/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py b/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py
index 73c41cee6c6f..83dea362c3e1 100644
--- a/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py
+++ b/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py
@@ -19,7 +19,7 @@
 from einops import rearrange, repeat
 
 from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
-from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding
+from nemo.collections.nlp.modules.common.megatron.position_embedding import RotaryEmbedding
 from nemo.collections.nlp.modules.common.megatron.transformer import ParallelTransformer
 from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, build_attention_mask_3d
 
diff --git a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py
index 229a9af48048..fc16295020fb 100644
--- a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py
+++ b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py
@@ -15,12 +15,6 @@
 import torch
 from omegaconf import DictConfig
 
-from nemo.collections.nlp.modules.common.megatron.alibi_relative_position_embedding import (
-    ALiBiRelativePositionEmbedding,
-)
-from nemo.collections.nlp.modules.common.megatron.kerple_relative_position_embedding import (
-    KERPLERelativePositionEmbedding,
-)
 from nemo.collections.nlp.modules.common.megatron.language_model import Embedding
 from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType
 from nemo.collections.nlp.modules.common.megatron.megatron_decoders import get_decoder_model
@@ -29,7 +23,11 @@
 )
 from nemo.collections.nlp.modules.common.megatron.megatron_encoders import get_encoder_model
 from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
-from nemo.collections.nlp.modules.common.megatron.t5_relative_position_embedding import T5RelativePositionEmbedding
+from nemo.collections.nlp.modules.common.megatron.position_embedding import (
+    ALiBiRelativePositionEmbedding,
+    KERPLERelativePositionEmbedding,
+    T5RelativePositionEmbedding,
+)
 from nemo.collections.nlp.modules.common.megatron.utils import (
     ApexGuardDefaults,
     build_position_ids,
@@ -197,6 +195,11 @@ def __init__(
             else:
                 self.encoder_relative_position_embedding = None
 
+            if encoder_cfg.get('use_flash_attention', False) and encoder_cfg.get(
+                'position_embedding_type', 'learned_absolute'
+            ) in ['relative', 'kerple']:
+                raise ValueError('flash-attention not supported with relative or kerple at this point')
+
             encoder = get_encoder_model(
                 arch=encoder_cfg.arch,
                 hidden_size=encoder_cfg.hidden_size,
@@ -243,6 +246,8 @@ def __init__(
                 num_moe_experts=encoder_cfg.get('num_moe_experts', 1),
                 moe_frequency=encoder_cfg.get('moe_frequency', 1),
                 moe_dropout=encoder_cfg.get('moe_dropout', 0.0),
+                position_embedding_type=encoder_cfg.get('position_embedding_type', 'learned_absolute'),
+                use_flash_attention=encoder_cfg.get('use_flash_attention', False),
             )
 
         if add_decoder:
@@ -307,6 +312,7 @@ def __init__(
                     ):
                         self.decoder_cross_attention_relative_position_embeddings_weight().data.fill_(0)
                         self.decoder_cross_attention_relative_position_embeddings_weight().shared = True
+
             elif self.decoder_cfg.get('position_embedding_type', 'learned_absolute') == 'alibi':
                 self.decoder_relative_position_embedding = ALiBiRelativePositionEmbedding(
                     bidirectional=False,
@@ -328,6 +334,11 @@ def __init__(
             else:
                 self.decoder_relative_position_embedding = None
 
+            if decoder_cfg.get('use_flash_attention', False) and decoder_cfg.get(
+                'position_embedding_type', 'learned_absolute'
+            ) in ['relative', 'kerple']:
+                raise ValueError('flash-attention not supported with relative or kerple at this point')
+
             decoder = get_decoder_model(
                 arch=decoder_cfg.arch,
                 hidden_size=decoder_cfg.hidden_size,
@@ -373,6 +384,8 @@ def __init__(
                 num_moe_experts=decoder_cfg.get('num_moe_experts', 1),
                 moe_frequency=decoder_cfg.get('moe_frequency', 1),
                 moe_dropout=decoder_cfg.get('moe_dropout', 0.0),
+                position_embedding_type=decoder_cfg.get('position_embedding_type', 'learned_absolute'),
+                use_flash_attention=decoder_cfg.get('use_flash_attention', False),
             )
 
         self.enc_dec_model = MegatronTransformerEncoderDecoderModule(
diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py
index f5dfbcabcd0e..8a0b22b4d289 100644
--- a/nemo/collections/nlp/modules/common/megatron/transformer.py
+++ b/nemo/collections/nlp/modules/common/megatron/transformer.py
@@ -18,6 +18,7 @@
 from typing import Any, Callable, Optional
 
 import torch
+import torch.nn as nn
 from einops import rearrange
 
 from nemo.collections.common.parts.adapter_modules import LinearAdapterConfig
@@ -33,7 +34,7 @@
     dropout_add,
 )
 from nemo.collections.nlp.modules.common.megatron.fused_layer_norm import get_layer_norm
-from nemo.collections.nlp.modules.common.megatron.layer_norm_1p import LayerNorm1P
+from nemo.collections.nlp.modules.common.megatron.layer_norm_1p import LayerNorm1P, LPLayerNorm
 from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType
 from nemo.collections.nlp.modules.common.megatron.mlp import ParallelMLP, SwitchMLP
 from nemo.collections.nlp.modules.common.megatron.module import MegatronModule
@@ -115,6 +116,12 @@ def _dropout_add(x, bias, residual, prob):
     return _dropout_add
 
 
+def remove_bias_from_layernorm(layer):
+    for module in layer.modules():
+        if hasattr(module, 'bias') and isinstance(module.bias, nn.Parameter):
+            module.register_parameter('bias', None)
+
+
 class ParallelTransformerLayer_(MegatronModule, adapter_mixins.AdapterModuleMixin):
     """A single transformer layer.
 
@@ -164,6 +171,7 @@ def __init__(
         num_moe_experts=1,
         moe_frequency=1,
         moe_dropout=0.0,
+        use_flash_attention=False,
     ):
         super(ParallelTransformerLayer_, self).__init__()
 
@@ -187,7 +195,9 @@ def __init__(
                 'bias_dropout_add_fusion=True requires bias=True, found bias=False. Either set both to True or both to False.'
             )
 
-        if normalization not in ['layernorm', 'layernorm1p', 'rmsnorm']:
+        # the low_precision_layernorm does not require a bias term, whereas layernorm1p from apex
+        # does require a bias, so it cannot be used for bias-less low precision LN such as in MPT-7B
+        if normalization not in ['layernorm', 'layernorm1p', 'rmsnorm', 'low_precision_layernorm']:
             raise ValueError(f'normalization must be "layernorm", "layernorm1p" or "rmsnorm", found {normalization}')
 
         if transformer_block_type not in ['pre_ln', 'post_ln', 'normformer']:
@@ -212,8 +222,16 @@ def __init__(
                 self.input_layernorm = LayerNorm1P(
                     hidden_size, layernorm_epsilon, sequence_parallel_enabled=sequence_parallel
                 )
+            elif normalization == 'low_precision_layernorm':
+                self.input_layernorm = LPLayerNorm(hidden_size, layernorm_epsilon)
             else:
                 self.input_layernorm = MixedFusedRMSNorm(hidden_size, layernorm_epsilon)
+            # for architectures such as MPT, there is no bias term even on the layernorms
+            # this code allows us to remove the bias terms from the layernorm module
+            # so that we can support MPT. However, certain apex-based LNs don't support
+            # removing bias, so we also have to check for that
+            if not bias and normalization not in ['layernorm', 'layernorm1p']:
+                remove_bias_from_layernorm(self.input_layernorm)
 
             self.self_attention = ParallelAttention(
                 init_method=init_method,
@@ -240,6 +258,7 @@ def __init__(
                 sequence_parallel=sequence_parallel,
                 gradient_accumulation_fusion=gradient_accumulation_fusion,
                 normalize_attention_scores=normalize_attention_scores,
+                use_flash_attention=use_flash_attention,
             )
 
             if transformer_block_type == 'normformer':
@@ -261,8 +280,12 @@ def __init__(
                     self.post_attention_layernorm = LayerNorm1P(
                         hidden_size, layernorm_epsilon, sequence_parallel_enabled=sequence_parallel
                     )
+                elif normalization == 'low_precision_layernorm':
+                    self.post_attention_layernorm = LPLayerNorm(hidden_size, layernorm_epsilon)
                 else:
                     self.post_attention_layernorm = MixedFusedRMSNorm(hidden_size, layernorm_epsilon)
+                if not bias and normalization not in ['layernorm', 'layernorm1p']:
+                    remove_bias_from_layernorm(self.post_attention_layernorm)
 
         if self.layer_type == LayerType.decoder_pre_mlp:
             # skip MLP and cross attention
@@ -280,8 +303,12 @@ def __init__(
                 self.post_attention_layernorm = LayerNorm1P(
                     hidden_size, layernorm_epsilon, sequence_parallel_enabled=sequence_parallel
                 )
+            elif normalization == 'low_precision_layernorm':
+                self.post_attention_layernorm = LPLayerNorm(hidden_size, layernorm_epsilon)
             else:
                 self.post_attention_layernorm = MixedFusedRMSNorm(hidden_size, layernorm_epsilon)
+            if not bias and normalization not in ['layernorm', 'layernorm1p']:
+                remove_bias_from_layernorm(self.post_attention_layernorm)
 
         if self.layer_type == LayerType.decoder or self.layer_type == LayerType.retrieval_encoder:
             self.inter_attention = ParallelAttention(
@@ -669,6 +696,7 @@ def __init__(
         num_moe_experts=1,
         moe_frequency=1,
         moe_dropout=0.0,
+        use_flash_attention=False,
     ):
         super(ParallelTransformerLayer, self).__init__(
             init_method=init_method,
@@ -711,6 +739,7 @@ def __init__(
             num_moe_experts=num_moe_experts,
             moe_frequency=moe_frequency,
             moe_dropout=moe_dropout,
+            use_flash_attention=use_flash_attention,
         )
 
         # Dtype for forward pass - ignore amp O2
@@ -924,6 +953,7 @@ def __init__(
         num_moe_experts=1,
         moe_frequency=1,
         moe_dropout=0.0,
+        use_flash_attention=False,
     ):
         super(ParallelTransformer, self).__init__()
 
@@ -1104,6 +1134,7 @@ def build_layer(layer_number):
                     num_moe_experts=num_moe_experts,
                     moe_frequency=moe_frequency,
                     moe_dropout=moe_dropout,
+                    use_flash_attention=use_flash_attention,
                 )
 
         if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None:
@@ -1154,8 +1185,16 @@ def build_layer(layer_number):
                 self.final_layernorm = LayerNorm1P(
                     hidden_size, layernorm_epsilon, sequence_parallel_enabled=sequence_parallel
                 )
+            elif normalization == 'low_precision_layernorm':
+                self.final_layernorm = LPLayerNorm(hidden_size, layernorm_epsilon)
             else:
                 self.final_layernorm = MixedFusedRMSNorm(hidden_size, layernorm_epsilon)
+            # for architectures such as MPT, there is no bias term even on the layernorms
+            # this code allows us to remove the bias terms from the layernorm module
+            # so that we can support MPT. However, certain apex-based LNs don't support
+            # removing bias, so we also have to check for that
+            if not bias and normalization not in ['layernorm', 'layernorm1p']:
+                remove_bias_from_layernorm(self.final_layernorm)
 
     def _get_layer(self, layer_number):
         return self.layers[layer_number]
diff --git a/nemo/collections/nlp/modules/common/megatron/utils.py b/nemo/collections/nlp/modules/common/megatron/utils.py
index 8ef46c10d49b..7c7a428fa43f 100644
--- a/nemo/collections/nlp/modules/common/megatron/utils.py
+++ b/nemo/collections/nlp/modules/common/megatron/utils.py
@@ -179,7 +179,9 @@ def average_losses_across_data_parallel_group(losses):
     return averaged_losses
 
 
-def get_ltor_masks_and_position_ids(data, eod_token, reset_position_ids, reset_attention_mask, eod_mask_loss):
+def get_ltor_masks_and_position_ids(
+    data, eod_token, reset_position_ids, reset_attention_mask, eod_mask_loss, compute_attention_mask=True
+):
     """Build masks and position id for left to right model."""
 
     # Extract batch size and sequence length.
@@ -190,9 +192,12 @@ def get_ltor_masks_and_position_ids(data, eod_token, reset_position_ids, reset_a
         att_mask_batch = micro_batch_size
     else:
         att_mask_batch = 1
-    attention_mask = torch.tril(torch.ones((att_mask_batch, seq_length, seq_length), device=data.device)).view(
-        att_mask_batch, 1, seq_length, seq_length
-    )
+
+    attention_mask = None
+    if compute_attention_mask:
+        attention_mask = torch.tril(torch.ones((att_mask_batch, seq_length, seq_length), device=data.device)).view(
+            att_mask_batch, 1, seq_length, seq_length
+        )
 
     # Loss mask.
     loss_mask = torch.ones(data.size(), dtype=torch.float, device=data.device)
@@ -228,8 +233,9 @@ def get_ltor_masks_and_position_ids(data, eod_token, reset_position_ids, reset_a
                     position_ids[b, (i + 1) :] -= i + 1 - prev_index
                     prev_index = i + 1
 
-    # Convert attention mask to binary:
-    attention_mask = attention_mask < 0.5
+    if compute_attention_mask:
+        # Convert attention mask to binary:
+        attention_mask = attention_mask < 0.5
 
     return attention_mask, loss_mask, position_ids
 
@@ -381,3 +387,16 @@ def get_iterator_k_split(batch: List[torch.Tensor], num_microbatches: int) -> It
         microbatches = [[elem[i] for elem in split_batch] for i in range(num_microbatches)]
 
     return itertools.chain(microbatches)
+
+
+def _cast_if_autocast_enabled(tensor):
+    if torch.is_autocast_enabled():
+        if isinstance(tensor, torch.Tensor):
+            if tensor.device.type == 'cuda':
+                dtype = torch.get_autocast_gpu_dtype()
+            elif tensor.device.type == 'cpu':
+                dtype = torch.get_autocast_cpu_dtype()
+            else:
+                raise NotImplementedError()
+            return tensor.to(dtype=dtype)
+    return tensor
diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py
index 310065fc3523..8608c0c9a680 100644
--- a/nemo/collections/nlp/modules/common/text_generation_strategy.py
+++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py
@@ -97,10 +97,11 @@ def clip_max_len(self, maxlen: int) -> int:
         pass
 
     @abc.abstractclassmethod
-    def init_batch(self, context_tokens: torch.Tensor, context_length: int):
+    def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_attention_mask: bool):
         """initialize the batch data before the inference steps.
            It will save the intermediate results as object attributes
            context_length (int): the context token length
+           compute_attention_mask: bool: set to True to compute attention mask (not needed for FA)
         Args:
             context_tokens (torch.Tensor):  The padded context tokens including the space for tokens to be generated 
         """
@@ -187,7 +188,7 @@ def clip_max_len(self, maxlen: int) -> int:
                 maxlen = self.model.cfg.encoder_seq_length + 1
         return maxlen
 
-    def init_batch(self, context_tokens: torch.Tensor, context_length: int):
+    def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_attention_mask: bool):
         """initialize the batch data before the inference steps."""
         # Move to GPU.
         tokenizer = self.model.tokenizer
@@ -199,10 +200,17 @@ def init_batch(self, context_tokens: torch.Tensor, context_length: int):
             self.model.cfg.get('reset_position_ids', False),
             self.model.cfg.get('reset_attention_mask', False),
             self.model.cfg.get('eod_mask_loss', False),
+            compute_attention_mask=compute_attention_mask,
         )
 
     def prepare_batch_at_step(
-        self, tokens: torch.Tensor, maxlen: int, micro_batch_size: int, step: int, context_length: int
+        self,
+        tokens: torch.Tensor,
+        maxlen: int,
+        micro_batch_size: int,
+        step: int,
+        context_length: int,
+        compute_attention_mask: bool = True,
     ) -> Tuple[List[torch.Tensor], List[int]]:
         """
         generate the batch used in inference for each of the steps
@@ -226,7 +234,10 @@ def prepare_batch_at_step(
             #     types2use = type_ids[:, context_length - 1].view(batch_size, -1)
 
         """Prepare batch for each of the inference steps"""
-        attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)])
+        attention_mask_repeat = None
+        if compute_attention_mask:
+            attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)])
+
         setkey_value_array = torch.tensor(
             [set_inference_key_value_memory] * micro_batch_size, device=torch.cuda.current_device()
         )
@@ -243,7 +254,7 @@ def __init__(self, model, task_ids):
         self.task_ids = task_ids
         self.forward_model = self.model
 
-    def init_batch(self, context_tokens: torch.Tensor, context_length: int):
+    def init_batch(self, context_tokens: torch.Tensor, context_length: int, compute_attention_mask: bool):
         """initialize the batch data before the inference steps."""
         # Move to GPU.
         tokenizer = self.model.tokenizer
@@ -255,6 +266,7 @@ def init_batch(self, context_tokens: torch.Tensor, context_length: int):
             self.model.cfg.get('reset_position_ids', False),
             self.model.cfg.get('reset_attention_mask', False),
             self.model.cfg.get('eod_mask_loss', False),
+            compute_attention_mask=compute_attention_mask,
         )
 
     def clip_max_len(self, maxlen: int) -> int:
@@ -264,7 +276,13 @@ def clip_max_len(self, maxlen: int) -> int:
         return maxlen
 
     def prepare_batch_at_step(
-        self, tokens: torch.Tensor, maxlen: int, micro_batch_size: int, step: int, context_length: int
+        self,
+        tokens: torch.Tensor,
+        maxlen: int,
+        micro_batch_size: int,
+        step: int,
+        context_length: int,
+        compute_attention_mask: bool,
     ) -> Tuple[List[torch.Tensor], List[int]]:
         # types2use = None
         if step == 0:
@@ -285,7 +303,9 @@ def prepare_batch_at_step(
             #     types2use = type_ids[:, context_length - 1].view(batch_size, -1)
 
         """Prepare batch for each of the inference steps"""
-        attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)])
+        attention_mask_repeat = None
+        if compute_attention_mask:
+            attention_mask_repeat = torch.concat([self.attention_mask for _ in range(micro_batch_size)])
         setkey_value_array = torch.tensor(
             [set_inference_key_value_memory] * micro_batch_size, device=torch.cuda.current_device()
         )
diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index a56304970bdc..6417f887c0cd 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -105,7 +105,7 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para
             greedy=sampling_params['use_greedy'],
             repetition_penalty=sampling_params['repetition_penalty'],
             min_tokens_to_generate=length_params['min_length'],
-            **strategy_args,
+            compute_attention_mask=sampling_params.get("compute_attention_mask", True) ** strategy_args,
         )
         compute_prob_response = get_computeprob_response(tokenizer, response, inputs)
         return compute_prob_response
@@ -376,6 +376,7 @@ def synced_generate(
     top_k=0,
     top_p=0.0,
     greedy=False,
+    compute_attention_mask=True,
     compute_logprob=False,
     repetition_penalty=1.2,
     min_tokens_to_generate=0,
@@ -401,6 +402,7 @@ def synced_generate(
             context_length_tensor,
             tokens_to_generate,
             all_probs,
+            compute_attention_mask=compute_attention_mask,
             compute_logprob=compute_logprob,
             temperature=temperature,
             end_strings=end_strings,
@@ -469,6 +471,7 @@ def generate(
     top_k=0,
     top_p=0.0,
     greedy=False,
+    compute_attention_mask=True,
     compute_logprob=False,
     repetition_penalty=1.0,
     min_tokens_to_generate=0,
@@ -550,6 +553,7 @@ def generate(
         tokens_to_generate,
         all_probs,
         temperature,
+        compute_attention_mask=compute_attention_mask,
         compute_logprob=compute_logprob,
         top_k=top_k,
         top_p=top_p,
@@ -635,6 +639,7 @@ def sample_sequence_batch(
     context_lengths,
     tokens_to_generate,
     all_probs=False,
+    compute_attention_mask=True,
     compute_logprob=False,
     type_ids=None,
     temperature=None,
@@ -666,7 +671,7 @@ def sample_sequence_batch(
     # initialize the batch
     with torch.no_grad():
         context_length = context_lengths.min().item()
-        inference_strategy.init_batch(context_tokens, context_length)
+        inference_strategy.init_batch(context_tokens, context_length, compute_attention_mask)
         # added eos_id to support the function generate_samples_eval that passes
         # eos_id as an argument and needs termination when that id id found.
         eod_id = tokenizer.eos_id
@@ -685,7 +690,7 @@ def sample_sequence_batch(
         lengths = torch.ones([batch_size]).long().cuda() * maxlen
         while context_length < maxlen:
             batch, tensor_shape = inference_strategy.prepare_batch_at_step(
-                tokens, maxlen, micro_batch_size, counter, context_length
+                tokens, maxlen, micro_batch_size, counter, context_length, compute_attention_mask
             )
             output = inference_strategy.forward_step(batch, tensor_shape)
 
diff --git a/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py b/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py
index 6e992f5348ae..22c657b25613 100644
--- a/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py
+++ b/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py
@@ -1,76 +1,77 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Use this file to create a lexicon file for Flashlight decoding from an existing KenLM arpa file
-# A lexicon file is required for Flashlight decoding in most cases, as it acts as a map from the words
-# in you arpa file to the representation used by your ASR AM.
-# For more details, see: https://github.com/flashlight/flashlight/tree/main/flashlight/app/asr#data-preparation
-#
-# Usage: python create_lexicon_from_arpa.py --arpa /path/to/english.arpa --model /path/to/model.nemo --lower
-#
-#
-
-
-import argparse
-import os
-import re
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Utility script for generating lexicon file from a KenLM arpa file")
-    parser.add_argument("--arpa", required=True, help="path to your arpa file")
-    parser.add_argument("--dst", help="directory to store generated lexicon", default=None)
-    parser.add_argument("--lower", action='store_true', help="Whether to lowercase the arpa vocab")
-    parser.add_argument("--model", default=None, help="path to Nemo model for its tokeniser")
-
-    args = parser.parse_args()
-
-    if not os.path.exists(args.arpa):
-        print("ARPA file not detected on disk, aborting!", flush=True)
-        exit(255)
-
-    if args.dst is not None:
-        save_path = args.dst
-    else:
-        save_path = os.path.dirname(args.arpa)
-    os.makedirs(save_path, exist_ok=True)
-
-    tokenizer = None
-    if args.model is not None:
-        from nemo.collections.asr.models import ASRModel
-
-        model = ASRModel.restore_from(restore_path=args.model, map_location='cpu')
-        if hasattr(model, 'tokenizer'):
-            tokenizer = model.tokenizer
-        else:
-            print('WARNING: supplied Nemo model does not contain a tokenizer', flush=True)
-
-    lex_file = os.path.join(save_path, os.path.splitext(os.path.basename(args.arpa))[0] + '.lexicon')
-    print(f"Writing Lexicon file - {lex_file}...", flush=True)
-    with open(lex_file, "w", encoding='utf_8', newline='\n') as f:
-        with open(args.arpa, "r", encoding='utf_8') as arpa:
-            for line in arpa:
-                # verify if the line corresponds to unigram
-                if not re.match(r"[-]*[0-9\.]+\t\S+\t*[-]*[0-9\.]*$", line):
-                    continue
-                word = line.split("\t")[1]
-                word = word.strip().lower() if args.lower else word.strip()
-                if word == "<UNK>" or word == "<unk>" or word == "<s>" or word == "</s>":
-                    continue
-
-                if tokenizer is None:
-                    f.write("{w}\t{s}\n".format(w=word, s=" ".join(word)))
-                else:
-                    f.write("{w}\t{s}\n".format(w=word, s=" ".join(tokenizer.text_to_tokens(word))))
-
-    print("Done!", flush=True)
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Use this file to create a lexicon file for Flashlight decoding from an existing KenLM arpa file
+# A lexicon file is required for Flashlight decoding in most cases, as it acts as a map from the words
+# in you arpa file to the representation used by your ASR AM.
+# For more details, see: https://github.com/flashlight/flashlight/tree/main/flashlight/app/asr#data-preparation
+#
+# Usage: python create_lexicon_from_arpa.py --arpa /path/to/english.arpa --model /path/to/model.nemo --lower
+#
+#
+
+
+import argparse
+import os
+import re
+
+from nemo.utils import logging
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Utility script for generating lexicon file from a KenLM arpa file")
+    parser.add_argument("--arpa", required=True, help="path to your arpa file")
+    parser.add_argument("--dst", help="directory to store generated lexicon", default=None)
+    parser.add_argument("--lower", action='store_true', help="Whether to lowercase the arpa vocab")
+    parser.add_argument("--model", default=None, help="path to Nemo model for its tokeniser")
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.arpa):
+        logging.critical(f"ARPA file [ {args.arpa} ] not detected on disk, aborting!")
+        exit(255)
+
+    if args.dst is not None:
+        save_path = args.dst
+    else:
+        save_path = os.path.dirname(args.arpa)
+    os.makedirs(save_path, exist_ok=True)
+
+    tokenizer = None
+    if args.model is not None:
+        from nemo.collections.asr.models import ASRModel
+
+        model = ASRModel.restore_from(restore_path=args.model, map_location='cpu')
+        if hasattr(model, 'tokenizer'):
+            tokenizer = model.tokenizer
+        else:
+            logging.warning('Supplied Nemo model does not contain a tokenizer')
+
+    lex_file = os.path.join(save_path, os.path.splitext(os.path.basename(args.arpa))[0] + '.lexicon')
+
+    logging.info(f"Writing Lexicon file to: {lex_file}...")
+    with open(lex_file, "w", encoding='utf_8', newline='\n') as f:
+        with open(args.arpa, "r", encoding='utf_8') as arpa:
+            for line in arpa:
+                # verify if the line corresponds to unigram
+                if not re.match(r"[-]*[0-9\.]+\t\S+\t*[-]*[0-9\.]*$", line):
+                    continue
+                word = line.split("\t")[1]
+                word = word.strip().lower() if args.lower else word.strip()
+                if word == "<UNK>" or word == "<unk>" or word == "<s>" or word == "</s>":
+                    continue
+
+                if tokenizer is None:
+                    f.write("{w}\t{s}\n".format(w=word, s=" ".join(word)))
+                else:
+                    f.write("{w}\t{s}\n".format(w=word, s=" ".join(tokenizer.text_to_tokens(word))))
diff --git a/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py b/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py
new file mode 100644
index 000000000000..14d7b6ae54ea
--- /dev/null
+++ b/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py
@@ -0,0 +1,212 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+A script to convert the Mosaic MPT-7B checkpoint on HuggingFace to Megatron GPTModel
+This script is hardcoded specifically for the MPT-7B pretrained model only, and is not
+generalisable to any other models.
+
+This script will load and convert the model entirely on CPU for OOM safety, but there
+is an option to put the model onto GPU before the save down, which sets the map_location
+to cuda for the restore_from call. You can do this by adding --cuda to this script call.
+
+This script requires that you have downloaded the 2 .bin weight files for MPT-7B from
+HuggingFace located here: https://huggingface.co/mosaicml/mpt-7b/tree/main
+These files MUST have the following file names and be saved somewhere where this script
+can read them:
+    pytorch_model-00001-of-00002.bin
+    pytorch_model-00002-of-00002.bin
+
+This script will generate a Megatron model with TP=1 and PP=1. If you need different TP/PP
+values, then after running this script, please use the script located below to set whatever
+TP/PP values you want:
+    NeMo/examples/nlp/language_modeling/megatron_change_num_partitions.py
+
+
+Here is an example usage command:
+
+```python
+python scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py -i /path/to/mpt_7b -o /path/to/save
+```
+
+"""
+
+
+import argparse
+import os
+
+import pytorch_lightning as pl
+import torch
+from omegaconf import OmegaConf
+
+from nemo.collections.nlp.models.language_modeling.megatron import GPTModel
+from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
+from nemo.utils import logging
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-i', '--input', required=True, type=str, help='path to the two MPT-7B .bin weight files from HuggingFace'
+    )
+    parser.add_argument(
+        '-o', '--output', required=False, default=None, type=str, help='path to dir where to store output .nemo file'
+    )
+    parser.add_argument('--cuda', action='store_true', help='put Nemo model onto GPU prior to savedown')
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.input):
+        logging.critical(f'Input directory [ {args.input} ] does not exist or cannot be found. Aborting.')
+        exit(255)
+
+    model_dict = {
+        'micro_batch_size': 4,
+        'global_batch_size': 8,
+        'rampup_batch_size': None,
+        'tensor_model_parallel_size': 1,
+        'pipeline_model_parallel_size': 1,
+        'virtual_pipeline_model_parallel_size': None,
+        'megatron_amp_O2': True,
+        'transformer_engine': False,
+        'use_cpu_initialization': True,
+        'hidden_size': 4096,
+        'max_position_embeddings': 2048,
+        'num_layers': 32,
+        'num_attention_heads': 32,
+        'ffn_hidden_size': 4 * 4096,
+        'precision': 'bf16',
+        'pre_process': True,
+        'post_process': True,
+        'num_tokentypes': 0,
+        'apply_query_key_layer_scaling': False,
+        'parallel_output': False,
+        'bias': False,
+        'bias_dropout_add_fusion': False,
+        'bias_activation_fusion': False,
+        'transformer_block_type': 'pre_ln',
+        'normalization': 'low_precision_layernorm',
+        'fp32_residual_connection': False,
+        'hidden_dropout': 0,
+        'attention_dropout': 0,
+        'ffn_dropout': 0,
+        'megatron_legacy': True,
+        'share_embeddings_and_output_weights': True,
+        'sequence_parallel': False,
+        'position_embedding_type': 'alibi',
+        'normalize_attention_scores': True,
+        'use_flash_attention': False,
+        'override_vocab_size': 50432,
+    }
+    tokeniser_dict = {
+        'library': 'huggingface',
+        'type': 'EleutherAI/gpt-neox-20b',
+        'use_fast': True,
+    }
+    optim_dict = {
+        'name': 'fused_adam',
+        'lr': 2e-4,
+        'weight_decay': 0.01,
+    }
+    trainer_dict = {
+        'devices': 1,
+        'num_nodes': 1,
+        'accelerator': 'gpu' if args.cuda else 'cpu',
+        'precision': 'bf16',
+        'logger': False,  # logger provided by exp_manager
+        'enable_checkpointing': False,
+        'replace_sampler_ddp': False,
+        'max_epochs': -1,  # PTL default. In practice, max_steps will be reached first.
+        'max_steps': 100000,  # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches
+        'log_every_n_steps': 10,
+        'val_check_interval': 100,
+        'limit_val_batches': 50,
+        'limit_test_batches': 500,
+        'accumulate_grad_batches': 1,
+        'gradient_clip_val': 1.0,
+        'benchmark': False,
+        'enable_model_summary': False,
+    }
+
+    model_dict['tokenizer'] = tokeniser_dict
+    model_dict['optim'] = optim_dict
+
+    omega_cfg = OmegaConf.create(model_dict)
+
+    trainer = pl.Trainer(**trainer_dict)
+
+    model = MegatronGPTModel(omega_cfg, trainer)
+
+    model_keys = list(model.state_dict().keys())
+    model_dtypes = list(set([model.state_dict()[x].dtype for x in model_keys]))
+
+    if not (len(model_dtypes) == 1 and model_dtypes[0] is torch.bfloat16):
+        model = model.bfloat16()
+
+    if args.cuda:
+        model = model.cuda()
+
+    mpt_1 = torch.load(os.path.join(args.input, 'pytorch_model-00001-of-00002.bin'), map_location="cpu")
+    mpt_2 = torch.load(os.path.join(args.input, 'pytorch_model-00002-of-00002.bin'), map_location="cpu")
+    mpt_dict = {**mpt_1, **mpt_2}
+    del mpt_1, mpt_2
+
+    def convert_state_dict(state_dict, amp=False):
+        def get_new_key(old_key):
+            if old_key == 'transformer.wte.weight':
+                return 'language_model.embedding.word_embeddings.weight'
+            elif old_key == 'transformer.norm_f.weight':
+                return 'language_model.encoder.final_layernorm.weight'
+            else:
+                p1 = old_key.replace('transformer.blocks.', 'language_model.encoder.layers.')
+                p2 = p1.replace('norm_1.weight', 'input_layernorm.weight')
+                p3 = p2.replace('attn.Wqkv.weight', 'self_attention.query_key_value.weight')
+                p4 = p3.replace('attn.out_proj.weight', 'self_attention.dense.weight')
+                p5 = p4.replace('norm_2.weight', 'post_attention_layernorm.weight')
+                p6 = p5.replace('ffn.up_proj.weight', 'mlp.dense_h_to_4h.weight')
+                p7 = p6.replace('ffn.down_proj.weight', 'mlp.dense_4h_to_h.weight')
+
+                return p7
+
+        new_dict = {}
+
+        for old_key, val in state_dict.items():
+            new_key = get_new_key(old_key)
+            if amp:
+                new_key = 'module.' + new_key
+
+            new_dict[new_key] = val
+
+        return new_dict
+
+    convert_dict = convert_state_dict(mpt_dict, amp=model_dict['megatron_amp_O2'])
+
+    if model_dict['megatron_amp_O2']:
+        missing_keys, unexpected_keys = model.model.load_state_dict(convert_dict, strict=True)
+    else:
+        missing_keys, unexpected_keys = super(GPTModel, model.model).load_state_dict(convert_dict, strict=True)
+
+    if len(missing_keys) > 0:
+        logging.critical('Missing keys were detected during the load, something has gone wrong. Aborting.')
+        logging.critical(f'Missing keys: \n{missing_keys}')
+        exit(255)
+
+    if len(unexpected_keys) > 0:
+        logging.warning('Unexpected keys were detected which should not happen. Please investigate.')
+        logging.warning(f'Unexpected keys: \n{unexpected_keys}')
+
+    if args.output is None:
+        args.output = os.path.dirname(os.path.abspath(__file__))
+
+    model.save_to(os.path.join(args.output, 'megatron_mpt_7b_base_tp1_pp1.nemo'))
diff --git a/tests/collections/nlp/test_flash_attention.py b/tests/collections/nlp/test_flash_attention.py
new file mode 100644
index 000000000000..cead91ff312a
--- /dev/null
+++ b/tests/collections/nlp/test_flash_attention.py
@@ -0,0 +1,247 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+import pytest
+import torch
+from pytorch_lightning.trainer.trainer import Trainer
+
+from nemo.collections.nlp.modules.common.megatron.attention import CoreAttention
+from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo
+from nemo.collections.nlp.modules.common.megatron.utils import build_attention_mask_3d
+from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy
+
+try:
+    from apex.transformer.enums import AttnMaskType
+
+    HAVE_APEX = True
+except (ImportError, ModuleNotFoundError):
+    HAVE_APEX = False
+
+try:
+    import flash_attn
+
+    HAVE_FA = True
+except (ImportError, ModuleNotFoundError):
+    HAVE_FA = False
+
+try:
+    import triton
+
+    HAVE_TRITON = True
+except (ImportError, ModuleNotFoundError):
+    HAVE_TRITON = False
+
+import pynvml
+
+
+def HAVE_AMPERE_GPU():
+    pynvml.nvmlInit()
+    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+    device_arch = pynvml.nvmlDeviceGetArchitecture(handle)
+    pynvml.nvmlShutdown()
+    return device_arch == pynvml.NVML_DEVICE_ARCH_AMPERE
+
+
+@pytest.mark.run_only_on('GPU')
+@pytest.mark.skipif(not HAVE_APEX, reason="apex is not installed")
+class TestFlashAttention:
+    @classmethod
+    def setup_class(cls):
+        if not torch.cuda.is_available():
+            return
+
+        GPUS = 1
+        TP_SIZE = GPUS
+        PP_SIZE = 1
+        MB_SIZE = 4
+        GB_SIZE = 8
+        SEED = 1234
+        trainer = Trainer(strategy=NLPDDPStrategy(), devices=GPUS, accelerator='gpu', num_nodes=1, logger=None,)
+
+        initialize_model_parallel_for_nemo(
+            world_size=trainer.world_size,
+            global_rank=trainer.global_rank,
+            local_rank=trainer.local_rank,
+            tensor_model_parallel_size=TP_SIZE,
+            pipeline_model_parallel_size=PP_SIZE,
+            micro_batch_size=MB_SIZE,
+            global_batch_size=GB_SIZE,
+            seed=SEED,
+            apex_transformer_log_level=30,
+        )
+
+    @pytest.fixture()
+    def cfg(self):
+        cfg = {
+            'bz': random.randint(1, 7),
+            'sl': random.randint(1, 7),
+            'head': random.randint(1, 7),
+            'device': torch.cuda.current_device(),
+        }
+        # flash attention requires head dimensions are multiples of 8
+        head_dim = random.randint(1, 7) * 8
+        cfg['hidden'] = cfg['head'] * head_dim
+
+        return cfg
+
+    @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed")
+    @pytest.mark.unit
+    def test_flash_attention(self, cfg):
+        device = cfg['device']
+        bz, sl, np, h = cfg['bz'], cfg['sl'], cfg['head'], cfg['hidden']
+        hn = h // np
+
+        q = torch.rand(sl, bz, np, hn, device=device).half()
+        k = torch.rand(sl, bz, np, hn, device=device).half()
+        v = torch.rand(sl, bz, np, hn, device=device).half()
+
+        attention_mask_2d = torch.arange(sl, device=device).unsqueeze(0) < torch.randint(
+            1, sl, (bz,), device=device
+        ).unsqueeze(1)
+
+        attention_mask_padding_3d = build_attention_mask_3d(
+            source_mask=attention_mask_2d, target_mask=attention_mask_2d, attn_mask_type=AttnMaskType.padding
+        ).unsqueeze(1)
+
+        attention_mask_causal_3d = build_attention_mask_3d(
+            source_mask=attention_mask_2d, target_mask=attention_mask_2d, attn_mask_type=AttnMaskType.causal
+        ).unsqueeze(1)
+
+        # Non-causal
+        attention = CoreAttention(
+            layer_number=1,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.padding,
+            attention_dropout=0.0,
+        )
+
+        attention_fa = CoreAttention(
+            layer_number=1,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.padding,
+            attention_dropout=0.0,
+            use_flash_attention=True,
+        )
+
+        out = attention(q, k, v, attention_mask_padding_3d)
+        out_fa = attention_fa(q, k, v, attention_mask_padding_3d)
+        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        out_fa = attention_fa(q, k, v, attention_mask_2d)
+        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+
+        # Causal
+        attention = CoreAttention(
+            layer_number=1,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.causal,
+            attention_dropout=0.0,
+        )
+
+        attention_fa = CoreAttention(
+            layer_number=1,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.causal,
+            attention_dropout=0.0,
+            use_flash_attention=True,
+        )
+
+        out = attention(q, k, v, attention_mask_causal_3d)
+        out_fa = attention_fa(q, k, v, attention_mask_causal_3d)
+        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        out_fa = attention_fa(q, k, v, attention_mask_2d)
+        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+
+    @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed")
+    @pytest.mark.skipif(not HAVE_TRITON, reason="triton is not installed")
+    @pytest.mark.skipif(
+        not HAVE_AMPERE_GPU(),
+        reason="should only run on AMPERE GPU. Please see https://github.com/HazyResearch/flash-attention/issues/245",
+    )
+    @pytest.mark.unit
+    def test_flash_attention_triton(self, cfg):
+        device = cfg['device']
+        bz, sl, np, h = cfg['bz'], cfg['sl'], cfg['head'], cfg['hidden']
+        hn = h // np
+
+        q = torch.rand(sl, bz, np, hn, device=device).half()
+        k = torch.rand(sl, bz, np, hn, device=device).half()
+        v = torch.rand(sl, bz, np, hn, device=device).half()
+
+        attention_mask_2d = torch.arange(sl, device=device).unsqueeze(0) < torch.randint(
+            1, sl, (bz,), device=device
+        ).unsqueeze(1)
+
+        attention_mask_padding_3d = build_attention_mask_3d(
+            source_mask=attention_mask_2d, target_mask=attention_mask_2d, attn_mask_type=AttnMaskType.padding
+        ).unsqueeze(1)
+
+        attention_mask_causal_3d = build_attention_mask_3d(
+            source_mask=attention_mask_2d, target_mask=attention_mask_2d, attn_mask_type=AttnMaskType.causal
+        ).unsqueeze(1)
+
+        attention_bias = torch.rand(bz, np, sl, sl, device=device)
+
+        # Non-causal
+        attention = CoreAttention(
+            layer_number=1,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.padding,
+            attention_dropout=0.0,
+        )
+
+        attention_fa = CoreAttention(
+            layer_number=1,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.padding,
+            attention_dropout=0.0,
+            use_flash_attention=True,
+        )
+
+        out = attention(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias)
+        out_fa = attention_fa(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias)
+        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        out_fa = attention_fa(q, k, v, attention_mask_2d, relative_position_bias=attention_bias)
+        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+
+        # Causal
+        attention = CoreAttention(
+            layer_number=1,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.causal,
+            attention_dropout=0.0,
+        )
+
+        attention_fa = CoreAttention(
+            layer_number=1,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.causal,
+            attention_dropout=0.0,
+            use_flash_attention=True,
+        )
+
+        out = attention(q, k, v, attention_mask_causal_3d, relative_position_bias=attention_bias)
+        out_fa = attention_fa(q, k, v, attention_mask_causal_3d, relative_position_bias=attention_bias)
+        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        out_fa = attention_fa(q, k, v, attention_mask_2d, relative_position_bias=attention_bias)
+        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
diff --git a/tests/collections/nlp/test_position_embedding.py b/tests/collections/nlp/test_position_embedding.py
new file mode 100644
index 000000000000..263ca8669d81
--- /dev/null
+++ b/tests/collections/nlp/test_position_embedding.py
@@ -0,0 +1,211 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+import pytest
+import torch
+
+from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType
+from nemo.collections.nlp.modules.common.megatron.position_embedding import (
+    ALiBiRelativePositionEmbedding,
+    KERPLERelativePositionEmbedding,
+    RotaryEmbedding,
+    SandwichRelativePositionEmbedding,
+    T5RelativePositionEmbedding,
+    XPOSPositionEmbedding,
+)
+from nemo.collections.nlp.modules.common.megatron.position_embedding.rotary_position_embedding import (
+    apply_rotary_pos_emb,
+)
+from nemo.collections.nlp.modules.common.megatron.utils import init_method_normal
+
+
+@pytest.fixture()
+def cfg():
+    cfg = {
+        'max_seq_len': 8,
+        'num_attention_heads': 2,
+        'layer_type': LayerType.encoder,
+        'hidden_size': 4,
+        'rpe_init_method_std': 0.02,
+        'rpe_num_buckets': 6,
+        'rpe_max_distance': 16,
+    }
+    return cfg
+
+
+@pytest.mark.unit
+def test_alibi(cfg):
+    # non-causal
+    PE_nc = ALiBiRelativePositionEmbedding(
+        bidirectional=True,
+        num_attention_heads=cfg['num_attention_heads'],
+        layer_type=cfg['layer_type'],
+        max_seq_len=cfg['max_seq_len'],
+    )
+
+    # causal
+    PE_c = ALiBiRelativePositionEmbedding(
+        bidirectional=False,
+        num_attention_heads=cfg['num_attention_heads'],
+        layer_type=cfg['layer_type'],
+        max_seq_len=cfg['max_seq_len'],
+    )
+
+    q_len = k_len = random.randint(1, cfg['max_seq_len'] * 2)
+
+    bias_nc = PE_nc(q_len, k_len)
+    assert bias_nc.shape == (1, cfg['num_attention_heads'], q_len, k_len)
+    assert torch.equal(bias_nc, bias_nc.transpose(2, 3))
+
+    bias_c = PE_c(q_len, k_len)
+    assert bias_c.shape == (1, cfg['num_attention_heads'], 1, k_len)
+    assert torch.equal(bias_c, bias_nc[:, :, -1:, :])
+
+
+@pytest.mark.unit
+def test_sandwich(cfg):
+    # non-causal
+    PE_nc = SandwichRelativePositionEmbedding(
+        bidirectional=True,
+        num_attention_heads=cfg['num_attention_heads'],
+        layer_type=cfg['layer_type'],
+        max_seq_len=cfg['max_seq_len'],
+        hidden_size=cfg['hidden_size'],
+    )
+
+    # causal
+    PE_c = SandwichRelativePositionEmbedding(
+        bidirectional=False,
+        num_attention_heads=cfg['num_attention_heads'],
+        layer_type=cfg['layer_type'],
+        max_seq_len=cfg['max_seq_len'],
+        hidden_size=cfg['hidden_size'],
+    )
+
+    q_len = k_len = random.randint(1, cfg['max_seq_len'] * 2)
+
+    bias_nc = PE_nc(q_len, k_len)
+    assert bias_nc.shape == (1, cfg['num_attention_heads'], q_len, k_len)
+    assert torch.equal(bias_nc, bias_nc.transpose(2, 3))
+
+    bias_c = PE_c(q_len, k_len)
+    assert bias_c.shape == (1, cfg['num_attention_heads'], q_len, k_len)
+    assert torch.all(torch.triu(bias_c, diagonal=0) == 0)
+
+
+@pytest.mark.unit
+def test_kerple(cfg):
+    # non-causal
+    PE_nc = KERPLERelativePositionEmbedding(
+        bidirectional=True,
+        num_attention_heads=cfg['num_attention_heads'],
+        layer_type=cfg['layer_type'],
+        max_seq_len=cfg['max_seq_len'],
+    )
+
+    # causal
+    PE_c = KERPLERelativePositionEmbedding(
+        bidirectional=False,
+        num_attention_heads=cfg['num_attention_heads'],
+        layer_type=cfg['layer_type'],
+        max_seq_len=cfg['max_seq_len'],
+    )
+
+    q_len = k_len = random.randint(1, cfg['max_seq_len'] * 2)
+
+    bias_nc = PE_nc(q_len, k_len)
+    assert bias_nc.shape == (1, cfg['num_attention_heads'], q_len, k_len)
+    assert torch.equal(bias_nc, bias_nc.transpose(2, 3))
+
+    bias_c = PE_c(q_len, k_len)
+    assert bias_c.shape == (1, cfg['num_attention_heads'], q_len, k_len)
+    assert torch.all(torch.triu(bias_c, diagonal=0) == 0)
+
+
+@pytest.mark.unit
+def test_t5relative(cfg):
+    # non-causal
+    PE_nc = T5RelativePositionEmbedding(
+        bidirectional=True,
+        num_attention_heads=cfg['num_attention_heads'],
+        layer_type=cfg['layer_type'],
+        init_method=init_method_normal(cfg['rpe_init_method_std']),
+        relative_position_num_buckets=cfg['rpe_num_buckets'],
+        relative_position_max_distance=cfg['rpe_max_distance'],
+    )
+
+    # causal
+    PE_c = T5RelativePositionEmbedding(
+        bidirectional=False,
+        num_attention_heads=cfg['num_attention_heads'],
+        layer_type=cfg['layer_type'],
+        init_method=init_method_normal(cfg['rpe_init_method_std']),
+        relative_position_num_buckets=cfg['rpe_num_buckets'],
+        relative_position_max_distance=cfg['rpe_max_distance'],
+    )
+
+    q_len = k_len = random.randint(1, cfg['max_seq_len'] * 2)
+
+    bias_nc = PE_nc(q_len, k_len)
+    assert bias_nc.shape == (1, cfg['num_attention_heads'], q_len, k_len)
+
+    bias_c = PE_c(q_len, k_len)
+    assert bias_c.shape == (1, cfg['num_attention_heads'], q_len, k_len)
+    assert (
+        len(torch.triu(bias_c, diagonal=0).unique()) == cfg['num_attention_heads'] + 1
+        if q_len > 1
+        else cfg['num_attention_heads']
+    )
+
+
+@pytest.mark.unit
+def test_rotary(cfg):
+    PE = RotaryEmbedding(dim=cfg['hidden_size'])
+    rotary_embedding = PE(cfg['max_seq_len'])
+
+    x = torch.rand(cfg['max_seq_len'], 1, cfg['num_attention_heads'], cfg['hidden_size'])
+    x_rotary = apply_rotary_pos_emb(x, rotary_embedding)
+    assert x_rotary.shape == x.shape
+
+    hd = cfg['hidden_size'] // 2
+    x_rotary_test = torch.cat(
+        (
+            x[..., :hd] * rotary_embedding[..., :hd].cos() + x[..., hd:] * rotary_embedding[..., hd:].sin() * -1,
+            x[..., :hd] * rotary_embedding[..., :hd].sin() + x[..., hd:] * rotary_embedding[..., hd:].cos(),
+        ),
+        dim=-1,
+    )
+    assert torch.equal(x_rotary, x_rotary_test)
+
+    offset = random.choice(range(1, cfg['max_seq_len']))
+    rotary_embedding_offset = PE(cfg['max_seq_len'], offset=offset)
+    x_rotary = apply_rotary_pos_emb(x[: offset + 1], rotary_embedding[: offset + 1])
+    x_rotary_offset = apply_rotary_pos_emb(x[offset : offset + 1], rotary_embedding_offset[:1])
+    assert torch.equal(x_rotary[-1], x_rotary_offset[0])
+
+
+@pytest.mark.unit
+def test_xpos(cfg):
+    PE = XPOSPositionEmbedding(head_dim=cfg['hidden_size'])
+    x = torch.rand(cfg['max_seq_len'], 1, cfg['num_attention_heads'], cfg['hidden_size'])
+
+    x_rotary = PE(x)
+    assert x_rotary.shape == x.shape
+
+    offset = random.choice(range(1, cfg['max_seq_len']))
+    x_rotary = PE(x[: offset + 1])
+    x_rotary_offset = PE(x[offset : offset + 1], offset=offset)
+    assert torch.equal(x_rotary[-1], x_rotary_offset[0])
diff --git a/tests/collections/nlp/test_retrieval_module.py b/tests/collections/nlp/test_retrieval_module.py
index 3a2d46f4fed2..08425964e566 100644
--- a/tests/collections/nlp/test_retrieval_module.py
+++ b/tests/collections/nlp/test_retrieval_module.py
@@ -21,6 +21,7 @@
 from nemo.collections.nlp.modules.common.megatron.attention import ParallelChunkedCrossAttention
 from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType
 from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo
+from nemo.collections.nlp.modules.common.megatron.position_embedding import RotaryEmbedding
 from nemo.collections.nlp.modules.common.megatron.retrieval_token_level_encoder_decoder import (
     MegatronRetrievalTokenLevelEncoderDecoderModule,
 )
@@ -28,7 +29,6 @@
     MegatronRetrievalTransformerDecoderModule,
     MegatronRetrievalTransformerEncoderModule,
 )
-from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding
 from nemo.collections.nlp.modules.common.megatron.utils import (
     build_attention_mask_3d,
     init_method_normal,
diff --git a/tests/collections/nlp/test_retrieval_module_inference.py b/tests/collections/nlp/test_retrieval_module_inference.py
index 16e7e556bd10..a9aa002815b2 100644
--- a/tests/collections/nlp/test_retrieval_module_inference.py
+++ b/tests/collections/nlp/test_retrieval_module_inference.py
@@ -22,6 +22,7 @@
 from nemo.collections.nlp.modules.common.megatron.attention import ParallelChunkedCrossAttention
 from nemo.collections.nlp.modules.common.megatron.layer_type import LayerType
 from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo
+from nemo.collections.nlp.modules.common.megatron.position_embedding import RotaryEmbedding
 from nemo.collections.nlp.modules.common.megatron.retrieval_token_level_encoder_decoder import (
     MegatronRetrievalTokenLevelEncoderDecoderModule,
 )
@@ -29,7 +30,6 @@
     MegatronRetrievalTransformerDecoderModule,
     MegatronRetrievalTransformerEncoderModule,
 )
-from nemo.collections.nlp.modules.common.megatron.rotary_pos_embedding import RotaryEmbedding
 from nemo.collections.nlp.modules.common.megatron.utils import (
     build_attention_mask_3d,
     init_method_normal,

From b54e7fd6b0a8197270a6e33e262d40083d213254 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 13 Jun 2023 09:53:48 -0700
Subject: [PATCH 038/123] fix (#6842) (#6843)

Signed-off-by: Yi Dong <yidong@nvidia.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
---
 tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
index e1aa32f7bbf1..b7ae11ef3f5d 100644
--- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
+++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
@@ -586,7 +586,7 @@
             "outputs": [],
             "source": [
                 "CHECKPONT_FILE_NAME = megatron_gpt--val_loss=1.17-step=10047-consumed_samples=80376.0-last.ckpt # change it to your checkpoint file name\n",
-                "!python -m torch.distributed.launch --nproc_per_node=1 megatron_ckpt_to_nemo.py \\\n",
+                "!torchrun --nproc_per_node=1 megatron_ckpt_to_nemo.py \\\n",
                 "    --checkpoint_folder=gpt_creditcard_results/megatron_gpt/checkpoints/ \\\n",
                 "    --checkpoint_name={CHECKPONT_FILE_NAME} \\\n",
                 "    --nemo_file_path=tabular.nemo \\\n",

From 02c30689a9bfc3ca3950e010525610557979c646 Mon Sep 17 00:00:00 2001
From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com>
Date: Tue, 13 Jun 2023 13:28:19 -0400
Subject: [PATCH 039/123] Add Frame-VAD to ASR+VAD pipeline (#6464)

* add model, dataset, necessary utils and tests

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix tarred data

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* add fvad examples and update utils

Signed-off-by: stevehuang52 <heh@nvidia.com>

* add copyright

Signed-off-by: stevehuang52 <heh@nvidia.com>

* add frame-vad to ASR+VAD pipeline, add drop-frame mode

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix masking

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* slight refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix rnnt output

Signed-off-by: stevehuang52 <heh@nvidia.com>

* add support for hybrid model

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update tutorial

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* merge frame- and segment-vad scripts

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update tutorial

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

---------

Signed-off-by: stevehuang52 <heh@nvidia.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
---
 examples/asr/asr_vad/README.md                |  27 ++-
 .../asr/asr_vad/speech_to_text_with_vad.py    | 116 +++++++----
 .../conf/vad/frame_vad_infer_postprocess.yaml |   2 +-
 .../speech_classification/frame_vad_infer.py  |   6 +-
 .../speech_to_frame_label.py                  |   9 +
 nemo/collections/asr/data/feature_to_label.py | 185 ++++++++++++++++--
 .../asr/data/feature_to_label_dataset.py      |  26 ++-
 nemo/collections/asr/data/feature_to_text.py  |  91 ++++++---
 .../asr/data/feature_to_text_dataset.py       |   4 +
 .../Offline_ASR_with_VAD_for_CTC_models.ipynb |  14 +-
 tutorials/asr/Voice_Activity_Detection.ipynb  |  23 ++-
 11 files changed, 414 insertions(+), 89 deletions(-)

diff --git a/examples/asr/asr_vad/README.md b/examples/asr/asr_vad/README.md
index 03c7efa146b9..9385b96a79ea 100644
--- a/examples/asr/asr_vad/README.md
+++ b/examples/asr/asr_vad/README.md
@@ -25,23 +25,30 @@ To run the code with ASR+VAD default settings:
 ```bash
 python speech_to_text_with_vad.py \
     manifest_filepath=/PATH/TO/MANIFEST.json \
-    vad_model=vad_multilingual_marblenet \
+    vad_model=vad_multilingual_frame_marblenet \
     asr_model=stt_en_conformer_ctc_large \
-    vad_config=../conf/vad/vad_inference_postprocess.yaml
+    vad_config=../conf/vad/frame_vad_infer_postprocess.yaml
 ```
 
-To use only ASR and disable VAD, set `vad_model=None` and `use_rttm=False`.
+- To use only ASR and disable VAD, set `vad_model=None` and `use_rttm=False`.
 
-To use only VAD, set `asr_model=None` and specify both `vad_model` and `vad_config`.
+- To use only VAD, set `asr_model=None` and specify both `vad_model` and `vad_config`.
 
-To enable profiling, set `profiling=True`, but this will significantly slow down the program.
+- To enable profiling, set `profiling=True`, but this will significantly slow down the program.
 
-To use or disable feature masking, set `use_rttm` to `True` or `False`.
+### Using RTTM to handle non-speech audio segments
+- To use or disable RTTM usage, set `use_rttm` to `True` or `False`. There are two options to use RTTM files, as specified by the parameter `rttm_mode`, which must be one of `mask` or `drop`. For `mask`, the RTTM file will be used to mask the non-speech features. For `drop`, the RTTM file will be used to drop the non-speech features.
 
-To normalize feature before masking, set `normalize=pre_norm`, 
-and set `normalize=post_norm` for masking before normalization.
+- It's recommended that for `rttm_mode='drop'`, use larger `pad_onset` and `pad_offset` to avoid dropping speech features.
 
-To use a specific value for feature masking, set `feat_mask_val` to the desired value. 
+- To use a specific value for feature masking, set `feat_mask_val` to the desired value. 
 Default is `feat_mask_val=None`, where -16.530 (zero log mel-spectrogram value) will be used for `post_norm` and 0 (same as SpecAugment) will be used for `pre_norm`.
 
-See more options in the `InferenceConfig` class.
+- To normalize feature before masking, set `normalize=pre_norm`, and set `normalize=post_norm` for masking before normalization.
+
+### Frame-VAD and Segment-VAD
+- By default, `speech_to_text_with_vad.py` and `vad_config=../conf/vad/frame_vad_infer_postprocess.yaml` will use a frame-VAD model, which generates a speech/non-speech prediction for each audio frame of 20ms. 
+- To use segment-VAD, use `speech_to_text_with_vad.py vad_type='segment' vad_config=../conf/vad/vad_inference_postprocessing.yaml` instead. In segment-VAD, the audio is split into segments and VAD is performed on each segment. The segments are then stitched together to form the final output. The segment size and stride can be specified by `window_length_in_sec` and `shift_length_in_sec` in the VAD config (e.g., `../conf/vad/vad_inference_postprocessing.yaml`) respectively. The default values are 0.63 seconds and 0.08 seconds respectively.
+
+### More options
+- See more options in the `InferenceConfig` data class.
diff --git a/examples/asr/asr_vad/speech_to_text_with_vad.py b/examples/asr/asr_vad/speech_to_text_with_vad.py
index b22ff709c344..ecdfac42f665 100644
--- a/examples/asr/asr_vad/speech_to_text_with_vad.py
+++ b/examples/asr/asr_vad/speech_to_text_with_vad.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -29,9 +29,9 @@
 ```bash
 python speech_to_text_with_vad.py \
     manifest_filepath=/PATH/TO/MANIFEST.json \
-    vad_model=vad_multilingual_marblenet \
+    vad_model=vad_multilingual_frame_marblenet\
     asr_model=stt_en_conformer_ctc_large \
-    vad_config=../conf/vad/vad_inference_postprocess.yaml
+    vad_config=../conf/vad/frame_vad_inference_postprocess.yaml
 ```
 
 To use only ASR and disable VAD, set `vad_model=None` and `use_rttm=False`.
@@ -40,13 +40,15 @@
 
 To enable profiling, set `profiling=True`, but this will significantly slow down the program.
 
-To use or disable feature masking, set `use_rttm` to `True` or `False`.
+To use or disable feature masking/droping based on RTTM files, set `use_rttm` to `True` or `False`. 
+There are two ways to use RTTM files, either by masking the features (`rttm_mode=mask`) or by dropping the features (`rttm_mode=drop`).
+For audios that have long non-speech audios between speech segments, dropping frames is recommended.
 
 To normalize feature before masking, set `normalize=pre_norm`, 
 and set `normalize=post_norm` for masking before normalization.
 
 To use a specific value for feature masking, set `feat_mask_val` to the desired value. 
-Default is `feat_mask_val=None`, where -16.530 will be used for `post_norm` and 0 will be used for `pre_norm`.
+Default is `feat_mask_val=None`, where -16.635 will be used for `post_norm` and 0 will be used for `pre_norm`.
 
 See more options in the `InferenceConfig` class.
 """
@@ -72,10 +74,10 @@
 from nemo.collections.asr.models import ASRModel, EncDecClassificationModel
 from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest
 from nemo.collections.asr.parts.utils.vad_utils import (
-    extract_audio_features,
     generate_overlap_vad_seq,
     generate_vad_segment_table,
     get_vad_stream_status,
+    init_frame_vad_model,
     init_vad_model,
 )
 from nemo.core.config import hydra_runner
@@ -97,15 +99,16 @@ class InferenceConfig:
     vad_model: Optional[str] = None  # Path to a .nemo file or a pretrained NeMo model on NGC
     vad_config: Optional[str] = None  # Path to a yaml file containing VAD post-processing configs
     manifest_filepath: Optional[str] = None  # Path to dataset's JSON manifest
-    audio_dir: Optional[str] = None
+    audio_dir: Optional[str] = None  # Path to a directory containing audio files, use this if no manifest is provided
 
     use_rttm: bool = True  # whether to use RTTM
+    rttm_mode: str = "mask"  # how to use RTTM files, choices=[`mask`, `drop`]
     feat_mask_val: Optional[float] = None  # value used to mask features based on RTTM, set None to use defaults
     normalize: Optional[
         str
-    ] = "post_norm"  # whether and where to normalize feature, choices=[None, `pre_norm`, `post_norm`]
+    ] = "post_norm"  # whether and where to normalize audio feature, choices=[None, `pre_norm`, `post_norm`]
     normalize_type: str = "per_feature"  # how to determine mean and std used for normalization
-    use_pure_noise: bool = False  # whether input is pure noise or not.
+    normalize_audio_db: Optional[float] = None  # set to normalize RMS DB of audio before extracting audio features
 
     profiling: bool = False  # whether to enable pytorch profiling
 
@@ -113,13 +116,13 @@ class InferenceConfig:
     batch_size: int = 1  # batch size for ASR. Feature extraction and VAD only support single sample per batch.
     num_workers: int = 8
     sample_rate: int = 16000
-    frame_unit_time_secs: float = 0.01  # unit time per frame in seconds, equal to `window_stride` in ASR configs.
+    frame_unit_time_secs: float = 0.01  # unit time per frame in seconds, equal to `window_stride` in ASR configs, typically 10ms.
     audio_type: str = "wav"
 
     # Output settings, no need to change
     output_dir: Optional[str] = None  # will be automatically set by the program
     output_filename: Optional[str] = None  # will be automatically set by the program
-    pred_name_postfix: Optional[str] = None  # If you need to use another model name, rather than standard one.
+    pred_name_postfix: Optional[str] = None  # If you need to use another model name, other than the standard one.
 
     # Set to True to output language ID information
     compute_langs: bool = False
@@ -130,6 +133,9 @@ class InferenceConfig:
     # Decoding strategy for RNNT models
     rnnt_decoding: RNNTDecodingConfig = RNNTDecodingConfig(fused_batch_size=-1)
 
+    # VAD model type
+    vad_type: str = "frame"  # which type of VAD to use, choices=[`frame`, `segment`]
+
 
 @hydra_runner(config_name="InferenceConfig", schema=InferenceConfig)
 def main(cfg):
@@ -243,7 +249,10 @@ def extract_audio_features(manifest_filepath: str, cfg: DictConfig, record_fn: C
 
     out_dir.mkdir(parents=True, exist_ok=True)
     torch.set_grad_enabled(False)
-    vad_model = EncDecClassificationModel.from_pretrained("vad_multilingual_marblenet")
+    if cfg.vad_model:
+        vad_model = init_frame_vad_model(cfg.vad_model)
+    else:
+        vad_model = EncDecClassificationModel.from_pretrained("vad_multilingual_marblenet")
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     vad_model = vad_model.to(device)
     vad_model.eval()
@@ -256,6 +265,7 @@ def extract_audio_features(manifest_filepath: str, cfg: DictConfig, record_fn: C
             'labels': ['infer',],
             'num_workers': cfg.num_workers,
             'shuffle': False,
+            'normalize_audio_db': cfg.normalize_audio_db,
         }
     )
 
@@ -284,7 +294,13 @@ def extract_audio_features(manifest_filepath: str, cfg: DictConfig, record_fn: C
 
 def run_vad_inference(manifest_filepath: str, cfg: DictConfig, record_fn: Callable) -> str:
     logging.info("Start VAD inference pipeline...")
-    vad_model = init_vad_model(cfg.vad_model)
+    if cfg.vad_type == "segment":
+        vad_model = init_vad_model(cfg.vad_model)
+    elif cfg.vad_type == "frame":
+        vad_model = init_frame_vad_model(cfg.vad_model)
+    else:
+        raise ValueError(f"Unknown VAD type: {cfg.vad_type}, supported types: ['segment', 'frame']")
+
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     vad_model = vad_model.to(device)
     vad_model.eval()
@@ -358,8 +374,6 @@ def run_vad_inference(manifest_filepath: str, cfg: DictConfig, record_fn: Callab
     logging.info(f"Generating segment tables with postprocessing params: {vad_cfg.vad.parameters.postprocessing}")
     segment_dir_name = "vad_rttm"
     for key, val in vad_cfg.vad.parameters.postprocessing.items():
-        if key == "use_rttm":
-            continue
         segment_dir_name = segment_dir_name + "-" + str(key) + str(val)
 
     segment_dir = Path(cfg.output_dir) / Path(segment_dir_name)
@@ -368,13 +382,13 @@ def run_vad_inference(manifest_filepath: str, cfg: DictConfig, record_fn: Callab
     else:
         segment_dir.mkdir(parents=True)
         t0 = time.time()
-        vad_cfg.vad.parameters.postprocessing.use_rttm = True
         segment_dir = generate_vad_segment_table(
             vad_pred_dir=pred_dir,
             postprocessing_params=vad_cfg.vad.parameters.postprocessing,
             frame_length_in_sec=frame_length_in_sec,
             num_workers=cfg.num_workers,
             out_dir=segment_dir,
+            use_rttm=True,
         )
         t1 = time.time()
         logging.info(f"Time elapsed: {t1 - t0: .2f} seconds")
@@ -432,9 +446,14 @@ def generate_vad_frame_pred(
 
                 with record_fn("vad_infer_other"):
                     probs = torch.softmax(log_probs, dim=-1)
+                    if len(probs.shape) == 3:
+                        # squeeze the batch dimension, since batch size is 1
+                        probs = probs.squeeze(0)  # [1,T,C] -> [T,C]
                     pred = probs[:, 1]
 
-                    if status[i] == 'start':
+                    if window_length_in_sec == 0:
+                        to_save = pred
+                    elif status[i] == 'start':
                         to_save = pred[:-trunc]
                     elif status[i] == 'next':
                         to_save = pred[trunc:-trunc_l]
@@ -443,11 +462,13 @@ def generate_vad_frame_pred(
                     else:
                         to_save = pred
 
+                    to_save = to_save.cpu().tolist()
                     all_len += len(to_save)
+
                     outpath = os.path.join(out_dir, data[i] + ".frame")
                     with open(outpath, "a", encoding='utf-8') as fout:
-                        for f in range(len(to_save)):
-                            fout.write('{0:0.4f}\n'.format(to_save[f]))
+                        for p in to_save:
+                            fout.write(f'{p:0.4f}\n')
 
                     del test_batch
                     if status[i] == 'end' or status[i] == 'single':
@@ -476,18 +497,30 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str:
 
     # Setup decoding strategy
     decode_function = None
-    if hasattr(asr_model, 'change_decoding_strategy'):
-        # Check if ctc or rnnt model
-        if hasattr(asr_model, 'joint'):  # RNNT model
+    decoder_type = cfg.get("decoder_type", None)
+    if not hasattr(asr_model, 'change_decoding_strategy'):
+        raise ValueError(f"ASR model {cfg.asr_model} does not support decoding strategy.")
+    if decoder_type is not None:  # Hybrid model
+        if decoder_type == 'rnnt':
             cfg.rnnt_decoding.fused_batch_size = -1
             cfg.rnnt_decoding.compute_langs = cfg.compute_langs
-            asr_model.change_decoding_strategy(cfg.rnnt_decoding)
+            asr_model.change_decoding_strategy(cfg.rnnt_decoding, decoder_type=decoder_type)
             decode_function = asr_model.decoding.rnnt_decoder_predictions_tensor
-        else:
-            asr_model.change_decoding_strategy(cfg.ctc_decoding)
+        elif decoder_type == 'ctc':
+            asr_model.change_decoding_strategy(cfg.ctc_decoding, decoder_type=decoder_type)
             decode_function = asr_model.decoding.ctc_decoder_predictions_tensor
+        else:
+            raise ValueError(
+                f"Unknown decoder type for hybrid model: {decoder_type}, supported types: ['rnnt', 'ctc']"
+            )
+    elif hasattr(asr_model, 'joint'):  # RNNT model
+        cfg.rnnt_decoding.fused_batch_size = -1
+        cfg.rnnt_decoding.compute_langs = cfg.compute_langs
+        asr_model.change_decoding_strategy(cfg.rnnt_decoding)
+        decode_function = asr_model.decoding.rnnt_decoder_predictions_tensor
     else:
-        raise ValueError(f"Only support CTC or RNNT models that have `change_decoding_strategy()` implemented.")
+        asr_model.change_decoding_strategy(cfg.ctc_decoding)
+        decode_function = asr_model.decoding.ctc_decoder_predictions_tensor
 
     # Compute output filename
     if cfg.output_filename is None:
@@ -499,7 +532,10 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str:
             if cfg.use_rttm:
                 vad_tag = Path(manifest_filepath).stem
                 vad_tag = vad_tag[len("temp_manifest_vad_rttm_") :]
-                tag += f"-mask{cfg.feat_mask_val}-{vad_tag}"
+                if cfg.rttm_mode == "mask":
+                    tag += f"-mask{cfg.feat_mask_val}-{vad_tag}"
+                else:
+                    tag += f"-dropframe-{vad_tag}"
             cfg.output_filename = cfg.manifest_filepath.replace('.json', f'-{Path(cfg.asr_model).stem}-{tag}.json')
         cfg.output_filename = Path(cfg.output_dir) / Path(cfg.output_filename).name
 
@@ -509,10 +545,12 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str:
         "normalize": cfg.normalize,
         "normalize_type": cfg.normalize_type,
         "use_rttm": cfg.use_rttm,
+        "rttm_mode": cfg.rttm_mode,
         "feat_mask_val": cfg.feat_mask_val,
         "frame_unit_time_secs": cfg.frame_unit_time_secs,
     }
-    logging.info(f"use_rttm = {cfg.use_rttm}")
+    logging.info(f"use_rttm = {cfg.use_rttm}, rttm_mode = {cfg.rttm_mode}, feat_mask_val = {cfg.feat_mask_val}")
+
     if hasattr(asr_model, "tokenizer"):
         dataset = feature_to_text_dataset.get_bpe_dataset(config=data_config, tokenizer=asr_model.tokenizer)
     else:
@@ -542,10 +580,13 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str:
                             processed_signal=test_batch[0].to(device),
                             processed_signal_length=test_batch[1].to(device),
                         )
+
                     with record_fn("asr_infer_other"):
                         logits, logits_len = outputs[0], outputs[1]
 
                         current_hypotheses, all_hyp = decode_function(logits, logits_len, return_hypotheses=False,)
+                        if isinstance(current_hypotheses, tuple) and len(current_hypotheses) == 2:
+                            current_hypotheses = current_hypotheses[0]  # handle RNNT output
 
                         hypotheses += current_hypotheses
                         if all_hyp is not None:
@@ -562,9 +603,16 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str:
     # Save output to manifest
     input_manifest_data = read_manifest(manifest_filepath)
     manifest_data = read_manifest(cfg.manifest_filepath)
+
+    if "text" not in manifest_data[0]:
+        has_groundtruth = False
+    else:
+        has_groundtruth = True
+
     groundtruth = []
     for i in range(len(manifest_data)):
-        groundtruth.append(manifest_data[i]["text"])
+        if has_groundtruth:
+            groundtruth.append(manifest_data[i]["text"])
         manifest_data[i]["pred_text"] = hypotheses[i]
         manifest_data[i]["feature_file"] = input_manifest_data[i]["feature_file"]
         if "rttm_file" in input_manifest_data[i]:
@@ -572,19 +620,19 @@ def run_asr_inference(manifest_filepath, cfg, record_fn) -> str:
 
     write_manifest(cfg.output_filename, manifest_data)
 
-    if cfg.use_pure_noise:
+    if not has_groundtruth:
         hypotheses = " ".join(hypotheses)
         words = hypotheses.split()
         chars = "".join(words)
         logging.info("-----------------------------------------")
-        logging.info(f"Number of hallucinated characters={len(chars)}")
-        logging.info(f"Number of hallucinated words={len(words)}")
-        logging.info(f"Concatenated predictions: {hypotheses}")
+        logging.info(f"Number of generated characters={len(chars)}")
+        logging.info(f"Number of generated words={len(words)}")
         logging.info("-----------------------------------------")
     else:
         wer_score = word_error_rate(hypotheses=hypotheses, references=groundtruth)
+        cer_score = word_error_rate(hypotheses=hypotheses, references=groundtruth, use_cer=True)
         logging.info("-----------------------------------------")
-        logging.info(f"WER={wer_score*100:.2f}")
+        logging.info(f"WER={wer_score:.4f}, CER={cer_score:.4f}")
         logging.info("-----------------------------------------")
 
     logging.info(f"ASR output saved at {cfg.output_filename}")
diff --git a/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
index 8c9ef7fffaf5..842c04777c72 100644
--- a/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
+++ b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
@@ -15,7 +15,7 @@ vad:
   parameters: # Parameters not tuned on large datasets, please use default parameters with caution
     normalize_audio_db: null  # set to non null value to normalize RMS DB of audio before preprocessing
     window_length_in_sec: 0.0  # window length in sec for VAD context input, must be 0 for frame-VAD
-    shift_length_in_sec: 0.02 # frame-length in seconds for frame-VAD 
+    shift_length_in_sec: 0.02 # frame-length in seconds for frame-VAD, must be 0.02 for the pretrained NeMo VAD model 
     smoothing: False # Deprecated for Frame-VAD. false or type of smoothing method (eg: median, mean)
     overlap: 0.875 # Deprecated for Frame-VAD. overlap ratio for overlapped mean/median smoothing filter. If smoothing=False, ignore this value.
     postprocessing:
diff --git a/examples/asr/speech_classification/frame_vad_infer.py b/examples/asr/speech_classification/frame_vad_infer.py
index 9c8e57b0773d..56eb7584e3db 100644
--- a/examples/asr/speech_classification/frame_vad_infer.py
+++ b/examples/asr/speech_classification/frame_vad_infer.py
@@ -21,7 +21,11 @@
 ## Usage:
 python frame_vad_infer.py \
     --config-path="../conf/vad" --config-name="frame_vad_infer_postprocess" \
-    dataset=<Path of json file of evaluation data. Audio files should have unique names>
+    dataset=<Path of manifest file containing evaluation data. Audio files should have unique names>
+
+The manifest json file should have the following format (each line is a Python dictionary):
+{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000}  
+{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000}  
 """
 
 import os
diff --git a/examples/asr/speech_classification/speech_to_frame_label.py b/examples/asr/speech_classification/speech_to_frame_label.py
index 04cc77afda44..3289845ec3d3 100644
--- a/examples/asr/speech_classification/speech_to_frame_label.py
+++ b/examples/asr/speech_classification/speech_to_frame_label.py
@@ -28,6 +28,15 @@
     strategy="ddp" \
     trainer.max_epochs=200
 ```
+
+The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration",  "label"] are required. An example of a manifest file is:
+```
+{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000,  "label": "0 1 0 0 1"}
+{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000,  "text": "0 0 0 1 1 1 1 0 0"}
+```
+For example, if you have a 1s audio file, you'll need to have 50 frame labels in the manifest entry like "0 0 0 0 1 1 0 1 .... 0 1".
+However, shorter label strings are also supported for smaller file sizes. For example, you can prepare the `label` in 40ms frame, and the model will properly repeat the label for each 20ms frame.
+
 """
 
 import pytorch_lightning as pl
diff --git a/nemo/collections/asr/data/feature_to_label.py b/nemo/collections/asr/data/feature_to_label.py
index 673f50374581..058d0157fcbd 100644
--- a/nemo/collections/asr/data/feature_to_label.py
+++ b/nemo/collections/asr/data/feature_to_label.py
@@ -262,14 +262,20 @@ class FeatureToLabelDataset(Dataset):
     Dataset that loads tensors via a json file containing paths to feature files and their labels. 
     Each new line is a different sample. Example below:
     and their target labels. JSON files should be of the following format:
-        {"feature_filepath": "/path/to/audio_feature.pt", "label": "1"} \
+        {"feature_filepath": "/path/to/audio_feature.pt", "label": "1"}
         ...
         {"feature_filepath": "/path/to/audio_feature.pt", "label": "0"} 
     Args:
-        manifest_filepath (str): Dataset parameter. Path to JSON containing data.
-        labels (Optional[list]): Dataset parameter. List of unique labels collected from all samples.
+        manifest_filepath (str): Path to JSON containing data.
+        labels (Optional[list]): List of unique labels collected from all samples.
         augmentor (Optional): feature augmentation
-
+        window_length_in_sec (float): Window length in seconds.
+        shift_length_in_sec (float): Shift length in seconds.
+        is_regression_task (bool): if True, the labels are treated as for a regression task.
+        cal_labels_occurrence (bool): if True, the labels occurrence will be calculated.
+        zero_spec_db_val (float): Value to replace non-speech signals in log-melspectrogram.
+        min_duration (float): Minimum duration of the audio file in seconds.
+        max_duration (float): Maximum duration of the audio file in seconds.
     """
 
     ZERO_LEVEL_SPEC_DB_VAL = -16.635  # Log-Melspectrogram value for zero signal
@@ -296,22 +302,53 @@ def __init__(
         augmentor: 'nemo.collections.asr.parts.perturb.AudioAugmentor' = None,
         window_length_in_sec: float = 0.63,
         shift_length_in_sec: float = 0.01,
+        is_regression_task: bool = False,
+        cal_labels_occurrence: Optional[bool] = False,
+        zero_spec_db_val: float = -16.635,
+        min_duration: Optional[float] = None,
+        max_duration: Optional[float] = None,
     ):
         super().__init__()
         self.window_length_in_sec = window_length_in_sec
         self.shift_length_in_sec = shift_length_in_sec
-        self.collection = collections.ASRFeatureLabel(manifests_files=manifest_filepath.split(','),)
+        self.zero_spec_db_val = zero_spec_db_val
+
+        if isinstance(manifest_filepath, str):
+            manifest_filepath = manifest_filepath.split(',')
+
+        self.collection = collections.ASRFeatureLabel(
+            manifests_files=manifest_filepath,
+            is_regression_task=is_regression_task,
+            cal_labels_occurrence=cal_labels_occurrence,
+            min_duration=min_duration,
+            max_duration=max_duration,
+        )
 
         self.feature_loader = ExternalFeatureLoader(augmentor=augmentor)
         self.labels = labels if labels else self.collection.uniq_labels
 
-        self.label2id, self.id2label = {}, {}
-        for label_id, label in enumerate(self.labels):
-            self.label2id[label] = label_id
-            self.id2label[label_id] = label
+        self.is_regression_task = is_regression_task
 
-        for idx in range(len(self.labels[:5])):
-            logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx]))
+        if not is_regression_task:
+            self.labels = labels if labels else self.collection.uniq_labels
+            self.num_classes = len(self.labels) if self.labels is not None else 1
+            self.label2id, self.id2label = {}, {}
+            self.id2occurrence, self.labels_occurrence = {}, []
+
+            for label_id, label in enumerate(self.labels):
+                self.label2id[label] = label_id
+                self.id2label[label_id] = label
+                if cal_labels_occurrence:
+                    self.id2occurrence[label_id] = self.collection.labels_occurrence[label]
+
+            if cal_labels_occurrence:
+                self.labels_occurrence = [self.id2occurrence[k] for k in sorted(self.id2occurrence)]
+
+            for idx in range(len(self.labels[:5])):
+                logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx]))
+        else:
+            self.labels = []
+            self.num_classes = 1
 
     def __len__(self):
         return len(self.collection)
@@ -328,9 +365,133 @@ def __getitem__(self, index):
         return f, fl, t, tl
 
     def _collate_fn(self, batch):
-        return _audio_feature_collate_fn(batch, self.ZERO_LEVEL_SPEC_DB_VAL, 0)
+        return _audio_feature_collate_fn(batch, self.zero_spec_db_val, 0)
 
     def _vad_segment_collate_fn(self, batch):
         return _vad_feature_segment_collate_fn(
             batch, self.window_length_in_sec, self.shift_length_in_sec, self.FRAME_UNIT_TIME_SECS
         )
+
+
+class FeatureToMultiLabelDataset(Dataset):
+    """
+    Dataset that loads tensors via a json file containing paths to feature files and their labels. 
+    Each new line is a different sample. Example below:
+    and their target labels. JSON files should be of the following format:
+        {"feature_filepath": "/path/to/audio_feature.pt", "label": "1 1 0 0 1"}
+        ...
+        {"feature_filepath": "/path/to/audio_feature.pt", "label": "0 1 0 0"} 
+    Args:
+        manifest_filepath (str): Path to JSON containing data.
+        labels (Optional[list]): List of unique labels collected from all samples.
+        augmentor (Optional): feature augmentation
+        delimiter (str): delimiter to split the labels.
+        is_regression_task (bool): if True, the labels are treated as for a regression task.
+        cal_labels_occurrence (bool): if True, the labels occurrence will be calculated.
+        zero_spec_db_val (float): Value to replace non-speech signals in log-melspectrogram.
+        min_duration (float): Minimum duration of the audio file in seconds.
+        max_duration (float): Maximum duration of the audio file in seconds.
+    """
+
+    ZERO_LEVEL_SPEC_DB_VAL = -16.635  # Log-Melspectrogram value for zero signal
+
+    @property
+    def output_types(self) -> Optional[Dict[str, NeuralType]]:
+        """Returns definitions of module output ports.
+        """
+        output_types = {
+            'audio_feat': NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
+            'feat_length': NeuralType(tuple('B'), LengthsType()),
+            'labels': NeuralType(('B', 'T'), LabelsType()),
+            'labels_length': NeuralType(tuple('B'), LengthsType()),
+        }
+
+        return output_types
+
+    def __init__(
+        self,
+        *,
+        manifest_filepath: str,
+        labels: List[str] = None,
+        augmentor: 'nemo.collections.asr.parts.perturb.AudioAugmentor' = None,
+        delimiter: Optional[str] = None,
+        is_regression_task: bool = False,
+        cal_labels_occurrence: Optional[bool] = False,
+        zero_spec_db_val: float = -16.635,
+        min_duration: Optional[float] = None,
+        max_duration: Optional[float] = None,
+    ):
+        super().__init__()
+        self.delimiter = delimiter
+        self.zero_spec_db_val = zero_spec_db_val
+
+        if isinstance(manifest_filepath, str):
+            manifest_filepath = manifest_filepath.split(',')
+
+        self.collection = collections.ASRFeatureLabel(
+            manifests_files=manifest_filepath,
+            is_regression_task=is_regression_task,
+            cal_labels_occurrence=cal_labels_occurrence,
+            delimiter=delimiter,
+            min_duration=min_duration,
+            max_duration=max_duration,
+        )
+
+        self.is_regression_task = is_regression_task
+        self.feature_loader = ExternalFeatureLoader(augmentor=augmentor)
+        self.labels = labels if labels else self.collection.uniq_labels
+
+        self.label2id, self.id2label = {}, {}
+        if not is_regression_task:
+            self.labels = labels if labels else self._get_label_set()
+            self.num_classes = len(self.labels) if self.labels is not None else 1
+            self.label2id, self.id2label = {}, {}
+            for label_id, label in enumerate(self.labels):
+                self.label2id[label] = label_id
+                self.id2label[label_id] = label
+                if cal_labels_occurrence:
+                    self.id2occurrence[label_id] = self.collection.labels_occurrence[label]
+                    self.labels_occurrence.append(self.id2occurrence[label_id])
+
+            for idx in range(len(self.labels[:5])):
+                logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx]))
+        else:
+            self.labels = []
+            self.num_classes = 1
+
+    def _get_label_set(self):
+        labels = []
+        for sample in self.collection:
+            label_str = sample.label
+            if label_str:
+                label_str_list = label_str.split(self.delimiter) if self.delimiter else label_str.split()
+                labels.extend(label_str_list)
+        return sorted(set(labels))
+
+    def _label_str_to_tensor(self, label_str: str):
+        labels = label_str.split(self.delimiter) if self.delimiter else label_str.split()
+
+        if self.is_regression_task:
+            labels = [float(s) for s in labels]
+            labels = torch.tensor(labels).float()
+        else:
+            labels = [self.label2id[s] for s in labels]
+            labels = torch.tensor(labels).long()
+        return labels
+
+    def __len__(self):
+        return len(self.collection)
+
+    def __getitem__(self, index):
+        sample = self.collection[index]
+
+        features = self.feature_loader.process(sample.feature_file)
+        f, fl = features, torch.tensor(features.shape[1]).long()
+
+        t = self._label_str_to_tensor(sample.label)
+        tl = torch.tensor(t.size(0)).long()
+
+        return f, fl, t, tl
+
+    def _collate_fn(self, batch):
+        return _audio_feature_collate_fn(batch, self.zero_spec_db_val, 0)
diff --git a/nemo/collections/asr/data/feature_to_label_dataset.py b/nemo/collections/asr/data/feature_to_label_dataset.py
index dabe06aa62bb..08803f43ce8d 100644
--- a/nemo/collections/asr/data/feature_to_label_dataset.py
+++ b/nemo/collections/asr/data/feature_to_label_dataset.py
@@ -34,13 +34,35 @@ def get_feature_seq_speakerlabel_dataset(
 
 
 def get_feature_label_dataset(
-    config: dict, augmentor: Optional['AudioAugmentor'] = None
+    config: dict, augmentor: Optional['FeatureAugmentor'] = None
 ) -> feature_to_label.FeatureToLabelDataset:
     dataset = feature_to_label.FeatureToLabelDataset(
         manifest_filepath=config['manifest_filepath'],
         labels=config['labels'],
         augmentor=augmentor,
         window_length_in_sec=config.get("window_length_in_sec", 0.63),
-        shift_length_in_sec=config.get("shift_length_in_sec", 0.01),
+        shift_length_in_sec=config.get("shift_length_in_sec", 0.08),
+        is_regression_task=config.get("is_regression_task", False),
+        cal_labels_occurrence=config.get("cal_labels_occurrence", False),
+        zero_spec_db_val=config.get("zero_spec_db_val", -16.635),
+        max_duration=config.get('max_duration', None),
+        min_duration=config.get('min_duration', None),
+    )
+    return dataset
+
+
+def get_feature_multi_label_dataset(
+    config: dict, augmentor: Optional['FeatureAugmentor'] = None
+) -> feature_to_label.FeatureToMultiLabelDataset:
+    dataset = feature_to_label.FeatureToMultiLabelDataset(
+        manifest_filepath=config['manifest_filepath'],
+        labels=config['labels'],
+        augmentor=augmentor,
+        delimiter=config.get('delimiter', None),
+        is_regression_task=config.get("is_regression_task", False),
+        cal_labels_occurrence=config.get("cal_labels_occurrence", False),
+        zero_spec_db_val=config.get("zero_spec_db_val", -16.635),
+        max_duration=config.get('max_duration', None),
+        min_duration=config.get('min_duration', None),
     )
     return dataset
diff --git a/nemo/collections/asr/data/feature_to_text.py b/nemo/collections/asr/data/feature_to_text.py
index eaec7b3afba5..a7e295051ae8 100644
--- a/nemo/collections/asr/data/feature_to_text.py
+++ b/nemo/collections/asr/data/feature_to_text.py
@@ -86,30 +86,32 @@ class _FeatureTextDataset(Dataset):
     {"feature_filepath": "/path/to/audio_feature.pt", "text": "the transcription", "offset": 301.75, "duration": 0.82, "utt":
     "utterance_id", "ctm_utt": "en_4156", "side": "A"}
     Args:
-        manifest_filepath: Path to manifest json as described above. Can be comma-separated paths.
+        manifest_filepath (str): Path to manifest json as described above. Can be comma-separated paths.
         parser: Str for a language specific preprocessor or a callable.
-        normalize: whether and where to normalize feature, must be one of [None, "post_norm", "pre_norm"]
+        normalize (bool): whether and where to normalize feature, must be one of [None, "post_norm", "pre_norm"]
         normalize_type (Union[str, dict]): how to normalize feature, see `nemo.collections.asr.parts.preprocessing.features.normalize_batch`
-        use_rttm: whether to use RTTM files if there is any, default to False
+        use_rttm (bool): whether to use RTTM files if there is any, default to False
+        rttm_mode (str): how to use RTTM files, must be one of ['mask', 'drop'], default to 'mask'
+        feat_min_len (int): minimum length of feature when rttm_mode=deop, default to 4.
         feat_mask_val (Optional[float]): value used to mask features with RTTM files, default to None to use zero mel-spectralgram
         frame_unit_time_secs (float): time in seconds for each frame
         sample_rate (int): Sample rate to resample loaded audio to
         int_values (bool): If true, load samples as 32-bit integers. Defauts to False.
-        augmentor (nemo.collections.asr.parts.perturb.AudioAugmentor): An AudioAugmentor object used to augment loaded
-            audio
-        max_duration: If audio exceeds this length, do not include in dataset
-        min_duration: If audio is less than this length, do not include in dataset
-        max_utts: Limit number of utterances
-        trim: whether or not to trim silence. Defaults to False
-        bos_id: Id of beginning of sequence symbol to append if not None
-        eos_id: Id of end of sequence symbol to append if not None
-        pad_id: Id of pad symbol. Defaults to 0
+        augmentor (nemo.collections.asr.parts.perturb.AudioAugmentor): An AudioAugmentor object used to augment loaded audio
+        max_duration (float): If audio exceeds this length, do not include in dataset
+        min_duration (float): If audio is less than this length, do not include in dataset
+        max_utts (int): Limit number of utterances
+        trim (bool): whether or not to trim silence. Defaults to False
+        bos_id (int): Id of beginning of sequence symbol to append if not None
+        eos_id (int): Id of end of sequence symbol to append if not None
+        pad_id (int): Id of pad symbol. Defaults to 0
         return_sample_id (bool): whether to return the sample_id as a part of each sample
         channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`. Uses zero-based indexing.
     """
 
     ZERO_LEVEL_SPEC_DB_VAL = -16.635  # Log-Melspectrogram value for zero signal
     NORM_MODES = ["pre_norm", "post_norm"]
+    RTTM_MODES = ["mask", "drop"]
 
     @property
     def output_types(self) -> Optional[Dict[str, NeuralType]]:
@@ -130,6 +132,8 @@ def __init__(
         normalize: Optional[str] = "post_norm",
         normalize_type: Union[str, dict] = "per_feature",
         use_rttm: bool = False,
+        rttm_mode: str = "mask",
+        feat_min_len: int = 4,
         feat_mask_val: Optional[float] = None,
         frame_unit_time_secs: float = 0.01,
         sample_rate: Optional[int] = 16000,
@@ -151,6 +155,11 @@ def __init__(
         self.normalize = normalize
         self.normalize_type = normalize_type
         self.use_rttm = use_rttm
+        self.rttm_mode = rttm_mode
+        if self.use_rttm and self.rttm_mode not in self.RTTM_MODES:
+            raise ValueError(f"`rttm_mode` must be one of {self.RTTM_MODES}, got `{rttm_mode}` instead")
+
+        self.feat_min_len = feat_min_len
         if feat_mask_val is not None:
             self.feat_mask_val = feat_mask_val
         elif normalize == "pre_norm":
@@ -197,17 +206,18 @@ def __getitem__(self, index):
         # Feature normalization
         if self.normalize is None:
             if self.use_rttm and sample.rttm_file:
-                f = self.mask_features_from_rttm(f, offset, sample.rttm_file, self.feat_mask_val)
+                f = self.process_features_with_rttm(f, offset, sample.rttm_file, self.feat_mask_val)
         elif self.normalize == "post_norm":
             # (Optional) Masking based on RTTM file
             if self.use_rttm and sample.rttm_file:
-                f = self.mask_features_from_rttm(f, offset, sample.rttm_file, self.feat_mask_val)
+                f = self.process_features_with_rttm(f, offset, sample.rttm_file, self.feat_mask_val)
+
             f = self.normalize_feature(f)
         else:  # pre-norm
             f = self.normalize_feature(f)
             # (Optional) Masking based on RTTM file
             if self.use_rttm and sample.rttm_file:
-                f = self.mask_features_from_rttm(f, offset, sample.rttm_file, self.feat_mask_val)
+                f = self.process_features_with_rttm(f, offset, sample.rttm_file, self.feat_mask_val)
 
         if self.return_sample_id:
             output = f, fl, torch.tensor(t).long(), torch.tensor(tl).long(), index
@@ -216,17 +226,32 @@ def __getitem__(self, index):
 
         return output
 
-    def mask_features_from_rttm(self, features, offset, rttm_file, mask_val):
+    def process_features_with_rttm(self, features, offset, rttm_file, mask_val):
         segments = load_speech_segments_from_rttm(rttm_file)
-        sid = 0
+        new_features = features.clone()
+        sid, fid = 0, 0
         for i in range(features.size(1)):
             t = offset + i * self.frame_unit_time_secs
             while sid < len(segments) - 1 and segments[sid][1] < t:
                 sid += 1
             if segments[sid][1] == 0 or t < segments[sid][0] or t > segments[sid][1]:
-                features[:, i] = mask_val
-
-        return features
+                # not in speech segment
+                if self.rttm_mode == "drop":
+                    # drop the frame
+                    continue
+                else:
+                    # mask the frame with specified value
+                    new_features[:, i] = mask_val
+                    fid += 1
+            else:
+                # in speech segment
+                new_features[:, fid] = features[:, i]
+                fid += 1
+
+        if fid < self.feat_min_len and self.rttm_mode == "drop":
+            new_features[:, : self.feat_min_len] = mask_val
+            return new_features[:, : self.feat_min_len]
+        return new_features[:, :fid]
 
     def __len__(self):
         return len(self.manifest_processor.collection)
@@ -259,12 +284,14 @@ class FeatureToCharDataset(_FeatureTextDataset):
     "utterance_id", "ctm_utt": "en_4156", "side": "A"}
 
     Args:
-        manifest_filepath: Path to manifest json as described above. Can
+        manifest_filepath (str): Path to manifest json as described above. Can
             be comma-separated paths.
-        labels: String containing all the possible characters to map to
-        normalize: how to normalize feature, must be one of [None, "post_norm", "pre_norm"]
+        labels (str): String containing all the possible characters to map to
+        normalize (str): how to normalize feature, must be one of [None, "post_norm", "pre_norm"]
         normalize_type (Union[str, dict]): how to normalize feature, see `nemo.collections.asr.parts.preprocessing.features.normalize_batch`
-        use_rttm: whether to use RTTM files if there is any, default to False
+        use_rttm (bool): whether to use RTTM files if there is any, default to False
+        rttm_mode (str): how to use RTTM files, must be one of ['mask', 'drop'], default to 'mask'
+        feat_min_len (int): minimum length of feature, default to 4
         feat_mask_val (Optional[float]): value used to mask features with RTTM files, default to None to use zero mel-spectralgram
         frame_unit_time_secs: time in seconds for each frame
         sample_rate (int): Sample rate to resample loaded audio to
@@ -290,6 +317,8 @@ def __init__(
         normalize: Optional[str] = "post_norm",
         normalize_type: Union[str, dict] = "per_feature",
         use_rttm: bool = False,
+        rttm_mode: str = "mask",
+        feat_min_len: int = 4,
         feat_mask_val: Optional[float] = None,
         frame_unit_time_secs: float = 0.01,
         sample_rate: Optional[int] = 16000,
@@ -319,6 +348,8 @@ def __init__(
             normalize=normalize,
             normalize_type=normalize_type,
             use_rttm=use_rttm,
+            rttm_mode=rttm_mode,
+            feat_min_len=feat_min_len,
             feat_mask_val=feat_mask_val,
             frame_unit_time_secs=frame_unit_time_secs,
             sample_rate=sample_rate,
@@ -352,14 +383,16 @@ class FeatureToBPEDataset(_FeatureTextDataset):
     the manifest.
 
     Args:
-        manifest_filepath: Path to manifest json as described above. Can
+        manifest_filepath (str): Path to manifest json as described above. Can
             be comma-separated paths.
         tokenizer: A subclass of the Tokenizer wrapper found in the common collection,
             nemo.collections.common.tokenizers.TokenizerSpec. ASR Models support a subset of
             all available tokenizers.
-        normalize: how to normalize feature, must be one of [None, "post_norm", "pre_norm"]
+        normalize (str): how to normalize feature, must be one of [None, "post_norm", "pre_norm"]
         normalize_type (Union[str, dict]): how to normalize feature, see `nemo.collections.asr.parts.preprocessing.features.normalize_batch`
-        use_rttm: whether to use RTTM files if there is any, default to False
+        use_rttm (bool): whether to use RTTM files if there is any, default to False
+        rttm_mode (str): how to use RTTM files, must be one of ['mask', 'drop'], default to 'mask'
+        feat_min_len (int): minimum length of feature, default to 4
         feat_mask_val (Optional[float]): value used to mask features with RTTM files, default to None to use zero mel-spectralgram
         frame_unit_time_secs: time in seconds for each frame
         sample_rate (int): Sample rate to resample loaded audio to
@@ -384,6 +417,8 @@ def __init__(
         normalize: Optional[str] = "post_norm",
         normalize_type: Union[str, dict] = "per_feature",
         use_rttm: bool = False,
+        rttm_mode: str = "mask",
+        feat_min_len: int = 4,
         feat_mask_val: Optional[float] = None,
         frame_unit_time_secs: float = 0.01,
         sample_rate: Optional[int] = 16000,
@@ -435,6 +470,8 @@ def __call__(self, *args):
             normalize=normalize,
             normalize_type=normalize_type,
             use_rttm=use_rttm,
+            rttm_mode=rttm_mode,
+            feat_min_len=feat_min_len,
             feat_mask_val=feat_mask_val,
             frame_unit_time_secs=frame_unit_time_secs,
             sample_rate=sample_rate,
diff --git a/nemo/collections/asr/data/feature_to_text_dataset.py b/nemo/collections/asr/data/feature_to_text_dataset.py
index 7efd3be3cd24..6bc03bc0b33d 100644
--- a/nemo/collections/asr/data/feature_to_text_dataset.py
+++ b/nemo/collections/asr/data/feature_to_text_dataset.py
@@ -38,6 +38,8 @@ def get_char_dataset(config: dict, augmentor: Optional['FeatureAugmentor'] = Non
         normalize=config.get('normalize', 'post_norm'),
         normalize_type=config.get('normalize_type', 'per_feature'),
         use_rttm=config.get('use_rttm', False),
+        rttm_mode=config.get('rttm_mode', 'mask'),
+        feat_min_len=config.get('feat_min_len', 4),
         feat_mask_val=config.get('feat_mask_val', None),
         frame_unit_time_secs=config.get('frame_unit_time_secs', 0.01),
         sample_rate=config.get('sample_rate', 16000),
@@ -75,6 +77,8 @@ def get_bpe_dataset(
         normalize=config.get('normalize', 'post_norm'),
         normalize_type=config.get('normalize_type', 'per_feature'),
         use_rttm=config.get('use_rttm', False),
+        rttm_mode=config.get('rttm_mode', 'mask'),
+        feat_min_len=config.get('feat_min_len', 4),
         feat_mask_val=config.get('feat_mask_val', None),
         frame_unit_time_secs=config.get('frame_unit_time_secs', 0.01),
         sample_rate=config.get('sample_rate', 16000),
diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
index b38fab2c98bf..1445afe9e381 100644
--- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
+++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
@@ -15,7 +15,9 @@
                 "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
                 "4. Run this cell to set up dependencies.\n",
                 "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n",
-                "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n",
+                "\n",
+                "\n",
+                "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n",
                 "\"\"\"\n",
                 "# If you're using Google Colab and not running locally, run this cell.\n",
                 "\n",
@@ -364,6 +366,16 @@
                 "metric_value = word_error_rate(hypotheses=predicted_text, references=ground_truth_text, use_cer=False)\n",
                 "print(f\"WER is {metric_value}\")"
             ]
+        },
+        {
+            "attachments": {},
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "# Further Reading\n",
+                "\n",
+                "There are two ways to incorporate VAD into ASR pipeline. The first strategy is to drop the frames that are predicted as `non-speech` by VAD, as already discussed in this tutorial. The second strategy is to keep all the frames and mask the `non-speech` frames with zero-signal values. Also, instead of using segment-VAD as shown in this tutorial, we can use frame-VAD model for faster inference and better accuracy. For more information, please refer to the two scripts [speech_to_text_with_vad.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr_vad/speech_to_text_with_vad.py)."
+            ]
         }
     ],
     "metadata": {
diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb
index b8013822c486..8b95698c71e8 100644
--- a/tutorials/asr/Voice_Activity_Detection.ipynb
+++ b/tutorials/asr/Voice_Activity_Detection.ipynb
@@ -18,7 +18,9 @@
                 "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
                 "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
                 "4. Run this cell to set up dependencies.\n",
-                "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n",
+                "\n",
+                "\n",
+                "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n",
                 "\"\"\"\n",
                 "# If you're using Google Colab and not running locally, run this cell.\n",
                 "\n",
@@ -1124,6 +1126,25 @@
                 "# Inference and more\n",
                 "If you are interested in **pretrained** model and **streaming inference**, please have a look at our [VAD inference tutorial](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb) and script [vad_infer.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/vad_infer.py)\n"
             ]
+        },
+        {
+            "attachments": {},
+            "cell_type": "markdown",
+            "metadata": {},
+            "source": [
+                "# Frame-VAD: More Effective and Efficient VAD for More Fine-grained Timestamps\n",
+                "\n",
+                "In this notebook, we are using the segment-VAD model, which predicts a single label for each short segment (0.63s), which is not optimal for some applications that require very precise timestamps. \n",
+                "\n",
+                "To get more precise timestamps, we can use a frame-VAD model, which predicts a label for each input frame (20ms). To prepare manifest for frame-VAD, you'll need to have `label` field in each manifest entry, which is a string of labels for each frame. For example, if you have a 1s audio file, you'll need to have 50 frame labels in the manifest entry like \"0 0 0 0 1 1 0 1 .... 0 1\".\n",
+                "However, shorter label strings are also supported for smaller file sizes. For example, you can prepare the `label` in 40ms frame, and the model will properly repeat the label for each 20ms frame. \n",
+                "\n",
+                "The Frame-VAD model shares the same MarbleNet architecture as the segment-VAD model, but with a different input/output resolution and loss function. The frame-VAD model is trained with more data than segment-VAD and achieves better performance, as shown in the [NGC model card](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/vad_multilingual_frame_marblenet). \n",
+                "\n",
+                "During inference, since frame-VAD model doesn't require splicing input into overlapping segments, it is more efficient than segment-VAD model, with 8x less GPU memory consumption.\n",
+                "\n",
+                "For more information on the frame-VAD model, please refer to the [model class](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/models/classification_models.py#L840). For training and running inference on frame-VAD, please refer to [speech_to_frame_label.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/speech_to_frame_label.py) and [frame_vad_infer.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/frame_vad_infer.py)."
+            ]
         }
     ],
     "metadata": {

From 1e4845c057c5176d19bbece698eeee65268ef8d8 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 14 Jun 2023 16:18:01 -0600
Subject: [PATCH 040/123] Add API docs for NeMo Megatron (#6850) (#6864)

* add model pretraining and customization classes


* fix


* test width


* increase middle pane width


* add modules and datasets


* remove global in t5 dataset s and fix formatting in megatron base model


---------

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 docs/source/_static/css/custom.css            |   2 +-
 docs/source/conf.py                           |   5 +-
 docs/source/nlp/api.rst                       | 193 +++++++++++-------
 .../language_modeling/megatron_base_model.py  |  25 +--
 4 files changed, 135 insertions(+), 90 deletions(-)

diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
index da134a02d86a..cf0ad0ff2d7f 100644
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
@@ -255,7 +255,7 @@ article ul {
 	}
 }
 
-@media (min-width: 1400px) {
+@media (min-width: none) {
 	body {
 		font-size: 18px;
 	}
diff --git a/docs/source/conf.py b/docs/source/conf.py
index a78ba3528048..0765f8940ab0 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -28,7 +28,6 @@
 
 sys.path.insert(0, os.path.abspath("../.."))
 sys.path.insert(0, os.path.abspath("../../nemo"))
-sys.path.insert(0, os.path.abspath("../../nemo_text_processing"))
 
 from package_info import __version__
 
@@ -47,7 +46,6 @@
     'hydra',  # hydra-core in requirements, hydra during import
     'dateutil',  # part of core python
     'transformers.tokenization_bert',  # has ., troublesome for this regex
-    'megatron',  # megatron-lm in requirements, megatron in import
     'sklearn',  # scikit_learn in requirements, sklearn in import
     'nemo_text_processing.inverse_text_normalization',  # Not installed automatically
     'nemo_text_processing.text_normalization',  # Not installed automatically
@@ -55,10 +53,13 @@
     'torchmetrics',  # inherited from PTL
     'lightning_utilities',  # inherited from PTL
     'apex',
+    'megatron.core',
+    'transformer_engine',
     'joblib',  # inherited from optional code
     'IPython',
     'ipadic',
     'psutil',
+    'regex',
 ]
 
 _skipped_autodoc_mock_imports = ['wrapt', 'numpy']
diff --git a/docs/source/nlp/api.rst b/docs/source/nlp/api.rst
index 46efb0851d4e..7c6971a68d05 100755
--- a/docs/source/nlp/api.rst
+++ b/docs/source/nlp/api.rst
@@ -1,99 +1,142 @@
-NeMo NLP collection API
+NeMo Megatron API
 =======================
 
-Model Classes
--------------
+Pretraining Model Classes
+-------------------------
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_base_model.MegatronBaseModel
+    :show-inheritance: 
+    :no-members:
+    :members: __init__, configure_optimizers
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTModel
+    :show-inheritance:
+    :no-members:
+    :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup, on_save_checkpoint, on_load_checkpoint
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_bert_model.MegatronBertModel
+    :show-inheritance: 
+    :no-members:
+    :members: training_step, validation_step, build_train_valid_test_datasets, build_LDDL_data, setup, on_save_checkpoint, on_load_checkpoint
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_bart_model.MegatronBARTModel
+    :show-inheritance: 
+    :no-members:
+    :members: training_step, validation_step, build_train_valid_test_datasets, setup, on_save_checkpoint, on_load_checkpoint
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_retrieval_model.MegatronRetrievalModel
+    :show-inheritance: 
+    :no-members:
+    :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_model.MegatronT5Model
+    :show-inheritance: 
+    :no-members:
+    :members: complete, encode, decode, add_special_tokens_to_tokenizer, training_step, validation_step, build_train_valid_test_datasets, setup
+
+Customization Model Classes
+---------------------------
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model.MegatronGPTSFTModel
+    :show-inheritance: 
+    :no-members:
+    :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model.MegatronGPTAdapterLearningModel
+    :show-inheritance: 
+    :no-members:
+    :members: __init__, state_dict, generate, training_step, validation_step, build_train_valid_test_datasets, setup
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model.MegatronGPTInfusedAdapterModel
+    :show-inheritance: 
+    :no-members:
+    :members: __init__, state_dict, generate, training_step, validation_step, build_train_valid_test_datasets, setup
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_prompt_learning_model.MegatronGPTPromptLearningModel
+    :show-inheritance: 
+    :no-members:
+    :members: built_virtual_prompt_dataset, generate, training_step, validation_step, build_train_valid_test_datasets, setup
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5AdapterLearningModel
+    :show-inheritance: 
+    :no-members:
+    :members: __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5AdapterLearningModel
+    :show-inheritance: 
+    :no-members:
+    :members: _add_adapters_to_component, __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5InfusedAdapterModel
+    :show-inheritance: 
+    :no-members:
+    :members: _add_adapters_to_component, __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup
 
-.. autoclass:: nemo.collections.nlp.models.TextClassificationModel
-    :show-inheritance:
-    :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact, classifytext
+Modules
+-------
 
-.. autoclass:: nemo.collections.nlp.models.GLUEModel
-    :show-inheritance:
-    :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact
+.. autoclass:: nemo.collections.nlp.modules.common.megatron.module.MegatronModule
+    :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.models.PunctuationCapitalizationModel
-    :show-inheritance:
-    :members:
+.. autoclass:: nemo.collections.nlp.modules.common.megatron.module.Float16Module
+    :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.models.TokenClassificationModel
-    :show-inheritance:
-    :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact
-    
-.. autoclass:: nemo.collections.nlp.models.QAModel
-    :show-inheritance:
-    :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end
 
-.. autoclass:: nemo.collections.nlp.models.DuplexTaggerModel
-    :show-inheritance:
-    :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron.gpt_model.GPTModel
+    :show-inheritance: 
+    :no-members:
+    :members: forward
 
-.. autoclass:: nemo.collections.nlp.models.DuplexDecoderModel
-    :show-inheritance:
-    :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron.bert_model.BertModel
+    :show-inheritance: 
+    :no-members:
+    :members: forward
 
-.. autoclass:: nemo.collections.nlp.models.BERTLMModel
-    :show-inheritance:
-    :members: setup_training_data, setup_optimization
+.. autoclass:: nemo.collections.nlp.modules.common.megatron.token_level_encoder_decoder.MegatronTokenLevelEncoderDecoderModule
+    :show-inheritance: 
+    :no-members:
+    :members: forward
 
-Modules
--------
+.. autoclass:: nemo.collections.nlp.modules.common.megatron.retrieval_token_level_encoder_decoder.MegatronRetrievalTokenLevelEncoderDecoderModule
+    :show-inheritance: 
+    :no-members:
+    :members: forward
 
-.. autoclass:: nemo.collections.nlp.modules.BertModule
-    :show-inheritance:
-    :members:
-
-.. autoclass:: nemo.collections.nlp.modules.AlbertEncoder
-    :show-inheritance:
-    :members:
 
-.. autoclass:: nemo.collections.nlp.modules.BertEncoder
-    :show-inheritance:
-    :members:
-
-.. autoclass:: nemo.collections.nlp.modules.DistilBertEncoder
-    :show-inheritance:
-    :members:
+Datasets
+--------
 
-.. autoclass:: nemo.collections.nlp.modules.RobertaEncoder
-    :show-inheritance:
-    :members:
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.blendable_dataset.BlendableDataset
+    :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.modules.SequenceClassifier
-    :show-inheritance:
-    :members:
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset.GPTDataset
+    :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.modules.SequenceRegression
-    :show-inheritance:
-    :members:
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset.MockGPTDataset
+    :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.modules.SequenceTokenClassifier
-    :show-inheritance:
-    :members:
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.bert_dataset.BertDataset
+    :show-inheritance: 
 
-.. autofunction::  nemo.collections.nlp.modules.get_lm_model
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.base_prompt_learning_dataset.BasePromptLearningDataset
+    :show-inheritance: 
 
-.. autofunction::  nemo.collections.nlp.modules.get_pretrained_lm_models_list
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTDataset
+    :show-inheritance: 
 
-.. autofunction::  nemo.collections.nlp.modules.common.megatron.get_megatron_lm_models_list
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTChatDataset
+    :show-inheritance: 
 
-Datasets
---------
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.retro_dataset.RETRODataset
+    :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_dataset.BertPunctuationCapitalizationDataset
-    :show-inheritance:
-    :members:
-    :special-members: __getitem__
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.t5_dataset.T5Dataset
+    :show-inheritance: 
+    :exclude-members: MAX_SEQ_LENGTH_DELTA
 
-.. autofunction:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_tarred_dataset.create_tarred_dataset
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.t5_prompt_learning_dataset.T5PromptLearningDataset
+    :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_tarred_dataset.BertPunctuationCapitalizationTarredDataset
-    :show-inheritance:
-    :members:
-    :special-members: __iter__
-    :exclude-members: reinforce_type
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.ul2_dataset.UL2Dataset
+    :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_infer_dataset.BertPunctuationCapitalizationInferDataset
-    :show-inheritance:
-    :members:
-    :special-members: __getitem__
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index 7be679376175..ceddc1dca4d4 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -61,18 +61,19 @@
 
 class MegatronBaseModel(NLPModel):
     """
-    Megatron base class
-    It does the following things:
-    1. Initialize the model parallel for nemo given the model parallel parameters.
-    2. Turn on all the nvidia optimizations.
-    3. If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the correct size for tensor model parallelism.
-    4. If using distributed optimizer, configure to be compatible with
-       O2-level optimizations and/or model parallelism.
-    5. Perform gradient clipping: `grad_clip_pl_default` triggers the
-       PyTorch Lightning default implementation, `with_distributed_adam`
-       triggers the distributed optimizer's implementation,
-       `megatron_amp_o2` triggers gradient clipping on the main grads,
-       and otherwise gradient clipping is performed on the model grads.
+    Megatron base class. All NeMo Megatron models inherit from this class.
+
+    - Initialize the model parallel world for nemo.
+    - Turn on all of the nvidia optimizations.
+    - If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the 
+      correct size for tensor model parallelism.
+    - If using distributed optimizer, configure to be compatible
+      with O2 level optimizations and/or model parallelism.
+    - Perform gradient clipping: `grad_clip_pl_default` triggers
+      the PyTorch Lightning default implementation, `with_distributed_adam` triggers
+      the distributed optimizer's implementation, `megatron_amp_o2` triggers gradient clipping on the main grads,
+      and otherwise gradient clipping is performed on the model grads.
+
     """
 
     def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True):

From 72132a200d915e88f461bbcb5db1e2bd54d8ed93 Mon Sep 17 00:00:00 2001
From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com>
Date: Thu, 15 Jun 2023 01:57:56 -0400
Subject: [PATCH 041/123] Update transcribe_utils.py (#6865)

fix ctc decoding for hybrid model in partial transcribe

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
---
 nemo/collections/asr/parts/utils/transcribe_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py
index 60f936306d05..7cf957a7cec0 100644
--- a/nemo/collections/asr/parts/utils/transcribe_utils.py
+++ b/nemo/collections/asr/parts/utils/transcribe_utils.py
@@ -388,7 +388,7 @@ def transcribe_partial_audio(
         decode_function = (
             asr_model.decoding.rnnt_decoder_predictions_tensor
             if decoder_type == 'rnnt'
-            else asr_model.decoding.ctc_decoder_predictions_tensor
+            else asr_model.ctc_decoding.ctc_decoder_predictions_tensor
         )
     elif hasattr(asr_model, 'joint'):  # RNNT model
         decode_function = asr_model.decoding.rnnt_decoder_predictions_tensor

From a6c8cce5573ece94f96c444900e53edf5e7b59d7 Mon Sep 17 00:00:00 2001
From: Eric Harper <complex451@gmail.com>
Date: Sat, 17 Jun 2023 09:05:51 -0600
Subject: [PATCH 042/123] Import Enum for chatbot component (#6877)

* import Enum

Signed-off-by: ericharper <complex451@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* make web server import conditional

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 examples/nlp/language_modeling/megatron_gpt_eval.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py
index b33cdefc6df2..af1657b44d7b 100644
--- a/examples/nlp/language_modeling/megatron_gpt_eval.py
+++ b/examples/nlp/language_modeling/megatron_gpt_eval.py
@@ -23,7 +23,6 @@
 
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
 from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
-from nemo.collections.nlp.modules.common.megatron_web_server import get_chatbot_demo, get_demo
 from nemo.collections.nlp.modules.common.text_generation_server import MegatronServer
 from nemo.collections.nlp.modules.common.text_generation_utils import generate
 from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam
@@ -297,6 +296,8 @@ def main(cfg) -> None:
 
     # Third method of running text generation, use inference server
     if cfg.server:
+        from nemo.collections.nlp.modules.common.megatron_web_server import get_chatbot_demo, get_demo
+
         if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0:
             if cfg.web_server:
                 if cfg.chat:

From 6f2035bf985053bdda269f4df3b9338f5b58bfd5 Mon Sep 17 00:00:00 2001
From: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Date: Sat, 17 Jun 2023 16:39:50 -0700
Subject: [PATCH 043/123] [bugfix] avoid the random shuffle of phoneme and tone
 tokens. (#6855)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
---
 nemo/collections/tts/g2p/models/zh_cn_pinyin.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/tts/g2p/models/zh_cn_pinyin.py b/nemo/collections/tts/g2p/models/zh_cn_pinyin.py
index 35a22f6ba118..aab57c925c82 100644
--- a/nemo/collections/tts/g2p/models/zh_cn_pinyin.py
+++ b/nemo/collections/tts/g2p/models/zh_cn_pinyin.py
@@ -82,11 +82,11 @@ def __init__(
             if isinstance(phoneme_dict, str) or isinstance(phoneme_dict, pathlib.Path)
             else phoneme_dict
         )
-        self.phoneme_list = list({pron for prons in phoneme_dict.values() for pron in prons})
+        self.phoneme_list = sorted({pron for prons in phoneme_dict.values() for pron in prons})
 
         # tones
         self.tone_dict = {str(x): tone_prefix + str(x) for x in range(1, 6)}
-        self.tone_list = list(self.tone_dict.values())
+        self.tone_list = sorted(self.tone_dict.values())
 
         # ascii letters
         self.ascii_letter_dict = {

From 990c764d133adc99a34dac1469c4e4ed35d03813 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 18 Jun 2023 15:49:03 -0600
Subject: [PATCH 044/123] update mcore version (#6875) (#6876)

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 README.rst                        | 2 +-
 requirements/requirements_nlp.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 863b279b2be8..6742eb1f07d4 100644
--- a/README.rst
+++ b/README.rst
@@ -263,7 +263,7 @@ packaging is also needed:
 
 .. code-block:: bash
 
-  pip install -y packaging
+  pip install packaging
 
 
 Transformer Engine
diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt
index 1ff4c444c2bf..582862361a22 100644
--- a/requirements/requirements_nlp.txt
+++ b/requirements/requirements_nlp.txt
@@ -12,7 +12,7 @@ inflect
 jieba
 markdown2
 matplotlib>=3.3.2
-megatron_core==0.1.0
+megatron_core==0.2.0
 nltk>=3.6.5
 opencc
 pangu

From 3aac7958ebc0936296d6fa8328d2d7f51793187c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 18 Jun 2023 15:50:41 -0600
Subject: [PATCH 045/123] Add trainer.validate example for GPT (#6794) (#6822)

* add trainer.validate example


* clean up white space


* add mbs and gbs to the config


---------

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 .../conf/megatron_gpt_validate_config.yaml    |  22 +++
 .../megatron_gpt_validate.py                  | 155 ++++++++++++++++++
 .../language_modeling/megatron_gpt_model.py   |  21 +--
 3 files changed, 188 insertions(+), 10 deletions(-)
 create mode 100644 examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml
 create mode 100644 examples/nlp/language_modeling/megatron_gpt_validate.py

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml
new file mode 100644
index 000000000000..39b0c7ed2176
--- /dev/null
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml
@@ -0,0 +1,22 @@
+trainer:
+  devices: 1
+  num_nodes: 1
+  accelerator: gpu
+  logger: False # logger provided by exp_manager
+  precision: 16 # 16, 32, or bf16
+  log_every_n_steps: 1
+  limit_val_batches: 10
+  limit_test_batches: 50
+  max_steps: 100 # needed to setup dataloaders
+  max_epochs: null
+  replace_sampler_ddp: False
+
+tensor_model_parallel_size: ??? # should be set the same as the pretrained model that is being restored from
+pipeline_model_parallel_size: ??? # should be set the same as the pretrained model that is being restored from 
+micro_batch_size: null # limited by GPU memory, defaults to pretrained model config
+global_batch_size: null # will use more micro batches to reach global batch size, defaults to pretrained model config
+virtual_pipeline_model_parallel_size: null
+gpt_model_file: null  # GPT nemo file path
+checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training
+checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
+hparams_file: null # model configuration file, only used for PTL checkpoint loading
diff --git a/examples/nlp/language_modeling/megatron_gpt_validate.py b/examples/nlp/language_modeling/megatron_gpt_validate.py
new file mode 100644
index 000000000000..b5a61e627a14
--- /dev/null
+++ b/examples/nlp/language_modeling/megatron_gpt_validate.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+
+from omegaconf import OmegaConf, open_dict
+from pytorch_lightning.trainer.trainer import Trainer
+
+from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
+from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
+from nemo.collections.nlp.parts.nlp_overrides import (
+    MegatronHalfPrecisionPlugin,
+    NLPDDPStrategy,
+    NLPSaveRestoreConnector,
+    PipelineMixedPrecisionPlugin,
+)
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+from nemo.utils.app_state import AppState
+from nemo.utils.model_utils import inject_model_parallel_rank
+
+""" Example script showing how to run validation on a MegatronGPT model.
+
+    Sample usage:
+
+    From nemo model:
+
+    python megatron_gpt_validate.py \
+        trainer.devices=4 \
+        trainer.num_nodes=1 \
+        trainer.limit_val_batches=10 \
+        trainer.max_steps=100 \
+        tensor_model_parallel_size=1 \
+        pipeline_model_parallel_size=4 \
+        trainer.precision=bf16 \
+        gpt_model_file=/path/to/megatron_gpt_tp_1_pp4.nemo
+    
+    from PTL checkpoint:
+    python megatron_gpt_validate.py \
+        trainer.devices=4 \
+        trainer.num_nodes=1 \
+        trainer.limit_val_batches=10 \
+        trainer.max_steps=100 \
+        tensor_model_parallel_size=1 \
+        pipeline_model_parallel_size=4 \
+        virtual_pipeline_model_parallel_size=4 \
+        trainer.precision=bf16 \
+        checkpoint_dir='/path/to/experiment/checkpoints' \
+        checkpoint_name='megatron_gpt--val_loss=7.78-step=100-consumed_samples=6336.0-last.ckpt' \
+        hparams_file='/path/to/experiment/hparams.yaml
+
+"""
+
+
+def modify_pretrained_cfg(pretrained_cfg, trainer, cfg):
+    with open_dict(pretrained_cfg):
+        OmegaConf.set_struct(pretrained_cfg, True)
+        pretrained_cfg.sequence_parallel = False
+        pretrained_cfg.activations_checkpoint_granularity = None
+        pretrained_cfg.activations_checkpoint_method = None
+        pretrained_cfg.precision = trainer.precision
+        if cfg.micro_batch_size is not None:
+            pretrained_cfg.micro_batch_size = cfg.micro_batch_size
+        if cfg.global_batch_size is not None:
+            pretrained_cfg.global_batch_size = cfg.global_batch_size
+        if trainer.precision == "16":
+            pretrained_cfg.megatron_amp_O2 = False
+    return pretrained_cfg
+
+
+@hydra_runner(config_path="conf", config_name="megatron_gpt_validate_config")
+def main(cfg) -> None:
+
+    trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer)
+
+    assert (
+        cfg.trainer.devices * cfg.trainer.num_nodes
+        == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
+    ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size"
+
+    if cfg.gpt_model_file:
+        logging.info(f"Restoring model from {cfg.gpt_model_file}")
+        save_restore_connector = NLPSaveRestoreConnector()
+        if os.path.isdir(cfg.gpt_model_file):
+            save_restore_connector.model_extracted_dir = cfg.gpt_model_file
+
+        pretrained_cfg = MegatronGPTModel.restore_from(
+            restore_path=cfg.gpt_model_file,
+            trainer=trainer,
+            return_config=True,
+            save_restore_connector=save_restore_connector,
+        )
+        pretrained_cfg = modify_pretrained_cfg(pretrained_cfg, trainer, cfg)
+        model = MegatronGPTModel.restore_from(
+            restore_path=cfg.gpt_model_file,
+            trainer=trainer,
+            override_config_path=pretrained_cfg,
+            save_restore_connector=save_restore_connector,
+            map_location=f'cuda:{trainer.local_rank}',  # map_location is needed for converted models
+        )
+    elif cfg.checkpoint_dir:
+        logging.info(
+            f"Restoring model from checkpoint_dir: {cfg.checkpoint_dir} with checkpoint name: {cfg.checkpoint_name}"
+        )
+        app_state = AppState()
+        if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1:
+            app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size
+            app_state.tensor_model_parallel_size = cfg.tensor_model_parallel_size
+            app_state.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size
+            app_state.virtual_pipeline_model_parallel_size = cfg.virtual_pipeline_model_parallel_size
+            (
+                app_state.tensor_model_parallel_rank,
+                app_state.pipeline_model_parallel_rank,
+                app_state.model_parallel_size,
+                app_state.data_parallel_size,
+                app_state.pipeline_model_parallel_split_rank,
+                app_state.virtual_pipeline_model_parallel_rank,
+            ) = fake_initialize_model_parallel(
+                world_size=app_state.model_parallel_size,
+                rank=trainer.global_rank,
+                tensor_model_parallel_size_=cfg.tensor_model_parallel_size,
+                pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size,
+                virtual_pipeline_model_parallel_size_=cfg.virtual_pipeline_model_parallel_size,
+            )
+        checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name))
+        pretrained_cfg = OmegaConf.load(cfg.hparams_file)
+        pretrained_cfg = modify_pretrained_cfg(pretrained_cfg.cfg, trainer, cfg)
+        with tempfile.NamedTemporaryFile(suffix='.yaml') as f:
+            OmegaConf.save(config=pretrained_cfg, f=f.name)
+            model = MegatronGPTModel.load_from_checkpoint(
+                checkpoint_path=checkpoint_path, trainer=trainer, hparams_file=f.name,
+            )
+    else:
+        raise ValueError("need at least a nemo file or checkpoint dir")
+
+    logging.info("\n\n**************  Model configuration ***********")
+    logging.info(f'\n{OmegaConf.to_yaml(model.cfg)}')
+
+    trainer.validate(model=model)
+
+
+if __name__ == '__main__':
+    main()  # noqa pylint: disable=no-value-for-parameter
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 853c637eb3b3..1ce153bcf0fb 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -1016,17 +1016,18 @@ def setup(self, stage=None):
             self.setup_validation_data(self.cfg.data)
             self.setup_test_data(self.cfg.data)
 
-        # when using pipeline model parallel the final stage need to initialize word embeddings
-        if parallel_state.get_pipeline_model_parallel_world_size() > 1:
-            if isinstance(self.model, list):
-                for i, module in enumerate(self.model):
-                    parallel_state.set_virtual_pipeline_model_parallel_rank(i)
+        if stage == 'fit':
+            # when using pipeline model parallel the final stage need to initialize word embeddings
+            if parallel_state.get_pipeline_model_parallel_world_size() > 1:
+                if isinstance(self.model, list):
+                    for i, module in enumerate(self.model):
+                        parallel_state.set_virtual_pipeline_model_parallel_rank(i)
+                        if self.cfg.get('share_embeddings_and_output_weights', True):
+                            module.sync_initial_word_embeddings()
+                    parallel_state.set_virtual_pipeline_model_parallel_rank(0)
+                else:
                     if self.cfg.get('share_embeddings_and_output_weights', True):
-                        module.sync_initial_word_embeddings()
-                parallel_state.set_virtual_pipeline_model_parallel_rank(0)
-            else:
-                if self.cfg.get('share_embeddings_and_output_weights', True):
-                    self.model.sync_initial_word_embeddings()
+                        self.model.sync_initial_word_embeddings()
 
         if self.cfg.get('transformer_engine', False):
             self.setup_transformer_engine_tp_groups()

From fc8407112dc7fac30ddd2038f56c9c229a3fb3fe Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Sun, 18 Jun 2023 22:46:02 -0700
Subject: [PATCH 046/123] typo fix from #6666 (#6882)

* typo fix

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 nemo/collections/nlp/modules/common/text_generation_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index 6417f887c0cd..d84d16efb5ba 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -105,7 +105,8 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para
             greedy=sampling_params['use_greedy'],
             repetition_penalty=sampling_params['repetition_penalty'],
             min_tokens_to_generate=length_params['min_length'],
-            compute_attention_mask=sampling_params.get("compute_attention_mask", True) ** strategy_args,
+            compute_attention_mask=sampling_params.get("compute_attention_mask", True),
+            **strategy_args,
         )
         compute_prob_response = get_computeprob_response(tokenizer, response, inputs)
         return compute_prob_response

From e418f71ed562362e8ba2a9aa8249f65390b6e019 Mon Sep 17 00:00:00 2001
From: Vladimir Bataev <vbataev@nvidia.com>
Date: Tue, 20 Jun 2023 19:52:18 +0300
Subject: [PATCH 047/123] Fix k2 build topo helper (#6887)

Fix k2 build topo helper: reassign modified labels attribute

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
---
 nemo/collections/asr/parts/k2/topologies.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/nemo/collections/asr/parts/k2/topologies.py b/nemo/collections/asr/parts/k2/topologies.py
index c892b2643332..a3b6fcf0fef7 100644
--- a/nemo/collections/asr/parts/k2/topologies.py
+++ b/nemo/collections/asr/parts/k2/topologies.py
@@ -46,9 +46,11 @@ def build_topo(name: str, tokens: List[int], blank_num: int, with_self_loops: bo
     else:
         raise ValueError(f"Unknown topo name: {name}")
     if blank_num != 0:
-        blank_mask = ans.labels == 0
-        ans.labels[(ans.labels != -1) & (ans.labels <= blank_num)] -= 1
-        ans.labels[blank_mask] = blank_num
+        labels = ans.labels
+        blank_mask = labels == 0
+        labels[(labels != -1) & (labels <= blank_num)] -= 1
+        labels[blank_mask] = blank_num
+        ans.labels = labels  # force update ans.labels property to notify FSA about modifications, required by k2
     ans = k2.arc_sort(ans)
     return ans
 

From 63d9b2c906d080ef412c7fcd20ccbcaa88404154 Mon Sep 17 00:00:00 2001
From: Eric Harper <complex451@gmail.com>
Date: Tue, 20 Jun 2023 16:24:23 -0600
Subject: [PATCH 048/123] Update container for import action (#6883)

* update container

Signed-off-by: ericharper <complex451@gmail.com>

* run import tests in parallel, isntall Cython

Signed-off-by: ericharper <complex451@gmail.com>

* fix typo

Signed-off-by: ericharper <complex451@gmail.com>

* remove redundant comment

Signed-off-by: ericharper <complex451@gmail.com>

* fix more typos

Signed-off-by: ericharper <complex451@gmail.com>

* upload and download wheel

Signed-off-by: ericharper <complex451@gmail.com>

* fix typos

Signed-off-by: ericharper <complex451@gmail.com>

* fix typos

Signed-off-by: ericharper <complex451@gmail.com>

* test order

Signed-off-by: ericharper <complex451@gmail.com>

* remove name

Signed-off-by: ericharper <complex451@gmail.com>

* fix indent

Signed-off-by: ericharper <complex451@gmail.com>

* add names back

Signed-off-by: ericharper <complex451@gmail.com>

* don't upload or download just build in parallel

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>
---
 .github/workflows/import-test.yml | 62 ++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/import-test.yml b/.github/workflows/import-test.yml
index 5fc34347710d..e9b10e1e34af 100644
--- a/.github/workflows/import-test.yml
+++ b/.github/workflows/import-test.yml
@@ -6,25 +6,24 @@ on:
     paths:
       - "**"
 
+# Check https://hub.docker.com/r/pytorch/pytorch/tags for latest tags
 jobs:
-  ci-import-check:
-    runs-on: ubuntu-latest
 
-    # Check https://hub.docker.com/r/pytorch/pytorch/tags for latest tags
+  test-asr-imports:
+    runs-on: ubuntu-latest
     container:
-      image: pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime
-
+      image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
     steps:
-    - uses: actions/checkout@v2
-
+    - name: Checkout repo
+      uses: actions/checkout@v2
     - name: Update base dependencies
       run: |
         apt-get update && apt-get install -y build-essential
         apt-get install -y libsndfile1 make
-
     - name: Install nemo dependencies
       id: nemo-wheel
       run:  |
+        pip install Cython
         # install test requirements
         pip install -r requirements/requirements_test.txt
         # Build nemo as a wheel
@@ -33,7 +32,6 @@ jobs:
         # Preserve wheel location
         DIST_FILE=$(find ./dist -name "*.whl" | head -n 1)
         echo "::set-output name=DIST_FILE::${DIST_FILE}"
-
     - name: Test ASR Domain Imports
       run: |
         # Install NeMo Domain
@@ -43,6 +41,29 @@ jobs:
         # Uninstall NeMo
         pip uninstall -y nemo_toolkit
 
+  test-tts-imports:
+    runs-on: ubuntu-latest
+    container:
+      image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
+    steps:
+    - name: Checkout repo
+      uses: actions/checkout@v2
+    - name: Update base dependencies
+      run: |
+        apt-get update && apt-get install -y build-essential
+        apt-get install -y libsndfile1 make
+    - name: Install nemo dependencies
+      id: nemo-wheel
+      run:  |
+        pip install Cython
+        # install test requirements
+        pip install -r requirements/requirements_test.txt
+        # Build nemo as a wheel
+        pip install build
+        python -m build --no-isolation --wheel
+        # Preserve wheel location
+        DIST_FILE=$(find ./dist -name "*.whl" | head -n 1)
+        echo "::set-output name=DIST_FILE::${DIST_FILE}"
     - name: Test TTS Domain Imports
       run: |
         # Install NeMo Domain
@@ -52,6 +73,29 @@ jobs:
         # Uninstall NeMo
         pip uninstall -y nemo_toolkit
 
+  test-nlp-imports:
+    runs-on: ubuntu-latest
+    container:
+      image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
+    steps:
+    - name: Checkout repo
+      uses: actions/checkout@v2
+    - name: Update base dependencies
+      run: |
+        apt-get update && apt-get install -y build-essential
+        apt-get install -y libsndfile1 make
+    - name: Install nemo dependencies
+      id: nemo-wheel
+      run:  |
+        pip install Cython
+        # install test requirements
+        pip install -r requirements/requirements_test.txt
+        # Build nemo as a wheel
+        pip install build
+        python -m build --no-isolation --wheel
+        # Preserve wheel location
+        DIST_FILE=$(find ./dist -name "*.whl" | head -n 1)
+        echo "::set-output name=DIST_FILE::${DIST_FILE}"
     - name: Test NLP Domain Imports
       run: |
         # Install NeMo Domain

From 24837af3cd0b7cd5a79df1bb3dc4cc87f0c5a438 Mon Sep 17 00:00:00 2001
From: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Date: Wed, 21 Jun 2023 18:30:54 +0300
Subject: [PATCH 049/123] removed unnecessary print (#6884)

Signed-off-by: Dmytro Pykhtar <dpykhtar@nvidia.com>
---
 .../nlp/models/language_modeling/megatron_gpt_model.py           | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 1ce153bcf0fb..c4bfdbbad143 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -518,7 +518,6 @@ def training_step(self, dataloader_iter, batch_idx):
         if self.rampup_batch_size:
             num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR
             current_global_batch_size = num_microbatch_calculator.current_global_batch_size
-            logging.info(current_global_batch_size)
             # do validation and save the checkpoint when gbs is changed
             if self.prev_global_batch_size != current_global_batch_size and self.prev_global_batch_size:
                 self.trainer.should_stop = True

From 328bbbbe378507c0756c5399168770d3adb9309c Mon Sep 17 00:00:00 2001
From: mikolajblaz <mikolajblaz@users.noreply.github.com>
Date: Wed, 21 Jun 2023 20:12:11 +0200
Subject: [PATCH 050/123] Fix destructor for delayed mmap dataset case (#6703)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Mikołaj Błaż <mblaz@nvidia.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 .../nlp/data/language_modeling/megatron/indexed_dataset.py   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/data/language_modeling/megatron/indexed_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/indexed_dataset.py
index 0fffb5b64a23..fe71e7f78019 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/indexed_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/indexed_dataset.py
@@ -513,6 +513,8 @@ def _do_init(self, path, skip_warmup=True, delay_data_mmap=False):
             self._create_data_mmap(skip_warmup)
         else:
             logging.info("    skip creating data numpy buffer of mmap...")
+            self._bin_buffer_mmap = None
+            self._bin_buffer = None
 
     def _create_data_mmap(self, skip_warmup):
         if not skip_warmup:
@@ -524,7 +526,8 @@ def _create_data_mmap(self, skip_warmup):
         self._bin_buffer = memoryview(self._bin_buffer_mmap)
 
     def __del__(self):
-        self._bin_buffer_mmap._mmap.close()
+        if self._bin_buffer_mmap is not None:
+            self._bin_buffer_mmap._mmap.close()
         del self._bin_buffer_mmap
         del self._index
 

From 07ea9715db22f97b1b4758e88f00dd3f87ad4296 Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Wed, 21 Jun 2023 13:55:47 -0700
Subject: [PATCH 051/123] removed some tests (#6900)

* removed some tests

Signed-off-by: arendu <adithya.r@gmail.com>

* updated

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
---
 Jenkinsfile | 718 ++++++++++++++++++++++++++--------------------------
 1 file changed, 363 insertions(+), 355 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index d335378173f0..8a151d34c336 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -624,96 +624,97 @@ pipeline {
 
       }
     }
-    stage('L2: Megatron T5 Adapter PP=2') {
-      when {
-        anyOf {
-          branch 'main'
-          changeRequest target: 'main'
-        }
-      }
-      failFast true
-      parallel{
-        stage('T5 Adapter tuning & inference TP=1 PP=2') {
-          steps {
-            sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \
-                --config-name=megatron_t5_adapter_tuning_config \
-                name='test_tp1_pp2' \
-                exp_manager.exp_dir='examples/adapter_tuning' \
-                trainer.devices=2 \
-                trainer.max_steps=1 \
-                trainer.val_check_interval=1 \
-                trainer.max_epochs=null \
-                model.data.num_workers=1 \
-                model.tensor_model_parallel_size=1 \
-                model.pipeline_model_parallel_size=2 \
-                model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \
-                model.existing_tasks=[] \
-                model.new_tasks=['rte'] \
-                model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                model.global_batch_size=4"
-            sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \
-                --config-name=megatron_t5_adapter_inference \
-                adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \
-                language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \
-                trainer.devices=2 \
-                data.num_workers=1 \
-                tensor_model_parallel_size=1 \
-                pipeline_model_parallel_size=2 \
-                data.global_batch_size=2 \
-                data.micro_batch_size=2 \
-                data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                pred_file_path='examples/adapter_tuning/test_tp1_pp2/preds.txt'"
-            sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo"
-            sh "rm -rf examples/adapter_tuning/test_tp1_pp2"
-          }
-        }
-      }
-    }
-    stage('L2: Megatron T5 Adapter TP=2') {
-      when {
-        anyOf {
-          branch 'main'
-          changeRequest target: 'main'
-        }
-      }
-      failFast true
-      parallel{
-        stage('T5 Adapter tuning & inference TP=2 PP=1') {
-          steps {
-            sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \
-                --config-name=megatron_t5_adapter_tuning_config \
-                name='test_tp2_pp1' \
-                exp_manager.exp_dir='examples/adapter_tuning' \
-                trainer.devices=2 \
-                trainer.max_steps=1 \
-                trainer.val_check_interval=1 \
-                trainer.max_epochs=null \
-                model.data.num_workers=1 \
-                model.tensor_model_parallel_size=2 \
-                model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \
-                model.existing_tasks=[] \
-                model.new_tasks=['rte'] \
-                model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                model.global_batch_size=4"
-            sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \
-                --config-name=megatron_t5_adapter_inference \
-                adapter_model_file='examples/adapter_tuning/test_tp2_pp1.nemo' \
-                language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \
-                trainer.devices=2 \
-                tensor_model_parallel_size=2 \
-                data.global_batch_size=2 \
-                data.micro_batch_size=2 \
-                data.num_workers=1 \
-                data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                pred_file_path='examples/adapter_tuning/test_tp2_pp1/preds.txt'"
-            sh "rm -rf examples/adapter_tuning/test_tp2_pp1.nemo"
-            sh "rm -rf examples/adapter_tuning/test_tp2_pp1"
-          }
-        }
-      }
-    }
+    // commented out temporarily to save time on github ci
+    //stage('L2: Megatron T5 Adapter PP=2') {
+    //  when {
+    //    anyOf {
+    //      branch 'main'
+    //      changeRequest target: 'main'
+    //    }
+    //  }
+    //  failFast true
+    //  parallel{
+    //    stage('T5 Adapter tuning & inference TP=1 PP=2') {
+    //      steps {
+    //        sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \
+    //            --config-name=megatron_t5_adapter_tuning_config \
+    //            name='test_tp1_pp2' \
+    //            exp_manager.exp_dir='examples/adapter_tuning' \
+    //            trainer.devices=2 \
+    //            trainer.max_steps=1 \
+    //            trainer.val_check_interval=1 \
+    //            trainer.max_epochs=null \
+    //            model.data.num_workers=1 \
+    //            model.tensor_model_parallel_size=1 \
+    //            model.pipeline_model_parallel_size=2 \
+    //            model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \
+    //            model.existing_tasks=[] \
+    //            model.new_tasks=['rte'] \
+    //            model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            model.global_batch_size=4"
+    //        sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \
+    //            --config-name=megatron_t5_adapter_inference \
+    //            adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \
+    //            language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp1_pp2.nemo' \
+    //            trainer.devices=2 \
+    //            data.num_workers=1 \
+    //            tensor_model_parallel_size=1 \
+    //            pipeline_model_parallel_size=2 \
+    //            data.global_batch_size=2 \
+    //            data.micro_batch_size=2 \
+    //            data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            pred_file_path='examples/adapter_tuning/test_tp1_pp2/preds.txt'"
+    //        sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo"
+    //        sh "rm -rf examples/adapter_tuning/test_tp1_pp2"
+    //      }
+    //    }
+    //  }
+    //}
+    //stage('L2: Megatron T5 Adapter TP=2') {
+    //  when {
+    //    anyOf {
+    //      branch 'main'
+    //      changeRequest target: 'main'
+    //    }
+    //  }
+    //  failFast true
+    //  parallel{
+    //    stage('T5 Adapter tuning & inference TP=2 PP=1') {
+    //      steps {
+    //        sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_tuning.py \
+    //            --config-name=megatron_t5_adapter_tuning_config \
+    //            name='test_tp2_pp1' \
+    //            exp_manager.exp_dir='examples/adapter_tuning' \
+    //            trainer.devices=2 \
+    //            trainer.max_steps=1 \
+    //            trainer.val_check_interval=1 \
+    //            trainer.max_epochs=null \
+    //            model.data.num_workers=1 \
+    //            model.tensor_model_parallel_size=2 \
+    //            model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \
+    //            model.existing_tasks=[] \
+    //            model.new_tasks=['rte'] \
+    //            model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            model.global_batch_size=4"
+    //        sh "python examples/nlp/language_modeling/tuning/megatron_t5_adapter_eval.py \
+    //            --config-name=megatron_t5_adapter_inference \
+    //            adapter_model_file='examples/adapter_tuning/test_tp2_pp1.nemo' \
+    //            language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m_tp2.nemo' \
+    //            trainer.devices=2 \
+    //            tensor_model_parallel_size=2 \
+    //            data.global_batch_size=2 \
+    //            data.micro_batch_size=2 \
+    //            data.num_workers=1 \
+    //            data.test_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            pred_file_path='examples/adapter_tuning/test_tp2_pp1/preds.txt'"
+    //        sh "rm -rf examples/adapter_tuning/test_tp2_pp1.nemo"
+    //        sh "rm -rf examples/adapter_tuning/test_tp2_pp1"
+    //      }
+    //    }
+    //  }
+    //}
     stage('L2: Megatron T5 IA3 PP=2') {
       when {
         anyOf {
@@ -847,50 +848,51 @@ pipeline {
         }
       }
     }
-    stage('L2: Megatron GPT Adapter PP=2') {
-      when {
-        anyOf {
-          branch 'main'
-          changeRequest target: 'main'
-        }
-      }
-      failFast true
-      parallel{
-        stage('GPT Adapter tuning & inference TP=1 PP=2') {
-          steps {
-            sh "python examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py \
-                --config-name=megatron_gpt_adapter_tuning_config \
-                name='test_tp1_pp2' \
-                exp_manager.exp_dir='examples/adapter_tuning' \
-                trainer.devices=2 \
-                trainer.max_steps=1 \
-                trainer.val_check_interval=1 \
-                trainer.max_epochs=null \
-                model.data.num_workers=1 \
-                model.tensor_model_parallel_size=1 \
-                model.pipeline_model_parallel_size=2 \
-                model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \
-                model.existing_tasks=[] \
-                model.new_tasks=['rte'] \
-                model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                model.global_batch_size=4"
-            sh "python examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py \
-                --config-name=megatron_gpt_adapter_inference \
-                adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \
-                gpt_model_file='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \
-                inference.greedy=True \
-                inference.add_BOS=False \
-                trainer.devices=2 \
-                num_workers=1 \
-                tensor_model_parallel_size=2 \
-                data_paths=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl']"
-            sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo"
-            sh "rm -rf examples/adapter_tuning/test_tp1_pp2"
-          }
-        }
-      }
-    }
+    // commented out to save time on github ci @adithyare
+    //stage('L2: Megatron GPT Adapter PP=2') {
+    //  when {
+    //    anyOf {
+    //      branch 'main'
+    //      changeRequest target: 'main'
+    //    }
+    //  }
+    //  failFast true
+    //  parallel{
+    //    stage('GPT Adapter tuning & inference TP=1 PP=2') {
+    //      steps {
+    //        sh "python examples/nlp/language_modeling/tuning/megatron_gpt_adapter_tuning.py \
+    //            --config-name=megatron_gpt_adapter_tuning_config \
+    //            name='test_tp1_pp2' \
+    //            exp_manager.exp_dir='examples/adapter_tuning' \
+    //            trainer.devices=2 \
+    //            trainer.max_steps=1 \
+    //            trainer.val_check_interval=1 \
+    //            trainer.max_epochs=null \
+    //            model.data.num_workers=1 \
+    //            model.tensor_model_parallel_size=1 \
+    //            model.pipeline_model_parallel_size=2 \
+    //            model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \
+    //            model.existing_tasks=[] \
+    //            model.new_tasks=['rte'] \
+    //            model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            model.global_batch_size=4"
+    //        sh "python examples/nlp/language_modeling/tuning/megatron_gpt_adapter_eval.py \
+    //            --config-name=megatron_gpt_adapter_inference \
+    //            adapter_model_file='examples/adapter_tuning/test_tp1_pp2.nemo' \
+    //            gpt_model_file='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp2.nemo' \
+    //            inference.greedy=True \
+    //            inference.add_BOS=False \
+    //            trainer.devices=2 \
+    //            num_workers=1 \
+    //            tensor_model_parallel_size=2 \
+    //            data_paths=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl']"
+    //        sh "rm -rf examples/adapter_tuning/test_tp1_pp2.nemo"
+    //        sh "rm -rf examples/adapter_tuning/test_tp1_pp2"
+    //      }
+    //    }
+    //  }
+    //}
     stage('L2: Speech Transcription') {
       when {
         anyOf {
@@ -3278,43 +3280,44 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         model.activations_checkpoint_num_layers=1 \
         model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
         model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
-        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
-        trainer.devices=2 \
-        trainer.accelerator=gpu \
-        trainer.log_every_n_steps=1 \
-        trainer.val_check_interval=2 \
-        trainer.limit_val_batches=1 \
-        trainer.accumulate_grad_batches=1 \
-        trainer.max_steps=6 \
-        trainer.precision=16 \
-        trainer.gradient_clip_val=1.0 \
-        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
-        exp_manager.resume_if_exists=True \
-        model.tensor_model_parallel_size=2 \
-        model.optim.name=fused_adam \
-        model.optim.lr=2e-4 \
-        model.optim.sched.warmup_steps=2 \
-        model.optim.sched.constant_steps=2 \
-        model.optim.sched.min_lr=8e-5 \
-        model.max_position_embeddings=128 \
-        model.encoder_seq_length=128 \
-        model.data.seq_length=128 \
-        model.position_embedding_type=rope \
-        model.rotary_percentage=0.5 \
-        model.normalization=rmsnorm \
-        model.bias=False \
-        model.bias_activation_fusion=False \
-        model.bias_dropout_add_fusion=False \
-        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
-        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
-        model.num_layers=8 \
-        model.hidden_size=256 \
-        model.num_attention_heads=8 \
-        model.activations_checkpoint_method='block' \
-        model.activations_checkpoint_granularity='full' \
-        model.activations_checkpoint_num_layers=1 \
-        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
-        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        // commented out to save time on github ci @adithyare
+        //sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        //trainer.devices=2 \
+        //trainer.accelerator=gpu \
+        //trainer.log_every_n_steps=1 \
+        //trainer.val_check_interval=2 \
+        //trainer.limit_val_batches=1 \
+        //trainer.accumulate_grad_batches=1 \
+        //trainer.max_steps=6 \
+        //trainer.precision=16 \
+        //trainer.gradient_clip_val=1.0 \
+        //exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        //exp_manager.resume_if_exists=True \
+        //model.tensor_model_parallel_size=2 \
+        //model.optim.name=fused_adam \
+        //model.optim.lr=2e-4 \
+        //model.optim.sched.warmup_steps=2 \
+        //model.optim.sched.constant_steps=2 \
+        //model.optim.sched.min_lr=8e-5 \
+        //model.max_position_embeddings=128 \
+        //model.encoder_seq_length=128 \
+        //model.data.seq_length=128 \
+        //model.position_embedding_type=rope \
+        //model.rotary_percentage=0.5 \
+        //model.normalization=rmsnorm \
+        //model.bias=False \
+        //model.bias_activation_fusion=False \
+        //model.bias_dropout_add_fusion=False \
+        //model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        //model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        //model.num_layers=8 \
+        //model.hidden_size=256 \
+        //model.num_attention_heads=8 \
+        //model.activations_checkpoint_method='block' \
+        //model.activations_checkpoint_granularity='full' \
+        //model.activations_checkpoint_num_layers=1 \
+        //model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
         sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
         sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
       }
@@ -3365,44 +3368,45 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
         model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
         model.use_flash_attention=True"
-        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
-        trainer.devices=2 \
-        trainer.accelerator=gpu \
-        trainer.log_every_n_steps=1 \
-        trainer.val_check_interval=2 \
-        trainer.limit_val_batches=1 \
-        trainer.accumulate_grad_batches=1 \
-        trainer.max_steps=6 \
-        trainer.precision=16 \
-        trainer.gradient_clip_val=1.0 \
-        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
-        exp_manager.resume_if_exists=True \
-        model.tensor_model_parallel_size=2 \
-        model.optim.name=fused_adam \
-        model.optim.lr=2e-4 \
-        model.optim.sched.warmup_steps=2 \
-        model.optim.sched.constant_steps=2 \
-        model.optim.sched.min_lr=8e-5 \
-        model.max_position_embeddings=128 \
-        model.encoder_seq_length=128 \
-        model.data.seq_length=128 \
-        model.position_embedding_type=rope \
-        model.rotary_percentage=0.5 \
-        model.normalization=rmsnorm \
-        model.bias=False \
-        model.bias_activation_fusion=False \
-        model.bias_dropout_add_fusion=False \
-        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
-        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
-        model.num_layers=8 \
-        model.hidden_size=256 \
-        model.num_attention_heads=8 \
-        model.activations_checkpoint_method='block' \
-        model.activations_checkpoint_granularity='full' \
-        model.activations_checkpoint_num_layers=1 \
-        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
-        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
-        model.use_flash_attention=True"
+        // commented out to save time on github ci @adithyare
+        //sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        //trainer.devices=2 \
+        //trainer.accelerator=gpu \
+        //trainer.log_every_n_steps=1 \
+        //trainer.val_check_interval=2 \
+        //trainer.limit_val_batches=1 \
+        //trainer.accumulate_grad_batches=1 \
+        //trainer.max_steps=6 \
+        //trainer.precision=16 \
+        //trainer.gradient_clip_val=1.0 \
+        //exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        //exp_manager.resume_if_exists=True \
+        //model.tensor_model_parallel_size=2 \
+        //model.optim.name=fused_adam \
+        //model.optim.lr=2e-4 \
+        //model.optim.sched.warmup_steps=2 \
+        //model.optim.sched.constant_steps=2 \
+        //model.optim.sched.min_lr=8e-5 \
+        //model.max_position_embeddings=128 \
+        //model.encoder_seq_length=128 \
+        //model.data.seq_length=128 \
+        //model.position_embedding_type=rope \
+        //model.rotary_percentage=0.5 \
+        //model.normalization=rmsnorm \
+        //model.bias=False \
+        //model.bias_activation_fusion=False \
+        //model.bias_dropout_add_fusion=False \
+        //model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        //model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        //model.num_layers=8 \
+        //model.hidden_size=256 \
+        //model.num_attention_heads=8 \
+        //model.activations_checkpoint_method='block' \
+        //model.activations_checkpoint_granularity='full' \
+        //model.activations_checkpoint_num_layers=1 \
+        //model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
+        //model.use_flash_attention=True"
         sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
         sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
       }
@@ -3451,42 +3455,43 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         model.activations_checkpoint_num_layers=1 \
         model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
         model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
-        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
-        trainer.devices=2 \
-        trainer.accelerator=gpu \
-        trainer.log_every_n_steps=1 \
-        trainer.val_check_interval=2 \
-        trainer.limit_val_batches=1 \
-        trainer.accumulate_grad_batches=1 \
-        trainer.max_steps=6 \
-        trainer.precision=16 \
-        trainer.gradient_clip_val=1.0 \
-        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
-        exp_manager.resume_if_exists=True \
-        model.tensor_model_parallel_size=2 \
-        model.optim.name=fused_adam \
-        model.optim.lr=2e-4 \
-        model.optim.sched.warmup_steps=2 \
-        model.optim.sched.constant_steps=2 \
-        model.optim.sched.min_lr=8e-5 \
-        model.max_position_embeddings=128 \
-        model.encoder_seq_length=128 \
-        model.data.seq_length=128 \
-        model.position_embedding_type=alibi \
-        model.normalization=rmsnorm \
-        model.bias=False \
-        model.bias_activation_fusion=False \
-        model.bias_dropout_add_fusion=False \
-        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
-        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
-        model.num_layers=8 \
-        model.hidden_size=256 \
-        model.num_attention_heads=8 \
-        model.activations_checkpoint_method='block' \
-        model.activations_checkpoint_granularity='full' \
-        model.activations_checkpoint_num_layers=1 \
-        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
-        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        // not testing resume functionality to save time on ci @adithyare
+        //sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        //trainer.devices=2 \
+        //trainer.accelerator=gpu \
+        //trainer.log_every_n_steps=1 \
+        //trainer.val_check_interval=2 \
+        //trainer.limit_val_batches=1 \
+        //trainer.accumulate_grad_batches=1 \
+        //trainer.max_steps=6 \
+        //trainer.precision=16 \
+        //trainer.gradient_clip_val=1.0 \
+        //exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        //exp_manager.resume_if_exists=True \
+        //model.tensor_model_parallel_size=2 \
+        //model.optim.name=fused_adam \
+        //model.optim.lr=2e-4 \
+        //model.optim.sched.warmup_steps=2 \
+        //model.optim.sched.constant_steps=2 \
+        //model.optim.sched.min_lr=8e-5 \
+        //model.max_position_embeddings=128 \
+        //model.encoder_seq_length=128 \
+        //model.data.seq_length=128 \
+        //model.position_embedding_type=alibi \
+        //model.normalization=rmsnorm \
+        //model.bias=False \
+        //model.bias_activation_fusion=False \
+        //model.bias_dropout_add_fusion=False \
+        //model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        //model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        //model.num_layers=8 \
+        //model.hidden_size=256 \
+        //model.num_attention_heads=8 \
+        //model.activations_checkpoint_method='block' \
+        //model.activations_checkpoint_granularity='full' \
+        //model.activations_checkpoint_num_layers=1 \
+        //model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
         sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
         sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
       }
@@ -3535,42 +3540,43 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         model.activations_checkpoint_num_layers=1 \
         model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
         model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
-        sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
-        trainer.devices=2 \
-        trainer.accelerator=gpu \
-        trainer.log_every_n_steps=1 \
-        trainer.val_check_interval=2 \
-        trainer.limit_val_batches=1 \
-        trainer.accumulate_grad_batches=1 \
-        trainer.max_steps=6 \
-        trainer.precision=16 \
-        trainer.gradient_clip_val=1.0 \
-        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
-        exp_manager.resume_if_exists=True \
-        model.tensor_model_parallel_size=2 \
-        model.optim.name=fused_adam \
-        model.optim.lr=2e-4 \
-        model.optim.sched.warmup_steps=2 \
-        model.optim.sched.constant_steps=2 \
-        model.optim.sched.min_lr=8e-5 \
-        model.max_position_embeddings=128 \
-        model.encoder_seq_length=128 \
-        model.data.seq_length=128 \
-        model.position_embedding_type=kerple \
-        model.normalization=rmsnorm \
-        model.bias=False \
-        model.bias_activation_fusion=False \
-        model.bias_dropout_add_fusion=False \
-        model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
-        model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
-        model.num_layers=8 \
-        model.hidden_size=256 \
-        model.num_attention_heads=8 \
-        model.activations_checkpoint_method='block' \
-        model.activations_checkpoint_granularity='full' \
-        model.activations_checkpoint_num_layers=1 \
-        model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
-        model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+        // commented out to save time on github ci @adithyare
+        //sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+        //trainer.devices=2 \
+        //trainer.accelerator=gpu \
+        //trainer.log_every_n_steps=1 \
+        //trainer.val_check_interval=2 \
+        //trainer.limit_val_batches=1 \
+        //trainer.accumulate_grad_batches=1 \
+        //trainer.max_steps=6 \
+        //trainer.precision=16 \
+        //trainer.gradient_clip_val=1.0 \
+        //exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+        //exp_manager.resume_if_exists=True \
+        //model.tensor_model_parallel_size=2 \
+        //model.optim.name=fused_adam \
+        //model.optim.lr=2e-4 \
+        //model.optim.sched.warmup_steps=2 \
+        //model.optim.sched.constant_steps=2 \
+        //model.optim.sched.min_lr=8e-5 \
+        //model.max_position_embeddings=128 \
+        //model.encoder_seq_length=128 \
+        //model.data.seq_length=128 \
+        //model.position_embedding_type=kerple \
+        //model.normalization=rmsnorm \
+        //model.bias=False \
+        //model.bias_activation_fusion=False \
+        //model.bias_dropout_add_fusion=False \
+        //model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+        //model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+        //model.num_layers=8 \
+        //model.hidden_size=256 \
+        //model.num_attention_heads=8 \
+        //model.activations_checkpoint_method='block' \
+        //model.activations_checkpoint_granularity='full' \
+        //model.activations_checkpoint_num_layers=1 \
+        //model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+        //model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
         sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
         sh "rm -rf examples/nlp/language_modeling/gpt_index_mappings"
       }
@@ -3856,40 +3862,41 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
             rm -rf examples/nlp/language_modeling/out.jsonl"
       }
     }
-    stage('L2: Megatron GPT Prompt Tuning TP1 PP1') {
-      when {
-        anyOf {
-          branch 'main'
-          changeRequest target: 'main'
-        }
-      }
-      failFast true
-      parallel{
-        stage('GPT Prompt Learning TP=1 PP=1') {
-          steps {
-            sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning.py \
-                --config-name=megatron_gpt_prompt_learning_config \
-                name='/home/TestData/nlp/prompt_learning/prompt_tuning_test' \
-                trainer.devices=1 \
-                trainer.max_steps=1 \
-                trainer.val_check_interval=1 \
-                trainer.max_epochs=null \
-                model.data.num_workers=1 \
-                model.tensor_model_parallel_size=1 \
-                model.virtual_prompt_style='p-tuning' \
-                model.p_tuning.encoder_type='embedding' \
-                model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp1.nemo' \
-                model.existing_tasks=[] \
-                model.new_tasks=['rte'] \
-                model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
-                model.global_batch_size=4"
-            sh "rm -rf /home/TestData/nlp/prompt_learning/prompt_tuning_test"
-            sh "rm -rf /home/TestData/nlp/prompt_learning/prompt_tuning_test.nemo"
-          }
-        }
-      }
-    }
+    // commented out to save time we are testing tp>1 and pp>1 anyway. @adithyare
+    //stage('L2: Megatron GPT Prompt Tuning TP1 PP1') {
+    //  when {
+    //    anyOf {
+    //      branch 'main'
+    //      changeRequest target: 'main'
+    //    }
+    //  }
+    //  failFast true
+    //  parallel{
+    //    stage('GPT Prompt Learning TP=1 PP=1') {
+    //      steps {
+    //        sh "python examples/nlp/language_modeling/megatron_gpt_prompt_learning.py \
+    //            --config-name=megatron_gpt_prompt_learning_config \
+    //            name='/home/TestData/nlp/prompt_learning/prompt_tuning_test' \
+    //            trainer.devices=1 \
+    //            trainer.max_steps=1 \
+    //            trainer.val_check_interval=1 \
+    //            trainer.max_epochs=null \
+    //            model.data.num_workers=1 \
+    //            model.tensor_model_parallel_size=1 \
+    //            model.virtual_prompt_style='p-tuning' \
+    //            model.p_tuning.encoder_type='embedding' \
+    //            model.language_model_path='/home/TestData/nlp/megatron_gpt/tiny/megatron_14m_gpt_tp1_pp1.nemo' \
+    //            model.existing_tasks=[] \
+    //            model.new_tasks=['rte'] \
+    //            model.data.train_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            model.data.validation_ds=['/home/TestData/nlp/prompt_learning/rte_CI_test.jsonl'] \
+    //            model.global_batch_size=4"
+    //        sh "rm -rf /home/TestData/nlp/prompt_learning/prompt_tuning_test"
+    //        sh "rm -rf /home/TestData/nlp/prompt_learning/prompt_tuning_test.nemo"
+    //      }
+    //    }
+    //  }
+    //}
 
     stage('L2: Megatron GPT Prompt Tuning TP2 PP1') {
       when {
@@ -4456,46 +4463,47 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
       }
     }
 
-    stage('L2: Megatron T5 Prompt Learning TP1 PP1') {
-      when {
-        anyOf {
-          branch 'main'
-          changeRequest target: 'main'
-        }
-      }
-      failFast true
-      parallel{
-        stage('T5 Prompt Learning TP=1 PP=1') {
-          steps {
-            sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning.py \
-                --config-name=megatron_t5_prompt_learning \
-                name='/home/TestData/nlp/prompt_learning/t5_p_tuning_test' \
-                trainer.devices=1 \
-                trainer.max_steps=1 \
-                trainer.val_check_interval=1 \
-                trainer.max_epochs=null \
-                model.data.num_workers=1 \
-                model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \
-                model.existing_tasks=[] \
-                model.new_tasks=['squad'] \
-                model.data.train_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \
-                model.data.validation_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \
-                model.global_batch_size=4 \
-                model.micro_batch_size=4"
-            sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test"
-            sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py \
-                virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo' \
-                language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \
-                data.test_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \
-                pred_file_path='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt' \
-                data.global_batch_size=4 \
-                data.micro_batch_size=4"
-            sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo"
-            sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt"
-          }
-        }
-      }
-    }
+    // commented out to save time in github ci, we have tp>1 and pp>1 tests anyway @adithyare
+    //stage('L2: Megatron T5 Prompt Learning TP1 PP1') {
+    //  when {
+    //    anyOf {
+    //      branch 'main'
+    //      changeRequest target: 'main'
+    //    }
+    //  }
+    //  failFast true
+    //  parallel{
+    //    stage('T5 Prompt Learning TP=1 PP=1') {
+    //      steps {
+    //        sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning.py \
+    //            --config-name=megatron_t5_prompt_learning \
+    //            name='/home/TestData/nlp/prompt_learning/t5_p_tuning_test' \
+    //            trainer.devices=1 \
+    //            trainer.max_steps=1 \
+    //            trainer.val_check_interval=1 \
+    //            trainer.max_epochs=null \
+    //            model.data.num_workers=1 \
+    //            model.language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \
+    //            model.existing_tasks=[] \
+    //            model.new_tasks=['squad'] \
+    //            model.data.train_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \
+    //            model.data.validation_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \
+    //            model.global_batch_size=4 \
+    //            model.micro_batch_size=4"
+    //        sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test"
+    //        sh "python examples/nlp/language_modeling/megatron_t5_prompt_learning_eval.py \
+    //            virtual_prompt_model_file='/home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo' \
+    //            language_model_path='/home/TestData/nlp/megatron_t5/8m/megatron_t5_8m-refactor.nemo' \
+    //            data.test_ds=['/home/TestData/nlp/prompt_learning/squad_CI_test.jsonl'] \
+    //            pred_file_path='/home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt' \
+    //            data.global_batch_size=4 \
+    //            data.micro_batch_size=4"
+    //        sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test.nemo"
+    //        sh "rm -rf /home/TestData/nlp/prompt_learning/t5_p_tuning_test_preds.txt"
+    //      }
+    //    }
+    //  }
+    //}
 
     stage('L2: Megatron T5 Prompt Learning TP2 PP1') {
       when {

From 0b94ef808cd934128a696c9a75aa7f3a37727130 Mon Sep 17 00:00:00 2001
From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com>
Date: Wed, 21 Jun 2023 17:41:31 -0400
Subject: [PATCH 052/123] Fix transcribe_utils.py for hybrid models in partial
 transcribe mode (#6899)

* Fix transcribe_utils.py for hybrid models in partial transcribe mode

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* Update transcribe_utils.py

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

---------

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
---
 nemo/collections/asr/parts/utils/transcribe_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py
index 7cf957a7cec0..11e7792cfb21 100644
--- a/nemo/collections/asr/parts/utils/transcribe_utils.py
+++ b/nemo/collections/asr/parts/utils/transcribe_utils.py
@@ -23,8 +23,7 @@
 from tqdm.auto import tqdm
 
 import nemo.collections.asr as nemo_asr
-from nemo.collections.asr.models import ASRModel
-from nemo.collections.asr.models.ctc_models import EncDecCTCModel
+from nemo.collections.asr.models import ASRModel, EncDecHybridRNNTCTCModel
 from nemo.collections.asr.parts.utils import rnnt_utils
 from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchASR
 from nemo.collections.common.parts.preprocessing.manifest import get_full_path
@@ -421,6 +420,8 @@ def transcribe_partial_audio(
                 input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device)
             )
             logits, logits_len = outputs[0], outputs[1]
+            if isinstance(asr_model, EncDecHybridRNNTCTCModel) and decoder_type == "ctc":
+                logits = asr_model.ctc_decoder(encoder_output=logits)
             if logprobs:
                 # dump log probs per file
                 for idx in range(logits.shape[0]):

From 29015df1ee58d141cc00c0c6afe431002cff2201 Mon Sep 17 00:00:00 2001
From: George <37293288+Jorjeous@users.noreply.github.com>
Date: Thu, 22 Jun 2023 18:16:48 +0400
Subject: [PATCH 053/123] hot fix SDE (#6897)

* hot fix SDE

Signed-off-by: George <gzelenfroind@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: George <gzelenfroind@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Vitaly Lavrukhin <vlavrukhin@nvidia.com>
---
 tools/speech_data_explorer/data_explorer.py | 301 ++++++++++----------
 1 file changed, 151 insertions(+), 150 deletions(-)

diff --git a/tools/speech_data_explorer/data_explorer.py b/tools/speech_data_explorer/data_explorer.py
index de2b342a1028..65eafc5c9d49 100755
--- a/tools/speech_data_explorer/data_explorer.py
+++ b/tools/speech_data_explorer/data_explorer.py
@@ -1138,140 +1138,161 @@ def display_query(query):
             ]
         )
 
-
-comparison_layout = [
-    html.Div(
-        [
-            dcc.Markdown("model 1:" + ' ' + model_name_1[10:]),
-            dcc.Markdown("model 2:" + ' ' + model_name_2[10:]),
-            dcc.Dropdown(
-                ['word level', 'utterance level'], 'word level', placeholder="choose comparison lvl", id='lvl_choose'
-            ),
-        ]
-    ),
-    html.Hr(),
-    html.Div(
-        [
-            html.Div(
-                [
-                    dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_1, id='xaxis-column'),
-                    dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_2, id='yaxis-column'),
-                    dcc.Dropdown(
-                        for_col_names.select_dtypes(include='number').columns[::],
-                        placeholder='Select what will encode color of points',
-                        id='color-column',
-                    ),
-                    dcc.Dropdown(
-                        for_col_names.select_dtypes(include='number').columns[::],
-                        placeholder='Select what will encode size of points',
-                        id='size-column',
-                    ),
-                    dcc.Dropdown(
-                        ['yes', 'no'],
-                        placeholder='if you want to enable dot spacing',
-                        id='dot_spacing',
-                        style={'width': '200%'},
-                    ),
-                    dcc.Input(id='radius', placeholder='Enter radius of spacing (std is 0.01)'),
-                    html.Hr(),
-                    dcc.Input(id='filter-query-input', placeholder='Enter filter query',),
-                ],
-                style={'width': '200%', 'display': 'inline-block', 'float': 'middle'},
-            ),
-            html.Hr(),
-            html.Div(id='filter-query-output'),
-            dash_table.DataTable(
-                id='datatable-advanced-filtering',
-                columns=wordstable_columns_tool,
-                data=vocabulary_1,
-                editable=False,
-                page_action='native',
-                page_size=5,
-                filter_action="native",
-            ),
-            html.Hr(),
-            html.Div(id='datatable-query-structure', style={'whitespace': 'pre'}),
-            html.Hr(),
-            dbc.Row(dbc.Col(dcc.Graph(id='voc_graph'),),),
-            html.Hr(),
-        ],
-        id='wrd_lvl',
-        style={'display': 'block'},
-    ),
-    html.Div(
-        [
-            html.Div(
-                [
-                    dcc.Dropdown(['WER', 'CER'], 'WER', placeholder="Choose metric", id="choose_metric"),
-                    dbc.Row(dbc.Col(html.H5('Data'), class_name='text-secondary'), class_name='mt-3'),
-                    html.Hr(),
-                    html.Hr(),
-                    dcc.Input(id='filter-query-input-2', placeholder='Enter filter query', style={'width': '100%'}),
-                    html.Div(id='filter-query-output-2'),
-                    dbc.Row(
-                        dbc.Col(
-                            [
-                                dash_table.DataTable(
-                                    id='datatable-advanced-filtering-2',
-                                    columns=[
-                                        {'name': k.replace('_', ' '), 'id': k, 'hideable': True}
-                                        for k in data_with_metrics[0]
-                                    ],
-                                    data=data_with_metrics,
-                                    editable=False,
-                                    page_action='native',
-                                    page_size=5,
-                                    row_selectable='single',
-                                    selected_rows=[0],
-                                    page_current=0,
-                                    filter_action="native",
-                                    style_cell={
-                                        'overflow': 'hidden',
-                                        'textOverflow': 'ellipsis',
-                                        'maxWidth': 0,
-                                        'textAlign': 'center',
-                                    },
-                                    style_header={
-                                        'color': 'text-primary',
-                                        'text_align': 'center',
-                                        'height': 'auto',
-                                        'whiteSpace': 'normal',
-                                    },
-                                    css=[
-                                        {
-                                            'selector': '.dash-spreadsheet-menu',
-                                            'rule': 'position:absolute; bottom: 8px',
+    comparison_layout = [
+        html.Div(
+            [
+                dcc.Markdown("model 1:" + ' ' + model_name_1[10:]),
+                dcc.Markdown("model 2:" + ' ' + model_name_2[10:]),
+                dcc.Dropdown(
+                    ['word level', 'utterance level'],
+                    'word level',
+                    placeholder="choose comparison lvl",
+                    id='lvl_choose',
+                ),
+            ]
+        ),
+        html.Hr(),
+        html.Div(
+            [
+                html.Div(
+                    [
+                        dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_1, id='xaxis-column'),
+                        dcc.Dropdown(for_col_names.columns[::], 'accuracy_model_' + model_name_2, id='yaxis-column'),
+                        dcc.Dropdown(
+                            for_col_names.select_dtypes(include='number').columns[::],
+                            placeholder='Select what will encode color of points',
+                            id='color-column',
+                        ),
+                        dcc.Dropdown(
+                            for_col_names.select_dtypes(include='number').columns[::],
+                            placeholder='Select what will encode size of points',
+                            id='size-column',
+                        ),
+                        dcc.Dropdown(
+                            ['yes', 'no'],
+                            placeholder='if you want to enable dot spacing',
+                            id='dot_spacing',
+                            style={'width': '200%'},
+                        ),
+                        dcc.Input(id='radius', placeholder='Enter radius of spacing (std is 0.01)'),
+                        html.Hr(),
+                        dcc.Input(id='filter-query-input', placeholder='Enter filter query',),
+                    ],
+                    style={'width': '200%', 'display': 'inline-block', 'float': 'middle'},
+                ),
+                html.Hr(),
+                html.Div(id='filter-query-output'),
+                dash_table.DataTable(
+                    id='datatable-advanced-filtering',
+                    columns=wordstable_columns_tool,
+                    data=vocabulary_1,
+                    editable=False,
+                    page_action='native',
+                    page_size=5,
+                    filter_action="native",
+                ),
+                html.Hr(),
+                html.Div(id='datatable-query-structure', style={'whitespace': 'pre'}),
+                html.Hr(),
+                dbc.Row(dbc.Col(dcc.Graph(id='voc_graph'),),),
+                html.Hr(),
+            ],
+            id='wrd_lvl',
+            style={'display': 'block'},
+        ),
+        html.Div(
+            [
+                html.Div(
+                    [
+                        dcc.Dropdown(['WER', 'CER'], 'WER', placeholder="Choose metric", id="choose_metric"),
+                        dbc.Row(dbc.Col(html.H5('Data'), class_name='text-secondary'), class_name='mt-3'),
+                        html.Hr(),
+                        html.Hr(),
+                        dcc.Input(
+                            id='filter-query-input-2', placeholder='Enter filter query', style={'width': '100%'}
+                        ),
+                        html.Div(id='filter-query-output-2'),
+                        dbc.Row(
+                            dbc.Col(
+                                [
+                                    dash_table.DataTable(
+                                        id='datatable-advanced-filtering-2',
+                                        columns=[
+                                            {'name': k.replace('_', ' '), 'id': k, 'hideable': True}
+                                            for k in data_with_metrics[0]
+                                        ],
+                                        data=data_with_metrics,
+                                        editable=False,
+                                        page_action='native',
+                                        page_size=5,
+                                        row_selectable='single',
+                                        selected_rows=[0],
+                                        page_current=0,
+                                        filter_action="native",
+                                        style_cell={
+                                            'overflow': 'hidden',
+                                            'textOverflow': 'ellipsis',
+                                            'maxWidth': 0,
+                                            'textAlign': 'center',
                                         },
-                                        {'selector': '.dash-filter--case', 'rule': 'display: none'},
-                                        {'selector': '.column-header--hide', 'rule': 'display: none'},
-                                    ],
+                                        style_header={
+                                            'color': 'text-primary',
+                                            'text_align': 'center',
+                                            'height': 'auto',
+                                            'whiteSpace': 'normal',
+                                        },
+                                        css=[
+                                            {
+                                                'selector': '.dash-spreadsheet-menu',
+                                                'rule': 'position:absolute; bottom: 8px',
+                                            },
+                                            {'selector': '.dash-filter--case', 'rule': 'display: none'},
+                                            {'selector': '.column-header--hide', 'rule': 'display: none'},
+                                        ],
+                                    ),
+                                    dbc.Row(dbc.Col(html.Audio(id='player-1', controls=True),), class_name='mt-3'),
+                                ]
+                            )
+                        ),
+                    ]
+                    + [
+                        dbc.Row(
+                            [
+                                dbc.Col(
+                                    html.Div(children=k.replace('_', '-')),
+                                    width=2,
+                                    class_name='mt-1 bg-light font-monospace text-break small rounded border',
+                                ),
+                                dbc.Col(
+                                    html.Div(id='__' + k),
+                                    class_name='mt-1 bg-light font-monospace text-break small rounded border',
                                 ),
-                                dbc.Row(dbc.Col(html.Audio(id='player-1', controls=True),), class_name='mt-3'),
                             ]
                         )
-                    ),
-                ]
-                + [
-                    dbc.Row(
-                        [
-                            dbc.Col(
-                                html.Div(children=k.replace('_', '-')),
-                                width=2,
-                                class_name='mt-1 bg-light font-monospace text-break small rounded border',
-                            ),
-                            dbc.Col(
-                                html.Div(id='__' + k),
-                                class_name='mt-1 bg-light font-monospace text-break small rounded border',
-                            ),
-                        ]
-                    )
-                    for k in data_with_metrics[0]
-                ]
-            ),
-        ],
-        id='unt_lvl',
-    ),
-]
+                        for k in data_with_metrics[0]
+                    ]
+                ),
+            ],
+            id='unt_lvl',
+        ),
+    ] + [
+        html.Div(
+            [
+                html.Div(
+                    [
+                        dbc.Row(dbc.Col(dcc.Graph(id='utt_graph'),),),
+                        html.Hr(),
+                        dcc.Input(id='clicked_aidopath', style={'width': '100%'}),
+                        html.Hr(),
+                        dcc.Input(id='my-output-1', style={'display': 'none'}),  # we do need this
+                    ]
+                ),
+                html.Div([dbc.Row(dbc.Col(dcc.Graph(id='signal-graph-1')), class_name='mt-3'),]),
+            ],
+            id='down_thing',
+            style={'display': 'block'},
+        )
+    ]
 
 if args.show_statistics is not None:
     comparison_layout += [
@@ -1329,26 +1350,6 @@ def show_hide_element(visibility_state):
             )
 
 
-comparison_layout += [
-    html.Div(
-        [
-            html.Div(
-                [
-                    dbc.Row(dbc.Col(dcc.Graph(id='utt_graph'),),),
-                    html.Hr(),
-                    dcc.Input(id='clicked_aidopath', style={'width': '100%'}),
-                    html.Hr(),
-                    dcc.Input(id='my-output-1', style={'display': 'none'}),  # we do need this
-                ]
-            ),
-            html.Div([dbc.Row(dbc.Col(dcc.Graph(id='signal-graph-1')), class_name='mt-3'),]),
-        ],
-        id='down_thing',
-        style={'display': 'block'},
-    )
-]
-
-
 if args.show_statistics is None:
 
     @app.callback(

From a8609ab6a83377f30d42f6b225412f28b4b8f05b Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Thu, 22 Jun 2023 09:22:27 -0700
Subject: [PATCH 054/123] fix ptuning residuals bug (#6866)

* fix for lora bug and makes ptuning w peft framework compatible with FT inference

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* simple forward call for adapters with residual

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updates

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../megatron_gpt_peft_models.py               | 16 ++++-
 .../megatron/adapters/parallel_adapters.py    | 58 +++++++++++++------
 .../modules/common/megatron/language_model.py |  5 +-
 .../modules/common/megatron/transformer.py    | 21 +++----
 .../nlp/modules/common/prompt_encoder.py      |  5 +-
 5 files changed, 66 insertions(+), 39 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py
index 930bfbc8cf25..f1f44e31e175 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py
@@ -225,6 +225,12 @@ def __init__(self, cfg: DictConfig, trainer: Trainer):
         self.name_key_to_cfg = {AdapterName.PTUNING_ADAPTER: adapter_cfg}
         super().__init__(cfg, trainer)
         self.virtual_tokens = cfg.peft.p_tuning.virtual_tokens
+        self.trainable_keys = self.adapter_keys - set(
+            [
+                "model.language_model.adapter_layer.ptuning_adapter.inference_table.prompt_table.taskname.prompt_embeddings.weight"
+            ]
+        )
+        # we exclude the above parameter from training because it is present for backward compatibility for inference using FasterTransformer (@adithyare)
 
     def init_peft_modules(self,):
         """ 
@@ -268,7 +274,15 @@ def load_state_dict(self, state_dict, strict: bool = True):
 
     def setup_optimizer_param_groups(self):
         if self.first_stage_of_pipeline():
-            super().setup_optimizer_param_groups()
+            # super().setup_optimizer_param_groups()
+            self.freeze()  # Freeze the entire model
+            opt_params = []
+            for n, p in self.named_parameters():
+                if n in self.trainable_keys:
+                    p.requires_grad = True
+                    opt_params.append(p)
+
+            self._optimizer_param_groups = ({"params": opt_params},)
         else:
             self.freeze()  # Freeze the entire model
             self._optimizer_param_groups = ({"params": []},)
diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
index 679020019ab1..fe339c6f9a8b 100644
--- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
+++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
@@ -27,6 +27,7 @@
 from nemo.collections.common.parts.utils import activation_registry
 from nemo.collections.nlp.modules.common.megatron.fused_bias_gelu import fused_bias_gelu
 from nemo.collections.nlp.modules.common.megatron.utils import init_method_const, init_method_normal
+from nemo.collections.nlp.modules.common.prompt_encoder import InferenceTable
 from nemo.core.classes.mixins import adapter_mixin_strategies
 
 try:
@@ -65,13 +66,11 @@ class AdapterName(str, enum.Enum):
 
 
 class InfusedAdapter(nn.Module, AdapterModuleUtil):
-    def __init__(
-        self, in_features: int, adapter_strategy: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig = None,
-    ) -> None:
+    def __init__(self, in_features: int,) -> None:
         super().__init__()
         self.scalers = nn.Parameter(torch.ones(in_features))
         # Setup adapter strategy
-        self.setup_adapter_strategy(adapter_strategy)
+        self.setup_adapter_strategy(adapter_mixin_strategies.ReturnResultAdapterStrategy())
 
     def forward(self, x):
         x = x * self.scalers[None, None, :]
@@ -90,7 +89,6 @@ class MLPInfusedAdapter(InfusedAdapter):
 @dataclass
 class InfusedAdapterConfig:
     in_features: int
-    adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig()
     _target_: str = "{0}.{1}".format(InfusedAdapter.__module__, InfusedAdapter.__name__)
 
 
@@ -112,7 +110,6 @@ def __init__(
         row_init_method: str = 'zero',  # TODO: (@adithyare) should rename this to output_init_method to be more precise.
         gather_output: bool = True,
         dropout: float = 0.0,
-        adapter_strategy: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig = None,
     ):
         super().__init__()
         if not HAVE_APEX:
@@ -153,7 +150,7 @@ def __init__(
             self.dropout = None
 
         # Setup adapter strategy
-        self.setup_adapter_strategy(adapter_strategy)
+        self.setup_adapter_strategy(adapter_mixin_strategies.ReturnResultAdapterStrategy())
 
     def _get_init_fn(self, init_method: str):
         if init_method == 'xavier':
@@ -196,7 +193,6 @@ class ParallelLinearAdapterConfig:
     row_init_method: str = 'zero'
     gather_output: bool = True
     dropout: float = 0.0
-    adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig()
     _target_: str = "{0}.{1}".format(ParallelLinearAdapter.__module__, ParallelLinearAdapter.__name__)
 
 
@@ -250,13 +246,7 @@ class PromptEncoderAdapter(nn.Module, AdapterModuleUtil):
     """
 
     def __init__(
-        self,
-        virtual_tokens: int,
-        bottleneck_dim: int,
-        embedding_dim: int,
-        init_std: float,
-        output_dim: int,
-        adapter_strategy: adapter_mixin_strategies.ResidualAddAdapterStrategyConfig = None,
+        self, virtual_tokens: int, bottleneck_dim: int, embedding_dim: int, init_std: float, output_dim: int,
     ):
         """
         Initializes the Tensor Model parallel MLP PromptEncoderMLP module.
@@ -278,6 +268,7 @@ def __init__(
         # (@adithyare) the persistent=False will not pollute the indices into the state_dict of this module.
         self.register_buffer("indices", torch.LongTensor(list(range(self.virtual_tokens))), persistent=False)
         self.embedding = torch.nn.Embedding(self.virtual_tokens, self.embedding_dim)
+        self.inference_table = InferenceTable("taskname", self.embedding_dim, self.virtual_tokens)
         self.first = ColumnParallelLinear(
             self.embedding_dim,
             self.bottleneck_dim,
@@ -301,15 +292,47 @@ def __init__(
             gradient_accumulation_fusion=gradient_accumulation_fusion,
         )
         # Setup adapter strategy
-        self.setup_adapter_strategy(adapter_strategy)
+        self.setup_adapter_strategy(adapter_mixin_strategies.ReturnResultAdapterStrategy())
+
+    def set_inference_table(self, prompt_representation: torch.Tensor):
+        """
+        This method caches the output representation from the Encoder and saves it inside `self.inference_table`.
+        """
+        prompt_representation = prompt_representation.detach().clone()
+        self.inference_table.set_prompt_table(prompt_representation)
+
+    def clear_inference_table(self,):
+        self.inference_table.clear_prompt_table()
+
+    def get_inference_table(self,):
+        return self.inference_table.get_prompt_table()
 
-    def forward(self, batch_size):
+    def inner_forward(self,):
         input_embeds = self.embedding(self.indices).unsqueeze(0)
         intermediate_parallel, bias_parallel = self.first(input_embeds)
         intermediate_parallel = fused_bias_gelu(intermediate_parallel, bias_parallel)
         output_embeds, bias_parallel = self.second(intermediate_parallel)
         output_embeds = output_embeds + bias_parallel
         output_embeds = output_embeds.transpose(0, 1)
+        return output_embeds
+
+    def forward(self, batch_size: int, use_cached_reps: bool = False) -> torch.Tensor:
+        """ 
+        Forward pass through the encoder with caching of prompt representations
+        """
+        if use_cached_reps:
+            output_embeds = self.get_inference_table().unsqueeze(1)
+        else:
+            if self.training:
+                if self.inference_table.is_inference_ready:
+                    self.clear_inference_table()
+                output_embeds = self.inner_forward()
+            else:
+                if not self.inference_table.is_inference_ready:
+                    output_embeds = self.inner_forward()
+                    self.set_inference_table(output_embeds.squeeze(1))
+                output_embeds = self.get_inference_table().unsqueeze(1)
+
         output_embeds = output_embeds.expand(self.virtual_tokens, batch_size, self.output_dim)
         return output_embeds
 
@@ -321,5 +344,4 @@ class PromptEncoderAdapterConfig:
     embedding_dim: int
     init_std: float
     output_dim: int
-    adapter_strategy: Optional[Any] = adapter_mixin_strategies.ResidualAddAdapterStrategyConfig()
     _target_: str = "{0}.{1}".format(PromptEncoderAdapter.__module__, PromptEncoderAdapter.__name__)
diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py
index 2d10576dc7d0..a3fa3fd6d2be 100755
--- a/nemo/collections/nlp/modules/common/megatron/language_model.py
+++ b/nemo/collections/nlp/modules/common/megatron/language_model.py
@@ -746,10 +746,7 @@ def forward(
                 ptuning_adapter = self.get_adapter_module(AdapterName.PTUNING_ADAPTER)
                 v = ptuning_adapter.virtual_tokens
                 if ptuning_adapter and _sq >= v:  # The sequence should be longer the v to insert virtual embeddings.
-                    strategy = ptuning_adapter.adapter_strategy
-                    virtual_embeddings = self.forward_single_enabled_adapter_(
-                        _bs, ptuning_adapter, adapter_name=AdapterName.PTUNING_ADAPTER, adapter_strategy=strategy,
-                    )
+                    virtual_embeddings = ptuning_adapter(_bs)
                     encoder_input = encoder_input[
                         v:, :, :
                     ]  # the first v tokens are pads so that they can be swapped out with virtual embeddings.
diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py
index 8a0b22b4d289..ea01acd14a23 100644
--- a/nemo/collections/nlp/modules/common/megatron/transformer.py
+++ b/nemo/collections/nlp/modules/common/megatron/transformer.py
@@ -549,13 +549,9 @@ def forward(
             if self.is_adapter_available():
                 adapter_1 = self.get_adapter_module(AdapterName.PRE_ATTN_ADAPTER)
                 if adapter_1:
-                    strategy = adapter_1.adapter_strategy
-                    attention_output = self.forward_single_enabled_adapter_(
-                        attention_output,
-                        adapter_1,
-                        adapter_name=AdapterName.PRE_ATTN_ADAPTER,
-                        adapter_strategy=strategy,
-                    )
+                    attention_output = (
+                        adapter_1(attention_output) + attention_output
+                    )  # simple adapter call with residual connection
 
             layernorm_input = bias_dropout_add_func(attention_output, attention_bias, residual, self.hidden_dropout)
             # print(f"Layer: {self.layer_number} Attention checksum {layernorm_input.sum()}")
@@ -626,15 +622,12 @@ def forward(
                 layernorm_input = normalization_output
         # MLP.
         mlp_output, mlp_bias = self.mlp(normalization_output)
-        if (
-            self.is_adapter_available()
-        ):  # TODO: (@adithyre) was able to move adapter_2 back to the end of the transformer after ptl 1.7 update.
+        if self.is_adapter_available():
+            # TODO: (@adithyre) was able to move adapter_2 back to the end of the transformer after ptl 1.7 update.
             adapter_2 = self.get_adapter_module(AdapterName.POST_ATTN_ADAPTER)
             if adapter_2:
-                strategy = adapter_2.adapter_strategy
-                mlp_output = self.forward_single_enabled_adapter_(
-                    mlp_output, adapter_2, adapter_name=AdapterName.POST_ATTN_ADAPTER, adapter_strategy=strategy
-                )
+                mlp_output = adapter_2(mlp_output) + mlp_output  # simple adapter call with residual connection
+
         residual = layernorm_input
 
         bias_dropout_add_func = self._get_bias_droput_add_func(
diff --git a/nemo/collections/nlp/modules/common/prompt_encoder.py b/nemo/collections/nlp/modules/common/prompt_encoder.py
index 282ad053bc86..283608367b62 100644
--- a/nemo/collections/nlp/modules/common/prompt_encoder.py
+++ b/nemo/collections/nlp/modules/common/prompt_encoder.py
@@ -70,7 +70,7 @@ def __init__(
         self.prompt_embeddings.weight.requires_grad = False
 
         # Set fixed indicies for forward pass
-        self.register_buffer('indices', torch.LongTensor(list(range(self.total_virtual_tokens))))
+        self.register_buffer("indices", torch.LongTensor(list(range(self.total_virtual_tokens))), persistent=False)
 
     def clear_prompt_embedding_weights(self,):
         """
@@ -104,9 +104,10 @@ def __init__(self, taskname, hidden_size, total_virtual_tokens, is_inference_rea
         self.total_virtual_tokens = total_virtual_tokens
         self.prompt_table = torch.nn.ModuleDict()
         self.prompt_table[self.taskname] = PromptEmbedding(self.hidden_size, self.total_virtual_tokens)
-        self.prompt_table[self.taskname].prompt_embeddings.weight.requires_grad = False
         self.prompt_table[self.taskname].clear_prompt_embedding_weights()
         self.is_inference_ready = is_inference_ready
+        for p in self.prompt_table.parameters():
+            p.requires_grad = False
 
     def set_prompt_table(self, prompt_representation: torch.Tensor):
         """

From 698a5f77297cdbd9bb8f2a926f13b7c4186c8863 Mon Sep 17 00:00:00 2001
From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com>
Date: Thu, 22 Jun 2023 14:37:08 -0400
Subject: [PATCH 055/123] Add hybrid model support to
 transcribe_speech_parallel.py (#6906)

* Add hybrid model support to transcribe_speech_parallel.py

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update audio_to_text_dataset.py

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>

---------

Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 examples/asr/transcribe_speech_parallel.py     | 18 +++++++++++++++++-
 .../asr/data/audio_to_text_dataset.py          |  1 +
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/examples/asr/transcribe_speech_parallel.py b/examples/asr/transcribe_speech_parallel.py
index 74019d7668f0..f14df284c6b1 100644
--- a/examples/asr/transcribe_speech_parallel.py
+++ b/examples/asr/transcribe_speech_parallel.py
@@ -32,6 +32,15 @@
     predict_ds.batch_size=16 \
     output_path=/tmp/
 
+Example for Hybrid-CTC/RNNT models with non-tarred datasets:
+
+python transcribe_speech_parallel.py \
+    model=stt_en_fastconformer_hybrid_large \
+    decoder_type=ctc \
+    predict_ds.manifest_filepath=/dataset/manifest_file.json \
+    predict_ds.batch_size=16 \
+    output_path=/tmp/
+
 Example for tarred datasets:
 
 python transcribe_speech_parallel.py \
@@ -73,7 +82,7 @@
 from nemo.collections.asr.data.audio_to_text_dataset import ASRPredictionWriter
 from nemo.collections.asr.metrics.rnnt_wer import RNNTDecodingConfig
 from nemo.collections.asr.metrics.wer import word_error_rate
-from nemo.collections.asr.models import ASRModel
+from nemo.collections.asr.models import ASRModel, EncDecHybridRNNTCTCModel
 from nemo.collections.asr.models.configs.asr_models_config import ASRDatasetConfig
 from nemo.core.config import TrainerConfig, hydra_runner
 from nemo.utils import logging
@@ -92,6 +101,10 @@ class ParallelTranscriptionConfig:
 
     # decoding strategy for RNNT models
     rnnt_decoding: RNNTDecodingConfig = RNNTDecodingConfig()
+
+    # decoder for hybrid models, must be one of 'ctc', 'rnnt' if not None
+    decoder_type: Optional[str] = None
+
     trainer: TrainerConfig = TrainerConfig(devices=-1, accelerator="gpu", strategy="ddp")
 
 
@@ -137,6 +150,9 @@ def main(cfg: ParallelTranscriptionConfig):
         )
         model = ASRModel.from_pretrained(model_name=cfg.model, map_location="cpu")
 
+    if isinstance(model, EncDecHybridRNNTCTCModel) and cfg.decoder_type is not None:
+        model.change_decoding_strategy(decoder_type=cfg.decoder_type)
+
     trainer = ptl.Trainer(**cfg.trainer)
 
     cfg.predict_ds.return_sample_id = True
diff --git a/nemo/collections/asr/data/audio_to_text_dataset.py b/nemo/collections/asr/data/audio_to_text_dataset.py
index 14e8dea19651..d5dcc8be4847 100644
--- a/nemo/collections/asr/data/audio_to_text_dataset.py
+++ b/nemo/collections/asr/data/audio_to_text_dataset.py
@@ -713,6 +713,7 @@ def write_on_batch_end(
             item = {}
             sample = self.dataset.get_manifest_sample(sample_id)
             item["audio_filepath"] = sample.audio_file
+            item["offset"] = sample.offset
             item["duration"] = sample.duration
             item["text"] = sample.text_raw
             item["pred_text"] = transcribed_text

From d870644fdabc820c5307703f264fc63e470a0ce9 Mon Sep 17 00:00:00 2001
From: Yi Dong <43824965+yidong72@users.noreply.github.com>
Date: Thu, 22 Jun 2023 15:27:44 -0400
Subject: [PATCH 056/123] Make Gradio library optional (#6904)

* make gradio optinoal

Signed-off-by: Yi Dong <yidong@nvidia.com>

* update readme

Signed-off-by: Yi Dong <yidong@nvidia.com>

---------

Signed-off-by: Yi Dong <yidong@nvidia.com>
---
 README.rst                                    |  8 ++++++
 .../nlp/modules/common/chatbot_component.py   | 22 +++++++++++++++-
 .../nlp/modules/common/megatron_web_server.py | 26 ++++++++++++++++---
 requirements/requirements_nlp.txt             |  1 -
 4 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/README.rst b/README.rst
index 6742eb1f07d4..869782ab372f 100644
--- a/README.rst
+++ b/README.rst
@@ -290,6 +290,14 @@ Transformer Engine already supports Flash Attention for GPT models. If you want
   pip install flash-attn
   pip install triton==2.0.0.dev20221202
 
+NLP inference UI
+~~~~~~~~~~~~~~~~~~~~
+To launch the inference web UI server, please install the gradio `gradio <https://gradio.app/>`_. 
+
+.. code-block:: bash
+
+  pip install gradio==3.34.0
+
 NeMo Text Processing
 ~~~~~~~~~~~~~~~~~~~~
 NeMo Text Processing, specifically (Inverse) Text Normalization, is now a separate repository `https://github.com/NVIDIA/NeMo-text-processing <https://github.com/NVIDIA/NeMo-text-processing>`_.
diff --git a/nemo/collections/nlp/modules/common/chatbot_component.py b/nemo/collections/nlp/modules/common/chatbot_component.py
index 548458df7e29..afc86d9defec 100644
--- a/nemo/collections/nlp/modules/common/chatbot_component.py
+++ b/nemo/collections/nlp/modules/common/chatbot_component.py
@@ -19,9 +19,29 @@
 """
 from __future__ import annotations
 
-from gradio.components import *
+import warnings
+
 from markdown2 import Markdown
 
+try:
+    from typing import Any, Callable, Dict, List, Literal, Tuple
+
+    from gradio.components import (
+        Changeable,
+        Component,
+        Enum,
+        EventListenerMethod,
+        IOComponent,
+        JSONSerializable,
+        Selectable,
+        document,
+        processing_utils,
+    )
+
+    GRADIO_AVAILABLE = True
+except (ImportError, ModuleNotFoundError):
+    GRADIO_AVAILABLE = False
+
 
 class _Keywords(Enum):
     NO_VALUE = "NO_VALUE"  # Used as a sentinel to determine if nothing is provided as a argument for `value` in `Component.update()`
diff --git a/nemo/collections/nlp/modules/common/megatron_web_server.py b/nemo/collections/nlp/modules/common/megatron_web_server.py
index 884f7abe5f01..d3ccde49a5c5 100644
--- a/nemo/collections/nlp/modules/common/megatron_web_server.py
+++ b/nemo/collections/nlp/modules/common/megatron_web_server.py
@@ -14,10 +14,14 @@
 
 import asyncio
 
-import gradio as gr
+try:
+    import gradio as gr
+
+    GRADIO_AVAILABLE = True
+except (ImportError, ModuleNotFoundError):
+    GRADIO_AVAILABLE = False
 
 from nemo.collections.nlp.modules.common.chat_css import CSS
-from nemo.collections.nlp.modules.common.chatbot_component import Chatbot
 from nemo.collections.nlp.modules.common.megatron.retrieval_services.util import (
     convert_retrieved_to_md,
     request_data,
@@ -30,8 +34,17 @@
 
 DEFAULT_SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
 SYSTEM_TOKEN = '<extra_id_0>System\n'
-# HUMAN_TOKEN = 'Human:'
-# ASSITANT_TOKEN = 'Assistant:'
+
+
+def check_gradio_import():
+    if not GRADIO_AVAILABLE:
+        msg = (
+            f"could not find the gradio library.\n"
+            f"****************************************************************\n"
+            f"To install it, please follow the steps below:\n"
+            f"pip install gradio==3.34.0\n"
+        )
+        raise ImportError(msg)
 
 
 def create_gen_function(port=5555, chat=False):
@@ -89,6 +102,7 @@ def get_generation(
 
 
 def get_demo(share, username, password, server_port=5555, web_port=9889, loop=None):
+    check_gradio_import()
     asyncio.set_event_loop(loop)
     with gr.Blocks() as demo:
         with gr.Row():
@@ -132,6 +146,9 @@ def get_demo(share, username, password, server_port=5555, web_port=9889, loop=No
 
 
 def get_chatbot_demo(share, username, password, server_port=5555, web_port=9889, loop=None):
+    check_gradio_import()
+    from nemo.collections.nlp.modules.common.chatbot_component import Chatbot
+
     asyncio.set_event_loop(loop)
     with gr.Blocks(css=CSS) as demo:
         # store the mutliple turn conversation
@@ -294,6 +311,7 @@ def reset_index(self):
         return request_data(data, self.combo_service_ip, self.combo_service_port)
 
     def run_demo(self, share, username, password, port):
+        check_gradio_import()
         with gr.Blocks(css="table, th, td { border: 1px solid blue; table-layout: fixed; width: 100%; }") as demo:
             with gr.Row():
                 with gr.Column(scale=2, width=200):
diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt
index 582862361a22..2018de6fbc31 100644
--- a/requirements/requirements_nlp.txt
+++ b/requirements/requirements_nlp.txt
@@ -5,7 +5,6 @@ fasttext
 flask_restful
 ftfy
 gdown
-gradio>=3.28.3
 h5py
 ijson
 inflect

From 4726650aee4d97900f5735262bf0c64d10dd50cc Mon Sep 17 00:00:00 2001
From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com>
Date: Thu, 22 Jun 2023 19:06:52 -0400
Subject: [PATCH 057/123] Update Frame-VAD doc (#6902)

* update fvad doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

---------

Signed-off-by: stevehuang52 <heh@nvidia.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
---
 examples/asr/asr_vad/README.md                | 12 ++-
 examples/asr/speech_classification/README.md  | 97 +++++++++++++++----
 .../speech_classification/frame_vad_infer.py  |  7 ++
 .../speech_to_frame_label.py                  |  2 +-
 .../Offline_ASR_with_VAD_for_CTC_models.ipynb | 17 +++-
 tutorials/asr/Voice_Activity_Detection.ipynb  | 31 +++++-
 6 files changed, 143 insertions(+), 23 deletions(-)

diff --git a/examples/asr/asr_vad/README.md b/examples/asr/asr_vad/README.md
index 9385b96a79ea..f39b9735b20f 100644
--- a/examples/asr/asr_vad/README.md
+++ b/examples/asr/asr_vad/README.md
@@ -8,10 +8,16 @@ There are two types of input
 -  A manifest passed to `manifest_filepath`, 
 -  A directory containing audios passed to `audio_dir` and also specify `audio_type` (default to `wav`).
 
-The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration",  "text"] are required. An example of a manifest file is:
+The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration"] are required. An example of a manifest file is:
 ```json
-{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000,  "text": "a b c d e"}
-{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000,  "text": "f g h i j"}
+{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000}
+{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000}
+```
+
+If you want to calculate WER, provide `text` in manifest as groundtruth. An example of a manifest file is:
+```json
+{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "text": "hello world"}
+{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000, "text": "hello world"}
 ```
 
 ## Output
diff --git a/examples/asr/speech_classification/README.md b/examples/asr/speech_classification/README.md
index 86bba3dc65a4..4fa5d3c4f2b3 100644
--- a/examples/asr/speech_classification/README.md
+++ b/examples/asr/speech_classification/README.md
@@ -1,25 +1,88 @@
 # Speech Classification
 
-This directory contains example scripts to train speech classification and voice activity detection models. 
+This directory contains example scripts to train speech classification and voice activity detection models. There are two types of VAD models: Frame-VAD and Segment-VAD.
 
-# Model execution overview
+## Frame-VAD
 
-The training scripts in this directory execute in the following order. When preparing your own training-from-scratch / fine-tuning scripts, please follow this order for correct training/inference.
+The frame-level VAD model predicts for each frame of the audio whether it has speech or not. For example, with the default config file (`../conf/marblenet/marblenet_3x2x64_20ms.yaml`), the model provides a probability for each frame of 20ms length.
 
-```mermaid
+### Training
+```sh
+python speech_to_label.py \
+    --config-path=<path to directory of configs, e.g. "../conf/marblenet">
+    --config-name=<name of config without .yaml, e.g. "marblenet_3x2x64_20ms"> \
+    model.train_ds.manifest_filepath="[<path to train manifest1>,<path to train manifest2>]" \
+    model.validation_ds.manifest_filepath=["<path to val manifest1>","<path to val manifest2>"] \
+    trainer.devices=-1 \
+    trainer.accelerator="gpu" \
+    strategy="ddp" \
+    trainer.max_epochs=100
+```
+
+The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration",  "label"] are required. An example of a manifest file is:
+```
+{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000,  "label": "0 1 0 0 1"}
+{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000,  "label": "0 0 0 1 1 1 1 0 0"}
+```
+For example, if you have a 1s audio file, you'll need to have 50 frame labels in the manifest entry like "0 0 0 0 1 1 0 1 .... 0 1".
+However, shorter label strings are also supported for smaller file sizes. For example, you can prepare the `label` in 40ms frame, and the model will properly repeat the label for each 20ms frame.
+
+
+### Inference
+python frame_vad_infer.py \
+    --config-path="../conf/vad" --config-name="frame_vad_infer_postprocess" \
+    dataset=<Path of manifest file containing evaluation data. Audio files should have unique names>
+
+The manifest json file should have the following format (each line is a Python dictionary):
+```
+{"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000}  
+{"audio_filepath": "/path/to/audio_file2.wav", "offset": 0, "duration": 10000}  
+```
+
+#### Evaluation
+If you want to evaluate tne model's AUROC and DER performance, you need to set `evaluate: True` in config yaml (e.g., `../conf/vad/frame_vad_infer_postprocess.yaml`), and also provide groundtruth in label strings:
+```
+{"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000, "label": "0 1 0 0 0 1 1 1 0"}
+```
+or RTTM files:
+```
+{"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000, "rttm_filepath": "/path/to/rttm_file1.rttm"}
+```
+
+
+## Segment-VAD
+
+Segment-level VAD predicts a single label for each segment of audio (e.g., 0.63s by default).
+
+### Training
+```sh
+python speech_to_label.py \
+    --config-path=<path to dir of configs, e.g. "../conf/marblenet"> \
+    --config-name=<name of config without .yaml, e.g., "marblenet_3x2x64"> \
+    model.train_ds.manifest_filepath="[<path to train manifest1>,<path to train manifest2>]" \
+    model.validation_ds.manifest_filepath=["<path to val manifest1>","<path to val manifest2>"] \
+    trainer.devices=-1 \
+    trainer.accelerator="gpu" \
+    strategy="ddp" \
+    trainer.max_epochs=100
+```
 
-graph TD
-    A[Hydra Overrides + Yaml Config] --> B{Config}
-    B --> |Init| C[Trainer]
-    C --> D[ExpManager]
-    B --> D[ExpManager]
-    C --> E[Model]
-    B --> |Init| E[Model]
-    E --> |Constructor| F(Change Labels)
-    F --> G(Setup Train + Validation + Test Data loaders)
-    G --> H(Setup Optimization)
-    H --> I[Maybe init from pretrained]
-    I --> J["trainer.fit(model)"]
+The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration",  "label"] are required. An example of a manifest file is:
+```
+{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 0.63,  "label": "0"}
+{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 0.63,  "label": "1"}
 ```
 
-During restoration of the model, you may pass the Trainer to the restore_from / from_pretrained call, or set it after the model has been initialized by using `model.set_trainer(Trainer)`.
\ No newline at end of file
+
+### Inference
+```sh
+python vad_infer.py \
+    --config-path="../conf/vad" \
+    --config-name="vad_inference_postprocessing.yaml"
+    dataset=<Path of json file of evaluation data. Audio files should have unique names>
+```
+The manifest json file should have the following format (each line is a Python dictionary):
+```
+{"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000}  
+{"audio_filepath": "/path/to/audio_file2.wav", "offset": 0, "duration": 10000}  
+```
diff --git a/examples/asr/speech_classification/frame_vad_infer.py b/examples/asr/speech_classification/frame_vad_infer.py
index 56eb7584e3db..f716eb45bb64 100644
--- a/examples/asr/speech_classification/frame_vad_infer.py
+++ b/examples/asr/speech_classification/frame_vad_infer.py
@@ -26,6 +26,13 @@
 The manifest json file should have the following format (each line is a Python dictionary):
 {"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000}  
 {"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000}  
+
+If you want to evaluate tne model's AUROC and DER performance, you need to set `evaluate=True` in config yaml,
+and also provide groundtruth in either RTTM files or label strings:
+{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "label": "0 1 0 0 0 1 1 1 0"}
+or
+{"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000, "rttm_filepath": "/path/to/rttm_file1.rttm"}
+
 """
 
 import os
diff --git a/examples/asr/speech_classification/speech_to_frame_label.py b/examples/asr/speech_classification/speech_to_frame_label.py
index 3289845ec3d3..04fcbdd1b61c 100644
--- a/examples/asr/speech_classification/speech_to_frame_label.py
+++ b/examples/asr/speech_classification/speech_to_frame_label.py
@@ -32,7 +32,7 @@
 The input manifest must be a manifest json file, where each line is a Python dictionary. The fields ["audio_filepath", "offset", "duration",  "label"] are required. An example of a manifest file is:
 ```
 {"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000,  "label": "0 1 0 0 1"}
-{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000,  "text": "0 0 0 1 1 1 1 0 0"}
+{"audio_filepath": "/path/to/audio_file2", "offset": 0, "duration": 10000,  "label": "0 0 0 1 1 1 1 0 0"}
 ```
 For example, if you have a 1s audio file, you'll need to have 50 frame labels in the manifest entry like "0 0 0 0 1 1 0 1 .... 0 1".
 However, shorter label strings are also supported for smaller file sizes. For example, you can prepare the `label` in 40ms frame, and the model will properly repeat the label for each 20ms frame.
diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
index 1445afe9e381..7e9d0378bc1f 100644
--- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
+++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb
@@ -50,6 +50,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -57,6 +58,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -72,6 +74,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -132,6 +135,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -139,6 +143,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -154,6 +159,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -182,6 +188,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -198,6 +205,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -215,6 +223,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -239,6 +248,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -255,6 +265,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -262,6 +273,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -289,6 +301,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -313,6 +326,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -320,6 +334,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -374,7 +389,7 @@
             "source": [
                 "# Further Reading\n",
                 "\n",
-                "There are two ways to incorporate VAD into ASR pipeline. The first strategy is to drop the frames that are predicted as `non-speech` by VAD, as already discussed in this tutorial. The second strategy is to keep all the frames and mask the `non-speech` frames with zero-signal values. Also, instead of using segment-VAD as shown in this tutorial, we can use frame-VAD model for faster inference and better accuracy. For more information, please refer to the two scripts [speech_to_text_with_vad.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr_vad/speech_to_text_with_vad.py)."
+                "There are two ways to incorporate VAD into ASR pipeline. The first strategy is to drop the frames that are predicted as `non-speech` by VAD, as already discussed in this tutorial. The second strategy is to keep all the frames and mask the `non-speech` frames with zero-signal values. Also, instead of using segment-VAD as shown in this tutorial, we can use frame-VAD model for faster inference and better accuracy. For more information, please refer to the script [speech_to_text_with_vad.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr_vad/speech_to_text_with_vad.py)."
             ]
         }
     ],
diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb
index 8b95698c71e8..b1bdd434511b 100644
--- a/tutorials/asr/Voice_Activity_Detection.ipynb
+++ b/tutorials/asr/Voice_Activity_Detection.ipynb
@@ -41,6 +41,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -81,6 +82,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab": {},
@@ -98,6 +100,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -172,6 +175,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -205,6 +209,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -244,6 +249,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -272,6 +278,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -336,6 +343,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -362,6 +370,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -391,6 +400,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -469,6 +479,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -522,6 +533,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -548,6 +560,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -585,6 +598,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -592,6 +606,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -628,6 +643,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -652,6 +668,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -671,6 +688,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -699,6 +717,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -711,6 +730,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -723,6 +743,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -751,6 +772,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -835,6 +857,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -918,6 +941,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -943,6 +967,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -969,6 +994,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -1068,6 +1094,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -1104,6 +1131,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {},
             "source": [
@@ -1117,6 +1145,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "metadata": {
                 "colab_type": "text",
@@ -1143,7 +1172,7 @@
                 "\n",
                 "During inference, since frame-VAD model doesn't require splicing input into overlapping segments, it is more efficient than segment-VAD model, with 8x less GPU memory consumption.\n",
                 "\n",
-                "For more information on the frame-VAD model, please refer to the [model class](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/models/classification_models.py#L840). For training and running inference on frame-VAD, please refer to [speech_to_frame_label.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/speech_to_frame_label.py) and [frame_vad_infer.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/frame_vad_infer.py)."
+                "For more information on the frame-VAD model, please refer to the [README.md](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/README.md). For training and running inference on frame-VAD, please refer to [speech_to_frame_label.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/speech_to_frame_label.py) and [frame_vad_infer.py](https://github.com/NVIDIA/NeMo/blob/stable/examples/asr/speech_classification/frame_vad_infer.py)."
             ]
         }
     ],

From 722e77cd89c8a2ce5bf9a4052efb1e5ef7f327b2 Mon Sep 17 00:00:00 2001
From: fayejf <36722593+fayejf@users.noreply.github.com>
Date: Fri, 23 Jun 2023 10:49:26 -0700
Subject: [PATCH 058/123] Update container info in  README.rst (#6913)

Signed-off-by: fayejf <36722593+fayejf@users.noreply.github.com>
---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 869782ab372f..8a788da71550 100644
--- a/README.rst
+++ b/README.rst
@@ -304,13 +304,13 @@ NeMo Text Processing, specifically (Inverse) Text Normalization, is now a separa
 
 Docker containers:
 ~~~~~~~~~~~~~~~~~~
-We release NeMo containers alongside NeMo releases. For example, NeMo ``r1.18.1`` comes with container ``nemo:23.03``, you may find more details about released containers in `releases page <https://github.com/NVIDIA/NeMo/releases>`_.
+We release NeMo containers alongside NeMo releases. For example, NeMo ``r1.19.0`` comes with container ``nemo:23.04``, you may find more details about released containers in `releases page <https://github.com/NVIDIA/NeMo/releases>`_.
 
 To use built container, please run
 
 .. code-block:: bash
 
-    docker pull nvcr.io/nvidia/nemo:23.03
+    docker pull nvcr.io/nvidia/nemo:23.04
 
 To build a nemo container with Dockerfile from a branch, please run
 

From 74cbbb2859c0093dbde0e8aeedf0fc6d65849790 Mon Sep 17 00:00:00 2001
From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com>
Date: Sat, 24 Jun 2023 03:19:19 +0800
Subject: [PATCH 059/123] Fix fast-glu activation in change partitions (#6909)

* Fix fast-swiglu

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* change to all fast glu activation

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
---
 .../megatron_change_num_partitions.py                | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/nlp/language_modeling/megatron_change_num_partitions.py b/examples/nlp/language_modeling/megatron_change_num_partitions.py
index 2938a16098a1..72655089e0ee 100644
--- a/examples/nlp/language_modeling/megatron_change_num_partitions.py
+++ b/examples/nlp/language_modeling/megatron_change_num_partitions.py
@@ -199,7 +199,7 @@ def compute_tp_splits(
     # alias the global index to idx
     idx = global_idx
 
-    swiglu_activation = 'swiglu' in str(model_cfg.get('activation', '')).lower()
+    fast_glu_activation = str(model_cfg.get('activation', '')).lower() in ['fast-geglu', 'fast-swiglu', 'fast-reglu']
 
     if param.shape == partitions[0][idx].shape:
         split = [partitions[0][idx].data] * tp_size
@@ -230,8 +230,8 @@ def compute_tp_splits(
             for i in range(tp_size):
                 tp_qkv = torch.cat([tp_qkv_splits[item] for item in range(i, tp_size * 2, tp_size)])
                 split.append(tp_qkv)
-        elif 'dense_h_to_4h.weight' in param_name and swiglu_activation:
-            # For Megatron GPT model with Swiglu activation
+        elif 'dense_h_to_4h.weight' in param_name and fast_glu_activation:
+            # For Megatron GPT model with Fast Glu activation
             # Handle gated linear units
             # concat all the first halves ('W's) and all the second halves ('V's)
             w_split, k_split = torch.chunk(partitions[0][idx].data, 2, dim=0)
@@ -261,7 +261,7 @@ def compute_tp_merge(idx, name, param, partitions_pp, model_cfg):
     Returns:
         The concatenated parameter for TP 1 PP 1.
     """
-    swiglu_activation = 'swiglu' in str(model_cfg.get('activation', '')).lower()
+    fast_glu_activation = str(model_cfg.get('activation', '')).lower() in ['fast-geglu', 'fast-swiglu', 'fast-reglu']
 
     # Logic from original TP rank change
     if param.shape == partitions_pp[0][idx].shape:
@@ -271,8 +271,8 @@ def compute_tp_merge(idx, name, param, partitions_pp, model_cfg):
     else:
         concated = torch.cat([partitions_pp[i][idx].data for i in range(len(partitions_pp))], dim=0)
 
-    # Logic for Swiglu activation
-    if 'dense_h_to_4h.weight' in name and swiglu_activation:
+    # Logic for Fast Glu activation
+    if 'dense_h_to_4h.weight' in name and fast_glu_activation:
         # concat all the first halves ('W's) and all the second halves ('V's)
         wk_splits = []
         for tpr in range(len(partitions_pp)):

From c4e677a2d7aad47dbade8c3a0e47311a51d03bba Mon Sep 17 00:00:00 2001
From: asfiyab-nvidia <117682710+asfiyab-nvidia@users.noreply.github.com>
Date: Sun, 25 Jun 2023 14:19:59 -0700
Subject: [PATCH 060/123] Documentation for ONNX export of Megatron Models
 (#6914)

* add Megatron ONNX export guide

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* fix formatting

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* include megatron_onnx_export in api.rst

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* include megatron_onnx_export in index.rst

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* update installation section

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* revert changes to megatron_ckpt_to_nemo.py

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

* address comments

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>

---------

Signed-off-by: Asfiya Baig <asfiyab@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 docs/source/index.rst                    |  1 +
 docs/source/nlp/api.rst                  | 10 +++++
 docs/source/nlp/megatron_onnx_export.rst | 47 ++++++++++++++++++++++++
 3 files changed, 58 insertions(+)
 create mode 100644 docs/source/nlp/megatron_onnx_export.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
index ee1d3fba805a..dcf2ff30e9c5 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -45,6 +45,7 @@ NVIDIA NeMo User Guide
    nlp/machine_translation/machine_translation
    nlp/text_normalization/intro
    nlp/api
+   nlp/megatron_onnx_export
    nlp/models
    
 
diff --git a/docs/source/nlp/api.rst b/docs/source/nlp/api.rst
index 7c6971a68d05..0822ade0224c 100755
--- a/docs/source/nlp/api.rst
+++ b/docs/source/nlp/api.rst
@@ -140,3 +140,13 @@ Datasets
 .. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.ul2_dataset.UL2Dataset
     :show-inheritance: 
 
+Exportable Model Classes
+-------------------------
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTExportableModel
+    :show-inheritance: 
+
+.. toctree::
+   :maxdepth: 1
+
+   megatron_onnx_export
\ No newline at end of file
diff --git a/docs/source/nlp/megatron_onnx_export.rst b/docs/source/nlp/megatron_onnx_export.rst
new file mode 100644
index 000000000000..ee6138d1f912
--- /dev/null
+++ b/docs/source/nlp/megatron_onnx_export.rst
@@ -0,0 +1,47 @@
+.. _megatron_onnx_export:
+
+ONNX Export of Megatron Models
+====================================
+
+This guide demonstrates the usage of the ONNX export functionality for Megatron models.
+
+Requirements
+-----------------
+Set up the development environment by launching the latest `NeMo container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags>`_
+
+The minimum version requirements for NeMo and TransformerEngine are below
+
+.. code-block:: bash 
+
+    nemo > 1.19
+    transformer_engine > 0.10
+
+Export to ONNX
+-----------------
+The export script supports the ONNX export of models with .nemo and .ckpt file extensions. The script also supports the export of the following types of models: GPT, T5, BERT, BART, NMT, RETRO.
+Commands for both file formats are discussed in the following sections. The model type used for the examples is GPT.
+
+
+Export using .nemo file
+^^^^^^^^^^^^^^^^^^^^^^^^
+A model with .nemo file extension can be exported using the command below
+
+.. code-block:: bash 
+
+    python3 examples/nlp/language_modeling/megatron_export.py \
+        model_type=gpt \
+        onnx_model_file=gpt_126m.onnx \
+        gpt_model_file=gpt_126m.nemo
+
+Export using .ckpt file
+^^^^^^^^^^^^^^^^^^^^^^^^
+A model with .ckpt file extension can be exported using the command below
+
+.. code-block:: bash 
+
+    python3 examples/nlp/language_modeling/megatron_export.py \
+        model_type=gpt \
+        onnx_model_file=gpt_126m.onnx \
+        checkpoint_dir=./gpt_126m/ \
+        checkpoint_name=model_weights.ckpt \
+        hparams_file=./gpt_126m/hparams.yaml
\ No newline at end of file

From f344fdbe03d43dc7fecafc5cb3ae6ca937b901b9 Mon Sep 17 00:00:00 2001
From: Greg Heinrich <gheinrich@nvidia.com>
Date: Mon, 26 Jun 2023 16:15:13 +0200
Subject: [PATCH 061/123] FixTextMemMapDataset index file creation in
 multi-node setup (#6768)

* Fix for isolated filesystems in multi-node setting

Signed-off-by: Greg Heinrich <gheinrich@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Greg Heinrich <gheinrich@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Micha Livne <michalivne@users.noreply.github.com>
---
 .../language_modeling/text_memmap_dataset.py  | 66 +++++++++++++------
 tests/collections/nlp/test_mem_map_dataset.py | 12 ++--
 2 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py b/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py
index e1a30a3aafb7..05d10b42e115 100644
--- a/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/text_memmap_dataset.py
@@ -25,11 +25,11 @@
 import torch
 
 from nemo.core import Dataset
-from nemo.utils import logging
+from nemo.utils import AppState, logging
 
-__all__ = ['TextMemMapDataset', 'CSVMemMapDataset', 'build_index_files']
-__idx_version__ = '0.2'  # index file version
-__idx_suffix__ = 'idx'  # index file suffix
+__all__ = ["TextMemMapDataset", "CSVMemMapDataset", "build_index_files"]
+__idx_version__ = "0.2"  # index file version
+__idx_suffix__ = "idx"  # index file suffix
 
 
 def _build_index_from_memdata(fn, newline_int):
@@ -40,7 +40,7 @@ def _build_index_from_memdata(fn, newline_int):
     Returns a 1D array of ints.
     """
     # use memmap to read file
-    mdata = np.memmap(fn, dtype=np.uint8, mode='r')
+    mdata = np.memmap(fn, dtype=np.uint8, mode="r")
     # find newline positions
     midx = np.where(mdata == newline_int)[0]
     midx_dtype = midx.dtype
@@ -115,9 +115,10 @@ def __init__(
 
         logging.info(f"Building data files")
         # load all files into memmap
-        is_ditributed = torch.distributed.is_available() and torch.distributed.is_initialized()
+        is_distributed = torch.distributed.is_available() and torch.distributed.is_initialized()
 
-        if not is_ditributed or (is_ditributed and torch.distributed.get_rank() == 0):
+        if not is_distributed or (is_distributed and torch.distributed.get_rank() == 0):
+            # Create index files on global rank 0.
             build_index_files(
                 dataset_paths,
                 newline_int,
@@ -126,14 +127,39 @@ def __init__(
                 index_mapping_dir=index_mapping_dir,
             )
 
-        if is_ditributed:
+        if is_distributed:
+            torch.distributed.barrier()
+
+        if is_distributed and AppState().local_rank == 0:
+            # If we are in a distributed multi-node set-up and index files are not stored on
+            # a shared filesystem, then the index files created on global rank 0 are only
+            # accessible to the workers on that node.
+            #
+            # Two cases may occur here:
+            #
+            # 1. case of a shared filesystem, or global_rank==0: the index files are present in
+            #    the locally available filesystem, calling build_index_files() again is a no-op.
+            # 2. case of a non-shared filesystem, and global_rank>0: the index files are not
+            #    present in the locally available filesystem, calling build_index_files() again
+            #    will create them.
+            #
+            # Outcome in all cases: all nodes have access to the index files in their filesystem.
+            build_index_files(
+                dataset_paths,
+                newline_int,
+                workers=self._worker,
+                build_index_fn=build_index_fn,
+                index_mapping_dir=index_mapping_dir,
+            )
+
+        if is_distributed:
             torch.distributed.barrier()
 
         logging.info(f"Loading data files")
         start_time = time.time()
         mdata_midx_list = [self.load_file(fn, index_mapping_dir) for fn in self._files_list]
         logging.info(
-            f'Time loading {len(mdata_midx_list)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}'
+            f"Time loading {len(mdata_midx_list)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}"
         )
 
         logging.info("Computing global indices")
@@ -224,34 +250,34 @@ def load_file(self, fn, index_mapping_dir: Optional[str] = None):
         idx_fn = _index_fn(fn, index_mapping_dir)
 
         # create data map
-        mdata = np.memmap(fn, dtype=np.uint8, mode='r')
+        mdata = np.memmap(fn, dtype=np.uint8, mode="r")
 
         if _index_file_exists(idx_fn):
             # load index file into memory map
-            midx = np.load(idx_fn + ".npy", allow_pickle=True, mmap_mode='r')
+            midx = np.load(idx_fn + ".npy", allow_pickle=True, mmap_mode="r")
             # test for header
             if len(midx) < self._header_lines:
                 raise RuntimeError(f"Missing header, expected {self._header_lines} header lines")
 
             # load meta info
-            idx_info_dict = pickle.load(open(idx_fn + ".info", 'rb'))
+            idx_info_dict = pickle.load(open(idx_fn + ".info", "rb"))
             # test for mismatch in expected newline_int
-            if 'newline_int' in idx_info_dict:
-                newline_int = idx_info_dict['newline_int']
+            if "newline_int" in idx_info_dict:
+                newline_int = idx_info_dict["newline_int"]
                 if self._newline_int != newline_int:
                     logging.warning(
                         f"Mismatch in newline_int, expected = {self._newline_int} but loaded {newline_int}"
                     )
 
             # test for version mismatch (useful to force recreation of index files)
-            idx_version = idx_info_dict.get('version', '0.0')
+            idx_version = idx_info_dict.get("version", "0.0")
             if __idx_version__ != idx_version:
                 raise RuntimeError(
                     f"Version mismatch: Please delete existing '.{__idx_suffix__}' files. Expected version = {__idx_version__}, but file version = {idx_version}. File path = {idx_fn}"
                 )
         else:
             raise ValueError(
-                f'Memory Map for {fn} is not found, missing one or more of files: {idx_fn}.{{.npy,.info}}'
+                f"Memory Map for {fn} is not found, missing one or more of files: {idx_fn}.{{.npy,.info}}"
             )
 
         return (mdata, midx)
@@ -271,7 +297,7 @@ def __init__(
         tokenizer: Optional[Type["TokenizerSpec"]] = None,
         sort_dataset_paths: Optional[bool] = True,
         data_col=1,
-        data_sep=',',
+        data_sep=",",
         index_mapping_dir: Optional[str] = None,
     ):
         """
@@ -424,7 +450,7 @@ def _build_memmap_index_files(newline_int, build_index_fn, fn, index_mapping_dir
 
 
 def build_index_files(
-    dataset_paths, newline_int, workers=None, build_index_fn=_build_index_from_memdata, index_mapping_dir: str = None
+    dataset_paths, newline_int, workers=None, build_index_fn=_build_index_from_memdata, index_mapping_dir: str = None,
 ):
     """Auxiliary method to build multiple index files"""
     if len(dataset_paths) < 1:
@@ -438,10 +464,10 @@ def build_index_files(
     start_time = time.time()
     with mp.Pool(workers) as p:
         build_status = p.map(
-            partial(_build_memmap_index_files, newline_int, build_index_fn, index_mapping_dir=index_mapping_dir),
+            partial(_build_memmap_index_files, newline_int, build_index_fn, index_mapping_dir=index_mapping_dir,),
             dataset_paths,
         )
 
     logging.info(
-        f'Time building {sum(build_status)} / {len(build_status)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}'
+        f"Time building {sum(build_status)} / {len(build_status)} mem-mapped files: {datetime.timedelta(seconds=time.time() - start_time)}"
     )
diff --git a/tests/collections/nlp/test_mem_map_dataset.py b/tests/collections/nlp/test_mem_map_dataset.py
index b60636022e05..1e21b6d270c9 100644
--- a/tests/collections/nlp/test_mem_map_dataset.py
+++ b/tests/collections/nlp/test_mem_map_dataset.py
@@ -27,13 +27,17 @@ def jsonl_file(tmp_path):
     file_path = tmp_path / "data.jsonl"
 
     # Generate data to write to the JSONL file
-    data = [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}, {"name": "Bob", "age": 35}]
+    data = [
+        {"name": "John", "age": 30},
+        {"name": "Jane", "age": 25},
+        {"name": "Bob", "age": 35},
+    ]
 
     # Write data to the JSONL file
     with open(file_path, mode="w") as file:
         for item in data:
             json.dump(item, file)
-            file.write('\n')
+            file.write("\n")
 
     # Provide the file path to the test function
     yield str(file_path)
@@ -81,12 +85,12 @@ def test_csv_mem_map_dataset(csv_file):
 
 
 @pytest.mark.parametrize(
-    "dataset_class", [text_memmap_dataset.JSONLMemMapDataset, text_memmap_dataset.CSVMemMapDataset]
+    "dataset_class", [text_memmap_dataset.JSONLMemMapDataset, text_memmap_dataset.CSVMemMapDataset],
 )
 @pytest.mark.parametrize("use_alternative_index_mapping_dir", [True, False])
 @pytest.mark.parametrize("relative_index_fn", [True, False])
 def test_mem_map_dataset_index_mapping_dir(
-    tmp_path, dataset_class, jsonl_file, use_alternative_index_mapping_dir, relative_index_fn
+    tmp_path, dataset_class, jsonl_file, use_alternative_index_mapping_dir, relative_index_fn,
 ):
     """Test for index_mapping_dir."""
     if relative_index_fn:

From ef56c97e65921bff3e4544f46a69f3336f3ab99a Mon Sep 17 00:00:00 2001
From: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com>
Date: Mon, 26 Jun 2023 09:23:58 -0700
Subject: [PATCH 062/123] Move model change out of if-branch (#6908)

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>
---
 nemo/collections/asr/parts/utils/transcribe_utils.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py
index 11e7792cfb21..f8a69fbe817d 100644
--- a/nemo/collections/asr/parts/utils/transcribe_utils.py
+++ b/nemo/collections/asr/parts/utils/transcribe_utils.py
@@ -189,11 +189,6 @@ def setup_model(cfg: DictConfig, map_location: torch.device) -> Tuple[ASRModel,
         asr_model = imported_class.restore_from(
             restore_path=cfg.model_path, map_location=map_location,
         )  # type: ASRModel
-        if hasattr(cfg, "model_change"):
-            asr_model.change_attention_model(
-                self_attention_model=cfg.model_change.conformer.get("self_attention_model", None),
-                att_context_size=cfg.model_change.conformer.get("att_context_size", None),
-            )
         model_name = os.path.splitext(os.path.basename(cfg.model_path))[0]
     else:
         # restore model by name
@@ -202,6 +197,12 @@ def setup_model(cfg: DictConfig, map_location: torch.device) -> Tuple[ASRModel,
         )  # type: ASRModel
         model_name = cfg.pretrained_name
 
+    if hasattr(cfg, "model_change"):
+        asr_model.change_attention_model(
+            self_attention_model=cfg.model_change.conformer.get("self_attention_model", None),
+            att_context_size=cfg.model_change.conformer.get("att_context_size", None),
+        )
+
     return asr_model, model_name
 
 
From e736c863563b86f9be2bc3b9b9359f6ae268ff5a Mon Sep 17 00:00:00 2001
From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com>
Date: Mon, 26 Jun 2023 17:23:34 -0400
Subject: [PATCH 063/123] Update fvad doc (#6920)

* update fvad doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update fvad example

Signed-off-by: stevehuang52 <heh@nvidia.com>

---------

Signed-off-by: stevehuang52 <heh@nvidia.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
---
 .../conf/vad/frame_vad_infer_postprocess.yaml   |  8 ++++----
 examples/asr/speech_classification/README.md    | 17 +++++++++++++++++
 nemo/collections/asr/parts/utils/vad_utils.py   |  6 +++---
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
index 842c04777c72..d759a809ec37 100644
--- a/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
+++ b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
@@ -21,10 +21,10 @@ vad:
     postprocessing:
       onset: 0.3 # onset threshold for detecting the beginning and end of a speech
       offset: 0.3 # offset threshold for detecting the end of a speech.
-      pad_onset: 0.5 # adding durations before each speech segment
-      pad_offset: 0.5 # adding durations after each speech segment
-      min_duration_on: 0.0 # threshold for short speech deletion
-      min_duration_off: 0.6 # threshold for short non-speech segment deletion
+      pad_onset: 0.2 # adding durations before each speech segment
+      pad_offset: 0.2 # adding durations after each speech segment
+      min_duration_on: 0.2 # threshold for short speech deletion
+      min_duration_off: 0.2 # threshold for short non-speech segment deletion
       filter_speech_first: True
 
 prepared_manifest_vad_input: null # if not specify, it will automatically generated be "manifest_vad_input.json"
diff --git a/examples/asr/speech_classification/README.md b/examples/asr/speech_classification/README.md
index 4fa5d3c4f2b3..bdd3aead8db1 100644
--- a/examples/asr/speech_classification/README.md
+++ b/examples/asr/speech_classification/README.md
@@ -86,3 +86,20 @@ The manifest json file should have the following format (each line is a Python d
 {"audio_filepath": "/path/to/audio_file1.wav", "offset": 0, "duration": 10000}  
 {"audio_filepath": "/path/to/audio_file2.wav", "offset": 0, "duration": 10000}  
 ```
+
+
+## Visualization
+
+To visualize the VAD outputs, you can use the `nemo.collections.asr.parts.utils.vad_utils.plot_sample_from_rttm` function, which takes an audio file and an RTTM file as input, and plots the audio waveform and the VAD labels. Since the VAD inference script will output a json manifest `manifest_vad_out.json` by default, you can create a Jupyter Notebook with the following script and fill in the paths using the output manifest:
+```python
+from nemo.collections.asr.parts.utils.vad_utils import plot_sample_from_rttm
+
+plot_sample_from_rttm(
+    audio_file="/path/to/audio_file.wav",
+    rttm_file="/path/to/rttm_file.rttm",
+    offset=0.0,
+    duration=1000,
+    save_path="vad_pred.png"
+)
+```
+
diff --git a/nemo/collections/asr/parts/utils/vad_utils.py b/nemo/collections/asr/parts/utils/vad_utils.py
index addf3cae29b7..e4f024d231ad 100644
--- a/nemo/collections/asr/parts/utils/vad_utils.py
+++ b/nemo/collections/asr/parts/utils/vad_utils.py
@@ -1648,7 +1648,7 @@ def frame_vad_infer_load_manifest(cfg: DictConfig):
             manifest_orig.append(entry)
 
             # always prefer RTTM labels if exist
-            if "label" not in entry or "rttm_filepath" in entry or "rttm_file" in entry:
+            if "label" not in entry and ("rttm_filepath" in entry or "rttm_file" in entry):
                 rttm_key = "rttm_filepath" if "rttm_filepath" in entry else "rttm_file"
                 segments = load_speech_segments_from_rttm(entry[rttm_key])
                 label_str = get_frame_labels(
@@ -1661,8 +1661,8 @@ def frame_vad_infer_load_manifest(cfg: DictConfig):
                 key_labels_map[uniq_audio_name] = [float(x) for x in label_str.split()]
             elif entry.get("label", None) is not None:
                 key_labels_map[uniq_audio_name] = [float(x) for x in entry["label"].split()]
-            else:
-                raise ValueError("Must have either `label` or `rttm_filepath` in manifest")
+            elif cfg.evaluate:
+                raise ValueError("Must have either `label` or `rttm_filepath` in manifest when evaluate=True")
 
     return manifest_orig, key_labels_map, key_rttm_map
 

From 82044837a7bcfa1cfd3a91f6b0014e052bdfcaf0 Mon Sep 17 00:00:00 2001
From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com>
Date: Tue, 27 Jun 2023 06:06:28 +0800
Subject: [PATCH 064/123] Fix flash-attention (#6901)

* Set default apply_query_key_layer_scaling to false

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* Add cross attention test

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: hsiehjackson <c2hsieh@ucsd.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../conf/megatron_gpt_config.yaml             |   2 +-
 .../language_modeling/megatron/gpt_model.py   |   2 +-
 .../nlp/modules/common/megatron/attention.py  |  34 ++--
 .../modules/common/megatron/fused_softmax.py  |   2 +-
 .../modules/common/megatron/language_model.py |   2 +-
 .../common/megatron/megatron_decoders.py      |   2 +-
 .../common/megatron/megatron_encoders.py      |   2 +-
 .../modules/common/megatron/transformer.py    |   8 +-
 tests/collections/nlp/test_flash_attention.py | 148 +++++++++++++++---
 9 files changed, 148 insertions(+), 54 deletions(-)

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index d1132a32349a..8d7fd09e4307 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -63,7 +63,7 @@ model:
   attention_dropout: 0.1 # Dropout probability for attention
   ffn_dropout: 0.0 # Dropout probability in the feed-forward layer.
   kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null
-  apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number.
+  apply_query_key_layer_scaling: False # scale Q * K^T by 1 / layer-number.
   normalization: 'layernorm' # Normalization layer to use. Options are 'layernorm', 'rmsnorm'
   layernorm_epsilon: 1e-5
   do_layer_norm_weight_decay: False # True means weight decay on all params
diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
index b43dc98f2fe7..b32bfdb09f20 100755
--- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
@@ -114,7 +114,7 @@ def __init__(
         num_layers,
         num_attention_heads,
         ffn_hidden_size,
-        apply_query_key_layer_scaling=True,
+        apply_query_key_layer_scaling=False,
         kv_channels=None,
         num_tokentypes=0,
         parallel_output=True,
diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py
index b0d98e0c2fb1..6025b31c0bd5 100644
--- a/nemo/collections/nlp/modules/common/megatron/attention.py
+++ b/nemo/collections/nlp/modules/common/megatron/attention.py
@@ -109,7 +109,7 @@ def __init__(
         attention_type=AttnType.self_attn,
         attn_mask_type=AttnMaskType.padding,
         precision=16,
-        apply_query_key_layer_scaling=True,
+        apply_query_key_layer_scaling=False,
         kv_channels=None,
         use_cpu_initialization=False,
         megatron_amp_O2=False,
@@ -564,7 +564,7 @@ def __init__(
         num_attention_heads,
         hidden_size,
         precision=16,
-        apply_query_key_layer_scaling=True,
+        apply_query_key_layer_scaling=False,
         kv_channels=None,
         use_cpu_initialization=False,
         megatron_amp_O2=False,
@@ -728,7 +728,7 @@ def __init__(
         attention_type=AttnType.self_attn,
         attn_mask_type=AttnMaskType.padding,
         precision=16,
-        apply_query_key_layer_scaling=True,
+        apply_query_key_layer_scaling=False,
         kv_channels=None,
         masked_softmax_fusion=True,
         attention_dropout=0.1,
@@ -928,7 +928,6 @@ def torch_attention(self, query_layer, key_layer, value_layer, attention_mask, a
             attention_scores += attention_bias
 
         attention_probs = self.scale_mask_softmax(attention_scores, attention_mask)
-
         # This is actually dropping out entire tokens to attend to, which might
         # seem a bit unusual, but is taken from the original Transformer paper.
 
@@ -966,15 +965,6 @@ def flash_attention(self, query_layer, key_layer, value_layer, attention_mask, a
         else:
             return self.flash_attention_cuda(query_layer, key_layer, value_layer, attention_mask,)
 
-    def reset_is_causal(self, query_length, key_length, causal):
-        if query_length != key_length:
-            if query_length == 1:
-                return False
-            raise NotImplementedError(
-                "Flash attention does not support query and key with different number of tokens, unless number of query tokens is 1."
-            )
-        return causal
-
     def flash_attention_cuda(self, query_layer, key_layer, value_layer, attention_mask):
         batch_size, seqlen, nheads, _ = query_layer.shape
 
@@ -994,9 +984,7 @@ def flash_attention_cuda(self, query_layer, key_layer, value_layer, attention_ma
         q, indices_q, cu_seqlens_q, max_seqlen_q = unpad_input(query_layer, attention_mask_q)
         k, _, cu_seqlens_k, max_seqlen_k = unpad_input(key_layer, attention_mask_kv)
         v, _, _, _ = unpad_input(value_layer, attention_mask_kv)
-        causal = self.reset_is_causal(
-            query_layer.shape[1], key_layer.shape[1], self.attn_mask_type == AttnMaskType.causal
-        )
+        is_causal = self.attn_mask_type == AttnMaskType.causal and query_layer.shape[1] == key_layer.shape[1]
         context_layer = flash_attn_unpadded_func(
             q,
             k,
@@ -1006,7 +994,7 @@ def flash_attention_cuda(self, query_layer, key_layer, value_layer, attention_ma
             max_seqlen_q,
             max_seqlen_k,
             dropout_p=self.attention_dropout_p if self.training else 0.0,
-            causal=causal,
+            causal=is_causal,
         )
 
         # [b, sq, np, hn]
@@ -1031,13 +1019,13 @@ def flash_attention_triton(self, query_layer, key_layer, value_layer, attention_
                 attention_mask_q = attention_mask.unsqueeze(1).unsqueeze(3)
                 attention_mask_kv = attention_mask.unsqueeze(1).unsqueeze(2)
 
-            attention_bias = attention_bias.masked_fill(~attention_mask_q, torch.finfo(query_layer.dtype).min)
-            attention_bias = attention_bias.masked_fill(~attention_mask_kv, torch.finfo(query_layer.dtype).min)
+            if attention_bias.shape[2] == attention_mask_q.shape[2]:
+                attention_bias = attention_bias.masked_fill(~attention_mask_q, torch.finfo(query_layer.dtype).min)
+            if attention_bias.shape[3] == attention_mask_kv.shape[3]:
+                attention_bias = attention_bias.masked_fill(~attention_mask_kv, torch.finfo(query_layer.dtype).min)
 
-        causal = self.reset_is_causal(
-            query_layer.shape[1], key_layer.shape[1], self.attn_mask_type == AttnMaskType.causal
-        )
-        context_layer = flash_attn_func(query_layer, key_layer, value_layer, attention_bias, causal)
+        is_causal = self.attn_mask_type == AttnMaskType.causal and query_layer.shape[1] == key_layer.shape[1]
+        context_layer = flash_attn_func(query_layer, key_layer, value_layer, attention_bias, is_causal,)
 
         # [b, sq, np, hn] -> [b, np, sq, hn]
         context_layer = context_layer.permute(0, 2, 1, 3)
diff --git a/nemo/collections/nlp/modules/common/megatron/fused_softmax.py b/nemo/collections/nlp/modules/common/megatron/fused_softmax.py
index 2c914a67dd12..3da56e597751 100644
--- a/nemo/collections/nlp/modules/common/megatron/fused_softmax.py
+++ b/nemo/collections/nlp/modules/common/megatron/fused_softmax.py
@@ -53,7 +53,7 @@ def forward_torch_softmax(self, input, mask):
             probs = torch.nn.Softmax(dim=-1)(mask_output)
             if mask is not None:
                 all_k_masked = mask.all(axis=-1)
-                zero_attention_mask = (1.0 - all_k_masked.float())[:, :, :, None]
+                zero_attention_mask = (1.0 - all_k_masked.type(probs.type()))[:, :, :, None]
                 probs = probs * zero_attention_mask
 
             if self.input_in_float16 and self.softmax_in_fp32:
diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py
index a3fa3fd6d2be..e6305e563549 100755
--- a/nemo/collections/nlp/modules/common/megatron/language_model.py
+++ b/nemo/collections/nlp/modules/common/megatron/language_model.py
@@ -70,7 +70,7 @@ def get_language_model(
     vocab_size,
     num_attention_heads,
     encoder_attn_mask_type,
-    apply_query_key_layer_scaling=True,
+    apply_query_key_layer_scaling=False,
     kv_channels=None,
     init_method=None,
     scaled_init_method=None,
diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py
index ca2000842fe4..20f25a25179a 100644
--- a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py
+++ b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py
@@ -44,7 +44,7 @@ def get_decoder_model(
     ffn_hidden_size,
     num_layers,
     num_attention_heads,
-    apply_query_key_layer_scaling=True,
+    apply_query_key_layer_scaling=False,
     kv_channels=None,
     init_method=None,
     scaled_init_method=None,
diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py
index 9f5d917e2077..b98aa26b1b23 100644
--- a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py
+++ b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py
@@ -45,7 +45,7 @@ def get_encoder_model(
     ffn_hidden_size,
     num_layers,
     num_attention_heads,
-    apply_query_key_layer_scaling=True,
+    apply_query_key_layer_scaling=False,
     kv_channels=None,
     init_method=None,
     scaled_init_method=None,
diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py
index ea01acd14a23..258e42ce9694 100644
--- a/nemo/collections/nlp/modules/common/megatron/transformer.py
+++ b/nemo/collections/nlp/modules/common/megatron/transformer.py
@@ -141,7 +141,7 @@ def __init__(
         self_attn_mask_type=AttnMaskType.padding,
         fp32_residual_connection=False,
         precision=16,
-        apply_query_key_layer_scaling=True,
+        apply_query_key_layer_scaling=False,
         kv_channels=None,
         layernorm_epsilon=1e-5,
         hidden_dropout=0.1,
@@ -659,7 +659,7 @@ def __init__(
         self_attn_mask_type=AttnMaskType.padding,
         fp32_residual_connection=False,
         precision=16,
-        apply_query_key_layer_scaling=True,
+        apply_query_key_layer_scaling=False,
         kv_channels=None,
         layernorm_epsilon=1e-5,
         hidden_dropout=0.1,
@@ -804,7 +804,7 @@ def __init__(
         params_dtype: torch.dtype = torch.float32,
         get_rng_state_tracker: Optional[Callable] = None,
         fuse_wgrad_accumulation: bool = False,
-        apply_query_key_layer_scaling: bool = True,
+        apply_query_key_layer_scaling: bool = False,
         attention_softmax_in_fp32: bool = False,
         seq_length: Optional[int] = None,
         micro_batch_size: Optional[int] = None,
@@ -895,7 +895,7 @@ def __init__(
         hidden_size,
         ffn_hidden_size,
         num_attention_heads,
-        apply_query_key_layer_scaling=True,
+        apply_query_key_layer_scaling=False,
         kv_channels=None,
         layer_type=LayerType.encoder,  # it can be a list of types or single type
         self_attn_mask_type=AttnMaskType.padding,
diff --git a/tests/collections/nlp/test_flash_attention.py b/tests/collections/nlp/test_flash_attention.py
index cead91ff312a..727742fdffb5 100644
--- a/tests/collections/nlp/test_flash_attention.py
+++ b/tests/collections/nlp/test_flash_attention.py
@@ -87,8 +87,10 @@ def setup_class(cls):
     def cfg(self):
         cfg = {
             'bz': random.randint(1, 7),
-            'sl': random.randint(1, 7),
+            'sq': random.randint(2, 7),
+            'sk': random.randint(2, 7),
             'head': random.randint(1, 7),
+            'layer_number': random.randint(1, 7),
             'device': torch.cuda.current_device(),
         }
         # flash attention requires head dimensions are multiples of 8
@@ -99,9 +101,10 @@ def cfg(self):
 
     @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed")
     @pytest.mark.unit
-    def test_flash_attention(self, cfg):
+    def test_flash_self_attention(self, cfg):
         device = cfg['device']
-        bz, sl, np, h = cfg['bz'], cfg['sl'], cfg['head'], cfg['hidden']
+        layer_number = cfg['layer_number']
+        bz, sl, np, h = cfg['bz'], cfg['sq'], cfg['head'], cfg['hidden']
         hn = h // np
 
         q = torch.rand(sl, bz, np, hn, device=device).half()
@@ -122,7 +125,7 @@ def test_flash_attention(self, cfg):
 
         # Non-causal
         attention = CoreAttention(
-            layer_number=1,
+            layer_number=layer_number,
             num_attention_heads=np,
             hidden_size=h,
             attn_mask_type=AttnMaskType.padding,
@@ -130,7 +133,7 @@ def test_flash_attention(self, cfg):
         )
 
         attention_fa = CoreAttention(
-            layer_number=1,
+            layer_number=layer_number,
             num_attention_heads=np,
             hidden_size=h,
             attn_mask_type=AttnMaskType.padding,
@@ -140,21 +143,22 @@ def test_flash_attention(self, cfg):
 
         out = attention(q, k, v, attention_mask_padding_3d)
         out_fa = attention_fa(q, k, v, attention_mask_padding_3d)
-        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(out, out_fa)
         out_fa = attention_fa(q, k, v, attention_mask_2d)
-        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(out, out_fa)
 
         # Causal
         attention = CoreAttention(
-            layer_number=1,
+            layer_number=layer_number,
             num_attention_heads=np,
             hidden_size=h,
             attn_mask_type=AttnMaskType.causal,
             attention_dropout=0.0,
+            apply_query_key_layer_scaling=False,
         )
 
         attention_fa = CoreAttention(
-            layer_number=1,
+            layer_number=layer_number,
             num_attention_heads=np,
             hidden_size=h,
             attn_mask_type=AttnMaskType.causal,
@@ -164,9 +168,55 @@ def test_flash_attention(self, cfg):
 
         out = attention(q, k, v, attention_mask_causal_3d)
         out_fa = attention_fa(q, k, v, attention_mask_causal_3d)
-        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(out, out_fa)
         out_fa = attention_fa(q, k, v, attention_mask_2d)
-        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(out, out_fa)
+
+    @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed")
+    @pytest.mark.unit
+    def test_flash_cross_attention(self, cfg):
+        device = cfg['device']
+        layer_number = cfg['layer_number']
+        bz, sq, sk, np, h = cfg['bz'], cfg['sq'], cfg['sk'], cfg['head'], cfg['hidden']
+        hn = h // np
+
+        q = torch.rand(sq, bz, np, hn, device=device).half()
+        k = torch.rand(sk, bz, np, hn, device=device).half()
+        v = torch.rand(sk, bz, np, hn, device=device).half()
+
+        attention_mask_2d_q = torch.arange(sq, device=device).unsqueeze(0) < torch.randint(
+            1, sq, (bz,), device=device
+        ).unsqueeze(1)
+
+        attention_mask_2d_k = torch.arange(sk, device=device).unsqueeze(0) < torch.randint(
+            1, sk, (bz,), device=device
+        ).unsqueeze(1)
+
+        attention_mask_padding_3d = build_attention_mask_3d(
+            source_mask=attention_mask_2d_q, target_mask=attention_mask_2d_k, attn_mask_type=AttnMaskType.padding
+        ).unsqueeze(1)
+
+        attention = CoreAttention(
+            layer_number=layer_number,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.padding,
+            attention_dropout=0.0,
+            apply_query_key_layer_scaling=False,
+        )
+
+        attention_fa = CoreAttention(
+            layer_number=layer_number,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.padding,
+            attention_dropout=0.0,
+            use_flash_attention=True,
+        )
+
+        out = attention(q, k, v, attention_mask_padding_3d)
+        out_fa = attention_fa(q, k, v, attention_mask_padding_3d)
+        torch.testing.assert_close(out, out_fa)
 
     @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed")
     @pytest.mark.skipif(not HAVE_TRITON, reason="triton is not installed")
@@ -175,9 +225,10 @@ def test_flash_attention(self, cfg):
         reason="should only run on AMPERE GPU. Please see https://github.com/HazyResearch/flash-attention/issues/245",
     )
     @pytest.mark.unit
-    def test_flash_attention_triton(self, cfg):
+    def test_flash_self_attention_triton(self, cfg):
         device = cfg['device']
-        bz, sl, np, h = cfg['bz'], cfg['sl'], cfg['head'], cfg['hidden']
+        layer_number = cfg['layer_number']
+        bz, sl, np, h = cfg['bz'], cfg['sq'], cfg['head'], cfg['hidden']
         hn = h // np
 
         q = torch.rand(sl, bz, np, hn, device=device).half()
@@ -200,15 +251,16 @@ def test_flash_attention_triton(self, cfg):
 
         # Non-causal
         attention = CoreAttention(
-            layer_number=1,
+            layer_number=layer_number,
             num_attention_heads=np,
             hidden_size=h,
             attn_mask_type=AttnMaskType.padding,
             attention_dropout=0.0,
+            apply_query_key_layer_scaling=False,
         )
 
         attention_fa = CoreAttention(
-            layer_number=1,
+            layer_number=layer_number,
             num_attention_heads=np,
             hidden_size=h,
             attn_mask_type=AttnMaskType.padding,
@@ -218,21 +270,22 @@ def test_flash_attention_triton(self, cfg):
 
         out = attention(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias)
         out_fa = attention_fa(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias)
-        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3)
         out_fa = attention_fa(q, k, v, attention_mask_2d, relative_position_bias=attention_bias)
-        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3)
 
         # Causal
         attention = CoreAttention(
-            layer_number=1,
+            layer_number=layer_number,
             num_attention_heads=np,
             hidden_size=h,
             attn_mask_type=AttnMaskType.causal,
             attention_dropout=0.0,
+            apply_query_key_layer_scaling=False,
         )
 
         attention_fa = CoreAttention(
-            layer_number=1,
+            layer_number=layer_number,
             num_attention_heads=np,
             hidden_size=h,
             attn_mask_type=AttnMaskType.causal,
@@ -242,6 +295,59 @@ def test_flash_attention_triton(self, cfg):
 
         out = attention(q, k, v, attention_mask_causal_3d, relative_position_bias=attention_bias)
         out_fa = attention_fa(q, k, v, attention_mask_causal_3d, relative_position_bias=attention_bias)
-        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3)
         out_fa = attention_fa(q, k, v, attention_mask_2d, relative_position_bias=attention_bias)
-        assert torch.allclose(out, out_fa, rtol=1e-3, atol=1e-3)
+        torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3)
+
+    @pytest.mark.skipif(not HAVE_FA, reason="flash-attention is not installed")
+    @pytest.mark.skipif(not HAVE_TRITON, reason="triton is not installed")
+    @pytest.mark.skipif(
+        not HAVE_AMPERE_GPU(),
+        reason="should only run on AMPERE GPU. Please see https://github.com/HazyResearch/flash-attention/issues/245",
+    )
+    @pytest.mark.unit
+    def test_flash_cross_attention_triton(self, cfg):
+        device = cfg['device']
+        layer_number = cfg['layer_number']
+        bz, sq, sk, np, h = cfg['bz'], cfg['sq'], cfg['sk'], cfg['head'], cfg['hidden']
+        hn = h // np
+
+        q = torch.rand(sq, bz, np, hn, device=device).half()
+        k = torch.rand(sk, bz, np, hn, device=device).half()
+        v = torch.rand(sk, bz, np, hn, device=device).half()
+
+        attention_mask_2d_q = torch.arange(sq, device=device).unsqueeze(0) < torch.randint(
+            1, sq, (bz,), device=device
+        ).unsqueeze(1)
+
+        attention_mask_2d_k = torch.arange(sk, device=device).unsqueeze(0) < torch.randint(
+            1, sk, (bz,), device=device
+        ).unsqueeze(1)
+
+        attention_mask_padding_3d = build_attention_mask_3d(
+            source_mask=attention_mask_2d_q, target_mask=attention_mask_2d_k, attn_mask_type=AttnMaskType.padding
+        ).unsqueeze(1)
+
+        attention_bias = torch.rand(bz, np, sq, sk, device=device)
+
+        attention = CoreAttention(
+            layer_number=layer_number,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.padding,
+            attention_dropout=0.0,
+            apply_query_key_layer_scaling=False,
+        )
+
+        attention_fa = CoreAttention(
+            layer_number=layer_number,
+            num_attention_heads=np,
+            hidden_size=h,
+            attn_mask_type=AttnMaskType.padding,
+            attention_dropout=0.0,
+            use_flash_attention=True,
+        )
+
+        out = attention(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias)
+        out_fa = attention_fa(q, k, v, attention_mask_padding_3d, relative_position_bias=attention_bias)
+        torch.testing.assert_close(out, out_fa, rtol=1e-3, atol=1e-3)

From 7e3739bff68d98b7fbe280c36ad64d23bca98a34 Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithya.r@gmail.com>
Date: Tue, 27 Jun 2023 15:16:34 -0700
Subject: [PATCH 065/123] ptuning oom fix (#6916)

* oom wip

Signed-off-by: arendu <adithya.r@gmail.com>

* minor

Signed-off-by: arendu <adithya.r@gmail.com>

* comments

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../megatron/gpt_prompt_learning_dataset.py            | 10 +++++++++-
 .../language_modeling/megatron_gpt_peft_models.py      |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_prompt_learning_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_prompt_learning_dataset.py
index 15edc673b7cc..4b1b4f61d439 100755
--- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_prompt_learning_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_prompt_learning_dataset.py
@@ -327,6 +327,9 @@ def __len__(self):
     def __getitem__(self, idx):
         return self.examples[idx]
 
+    def _ceil_to_nearest(self, n, m):
+        return (n + m - 1) // m * m
+
     def collate_fn(self, batch, tp_workers=0):
         """ Prepares input_ids, labels, loss mask, attention_mask, and position ids for global batch """
         taskname_ids, input_ids, answer_starts = zip(*batch)
@@ -350,11 +353,16 @@ def collate_fn(self, batch, tp_workers=0):
         else:
             resi_padding = 0
         batch_max += resi_padding
+        ceil_batch_max = self._ceil_to_nearest(
+            batch_max, 8
+        )  # @adithyare this padding does not conflict with the tp_workers padding above
+        # since tp_workers is always a multiple of 2. the padding to multiple of 8 is to ensure an mem-optimized softmax is used.
+        batch_max = ceil_batch_max + 1
         input_ids, loss_mask = self.pad_batch_and_build_loss_mask(input_ids, batch_max, answer_starts)
         # Should be a label for every token in batch, label is the next token
         labels = input_ids[:, 1:].contiguous()
         input_ids = input_ids[:, :-1].contiguous()
-        batch_max -= 1
+        batch_max -= 1  # @adithyare I *think* this negatition is done to account for the above 2 lines which removes one item from the input_ids seq.
 
         # Loss mask should align with labels
         loss_mask = loss_mask[:, 1:].contiguous()
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py
index f1f44e31e175..73579114234d 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_peft_models.py
@@ -346,7 +346,7 @@ def __init__(
             AdapterName.LORA_KQV_ADAPTER,
         ]
         lora_cfg = cfg.peft.lora_tuning
-        if cfg.kv_channels is None:
+        if cfg.get("kv_channels", None) is None:
             assert (
                 cfg.hidden_size % cfg.num_attention_heads == 0
             ), 'hidden_size must be divisible by num_attention_heads if kv_channels is None'

From 350b2a2ddb221b63be3d406b5907b614d32a6e8d Mon Sep 17 00:00:00 2001
From: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com>
Date: Wed, 28 Jun 2023 01:29:25 +0300
Subject: [PATCH 066/123] add rampup bs assertion (#6927)

* added assertion

Signed-off-by: Dmytro Pykhtar <dpykhtar@nvidia.com>

* added assertion

Signed-off-by: Dmytro Pykhtar <dpykhtar@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Dmytro Pykhtar <dpykhtar@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../nlp/models/language_modeling/megatron_gpt_model.py       | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index c4bfdbbad143..84caed6c111e 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -1001,6 +1001,11 @@ def setup(self, stage=None):
         self.init_global_step = self.trainer.global_step
 
         if self.rampup_batch_size:
+            optimizer = self.cfg.optim.get('name', None)
+            assert (
+                optimizer == 'fused_adam'
+            ), f'{optimizer} optimizer is not supported yet with rampup batch size. Please, use fused_adam optimizer instead.'
+
             num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR
             num_microbatch_calculator.update(self.init_consumed_samples, consistency_check=False)
             self.prev_consumed_samples = self.init_consumed_samples

From 92c4a2a74d51f07c1dfbb6664c22c0dfa5f5a71e Mon Sep 17 00:00:00 2001
From: trias702 <25867060+trias702@users.noreply.github.com>
Date: Tue, 27 Jun 2023 18:10:47 -0500
Subject: [PATCH 067/123] Online Code Switching Dataset for ASR (#6579)

* Initial commit of online code switched dataset

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated tests for new argument added to BPE classes in audio_to_text.py

Signed-off-by: Daniel Egert <degert@nvidia.com>

* Updated logic to catch bad audios with all zeros

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added method docstrings and convert lang_probs to be an optional dict

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated lang_probs docstring to correct type

Signed-off-by: Daniel Egert <degert@nvidia.com>

* Added final batch of requested changes and docs

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Made changes for final release candidate test

Signed-off-by: Daniel Egert <degert@nvidia.com>

* Fixed random tabs and changed some docstrings

Signed-off-by: Daniel Egert <degert@nvidia.com>

* Changed input types slightly

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* import guard soundfile due to CI failure in test-nlp-imports

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixed one last issue with dataloader null checks

Signed-off-by: Daniel Egert <degert@nvidia.com>

---------

Signed-off-by: Daniel Egert <degert@nvidia.com>
Signed-off-by: trias702 <25867060+trias702@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 docs/source/asr/configs.rst                   |  39 ++
 .../asr/data/audio_to_text_dataset.py         | 144 ++++++-
 .../asr/models/classification_models.py       |   6 +-
 nemo/collections/asr/models/ctc_bpe_models.py |   2 +-
 nemo/collections/asr/models/ctc_models.py     |   8 +-
 .../asr/models/hybrid_rnnt_ctc_bpe_models.py  |   2 +-
 nemo/collections/asr/models/label_models.py   |   6 +-
 .../collections/asr/models/rnnt_bpe_models.py |   2 +-
 nemo/collections/asr/models/rnnt_models.py    |   8 +-
 nemo/collections/asr/models/slu_models.py     |   6 +-
 nemo/collections/asr/models/ssl_models.py     |  12 +-
 nemo/collections/common/data/__init__.py      |   2 +-
 nemo/collections/common/data/dataset.py       | 375 +++++++++++++++++-
 .../common/parts/preprocessing/collections.py |   3 +
 .../ngram_lm/create_lexicon_from_arpa.py      |   4 +-
 15 files changed, 599 insertions(+), 20 deletions(-)

diff --git a/docs/source/asr/configs.rst b/docs/source/asr/configs.rst
index 120969ee9dfa..f9a4ea9970b1 100644
--- a/docs/source/asr/configs.rst
+++ b/docs/source/asr/configs.rst
@@ -237,6 +237,45 @@ For example, a decoder config corresponding to a sub-word tokenization model sho
       vocabulary: []  # filled with vocabulary from tokenizer at runtime
 
 
+On-the-fly Code Switching
+-------------------------
+
+Nemo supports creating code-switched synthetic utterances on-the-fly during training/validation/testing. This allows you to create ASR models which
+support intra-utterance code switching. If you have Nemo formatted audio data on disk (either JSON manifests or tarred audio data), you
+can easily mix as many of these audio sources together as desired by adding some extra parameters to your `train_ds`, `validation_ds`, and `test_ds`.
+
+Please note that this allows you to mix any kind of audio sources together to create synthetic utterances which sample from all sources. The most
+common use case for this is blending different languages together to create a multilingual code-switched model, but you can also blend
+together different audio sources from the same languages (or language families), to create noise robust data, or mix fast and slow speech from the
+same language.
+
+For multilingual code-switched models, we recommend using AggTokenizer for your Tokenizer if mixing different languages.
+
+The following example shows how to mix 3 different languages: English (en), German (de), and Japanese (ja) added to the `train_ds` model block, however
+you can add similar logic to your `validation_ds` and `test_ds` blocks for on-the-fly code-switched validation and test data too. This example mixes
+together 3 languages, but you can use as many as you want. However, be advised that the more languages you add, the higher your `min_duration` and `max_duration`
+need to be set to ensure all languages are sampled into each synthetic utterance, and setting these hyperparameters higher will use more VRAM per mini-batch during
+training and evaluation.
+
+.. code-block:: yaml
+
+  model:
+    train_ds:
+      manifest_filepath: [/path/to/EN/tarred_manifest.json, /path/to/DE/tarred_manifest.json, /path/to/JA/tarred_manifest.json]
+      tarred_audio_filepaths: ['/path/to/EN/tars/audio__OP_0..511_CL_.tar', '/path/to/DE/tars/audio__OP_0..1023_CL_.tar', '/path/to/JA/tars/audio__OP_0..2047_CL_.tar']
+      is_code_switched: true
+      is_tarred: true
+      shuffle: true
+        code_switched:              # add this block for code-switching
+          min_duration: 12          # the minimum number of seconds for each synthetic code-switched utterance
+          max_duration: 20          # the maximum number of seconds for each synthetic code-switched utterance
+          min_monolingual: 0.3      # the minimum percentage of utterances which will be pure monolingual (0.3 = 30%)
+          probs: [0.25, 0.5, 0.25]  # the probability to sample each language (matches order of `language` above) if not provided, assumes uniform distribution
+          force_monochannel: true   # if your source data is multi-channel, then setting this to True will force the synthetic utterances to be mono-channel
+          sampling_scales: 0.75     # allows you to down/up sample individual languages. Can set this as an array for individual languages, or a scalar for all languages
+          seed: 123                 # add a seed for replicability in future runs (highly useful for `validation_ds` and `test_ds`)
+
+
 Model Architecture Configurations
 ---------------------------------
 
diff --git a/nemo/collections/asr/data/audio_to_text_dataset.py b/nemo/collections/asr/data/audio_to_text_dataset.py
index d5dcc8be4847..3234b617cc9c 100644
--- a/nemo/collections/asr/data/audio_to_text_dataset.py
+++ b/nemo/collections/asr/data/audio_to_text_dataset.py
@@ -19,14 +19,14 @@
 from typing import Any, List, Optional, Union
 
 import torch
-from omegaconf import DictConfig, open_dict
+from omegaconf import DictConfig, OmegaConf, open_dict
 from omegaconf.listconfig import ListConfig
 from pytorch_lightning.callbacks import BasePredictionWriter
 from torch.utils.data import ChainDataset
 
 from nemo.collections.asr.data import audio_to_text, audio_to_text_dali
 from nemo.collections.asr.parts.preprocessing.perturb import process_augmentations
-from nemo.collections.common.data.dataset import ConcatDataset
+from nemo.collections.common.data.dataset import CodeSwitchedDataset, ConcatDataset
 from nemo.utils import logging
 
 
@@ -398,6 +398,88 @@ def get_tarred_dataset(
     return get_chain_dataset(datasets=datasets, ds_config=config, rank=global_rank)
 
 
+def get_code_switched_dataset(
+    config: dict,
+    shuffle_n: int,
+    global_rank: int,
+    world_size: int,
+    tokenizer: Optional['TokenizerSpec'] = None,
+    augmentor: Optional['AudioAugmentor'] = None,
+) -> CodeSwitchedDataset:
+
+    if 'manifest_filepath' not in config:
+        raise ValueError("`manifest_filepath` must be provided in the dataset config if `is_code_switched=True`")
+    if 'code_switched' not in config:
+        raise ValueError("`code_switched` param group must be in the dataset config if `is_code_switched=True`")
+
+    manifest_filepaths = config['manifest_filepath']
+    tarred_audio_filepaths = config.get('tarred_audio_filepaths', None)
+
+    cs_config = OmegaConf.to_container(config['code_switched'])
+
+    # needed to support validation Datasets that arrive here as
+    # [[dataset1,dataset2]] otherwise ModelPT would interfere
+    if len(manifest_filepaths) == 1 and not isinstance(manifest_filepaths[0], str):
+        manifest_filepaths = config['manifest_filepath'][0]
+    if tarred_audio_filepaths is None:
+        tarred_audio_filepaths = [None] * len(manifest_filepaths)
+
+    if len(manifest_filepaths) != len(tarred_audio_filepaths):
+        raise ValueError(
+            f"manifest_filepaths (length={len(manifest_filepaths)}) and tarred_audio_filepaths (length={len(tarred_audio_filepaths)}) need to have the same number of items."
+        )
+
+    datasets = []
+    for dataset_idx, (tarred_audio_filepath, manifest_filepath) in enumerate(
+        zip(tarred_audio_filepaths, manifest_filepaths)
+    ):
+        conf = copy.deepcopy(config)
+        conf['manifest_filepath'] = manifest_filepath
+        with open_dict(conf):
+            conf['tarred_audio_filepaths'] = tarred_audio_filepath
+        if tarred_audio_filepath is None or len(tarred_audio_filepath) == 0:
+            if tokenizer is None:
+                dataset = get_char_dataset(config=conf, augmentor=None)
+            else:
+                dataset = get_bpe_dataset(config=conf, tokenizer=tokenizer, augmentor=None)
+        else:
+            dataset = get_tarred_dataset(
+                config=conf,
+                tokenizer=tokenizer,
+                shuffle_n=shuffle_n,
+                global_rank=global_rank,
+                world_size=world_size,
+                augmentor=None,
+            )
+        datasets.append(dataset)
+
+    config = OmegaConf.to_container(config)
+
+    dataset = CodeSwitchedDataset(
+        datasets,
+        shuffle=cs_config.get('shuffle', True),
+        min_duration=cs_config.get('min_duration', 4),
+        max_duration=cs_config.get('max_duration', 20),
+        min_monolingual=cs_config.get('min_monolingual', 0.3),
+        lang_probs=cs_config.get('probs', None),
+        db_norm=cs_config.get('db_norm', -25.0),
+        pause_start=cs_config.get('pause_start', 0),
+        pause_join=cs_config.get('pause_join', 0),
+        pause_end=cs_config.get('pause_end', 0),
+        sampling_scales=cs_config.get('sampling_scales', None),
+        seed=cs_config.get('seed', None),
+        global_rank=global_rank,
+        world_size=world_size,
+        pure_random=cs_config.get('pure_random', False),
+        force_monochannel=cs_config.get('force_monochannel', True),
+        infinity_mode=cs_config.get('infinity_mode', False),
+        sample_rate=config['sample_rate'],
+        augmentor=augmentor,
+    )
+
+    return dataset
+
+
 def get_dali_char_dataset(
     config: dict,
     shuffle: bool,
@@ -546,8 +628,35 @@ def get_audio_to_text_char_dataset_from_config(
         )
         return dataset
 
+    # Instantiate a code-switched dataset if config is present
+    if config.get('is_code_switched', False):
+        if 'manifest_filepath' in config and config['manifest_filepath'] is None:
+            logging.warning(f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}")
+            return None
+        if not ('code_switched' in config and config['code_switched'] is not None):
+            logging.warning(
+                f"Code switched dataset requires `*_ds.code_switched.*` dict but it was not provided. Config: {config}"
+            )
+            return None
+        if (
+            ('probs' in config['code_switched'])
+            and (config['code_switched']['probs'] is not None)
+            and (not isclose(sum(config['code_switched']['probs']), 1, abs_tol=1e-6))
+        ):
+            logging.warning(f"`.code_switched.probs` need to sum to 1. Config: {config['code_switched']}")
+            return None
+
+        shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0
+        dataset = get_code_switched_dataset(
+            config=config,
+            shuffle_n=shuffle_n,
+            global_rank=global_rank,
+            world_size=world_size,
+            tokenizer=None,
+            augmentor=augmentor,
+        )
     # Instantiate tarred dataset loader or normal dataset loader
-    if config.get('is_tarred', False):
+    elif config.get('is_tarred', False):
         if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or (
             'manifest_filepath' in config and config['manifest_filepath'] is None
         ):
@@ -645,8 +754,35 @@ def get_audio_to_text_bpe_dataset_from_config(
         )
         return dataset
 
+    # Instantiate a code-switched dataset if config is present
+    if config.get('is_code_switched', False):
+        if 'manifest_filepath' in config and config['manifest_filepath'] is None:
+            logging.warning(f"Could not load dataset as `manifest_filepath` was None. Provided config : {config}")
+            return None
+        if not ('code_switched' in config and config['code_switched'] is not None):
+            logging.warning(
+                f"Code switched dataset requires `*_ds.code_switched.*` dict but it was not provided. Config: {config}"
+            )
+            return None
+        if (
+            ('probs' in config['code_switched'])
+            and (config['code_switched']['probs'] is not None)
+            and (not isclose(sum(config['code_switched']['probs']), 1, abs_tol=1e-6))
+        ):
+            logging.warning(f"`.code_switched.probs` need to sum to 1. Config: {config['code_switched']}")
+            return None
+
+        shuffle_n = config.get('shuffle_n', 4 * config['batch_size']) if shuffle else 0
+        dataset = get_code_switched_dataset(
+            config=config,
+            shuffle_n=shuffle_n,
+            global_rank=global_rank,
+            world_size=world_size,
+            tokenizer=tokenizer,
+            augmentor=augmentor,
+        )
     # Instantiate tarred dataset loader or normal dataset loader
-    if config.get('is_tarred', False):
+    elif config.get('is_tarred', False):
         if ('tarred_audio_filepaths' in config and config['tarred_audio_filepaths'] is None) or (
             'manifest_filepath' in config and config['manifest_filepath'] is None
         ):
diff --git a/nemo/collections/asr/models/classification_models.py b/nemo/collections/asr/models/classification_models.py
index fb0ee82132a1..432674225f5a 100644
--- a/nemo/collections/asr/models/classification_models.py
+++ b/nemo/collections/asr/models/classification_models.py
@@ -174,7 +174,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict
         # Need to set this because if using an IterableDataset, the length of the dataloader is the total number
         # of samples rather than the number of batches, and this messes up the tqdm progress bar.
         # So we set the number of steps manually (to the correct number) to fix this.
-        if 'is_tarred' in train_data_config and train_data_config['is_tarred']:
+        if (
+            self._train_dl is not None
+            and hasattr(self._train_dl, 'dataset')
+            and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset)
+        ):
             # We also need to check if limit_train_batches is already set.
             # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches,
             # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0).
diff --git a/nemo/collections/asr/models/ctc_bpe_models.py b/nemo/collections/asr/models/ctc_bpe_models.py
index 7d3b236b2bab..04547b816fe8 100644
--- a/nemo/collections/asr/models/ctc_bpe_models.py
+++ b/nemo/collections/asr/models/ctc_bpe_models.py
@@ -106,7 +106,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]):
             return dataset
 
         shuffle = config['shuffle']
-        if config.get('is_tarred', False):
+        if isinstance(dataset, torch.utils.data.IterableDataset):
             shuffle = False
 
         if hasattr(dataset, 'collate_fn'):
diff --git a/nemo/collections/asr/models/ctc_models.py b/nemo/collections/asr/models/ctc_models.py
index 1446e1ce871f..d995544513de 100644
--- a/nemo/collections/asr/models/ctc_models.py
+++ b/nemo/collections/asr/models/ctc_models.py
@@ -365,7 +365,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]):
             return dataset
 
         shuffle = config['shuffle']
-        if config.get('is_tarred', False):
+        if isinstance(dataset, torch.utils.data.IterableDataset):
             shuffle = False
 
         if hasattr(dataset, 'collate_fn'):
@@ -413,7 +413,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict
         # Need to set this because if using an IterableDataset, the length of the dataloader is the total number
         # of samples rather than the number of batches, and this messes up the tqdm progress bar.
         # So we set the number of steps manually (to the correct number) to fix this.
-        if 'is_tarred' in train_data_config and train_data_config['is_tarred']:
+        if (
+            self._train_dl is not None
+            and hasattr(self._train_dl, 'dataset')
+            and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset)
+        ):
             # We also need to check if limit_train_batches is already set.
             # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches,
             # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0).
diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
index b88669a1fbc0..6637486f18dc 100644
--- a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
+++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
@@ -144,7 +144,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]):
             return dataset
 
         shuffle = config['shuffle']
-        if config.get('is_tarred', False):
+        if isinstance(dataset, torch.utils.data.IterableDataset):
             shuffle = False
 
         if hasattr(dataset, 'collate_fn'):
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
index cc789dacff11..1a284aca609d 100644
--- a/nemo/collections/asr/models/label_models.py
+++ b/nemo/collections/asr/models/label_models.py
@@ -275,7 +275,11 @@ def setup_training_data(self, train_data_layer_config: Optional[Union[DictConfig
         # Need to set this because if using an IterableDataset, the length of the dataloader is the total number
         # of samples rather than the number of batches, and this messes up the tqdm progress bar.
         # So we set the number of steps manually (to the correct number) to fix this.
-        if 'is_tarred' in train_data_layer_config and train_data_layer_config['is_tarred']:
+        if (
+            self._train_dl is not None
+            and hasattr(self._train_dl, 'dataset')
+            and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset)
+        ):
             # We also need to check if limit_train_batches is already set.
             # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches,
             # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0).
diff --git a/nemo/collections/asr/models/rnnt_bpe_models.py b/nemo/collections/asr/models/rnnt_bpe_models.py
index 9ed38a376103..aa5486f25811 100644
--- a/nemo/collections/asr/models/rnnt_bpe_models.py
+++ b/nemo/collections/asr/models/rnnt_bpe_models.py
@@ -494,7 +494,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]):
             return dataset
 
         shuffle = config['shuffle']
-        if config.get('is_tarred', False):
+        if isinstance(dataset, torch.utils.data.IterableDataset):
             shuffle = False
 
         if hasattr(dataset, 'collate_fn'):
diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py
index eec663813ca8..92bb04fd2a3e 100644
--- a/nemo/collections/asr/models/rnnt_models.py
+++ b/nemo/collections/asr/models/rnnt_models.py
@@ -475,7 +475,7 @@ def _setup_dataloader_from_config(self, config: Optional[Dict]):
             return dataset
 
         shuffle = config['shuffle']
-        if config.get('is_tarred', False):
+        if isinstance(dataset, torch.utils.data.IterableDataset):
             shuffle = False
 
         if hasattr(dataset, 'collate_fn'):
@@ -523,7 +523,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict
         # Need to set this because if using an IterableDataset, the length of the dataloader is the total number
         # of samples rather than the number of batches, and this messes up the tqdm progress bar.
         # So we set the number of steps manually (to the correct number) to fix this.
-        if 'is_tarred' in train_data_config and train_data_config['is_tarred']:
+        if (
+            self._train_dl is not None
+            and hasattr(self._train_dl, 'dataset')
+            and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset)
+        ):
             # We also need to check if limit_train_batches is already set.
             # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches,
             # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0).
diff --git a/nemo/collections/asr/models/slu_models.py b/nemo/collections/asr/models/slu_models.py
index 2062397c511c..6df907334662 100644
--- a/nemo/collections/asr/models/slu_models.py
+++ b/nemo/collections/asr/models/slu_models.py
@@ -436,7 +436,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict
         # Need to set this because if using an IterableDataset, the length of the dataloader is the total number
         # of samples rather than the number of batches, and this messes up the tqdm progress bar.
         # So we set the number of steps manually (to the correct number) to fix this.
-        if 'is_tarred' in train_data_config and train_data_config['is_tarred']:
+        if (
+            self._train_dl is not None
+            and hasattr(self._train_dl, 'dataset')
+            and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset)
+        ):
             # We also need to check if limit_train_batches is already set.
             # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches,
             # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0).
diff --git a/nemo/collections/asr/models/ssl_models.py b/nemo/collections/asr/models/ssl_models.py
index dee2559364d0..8de713ca948d 100644
--- a/nemo/collections/asr/models/ssl_models.py
+++ b/nemo/collections/asr/models/ssl_models.py
@@ -234,7 +234,11 @@ def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict
         # Need to set this because if using an IterableDataset, the length of the dataloader is the total number
         # of samples rather than the number of batches, and this messes up the tqdm progress bar.
         # So we set the number of steps manually (to the correct number) to fix this.
-        if 'is_tarred' in train_data_config and train_data_config['is_tarred']:
+        if (
+            self._train_dl is not None
+            and hasattr(self._train_dl, 'dataset')
+            and isinstance(self._train_dl.dataset, torch.utils.data.IterableDataset)
+        ):
             # We also need to check if limit_train_batches is already set.
             # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches,
             # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0).
@@ -270,7 +274,11 @@ def setup_validation_data(self, val_data_config: Optional[Union[DictConfig, Dict
         # Need to set this because if using an IterableDataset, the length of the dataloader is the total number
         # of samples rather than the number of batches, and this messes up the tqdm progress bar.
         # So we set the number of steps manually (to the correct number) to fix this.
-        if 'is_tarred' in val_data_config and val_data_config['is_tarred']:
+        if (
+            self._validation_dl is not None
+            and hasattr(self._validation_dl, 'dataset')
+            and isinstance(self._validation_dl.dataset, torch.utils.data.IterableDataset)
+        ):
             # We also need to check if limit_train_batches is already set.
             # If it's an int, we assume that the user has set it to something sane, i.e. <= # training batches,
             # and don't change it. Otherwise, adjust batches accordingly if it's a float (including 1.0).
diff --git a/nemo/collections/common/data/__init__.py b/nemo/collections/common/data/__init__.py
index afb12338e548..ecc67ef05ea5 100644
--- a/nemo/collections/common/data/__init__.py
+++ b/nemo/collections/common/data/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo.collections.common.data.dataset import ConcatDataset, ConcatMapDataset
+from nemo.collections.common.data.dataset import CodeSwitchedDataset, ConcatDataset, ConcatMapDataset
diff --git a/nemo/collections/common/data/dataset.py b/nemo/collections/common/data/dataset.py
index 030e997802bc..5b4fba5ef24a 100644
--- a/nemo/collections/common/data/dataset.py
+++ b/nemo/collections/common/data/dataset.py
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import io
 import logging
-from typing import Any, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple, Union
 
 import numpy as np
+import torch
 import torch.utils.data as pt_data
 from torch.utils.data import Dataset, IterableDataset
 
-__all__ = ['ConcatDataset', 'ConcatMapDataset']
+__all__ = ['ConcatDataset', 'ConcatMapDataset', 'CodeSwitchedDataset']
 
 
 class ConcatDataset(IterableDataset):
@@ -286,3 +288,372 @@ def __len__(self):
     def __getitem__(self, idx):
         dataset_id, dataset_index = self.indices[idx]
         return self.datasets[dataset_id][dataset_index]
+
+
+class CodeSwitchedDataset(IterableDataset):
+    """
+    A dataset that accepts as argument multiple sub-datasets (usually from different languages, but that's not required) and then
+    samples from them in order to create synthetic code-switched samples of up to N different sub-datasets
+    Args:
+        datasets (list): A list of datasets
+        lang_probs (list): A list of probabilities (which must sum to 1) corresponding to the sampling probability for each dataset
+        shuffle (bool): Whether to shuffle individual datasets. Only works with non-iterable datasets. 
+            Defaults to True.
+        min_duration (int): the minimum duration (secs) of each synthetic code-switched sample. Will draw randomly until this is hit.
+            Defaults to 4
+        max_duration (int): the maximum duration (secs) of each synthetic code-switched sample.
+            Defaults to 20
+        min_monolingual (float): this percentage of the dataset will be original monolingual samples
+            Defaults to 0.3 - means 30%
+        db_norm (float): will normalise the composite CS sample to this DB level
+            Defaults to -25.0
+        pause_start (int): inserts silence equal to this value (msecs) at the start of each CS sample
+            Defaults to 0
+        pause_join (int): inserts silence equal to this value (msecs) between all language changes in the CS sample
+            Defaults to 0
+        pause_end (int): terminates all CS samples with silence equal to this value (msecs)
+            Defaults to 0
+        sampling_scales (list or float): gives you the ability to upsample/downsample each individual dataset
+        seed: Optional value to seed the numpy RNG.
+        global_rank (int): Worker rank, used for partitioning map style datasets. Defaults to 0.
+        world_size (int): Total number of processes, used for partitioning map style datasets. Defaults to 1.
+        pure_random (bool): If true, then always draw random sample from lang_probs. If false, you only draw from those datasets
+                            which you haven't sampled from yet for the composite sample
+        force_monochannel (bool): If true, then all output audio will be mono-channel
+        infinity_mode (bool): If true, then the dataset iterable will generate an infinite amount of samples
+        sample_rate (int): the sample rate of all audio being sent to this Dataset
+        augmentor (AudioAugmentor): The any perturbations you wish to have applied on the CS samples
+    """
+
+    def __init__(
+        self,
+        datasets: List[Any],
+        lang_probs: Optional[List[float]] = None,
+        shuffle: bool = True,
+        min_duration: int = 4,
+        max_duration: int = 20,
+        min_monolingual: float = 0.3,
+        db_norm: float = -25.0,
+        pause_start: int = 0,
+        pause_join: int = 0,
+        pause_end: int = 0,
+        sampling_scales: Optional[Union[float, List[float]]] = None,
+        seed: Optional[int] = None,
+        global_rank: int = 0,
+        world_size: int = 1,
+        pure_random: bool = False,
+        force_monochannel: bool = True,
+        infinity_mode: bool = False,
+        sample_rate: int = 16000,
+        augmentor: Optional['AudioAugmentor'] = None,
+    ):
+        super().__init__()
+
+        if len(datasets) == 0:
+            raise ValueError("CodeSwitchedDataset must receive a non-zero length datasets dict object")
+
+        self.datasets = datasets
+        self.langs = list(range(len(datasets)))
+        self.langs_set = set(self.langs)
+        self.lang_iterables = {k: None for k in self.langs}
+        self.lang_kind = {k: None for k in self.langs}
+        self.shuffle = shuffle
+        self.min_duration = min_duration
+        self.max_duration = max_duration
+        self.min_monolingual = min_monolingual
+        self.db_norm = db_norm
+        self.pause_start = pause_start
+        self.pause_join = pause_join
+        self.pause_end = pause_end
+        self.pure_random = pure_random
+        self.force_monochannel = force_monochannel
+        self.infinity_mode = infinity_mode
+        self.global_rank = global_rank
+        self.world_size = world_size
+        self.augmentor = augmentor
+        self.sample_rate = sample_rate
+        self.length = 0
+        if lang_probs is None:
+            self.prob_dict = {l: 1.0 / len(self.langs) for l in self.langs}
+        else:
+            assert len(self.langs) == len(
+                lang_probs
+            ), "Size mismatch between languages and respective probs in CodeSwitchedDataset"
+            self.prob_dict = {l: lang_probs[l] for l in self.langs}
+        self.lang_probs = np.array(list(self.prob_dict.values()))
+        if sampling_scales is not None and not isinstance(sampling_scales, list):
+            self.sampling_scales = {k: sampling_scales for k in self.langs}
+        elif (
+            sampling_scales is not None
+            and isinstance(sampling_scales, list)
+            and len(sampling_scales) == len(self.langs)
+        ):
+            self.sampling_scales = {k: v for k, v in zip(self.langs, sampling_scales)}
+        else:
+            self.sampling_scales = {k: 1 for k in self.langs}
+
+        for lang, dataset in enumerate(self.datasets):
+            isiterable = isinstance(dataset, IterableDataset)
+
+            if isiterable:
+                self.lang_kind[lang] = 'iterable'
+                self.length += int(len(dataset) * self.sampling_scales[lang])
+            else:
+                self.lang_kind[lang] = 'map'
+                self.length += int((len(dataset) // world_size) * self.sampling_scales[lang])
+
+        if seed is not None:
+            np.random.seed(seed)
+
+        # set this to ensure compatibility with models searching for the collate_fn
+        # since this class stores datasets as a dict, not list
+        # self.collate_fn = self.datasets[self.langs[0]].collate_fn
+        if hasattr(self.datasets[self.langs[0]], 'collate_fn'):
+            self.collate_fn = self.datasets[self.langs[0]].collate_fn
+        elif (
+            hasattr(self.datasets[self.langs[0]], 'datasets')
+            and isinstance(self.datasets[self.langs[0]].datasets, list)
+            and len(self.datasets[self.langs[0]].datasets) > 0
+            and hasattr(self.datasets[self.langs[0]].datasets[0], 'collate_fn')
+        ):
+            # support datasets that are lists of entries
+            self.collate_fn = self.datasets[self.langs[0]].datasets[0].collate_fn
+        elif (
+            hasattr(self.datasets[self.langs[0]], 'datasets')
+            and isinstance(self.datasets[self.langs[0]].datasets, list)
+            and len(self.datasets[self.langs[0]].datasets) > 0
+            and hasattr(self.datasets[self.langs[0]].datasets[0], 'datasets')
+            and isinstance(self.datasets[self.langs[0]].datasets[0].datasets, list)
+            and len(self.datasets[self.langs[0]].datasets[0].datasets) > 0
+            and hasattr(self.datasets[self.langs[0]].datasets[0].datasets[0], 'collate_fn')
+        ):
+            # support datasets that are lists of lists
+            self.collate_fn = self.datasets[self.langs[0]].datasets[0].datasets[0].collate_fn
+        else:
+            raise RuntimeError("CodeSwitchedDataset could not locate a valid dataset collate_fn to bind to")
+
+    # this method returns an iterator object for a given language ID
+    # it correctly handles whether the underlying dataset is IterableDataset or mappable
+    def get_iterable_by_lang(self, lang):
+        dataset = self.datasets[lang]
+
+        if isinstance(dataset, IterableDataset):
+            return dataset.__iter__()
+        else:
+            indices = np.arange(len(dataset))
+            if self.shuffle:
+                np.random.shuffle(indices)
+            return iter(indices)
+
+    # this method is the main function which builds and returns a composite, synthetic code-switched
+    # utterance on the fly. It automatically works with all of the class-based variables stored to create
+    # the synthetic utterance
+    def build_single_CS_sample(self):
+        # get_sample_from_language returns a LongTensor for the transcripts so we create a LongTensor to hold
+        # all returned transcripts
+        comp_text = torch.LongTensor([])
+        created_sample_duration_sec = 0
+        created_sample_langs = []
+        created_sample_audios = []
+
+        # if min_monolingual fires, it means we will just return a single, original monolingual utterance
+        # from one of our languages based on that language's probability
+        pure_mono = np.random.rand() <= self.min_monolingual
+
+        # we continue to add to the composite utterance until we hit the min_duration
+        while created_sample_duration_sec < self.min_duration:
+            # we sample from only those languages which haven't already been sampled for this particular
+            # synthetic utterance, unless pure_random=True, in which case, you just sample with replacement
+            # every time
+            if (self.pure_random and not pure_mono) or (
+                len(set(created_sample_langs)) == 0 or len(set(created_sample_langs)) == len(self.langs)
+            ):
+                lang_id = np.random.choice(self.langs, p=self.lang_probs)
+            # elif pure_mono:
+            #    use this approach if you want synthetic utterances which are all monolingual
+            #    lang_id = created_sample_langs[0]
+            else:
+                # this code is for when we need to sample from only those languages which haven't been sampled
+                # yet for this utterance
+                p = np.array(list(map(self.prob_dict.get, list(self.langs_set - set(created_sample_langs)))))
+                p = p / p.sum()
+                lang_id = np.random.choice(list(self.langs_set - set(created_sample_langs)), p=p)
+
+            audio, audio_len, labels, labels_len, *_ = self.get_sample_from_language(lang_id)
+
+            # in case you get an audio which is all silence we keep sampling
+            if audio.count_nonzero().item() == 0:
+                continue
+
+            sample_duration = len(audio) / self.sample_rate
+            if (created_sample_duration_sec + sample_duration) > self.max_duration:
+                continue
+
+            if comp_text.device != labels.device:
+                comp_text = comp_text.to(labels.device)
+
+            if audio.ndim > 1 and self.force_monochannel:
+                audio = audio.mean(dim=-1)
+
+            created_sample_duration_sec += sample_duration
+            created_sample_langs.append(lang_id)
+            # need to use numpy instead of torch here because we need numpy's trim_zeros function
+            created_sample_audios.append(audio.cpu().numpy())
+            comp_text = torch.cat([comp_text, labels], dim=0)
+
+            # we want a real, non-synth pure_mono sample so we break soon as we have one
+            if pure_mono:
+                break
+
+        # check that all samples have the same number of channels
+        sample_channels = list(set([s.ndim for s in created_sample_audios]))
+        if len(sample_channels) > 1:
+            raise RuntimeError(
+                "Mixture of audios with different number of channels in CodeSwitchedDataset. All sources must be same number of channels."
+            )
+
+        multichannel = sample_channels[0] > 1
+
+        # we start with pause_start amount of silence (zero array) which needs the correct shape for multi/mono channel
+        if multichannel:
+            comp_audio = np.zeros(
+                shape=(int(self.pause_start * self.sample_rate / 1000.0), created_sample_audios[0].shape[-1]),
+                dtype=created_sample_audios[0].dtype,
+            )
+        else:
+            comp_audio = np.zeros(
+                shape=(int(self.pause_start * self.sample_rate / 1000.0),), dtype=created_sample_audios[0].dtype
+            )
+
+        # iterate over all mono-lingual samples to build the final composite
+        for idx, wav in enumerate(created_sample_audios):
+            if not multichannel:
+                # this function only works if mono-channel
+                wav = np.trim_zeros(wav)
+
+            # normalise to provided DB level
+            wav_norm = wav * (10.0 ** (self.db_norm / 20.0) / np.maximum(0.01, (wav ** 2).mean(axis=0) ** 0.5))
+
+            # this part appends the normed waveform to the existing waveform, and inserts pause_join amount of silence
+            # if necessary, otherwise just a straight append
+            if idx < len(created_sample_audios) - 1:
+                if multichannel:
+                    wav_norm = np.append(
+                        wav_norm,
+                        np.zeros(
+                            shape=(
+                                int(self.pause_join * self.sample_rate / 1000.0),
+                                created_sample_audios[0].shape[-1],
+                            ),
+                            dtype=comp_audio.dtype,
+                        ),
+                        axis=0,
+                    )
+                else:
+                    wav_norm = np.append(
+                        wav_norm,
+                        np.zeros(shape=(int(self.pause_join * self.sample_rate / 1000.0),), dtype=comp_audio.dtype),
+                        axis=0,
+                    )
+
+            # this is the penultimate composite wavform, just need to add pause_end silence
+            comp_audio = np.append(comp_audio, wav_norm, axis=0)
+
+        # here we add the pause_end amount of silence, in correct channel shape
+        if multichannel:
+            comp_audio = np.append(
+                comp_audio,
+                np.zeros(
+                    shape=(int(self.pause_end * self.sample_rate / 1000.0), created_sample_audios[0].shape[-1]),
+                    dtype=comp_audio.dtype,
+                ),
+                axis=0,
+            )
+        else:
+            comp_audio = np.append(
+                comp_audio,
+                np.zeros(shape=(int(self.pause_end * self.sample_rate / 1000.0),), dtype=comp_audio.dtype),
+                axis=0,
+            )
+
+        # we only want augmentation to happen on the final, synthetic utterance, and not on any of the individual
+        # languages, which is why we set augmentor=None when building the individual language datasets in audio_to_text_dataset.get_code_switched_dataset
+        # here we now apply augmentation to the final, synthetic utterance only
+        # all of this logic here happens in-memory, nothing is written to disk
+        if self.augmentor is not None:
+            # import here to avoid circular import error
+            # import here because otherwise CI test-nlp-imports fails since soundfile is only in requirements_asr and not in requirements_common
+            import soundfile as sf
+
+            from nemo.collections.asr.parts.preprocessing import AudioSegment
+
+            mb = io.BytesIO()
+            sf.write(mb, comp_audio, self.sample_rate, format='WAV')
+            mb.seek(0)
+            comp_audio_as = AudioSegment.from_file(mb, target_sr=self.sample_rate)
+            self.augmentor.perturb(comp_audio_as)
+            comp_audio = comp_audio_as.samples
+
+        return (
+            torch.tensor(comp_audio, dtype=audio.dtype, device=audio.device),
+            torch.tensor(len(comp_audio), device=audio_len.device).long(),
+            comp_text,
+            torch.tensor(len(comp_text), device=labels_len.device).long(),
+        )
+
+    # this is a helper method which prepares all of the iterator objects for all languages
+    # based on whether that language's underlying dataset is a map or an IterableDataset
+    def prep_underlying_datasets(self):
+        worker_info = pt_data.get_worker_info()
+        if worker_info is None:
+            max_elements = self.length
+            wid = 0
+            wnum = 1
+        else:
+            wid = worker_info.id
+            wnum = worker_info.num_workers
+            max_elements = len(range(wid, self.length, wnum))
+
+        for lang in self.langs:
+            if self.lang_kind[lang] == 'map':
+                start_idx = (len(self.datasets[lang]) // self.world_size) * self.global_rank
+                end_idx = start_idx + (len(self.datasets[lang]) // self.world_size)
+                if self.global_rank == self.world_size - 1:
+                    end_idx = len(self.datasets[lang])
+                indices = range(start_idx + wid, end_idx, wnum)
+                self.datasets[lang] = pt_data.Subset(self.datasets[lang], indices)
+
+            self.lang_iterables[lang] = self.get_iterable_by_lang(lang)
+
+        return max_elements
+
+    # returns a sample (audio and transcript) from any underlying language stored by the class on instantiation
+    # the sample returned is a tensor for the audio and a tensor of ints for the transcript
+    # this method automatically handles StopIteration errors for the underyling language and rebuilds
+    # the iterator if necessary
+    def get_sample_from_language(self, lang):
+        while True:
+            try:
+                val = next(self.lang_iterables[lang])
+                if self.lang_kind[lang] == 'map':
+                    val = self.datasets[lang][val]
+                return val
+            except StopIteration:
+                self.lang_iterables[lang] = self.get_iterable_by_lang(lang)
+
+    def __iter__(self):
+        # we create primed iterators for all languages and return the grand total of samples for each
+        # underlying language as a sum
+        max_elements = self.prep_underlying_datasets()
+
+        if self.infinity_mode:
+            while True:
+                yield self.build_single_CS_sample()
+        else:
+            n = 0
+            while n < max_elements:
+                yield self.build_single_CS_sample()
+                n += 1
+
+    def __len__(self):
+        return self.length
diff --git a/nemo/collections/common/parts/preprocessing/collections.py b/nemo/collections/common/parts/preprocessing/collections.py
index 4616f95e1a4f..ed9e53ae6ffe 100644
--- a/nemo/collections/common/parts/preprocessing/collections.py
+++ b/nemo/collections/common/parts/preprocessing/collections.py
@@ -159,6 +159,9 @@ def __init__(
                     if hasattr(parser, "is_aggregate") and parser.is_aggregate and isinstance(text, str):
                         if lang is not None:
                             text_tokens = parser(text, lang)
+                        # for future use if want to add language bypass to audio_to_text classes
+                        # elif hasattr(parser, "lang") and parser.lang is not None:
+                        #    text_tokens = parser(text, parser.lang)
                         else:
                             raise ValueError("lang required in manifest when using aggregate tokenizers")
                     else:
diff --git a/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py b/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py
index 22c657b25613..a38c33de05af 100644
--- a/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py
+++ b/scripts/asr_language_modeling/ngram_lm/create_lexicon_from_arpa.py
@@ -74,4 +74,6 @@
                 if tokenizer is None:
                     f.write("{w}\t{s}\n".format(w=word, s=" ".join(word)))
                 else:
-                    f.write("{w}\t{s}\n".format(w=word, s=" ".join(tokenizer.text_to_tokens(word))))
+                    w_ids = tokenizer.text_to_ids(word)
+                    if tokenizer.unk_id not in w_ids:
+                        f.write("{w}\t{s}\n".format(w=word, s=" ".join(tokenizer.text_to_tokens(word))))

From e9b0b11835653020d685e198f74a8e04aab8d5ab Mon Sep 17 00:00:00 2001
From: Boris Fomitchev <borisfom@users.noreply.github.com>
Date: Wed, 28 Jun 2023 09:26:40 -0700
Subject: [PATCH 068/123] Removed optional optimize_for_inference (#6933)

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
---
 nemo/core/classes/exportable.py | 2 +-
 scripts/export.py               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py
index 38b8e1c1e31b..3d2682f2304e 100644
--- a/nemo/core/classes/exportable.py
+++ b/nemo/core/classes/exportable.py
@@ -204,7 +204,7 @@ def _export(
                         check_trace=check_trace,
                         check_tolerance=check_tolerance,
                     )
-                    jitted_model = torch.jit.optimize_for_inference(torch.jit.freeze(jitted_model))
+                    jitted_model = torch.jit.freeze(jitted_model)
                     if verbose:
                         logging.info(f"JIT code:\n{jitted_model.code}")
                     jitted_model.save(output)
diff --git a/scripts/export.py b/scripts/export.py
index 80cbcf3dc666..fe3b79ebdf28 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -158,7 +158,7 @@ def nemo_export(argv):
                 check_trace=check_trace,
                 check_tolerance=args.check_tolerance,
                 onnx_opset_version=args.onnx_opset,
-                verbose=args.verbose,
+                verbose=bool(args.verbose),
             )
 
     except Exception as e:

From 7e20750b2035a46dffde4fae389ac1f3967bf3fc Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 28 Jun 2023 15:07:31 -0600
Subject: [PATCH 069/123] Apply garbage collection interval to validation steps
 (#6870) (#6872)

* Apply garbage collection inverval to validation steps


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 .../language_modeling/megatron_base_model.py  | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index ceddc1dca4d4..eb6671452992 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -163,6 +163,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True):
         # The automatic garbage collector sould be disabled before training starts.
         if self.gc_interval > 0:
             gc.disable()
+            self.validation_global_step = 1
 
     def _enable_nvidia_optimizations(self):
         "These optimizations are present in NVIDIA NGC PyTorch Containers"
@@ -225,6 +226,16 @@ def on_train_start(self) -> None:
         super().on_train_start()
         self.init_global_step = self.trainer.global_step
 
+    def on_validation_start(self) -> None:
+        super().on_validation_start()
+        if self.gc_interval > 0:
+            gc.collect()
+
+    def on_validation_end(self) -> None:
+        super().on_validation_end()
+        if self.gc_interval > 0:
+            gc.collect()
+
     def _build_vocab(self):
         """
         Manipulate vocabulary (e.g., pad vocabulary for increased performance)/
@@ -373,6 +384,14 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus
         if self.gc_interval > 0 and (self.trainer.global_step % self.gc_interval == 0):
             gc.collect()
 
+    def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
+        super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx)
+
+        if self.gc_interval > 0:
+            if self.validation_global_step % self.gc_interval == 0:
+                gc.collect()
+            self.validation_global_step += 1
+
     def setup_optimization(
         self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None,
     ):

From 3b4f37af4626130fc4c9c5c09671a209d6e284c5 Mon Sep 17 00:00:00 2001
From: Sara Rabhi <srabhi@nvidia.com>
Date: Wed, 28 Jun 2023 17:19:01 -0400
Subject: [PATCH 070/123] Enable `rpe` methods in bert-like models (#6898)

* enable rpe in bert model

Signed-off-by: sararb <sara.rabhi@gmail.com>

* expose position_embedding_type to config

Signed-off-by: sararb <sara.rabhi@gmail.com>

---------

Signed-off-by: sararb <sara.rabhi@gmail.com>
---
 examples/nlp/language_modeling/conf/megatron_bert_config.yaml   | 1 +
 .../nlp/models/language_modeling/megatron/bert_model.py         | 2 ++
 .../nlp/models/language_modeling/megatron_bert_model.py         | 1 +
 3 files changed, 4 insertions(+)

diff --git a/examples/nlp/language_modeling/conf/megatron_bert_config.yaml b/examples/nlp/language_modeling/conf/megatron_bert_config.yaml
index a7e3364d41b4..4e53ded4a453 100644
--- a/examples/nlp/language_modeling/conf/megatron_bert_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_bert_config.yaml
@@ -50,6 +50,7 @@ model:
   # model architecture
   encoder_seq_length: 512
   max_position_embeddings: ${.encoder_seq_length}
+  position_embedding_type: 'learned_absolute' # Position embedding type. Options ['learned_absolute', 'rope', 'alibi', 'kerple' , 'xpos', 'sandwich'] xpos and sandwich are experimental.
   num_layers: 12
   hidden_size: 768
   ffn_hidden_size: 3072 # Transformer FFN hidden size. Usually 4 * hidden_size.
diff --git a/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py
index 132f900298a6..cbbef2d56a15 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py
@@ -188,6 +188,7 @@ def __init__(
         add_binary_head=True,
         megatron_legacy=False,
         sequence_parallel=False,
+        position_embedding_type='learned_absolute',
     ):
         super(BertModel, self).__init__()
         # args = get_args()
@@ -234,6 +235,7 @@ def __init__(
             onnx_safe=onnx_safe,
             megatron_legacy=megatron_legacy,
             sequence_parallel=sequence_parallel,
+            position_embedding_type=position_embedding_type,
         )
 
         self.initialize_word_embeddings(
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
index cac1a50e98ae..ab0459b2966c 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
@@ -182,6 +182,7 @@ def model_provider_func(self, pre_process, post_process):
             add_binary_head=cfg.bert_binary_head,
             megatron_legacy=cfg.get('megatron_legacy', False),
             sequence_parallel=self.cfg.get('sequence_parallel', False),
+            position_embedding_type=self.cfg.get("position_embedding_type", "learned_absolute"),
         )
 
         return model

From 69747d8ce90595d4d6826c01c7c6dc034f5c8265 Mon Sep 17 00:00:00 2001
From: Vladimir Bataev <vbataev@nvidia.com>
Date: Thu, 29 Jun 2023 01:41:52 +0400
Subject: [PATCH 071/123] Fix AN4 dataset links (#6926)

* Fix an4 dataset link in docs

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>

* Remove broken a4 dataset links from tutorials

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>

---------

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
---
 docs/source/asr/datasets.rst                                    | 2 +-
 tutorials/asr/ASR_for_telephony_speech.ipynb                    | 2 +-
 tutorials/asr/ASR_with_NeMo.ipynb                               | 2 +-
 tutorials/asr/ASR_with_Subword_Tokenization.ipynb               | 2 +-
 tutorials/asr/ASR_with_Transducers.ipynb                        | 2 +-
 tutorials/asr/Online_Noise_Augmentation.ipynb                   | 2 +-
 tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb              | 2 +-
 .../speaker_tasks/Speaker_Identification_Verification.ipynb     | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/source/asr/datasets.rst b/docs/source/asr/datasets.rst
index 617d5195005f..05278ecb2437 100644
--- a/docs/source/asr/datasets.rst
+++ b/docs/source/asr/datasets.rst
@@ -126,7 +126,7 @@ AN4 Dataset
 This is a small dataset recorded and distributed by Carnegie Mellon University. It consists of recordings of people spelling out
 addresses, names, etc. Information about this dataset can be found on the `official CMU site <http://www.speech.cs.cmu.edu/databases/an4/>`_.
 
-#. `Download and extract the dataset <http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz>`_ (which is labeled "NIST's Sphere audio (.sph) format (64M)".
+#. `Download and extract the dataset <https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz>`_ (which is labeled "NIST's Sphere audio (.sph) format (64M)".
 
 #. Convert the ``.sph`` files to ``.wav`` using sox, and build one training and one test manifest.
 
diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb
index 11ba4b85bd47..6133fdc9a8b9 100644
--- a/tutorials/asr/ASR_for_telephony_speech.ipynb
+++ b/tutorials/asr/ASR_for_telephony_speech.ipynb
@@ -103,7 +103,7 @@
                 "# Download the dataset. This will take a few moments...\n",
                 "print(\"******\")\n",
                 "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n",
-                "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'  # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n",
+                "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n",
                 "    an4_path = wget.download(an4_url, data_dir)\n",
                 "    print(f\"Dataset downloaded at: {an4_path}\")\n",
                 "else:\n",
diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb
index 0c0d239bf58c..74cd0f739e84 100644
--- a/tutorials/asr/ASR_with_NeMo.ipynb
+++ b/tutorials/asr/ASR_with_NeMo.ipynb
@@ -189,7 +189,7 @@
                 "# Download the dataset. This will take a few moments...\n",
                 "print(\"******\")\n",
                 "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n",
-                "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'  # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n",
+                "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n",
                 "    an4_path = wget.download(an4_url, data_dir)\n",
                 "    print(f\"Dataset downloaded at: {an4_path}\")\n",
                 "else:\n",
diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
index b932916f2bc5..cdb36251fb70 100644
--- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
+++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb
@@ -372,7 +372,7 @@
         "# Download the dataset. This will take a few moments...\r\n",
         "print(\"******\")\r\n",
         "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\r\n",
-        "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'  # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \r\n",
+        "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\r\n",
         "    an4_path = wget.download(an4_url, data_dir)\r\n",
         "    print(f\"Dataset downloaded at: {an4_path}\")\r\n",
         "else:\r\n",
diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb
index e6bccc3f0f42..e1eb494f777e 100644
--- a/tutorials/asr/ASR_with_Transducers.ipynb
+++ b/tutorials/asr/ASR_with_Transducers.ipynb
@@ -137,7 +137,7 @@
         "# Download the dataset. This will take a few moments...\n",
         "print(\"******\")\n",
         "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n",
-        "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'  # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n",
+        "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n",
         "    an4_path = wget.download(an4_url, data_dir)\n",
         "    print(f\"Dataset downloaded at: {an4_path}\")\n",
         "else:\n",
diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb
index f8741cdcbfe1..8883cce55a80 100644
--- a/tutorials/asr/Online_Noise_Augmentation.ipynb
+++ b/tutorials/asr/Online_Noise_Augmentation.ipynb
@@ -135,7 +135,7 @@
                 "# Download the dataset. This will take a few moments...\n",
                 "print(\"******\")\n",
                 "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n",
-                "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'  # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n",
+                "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n",
                 "    an4_path = wget.download(an4_url, data_dir)\n",
                 "    print(f\"Dataset downloaded at: {an4_path}\")\n",
                 "else:\n",
diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
index 62481c3762d2..c9c547a8383e 100644
--- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
+++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb
@@ -190,7 +190,7 @@
         "# Download the dataset. This will take a few moments...\n",
         "print(\"******\")\n",
         "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n",
-        "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'  # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n",
+        "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n",
         "    an4_path = wget.download(an4_url, data_dir)\n",
         "    print(f\"Dataset downloaded at: {an4_path}\")\n",
         "else:\n",
diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
index 8e3ae9c1f131..dce8c46df1b0 100644
--- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
+++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb
@@ -85,7 +85,7 @@
     "# Download the dataset. This will take a few moments...\n",
     "print(\"******\")\n",
     "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n",
-    "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'  # for the original source, please visit http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz \n",
+    "    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'\n",
     "    an4_path = wget.download(an4_url, data_dir)\n",
     "    print(f\"Dataset downloaded at: {an4_path}\")\n",
     "else:\n",

From 295e88fcb171ab6ce7264f9ed6f022ed13db8d8f Mon Sep 17 00:00:00 2001
From: Abhinav Khattar <aklife97@gmail.com>
Date: Wed, 28 Jun 2023 15:13:28 -0700
Subject: [PATCH 072/123] Update core commit for CI (#6939)

* Update core commit for CI

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

* add comment

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

---------

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
---
 Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 8a151d34c336..caf48f7e1624 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -59,9 +59,10 @@ pipeline {
 
     stage('Megatron Core installation') {
       steps {
+        // commit points to core 23.05 ToT 
         sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \
             cd Megatron-LM && \
-            git checkout d2891b4ad3a00e3c4223f89491afd9e1b812f9b5 && \
+            git checkout 060415572f4365a2e895f8036c4e37dad0efbdf5 && \
             pip install -e .'
       }
     }

From 29b9b8a719300bf675f94d00092c12bad2d4c433 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 28 Jun 2023 16:14:54 -0600
Subject: [PATCH 073/123] Tensor-parallel communication overlap with userbuffer
 backend (#6792)

* Tensor-parallel communication overlap with userbuffer backend (#6780)

* add interfaces for tp_communication overlap

[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Interface to provide custom userbuffer communicator settings by yaml file

[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Construct MPI process group for userbuffers support

Signed-off-by: Tim Moon <tmoon@nvidia.com>

---------

Signed-off-by: Tim Moon <tmoon@nvidia.com>
Co-authored-by: Tim Moon <tmoon@nvidia.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>

* codeql change

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

---------

Signed-off-by: Tim Moon <tmoon@nvidia.com>
Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: Tim Moon <tmoon@nvidia.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: ericharper <complex451@gmail.com>
---
 .../conf/megatron_gpt_config.yaml             |  7 +++++
 .../language_modeling/megatron/gpt_model.py   |  2 ++
 .../language_modeling/megatron_base_model.py  |  9 ++++++
 .../language_modeling/megatron_gpt_model.py   | 29 +++++++++++++++++++
 .../modules/common/megatron/language_model.py |  4 +++
 .../modules/common/megatron/megatron_init.py  |  2 ++
 .../modules/common/megatron/transformer.py    |  4 +++
 nemo/collections/nlp/parts/nlp_overrides.py   |  4 +++
 nemo/utils/app_state.py                       | 17 +++++++++++
 9 files changed, 78 insertions(+)

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index 8d7fd09e4307..e588e94a6720 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -166,6 +166,13 @@ model:
   fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history
   reduce_amax: True # Perform reduction to sync amax tensors across GPUs after every iteration
   use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False.
+  ub_tp_comm_overlap: False
+  # Use userbuffer backend to overlap tensor-parallel communications with computes.
+  # This feature is only available with Transformer Engine and squence parallelism enabled and, currently, supports only GPT models.
+  ub_tp_comm_overlap_cfg: null
+  # A yaml file with userbuffer communicator configurations. This file should provide `method`, `dtype`, `num_sm`, `num_splits`,
+  # `cga_size`, `num_splits`, `set_sm_margin`, and `aggregate` for the communicators to use custom settings.
+  # If the configuration file is not provided a default setting is used for all communicators.
 
   ## Flash Attention
   use_flash_attention: False # Use flash attention in self-attention module, this config does nothing when transformer_engine=True
diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
index b32bfdb09f20..8e28b6cab362 100755
--- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
@@ -164,6 +164,7 @@ def __init__(
         fp8_amax_compute_algo='most_recent',
         reduce_amax=True,
         use_emha=False,
+        ub_tp_comm_overlap=False,
         use_flash_attention=False,
     ):
         super(GPTModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights)
@@ -246,6 +247,7 @@ def __init__(
             fp8_amax_compute_algo=fp8_amax_compute_algo,
             reduce_amax=reduce_amax,
             use_emha=use_emha,
+            ub_tp_comm_overlap=ub_tp_comm_overlap,
             use_flash_attention=use_flash_attention,
         )
 
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index eb6671452992..a54c68866d26 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -131,6 +131,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True):
             global_batch_size=cfg.get('global_batch_size'),
             rampup_batch_size=cfg.get('rampup_batch_size'),
             use_fp8=cfg.get('fp8', False),
+            init_mpi_proc_group=cfg.get('ub_tp_comm_overlap', False),
             seed=self.cfg.get('seed', 1234),
             apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30),
         )
@@ -578,6 +579,14 @@ def _validate_and_override_config(self):
                 'Make sure the number of model chunks is the same across all pipeline stages.'
             )
 
+        if self.cfg.get('ub_tp_comm_overlap', False):
+            if not self.cfg.get('transformer_engine', False) or not self.cfg.get('sequence_parallel', False):
+                logging.info(
+                    "Userbuffer tensor-parallel communication overlap is available with both Transformer Engine and sequence-parallelism."
+                )
+                with open_dict(self.cfg):
+                    self.cfg.ub_tp_comm_overlap = False
+
     def is_data_parallel_rank_zero(self):
         if is_global_rank_zero():
             return True
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 84caed6c111e..817ef0bd6442 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -85,6 +85,7 @@
 
 try:
     import transformer_engine
+    from transformer_engine.pytorch import module as te_module
 
     HAVE_TE = True
 
@@ -282,6 +283,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer):
             self._nsys_profile_end_step *= grad_accum_steps
 
         self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True)
+        self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False)
 
     def get_gpt_module_list(self):
         if isinstance(self.model, list):
@@ -357,6 +359,7 @@ def model_provider_func(self, pre_process, post_process):
             fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'),
             reduce_amax=self.cfg.get('reduce_amax', True),
             use_emha=self.cfg.get('use_emha', False),
+            ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False),
             use_flash_attention=self.cfg.get('use_flash_attention', False),
             megatron_legacy=self.cfg.get('megatron_legacy', False),
         )
@@ -515,6 +518,32 @@ def training_step(self, dataloader_iter, batch_idx):
             The input batch to each micro-batch is fetched using the dataloader function
             in the micro-batch fwd function.
         """
+        # Initialize userbuffer communicators. Initialization is done only once at the
+        # beginning of the first training step.
+        if self.initialize_ub:
+            input_shape = [
+                self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'),
+                self.cfg.get('hidden_size'),
+            ]
+            ub_cfg_file_name = self.cfg.get('ub_tp_comm_overlap_cfg', None)
+            ub_cfgs = None
+            if ub_cfg_file_name is not None:
+                try:
+                    import yaml
+
+                    with open(ub_cfg_file_name, 'r') as ub_cfg_file:
+                        ub_cfgs = yaml.safe_load(ub_cfg_file)
+                except (ImportError, TypeError):
+                    print("Fail to read ub_tp_comm_overlap config file.")
+
+            te_module.initialize_ub(
+                shape=input_shape,
+                tp_size=self.cfg.get('tensor_model_parallel_size'),
+                use_fp8=self.cfg.get('fp8'),
+                ub_cfgs=ub_cfgs,
+            )
+            self.initialize_ub = False
+
         if self.rampup_batch_size:
             num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR
             current_global_batch_size = num_microbatch_calculator.current_global_batch_size
diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py
index e6305e563549..683163246379 100755
--- a/nemo/collections/nlp/modules/common/megatron/language_model.py
+++ b/nemo/collections/nlp/modules/common/megatron/language_model.py
@@ -121,6 +121,7 @@ def get_language_model(
     fp8_amax_compute_algo='most_recent',
     reduce_amax=True,
     use_emha=False,
+    ub_tp_comm_overlap=False,
     use_flash_attention=False,
 ):
     """Build language model and return along with the key to save."""
@@ -197,6 +198,7 @@ def get_language_model(
         fp8_amax_compute_algo=fp8_amax_compute_algo,
         reduce_amax=reduce_amax,
         use_emha=use_emha,
+        ub_tp_comm_overlap=ub_tp_comm_overlap,
         use_flash_attention=use_flash_attention,
     )
     # key used for checkpoints.
@@ -504,6 +506,7 @@ def __init__(
         fp8_amax_compute_algo='most_recent',
         reduce_amax=True,
         use_emha=False,
+        ub_tp_comm_overlap=False,
         use_flash_attention=False,
     ):
         super(TransformerLanguageModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights)
@@ -643,6 +646,7 @@ def __init__(
             fp8_amax_compute_algo=fp8_amax_compute_algo,
             reduce_amax=reduce_amax,
             use_emha=use_emha,
+            ub_tp_comm_overlap=ub_tp_comm_overlap,
             position_embedding_type=position_embedding_type,
             use_flash_attention=use_flash_attention,
         )
diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_init.py b/nemo/collections/nlp/modules/common/megatron/megatron_init.py
index e0551fad5d16..7431bffad26c 100644
--- a/nemo/collections/nlp/modules/common/megatron/megatron_init.py
+++ b/nemo/collections/nlp/modules/common/megatron/megatron_init.py
@@ -67,6 +67,7 @@ def initialize_model_parallel_for_nemo(
     global_batch_size=None,
     rampup_batch_size=None,
     use_fp8=False,
+    init_mpi_proc_group=False,
     seed=1234,
     apex_transformer_log_level=30,
 ):
@@ -83,6 +84,7 @@ def initialize_model_parallel_for_nemo(
     app_state.pipeline_model_parallel_size = pipeline_model_parallel_size
     app_state.virtual_pipeline_model_parallel_size = virtual_pipeline_model_parallel_size
     app_state.use_fp8 = use_fp8
+    app_state.init_mpi_proc_group = init_mpi_proc_group
     (
         app_state.tensor_model_parallel_rank,
         app_state.pipeline_model_parallel_rank,
diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py
index 258e42ce9694..652a3e6f4e3a 100644
--- a/nemo/collections/nlp/modules/common/megatron/transformer.py
+++ b/nemo/collections/nlp/modules/common/megatron/transformer.py
@@ -814,6 +814,7 @@ def __init__(
         layer_type: str = "encoder",
         drop_path_rate: float = 0,
         use_emha: bool = False,
+        ub_tp_comm_overlap: bool = False,
         autocast_dtype: Any = 16,
         zero_centered_gamma: bool = False,
     ) -> None:
@@ -846,6 +847,7 @@ def __init__(
             set_parallel_mode=tp_size > 1,
             fuse_qkv_params=True,
             zero_centered_gamma=zero_centered_gamma,
+            ub_tp_comm_overlap=ub_tp_comm_overlap,
         )
         # use_emha=use_emha,
 
@@ -941,6 +943,7 @@ def __init__(
         fp8_amax_compute_algo='most_recent',
         reduce_amax=True,
         use_emha=False,
+        ub_tp_comm_overlap=False,
         normalize_attention_scores=True,
         multi_query_attention=False,
         num_moe_experts=1,
@@ -1084,6 +1087,7 @@ def build_layer(layer_number):
                     apply_residual_connection_post_layernorm=False,
                     autocast_dtype=precision,
                     use_emha=use_emha,
+                    ub_tp_comm_overlap=ub_tp_comm_overlap,
                     zero_centered_gamma=normalization == 'layernorm1p',
                 )
             else:
diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
index c390ba995843..3b5a14c52f37 100644
--- a/nemo/collections/nlp/parts/nlp_overrides.py
+++ b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -181,6 +181,10 @@ def init_model_parallel(self, global_rank: int, world_size: int) -> None:
                 app_state.data_parallel_size = parallel_state.get_data_parallel_world_size()
                 app_state.pipeline_model_parallel_group = parallel_state.get_pipeline_model_parallel_group()
 
+                # create MPI process group for UCX-based communication APIs
+                if app_state.init_mpi_proc_group:
+                    torch.distributed.new_group(backend='mpi')
+
     def save_checkpoint(
         self, checkpoint: Dict[str, Any], filepath: Union[str, Path], storage_options: Optional[Any] = None
     ) -> None:
diff --git a/nemo/utils/app_state.py b/nemo/utils/app_state.py
index c3ead0bff48f..d06e1ac32e36 100644
--- a/nemo/utils/app_state.py
+++ b/nemo/utils/app_state.py
@@ -55,6 +55,7 @@ def __init__(self):
         self._data_parallel_group = None
         self._megatron_checkpoint_version = None
         self._use_fp8 = False
+        self._init_mpi_proc_gruop = False
 
         self._random_seed = None
 
@@ -363,6 +364,22 @@ def use_fp8(self, use_fp8):
         """
         self._use_fp8 = use_fp8
 
+    @property
+    def init_mpi_proc_group(self):
+        """ Property sets the initialization of mpi process group.
+            Returns:
+                Initialize mpi process group.
+        """
+        return self._init_mpi_proc_group
+
+    @init_mpi_proc_group.setter
+    def init_mpi_proc_group(self, init_mpi_proc_group):
+        """ Property sets the initialization of mpi process group.
+            Args:
+                init_mpi_proc_group:  Initialize mpi process group.
+        """
+        self._init_mpi_proc_group = init_mpi_proc_group
+
     @property
     def random_seed(self):
         """ Property returns the random seed.

From 5260c9c2f7202770fcc8f1a3fbf30fb07658918c Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithyare@nvidia.com>
Date: Wed, 28 Jun 2023 16:25:46 -0700
Subject: [PATCH 074/123] lora inference ci (#6931)

* inference test

Signed-off-by: arendu <adithya.r@gmail.com>

* Update typo

Signed-off-by: Adi Renduchintala <adithyare@nvidia.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: Adi Renduchintala <adithyare@nvidia.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
---
 Jenkinsfile | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index caf48f7e1624..1a79d87bcd38 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -3778,7 +3778,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
       }
       failFast true
       steps {
-        sh "rm -rf examples/nlp/language_modeling/gpt_peft_lora_results"
+        sh "rm -rf /home/TestData/nlp/lora_tuning_tp2"
         sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_tuning.py \
         trainer.devices=2 \
         trainer.log_every_n_steps=1 \
@@ -3787,7 +3787,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         trainer.val_check_interval=3 \
         ++trainer.limit_val_batches=2 \
         trainer.precision=16 \
-        exp_manager.exp_dir=examples/nlp/language_modeling/gpt_peft_lora_results \
+        exp_manager.exp_dir=/home/TestData/nlp/lora_tuning_tp2 \
         model.pipeline_model_parallel_size=1 \
         model.tensor_model_parallel_size=2 \
         model.restore_from_path=/home/TestData/nlp/megatron_gpt/TP2/megatron_gpt_tp2.nemo \
@@ -3801,7 +3801,21 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
         model.data.validation_ds.num_workers=0 \
         model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
         model.data.validation_ds.names=[quarel]"
-        sh "rm -rf examples/nlp/language_modeling/gpt_peft_lora_results"
+        sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py \
+        model.restore_from_path=/home/TestData/nlp/megatron_gpt/TP2/megatron_gpt_tp2.nemo \
+        model.peft.restore_from_path=/home/TestData/nlp/lora_tuning_tp2/megatron_gpt_peft_tuning/checkpoints/megatron_gpt_peft_tuning.nemo \
+        model.peft.restore_from_ckpt_name=null \
+        model.peft.restore_from_hparams_path=null \
+        trainer.devices=2 \
+        model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel_4.jsonl] \
+        model.data.test_ds.names=['quarel4'] \
+        model.data.test_ds.global_batch_size=1 \
+        model.data.test_ds.micro_batch_size=1 \
+        model.data.test_ds.tokens_to_generate=10 \
+        inference.greedy=True \
+        inference.repetition_penalty=1.0 \
+        inference.outfile_path='/home/TestData/nlp/lora_tuning_tp2/out.jsonl'"
+        sh "rm -rf /home/TestData/nlp/lora_tuning_tp2"
       }
     }
     stage('L2: Megatron GPT Eval') {

From a27ba52a673789c88fc3382d4ea53f9d2d9bd131 Mon Sep 17 00:00:00 2001
From: Yi Dong <43824965+yidong72@users.noreply.github.com>
Date: Wed, 28 Jun 2023 22:30:46 -0400
Subject: [PATCH 075/123] support value attribution condition (#6934)

* text gen condition on value

Signed-off-by: Yi Dong <yidong@nvidia.com>

* fix round function

Signed-off-by: Yi Dong <yidong@nvidia.com>

* predict value

Signed-off-by: Yi Dong <yidong@nvidia.com>

* scale 9

Signed-off-by: Yi Dong <yidong@nvidia.com>

* handle hard code label

Signed-off-by: Yi Dong <yidong@nvidia.com>

* use likert scale 7

Signed-off-by: Yi Dong <yidong@nvidia.com>

* scale 6

Signed-off-by: Yi Dong <yidong@nvidia.com>

* merge the latest main

Signed-off-by: Yi Dong <yidong@nvidia.com>

* added latest chatbot ui

Signed-off-by: Yi Dong <yidong@nvidia.com>

* added new playground interface

Signed-off-by: Yi Dong <yidong@nvidia.com>

* default scale 9

Signed-off-by: Yi Dong <yidong@nvidia.com>

* address comments

Signed-off-by: Yi Dong <yidong@nvidia.com>

* add speicial tokens

Signed-off-by: Yi Dong <yidong@nvidia.com>

* handles more tokenizer

Signed-off-by: Yi Dong <yidong@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added comments

Signed-off-by: Yi Dong <yidong@nvidia.com>

* fix type

Signed-off-by: Yi Dong <yidong@nvidia.com>

* faster check

Signed-off-by: Yi Dong <yidong@nvidia.com>

---------

Signed-off-by: Yi Dong <yidong@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../conf/megatron_gpt_inference.yaml          |   7 +-
 .../language_modeling/megatron_gpt_eval.py    |   8 +-
 .../megatron/gpt_sft_chat_dataset.py          | 150 +++++--
 .../language_modeling/megatron_base_model.py  |   5 +
 .../nlp/modules/common/megatron_web_server.py | 380 +++++++++++++-----
 5 files changed, 409 insertions(+), 141 deletions(-)

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml
index 6bd1be905a97..53d4e9b7e82b 100644
--- a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml
@@ -35,4 +35,9 @@ share: False  # whether create a public URL
 username: test # user name for web client
 password: test2  # password for web client
 web_port: 9889 # the port number of the web server
-chat: False # use the chat interface
\ No newline at end of file
+chat: False # use the chat interface
+chatbot_config:
+  value: False   # whether to inject the value attributes
+  user: User
+  assistant: Assistant
+  system: "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py
index af1657b44d7b..dc299fccdf90 100644
--- a/examples/nlp/language_modeling/megatron_gpt_eval.py
+++ b/examples/nlp/language_modeling/megatron_gpt_eval.py
@@ -15,6 +15,7 @@
 import asyncio
 import os
 import threading
+from functools import partial
 
 import torch
 from omegaconf import OmegaConf, open_dict
@@ -301,7 +302,12 @@ def main(cfg) -> None:
         if parallel_state.is_pipeline_first_stage() and parallel_state.get_tensor_model_parallel_rank() == 0:
             if cfg.web_server:
                 if cfg.chat:
-                    web_ui = get_chatbot_demo
+                    defaults = {
+                        'user': cfg.chatbot_config.user,
+                        'assistant': cfg.chatbot_config.assistant,
+                        'system': cfg.chatbot_config.system,
+                    }
+                    web_ui = partial(get_chatbot_demo, defaults=defaults, value=cfg.chatbot_config.value)
                 else:
                     web_ui = get_demo
                 loop = asyncio.new_event_loop()
diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py
index 2c896c2e61af..d6c2257ebabb 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py
@@ -16,6 +16,7 @@
 
 import torch
 
+from nemo.collections.common.tokenizers.sentencepiece_tokenizer import SentencePieceTokenizer
 from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec
 from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset import GPTSFTDataset
 from nemo.utils import logging
@@ -29,25 +30,65 @@
 SYSTEM_TOKEN = "<extra_id_0>System\n"
 TURN_TOKEN = "<extra_id_1>"
 
-GUARD_RAIL_INSTRUCTION = {
-    "TEXT_TO_CANONICAL_FORM": "Given a dialogue, for each turn you need to generate a short summary called a canonical form. Generate the canonical form for the last turn in the dialogue.",
-    "CANONICAL_FORM_TO_TEXT": "Given a dialogue, for each turn we also have a short summary called a canonical form. Generate the canonical form given the last turn message and canonical form. Then generate the message.",
+TYPE_INSTRUCTION = {
+    'TEXT_TO_VALUE': "",
+    'VALUE_TO_TEXT': '',
 }
 
 
-def _mask_targets(target, tokenized_lens, speakers, header_len, s_ids, tokenizer, mask_role):
+def _mask_targets(
+    target,
+    tokenized_lens,
+    speakers,
+    header_len,
+    s_ids,
+    tokenizer,
+    mask_role,
+    gtype,
+    extra_id_2_token_id,
+    new_line_token_id,
+):
+    """ This function masks the tokens so the loss is computed only on the non-masked role's responses.
+    For 'TEXT_TO_VALUE' type, the loss is computed on the value attributes.
+
+    Args:
+        target (Tensor): input ids
+        tokenized_lens (List[int]): array of lengths of each turns
+        speakers (List[str]): array of speakers of each turns
+        header_len (int): the system prompt length
+        s_ids (List[Tensor]): array of tokenized ids of each turns
+        tokenizer (TokenizerSpec): tokenizer object
+        mask_role (str): the speaker id to be masked from loss computation
+        gtype (str): either 'TEXT_TO_VALUE' or 'VALUE_TO_TEXT'
+        extra_id_2_token_id (int): <extra_id_2> token id
+        new_line_token_id (int): new line token id
+
+    """
     cur_idx = header_len
     tgt_len = target.shape[0]
     for i, (tokenized_len, speaker, s_id) in enumerate(zip(tokenized_lens, speakers, s_ids)):
-        # note, sentence piece will add extra empty token in front. s_id has that extra token too
-        skip_name_len = len(tokenizer.text_to_ids(TURN_TOKEN + speaker + END_NAME_SIGNAL))
+        # note, sentence piece will add extra empty token in front. has to compute the diff
+        id1 = tokenizer.text_to_ids("<extra_id_1>")
+        id2 = tokenizer.text_to_ids("<extra_id_1>" + TURN_TOKEN + speaker + END_NAME_SIGNAL)
+        skip_name_len = len(id2) - len(id1)
+        if extra_id_2_token_id is None:
+            raise ValueError("extra_id_2 is not in the vocabulary")
+        if (s_id == extra_id_2_token_id).any().item():
+            if gtype == 'VALUE_TO_TEXT':
+                # if contains the token <extra_id_2>
+                assert skip_name_len == torch.where((s_id == extra_id_2_token_id))[0].item()
+                # find new line token id 14
+                more_skip_len = torch.where((s_id[skip_name_len:] == new_line_token_id))[0][0].item() + 1
+                skip_name_len += more_skip_len
+            elif gtype == 'TEXT_TO_VALUE':
+                skip_name_len = torch.where((s_id == extra_id_2_token_id))[0].item() + 1
         if cur_idx >= tgt_len:
             break
         elif cur_idx + tokenized_len < tgt_len:
             # Check whether the mask is applied to the correct position, the first token is turn token: <extra_id_1>
             # s_id[2:] skips the artifact empty token and the turn token
             # target[cur_idx + 1:cur_idx + tokenized_len] skip the turn token
-            if not torch.equal(target[cur_idx + 1 : cur_idx + tokenized_len], s_id[2:]):
+            if not torch.equal(target[cur_idx + 1 : cur_idx + tokenized_len], s_id[1:]):
                 logging.warning("a sentence mismatches the corresponding piece " "in the conversation")
         if i == 0:
             # mask the first turn completely to provide at least one turn as context
@@ -57,7 +98,7 @@ def _mask_targets(target, tokenized_lens, speakers, header_len, s_ids, tokenizer
             target[cur_idx + 1 : cur_idx + tokenized_len] = IGNORE_INDEX
         else:
             # mask up to the name end, need to remove one as skip name has an extra artifact empty token
-            target[cur_idx : cur_idx + skip_name_len - 1] = IGNORE_INDEX
+            target[cur_idx : cur_idx + skip_name_len] = IGNORE_INDEX
         cur_idx += tokenized_len
 
 
@@ -65,6 +106,13 @@ def cannonical_form_formater(cannoical_form):
     return f'<extra_id_2>{cannoical_form}\n'
 
 
+def response_value_formater(label):
+    if isinstance(label, str):
+        return '<extra_id_2>' + label + '\n'
+    else:
+        raise ValueError(f'Unknown label type {type(label)}, only str type is supported')
+
+
 def _add_speaker_and_signal(header, source, mask_role, gtype):
     """Add speaker and start/end signal on each round."""
     BEGIN_SIGNAL = ""
@@ -76,28 +124,30 @@ def _add_speaker_and_signal(header, source, mask_role, gtype):
             sentence["value"] = (
                 BEGIN_SIGNAL + role_token + sentence_from + END_NAME_SIGNAL + sentence["value"] + END_SIGNAL
             )
-        elif gtype == "TEXT_TO_CANONICAL_FORM":
+        elif gtype == "VALUE_TO_TEXT":
             sentence["value"] = (
                 BEGIN_SIGNAL
                 + role_token
                 + sentence_from
                 + END_NAME_SIGNAL
+                + (response_value_formater(sentence['label']) if 'label' in sentence else '')
                 + sentence["value"]
                 + END_SIGNAL
-                + cannonical_form_formater(sentence['canonical_form'])
             )
-        elif gtype == "CANONICAL_FORM_TO_TEXT":
+        elif gtype == "TEXT_TO_VALUE":
             sentence["value"] = (
                 BEGIN_SIGNAL
                 + role_token
                 + sentence_from
                 + END_NAME_SIGNAL
-                + cannonical_form_formater(sentence['canonical_form'])
                 + sentence["value"]
                 + END_SIGNAL
+                + (response_value_formater(sentence['label']) if 'label' in sentence else '')
             )
         else:
-            raise ValueError(f"source type {gtype} not supported")
+            raise ValueError(
+                f"source type {gtype} not supported, only 'VALUE_TO_TEXT' and 'TEXT_TO_VALUE' are supported"
+            )
         conversation += sentence["value"]
         # if the last turn is not masked, add next token start token to the end, which will be included for loss calculation
         if sentence_from != mask_role and i == len(source) - 1:
@@ -105,9 +155,7 @@ def _add_speaker_and_signal(header, source, mask_role, gtype):
     return conversation
 
 
-def preprocess(
-    source: dict, tokenizer: TokenizerSpec,
-):
+def preprocess(source: dict, tokenizer: TokenizerSpec, extra_id_2_token_id: int, new_line_token_id: int):
     """
     Given a conversation list. This transform:
     1. Add signal '### ' at the beginning each sentence, with end signal '\n';
@@ -115,17 +163,18 @@ def preprocess(
     3. Tokenize the concatenated conversation;
     4. Make a deepcopy as the target. Mask human words with IGNORE_INDEX.
     """
-    canonical_type = None
+    data_type = None
     if 'type' in source:
-        canonical_type = source['type']
-        assert canonical_type in GUARD_RAIL_INSTRUCTION, f"source type {canonical_type} not supported"
+        data_type = source['type']
+        assert data_type in TYPE_INSTRUCTION, f"source type {data_type} not supported"
     # add end signal and concatenate together
     conversation = source['system']
-    if canonical_type is not None:
-        conversation = conversation + '\n' + GUARD_RAIL_INSTRUCTION[canonical_type]
+    if data_type is not None:
+        if TYPE_INSTRUCTION[data_type] != '':
+            conversation = conversation + '\n' + TYPE_INSTRUCTION[data_type]
     mask_role = source.get('mask', 'User')
-    header = f"{SYSTEM_TOKEN}{conversation}\n\n"
-    conversation = _add_speaker_and_signal(header, source['conversations'], mask_role, canonical_type)
+    header = f"{SYSTEM_TOKEN}{conversation}"
+    conversation = _add_speaker_and_signal(header, source['conversations'], mask_role, data_type)
     # tokenize conversations
     input_ids = tokenizer.text_to_ids(conversation)
     target = copy.deepcopy(input_ids)
@@ -134,10 +183,16 @@ def preprocess(
     ids = []
     tokenized_lens = []
     for s in source['conversations']:
-        tokenized_sentence = tokenizer.text_to_ids(s["value"])
-        ids.append(torch.tensor(tokenized_sentence))
-        # remove one token as it adds an empty token in front
-        tokenized_lens.append(len(tokenized_sentence) - 1)
+        if isinstance(tokenizer, SentencePieceTokenizer):
+            tokenized_sentence = tokenizer.text_to_ids(s["value"])
+            ids.append(torch.tensor(tokenized_sentence)[1:])
+            # remove one token as it adds an empty token in front
+            tokenized_lens.append(len(tokenized_sentence) - 1)
+        else:
+            tokenized_sentence = tokenizer.text_to_ids(s["value"])
+            ids.append(torch.tensor(tokenized_sentence))
+            # remove one token as it adds an empty token in front
+            tokenized_lens.append(len(tokenized_sentence))
     speakers = [sentence["from"] for sentence in source['conversations']]
     assert mask_role in speakers, "mask role not in the conversation"
     target = torch.LongTensor(target)
@@ -145,18 +200,51 @@ def preprocess(
     target[:header_len] = IGNORE_INDEX
     input_ids = torch.LongTensor(input_ids)
 
-    _mask_targets(target, tokenized_lens, speakers, header_len, ids, tokenizer, mask_role)
+    _mask_targets(
+        target,
+        tokenized_lens,
+        speakers,
+        header_len,
+        ids,
+        tokenizer,
+        mask_role,
+        data_type,
+        extra_id_2_token_id,
+        new_line_token_id,
+    )
     mask = (target != IGNORE_INDEX).bool()
     assert mask.sum().item() != 0, "mask is empty"
     return dict(input_ids=input_ids, mask=mask)
 
 
+def _check_token_in_vocab(tokenizer, token):
+    ids = tokenizer.text_to_ids(token)
+    if isinstance(tokenizer, SentencePieceTokenizer):
+        return len(ids) == 2
+    else:
+        return len(ids) == 1
+
+
 class GPTSFTChatDataset(GPTSFTDataset):
     def _build_samples_mapping(self):
         super()._build_samples_mapping()
         assert hasattr(self.tokenizer, "vocab"), "tokenizer should have vocab property, not supported"
-        assert '<extra_id_0>' in self.tokenizer.vocab, "<extra_id_0> not in the tokenizer vocab. not supported"
-        assert '<extra_id_1>' in self.tokenizer.vocab, "<extra_id_1> not in the tokenizer vocab. not supported"
+        assert _check_token_in_vocab(
+            self.tokenizer, '<extra_id_0>'
+        ), "<extra_id_0> not in the tokenizer vocab. not supported"
+        assert _check_token_in_vocab(
+            self.tokenizer, '<extra_id_1>'
+        ), "<extra_id_1> not in the tokenizer vocab. not supported"
+        # calcuilate <extra_id_2> id value
+        if _check_token_in_vocab(self.tokenizer, '<extra_id_2>'):
+            ids_1 = self.tokenizer.text_to_ids('<extra_id_1><extra_id_2>')
+            ids_2 = self.tokenizer.text_to_ids('<extra_id_1>')
+            self.extra_id_2_token_id = ids_1[len(ids_2) :][0]
+        else:
+            self.extra_id_2_token_id = None
+        ids_1 = self.tokenizer.text_to_ids('<extra_id_1>\n')
+        ids_2 = self.tokenizer.text_to_ids('<extra_id_1>')
+        self.new_line_token_id = ids_1[len(ids_2) :][0]
 
     def _process_example(self, example):
         """
@@ -164,7 +252,7 @@ def _process_example(self, example):
         Truncation is carried out when needed, but it is performed only on the prompt side.
         BOS, EOS, and SEP, are added if specified.
         """
-        result = preprocess(example, self.tokenizer)
+        result = preprocess(example, self.tokenizer, self.extra_id_2_token_id, self.new_line_token_id)
 
         return result
 
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index a54c68866d26..e018a4decaf6 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -17,6 +17,7 @@
 import re
 from typing import Any, Dict, Optional, Union
 
+import omegaconf
 import torch
 from omegaconf import open_dict
 from omegaconf.dictconfig import DictConfig
@@ -223,6 +224,10 @@ def _build_tokenizer(self):
             legacy=legacy,
         )
 
+        if self._cfg.tokenizer.get('additional_special_tokens', None) is not None:
+            tokens_list = omegaconf.OmegaConf.to_object(self._cfg.tokenizer.additional_special_tokens)
+            self.tokenizer.add_special_tokens({'additional_special_tokens': tokens_list})
+
     def on_train_start(self) -> None:
         super().on_train_start()
         self.init_global_step = self.trainer.global_step
diff --git a/nemo/collections/nlp/modules/common/megatron_web_server.py b/nemo/collections/nlp/modules/common/megatron_web_server.py
index d3ccde49a5c5..648bca024ba0 100644
--- a/nemo/collections/nlp/modules/common/megatron_web_server.py
+++ b/nemo/collections/nlp/modules/common/megatron_web_server.py
@@ -32,8 +32,33 @@
 
 TURN_TOKEN = '<extra_id_1>'
 
-DEFAULT_SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
-SYSTEM_TOKEN = '<extra_id_0>System\n'
+PROMPT_PRESETS = {
+    "DIALOGUE": {
+        "SYSTEM_TURN_TOKEN": '',
+        "USER_TURN_TOKEN": '<extra_id_1>',
+        "BOT_TURN_TOKEN": '<extra_id_2>',
+        "END_OF_NAME": '',
+        "END_OF_TURN": '\n',
+    },
+    "DIALOGUE2": {
+        "SYSTEM_TURN_TOKEN": '<extra_id_0>System\n',
+        "USER_TURN_TOKEN": '<extra_id_1>',
+        "BOT_TURN_TOKEN": '<extra_id_1>',
+        "END_OF_NAME": '\n',
+        "END_OF_TURN": '\n',
+    },
+}
+
+
+PRESETS = {
+    "K1-Greedy": {"temperature": 1.0, "top_p": 0.9, "top_k": 1, "repetition_penalty": 1.0,},
+    "K50": {"temperature": 0.75, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,},
+    "K50-Creative": {"temperature": 0.85, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,},
+    "K50-Precise": {"temperature": 0.1, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,},
+    "K50-Original": {"temperature": 0.9, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,},
+    "Nucleus9": {"temperature": 0.8, "top_p": 0.9, "top_k": 10000, "repetition_penalty": 1.0,},
+    "Custom": {"temperature": 0.75, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1.0,},
+}
 
 
 def check_gradio_import():
@@ -48,55 +73,25 @@ def check_gradio_import():
 
 
 def create_gen_function(port=5555, chat=False):
-    if chat:
-
-        def get_generation(
-            prompt, preamble, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings
-        ):
-            if preamble is not None and preamble != '':
-                prompt = SYSTEM_TOKEN + preamble + prompt
-            data = {
-                "sentences": [prompt],
-                "tokens_to_generate": int(token_to_gen),
-                "temperature": temp,
-                "add_BOS": add_BOS,
-                "top_k": top_k,
-                "top_p": top_p,
-                "greedy": greedy,
-                "all_probs": False,
-                "repetition_penalty": repetition,
-                "min_tokens_to_generate": int(min_tokens),
-                "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0],
-            }
-            response = text_generation(data, port=port)
-            sentences = response['sentences']
-            bot_message = sentences[0]
-            bot_message = bot_message[len(prompt) :]
-            return bot_message
-
-    else:
-
-        def get_generation(
-            prompt, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings
-        ):
-            data = {
-                "sentences": [prompt],
-                "tokens_to_generate": int(token_to_gen),
-                "temperature": temp,
-                "add_BOS": add_BOS,
-                "top_k": top_k,
-                "top_p": top_p,
-                "greedy": greedy,
-                "all_probs": False,
-                "repetition_penalty": repetition,
-                "min_tokens_to_generate": int(min_tokens),
-                "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0],
-            }
-            response = text_generation(data, port=port)
-            sentences = response['sentences']
-            bot_message = sentences[0]
-            bot_message = bot_message[len(prompt) :]
-            return bot_message
+    def get_generation(prompt, greedy, add_BOS, token_to_gen, min_tokens, temp, top_p, top_k, repetition, end_strings):
+        data = {
+            "sentences": [prompt],
+            "tokens_to_generate": int(token_to_gen),
+            "temperature": temp,
+            "add_BOS": add_BOS,
+            "top_k": top_k,
+            "top_p": top_p,
+            "greedy": greedy,
+            "all_probs": False,
+            "repetition_penalty": repetition,
+            "min_tokens_to_generate": int(min_tokens),
+            "end_strings": [i.strip() for i in end_strings.split(',') if len(i) != 0],
+        }
+        response = text_generation(data, port=port)
+        sentences = response['sentences']
+        bot_message = sentences[0]
+        bot_message = bot_message[len(prompt) :]
+        return bot_message
 
     return get_generation
 
@@ -104,33 +99,54 @@ def get_generation(
 def get_demo(share, username, password, server_port=5555, web_port=9889, loop=None):
     check_gradio_import()
     asyncio.set_event_loop(loop)
-    with gr.Blocks() as demo:
+    with gr.Blocks(css=CSS) as demo:
         with gr.Row():
             with gr.Column(scale=2, width=200):
-                greedy_flag = gr.Checkbox(label="Greedy")
-                add_BOS = gr.Checkbox(label="Add BOS token", value=False)
+                # store the mutliple turn conversation
                 token_to_gen = gr.Number(label='Number of Tokens to generate', value=300, type=int)
                 min_token_to_gen = gr.Number(label='Min number of Tokens to generate', value=1, type=int)
-                temperature = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, label='Temperature', step=0.1)
-                top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.02, value=0.9, label='Top P')
-                top_k = gr.Slider(minimum=0, maximum=10000, step=2, value=0, label='Top K')
+                seed = gr.Number(label='Random seed', value=0, type=int)
+                end_strings = gr.Textbox(label="End strings (comma separated)", value="<extra_id_1>,", lines=1,)
+                add_BOS = gr.Checkbox(label="Add BOS token", value=False)
+                sampling_method = gr.Dropdown(
+                    list(PRESETS.keys()), label='Sampling Presets', default='K50', value='K50'
+                )
+                temperature = gr.Slider(minimum=0.0, maximum=5.0, value=0.75, label='Temperature', step=0.1)
+                top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.02, value=0.95, label='Top P')
+                top_k = gr.Slider(minimum=0, maximum=1024, step=2, value=50, label='Top K')
+
                 repetition_penality = gr.Slider(
-                    minimum=1.0, maximum=5.0, step=0.02, value=1.2, label='Repetition penalty'
+                    minimum=1.0, maximum=5.0, step=0.02, value=1.0, label='Repetition penalty'
                 )
-                end_strings = gr.Textbox(label="End strings (comma separated)", value="<|endoftext|>,", lines=1,)
-            with gr.Column(scale=1, min_width=800):
-                input_prompt = gr.Textbox(
-                    label="Input",
-                    value="Ariel was playing basketball. 1 of her shots went in the hoop. 2 of her shots did not go in the hoop. How many shots were there in total?",
-                    lines=5,
+
+                def set_sampling(x):
+                    return list(PRESETS[x].values())
+
+                sampling_method.change(
+                    set_sampling, inputs=[sampling_method], outputs=[temperature, top_p, top_k, repetition_penality]
                 )
-                output_box = gr.Textbox(value="", label="Output")
-                btn = gr.Button(value="Submit")
-                btn.click(
-                    create_gen_function(server_port, chat=False),
-                    inputs=[
-                        input_prompt,
-                        greedy_flag,
+
+            with gr.Column(scale=1, min_width=900):
+                text = gr.Textbox(label="Playground", value="", lines=60, placeholder="Type something here...",)
+                submit_btn = gr.Button("Generate")
+                clear = gr.Button("Clear")
+
+                def on_submit(
+                    prompt_text,
+                    token_to_gen,
+                    temperature,
+                    top_p,
+                    top_k,
+                    repetition_penality,
+                    seed,
+                    end_strings,
+                    add_BOS,
+                    min_token_to_gen,
+                ):
+
+                    output = create_gen_function(server_port)(
+                        prompt_text,
+                        False,
                         add_BOS,
                         token_to_gen,
                         min_token_to_gen,
@@ -139,41 +155,142 @@ def get_demo(share, username, password, server_port=5555, web_port=9889, loop=No
                         top_k,
                         repetition_penality,
                         end_strings,
+                    )
+                    print(output)
+                    print('-------------------')
+                    return prompt_text + output
+
+                def clear_fun():
+                    return ''
+
+                submit_btn.click(
+                    on_submit,
+                    [
+                        text,
+                        token_to_gen,
+                        temperature,
+                        top_p,
+                        top_k,
+                        repetition_penality,
+                        seed,
+                        end_strings,
+                        add_BOS,
+                        min_token_to_gen,
                     ],
-                    outputs=[output_box],
+                    [text],
+                    queue=False,
                 )
-    demo.launch(share=share, server_port=web_port, server_name='0.0.0.0', auth=(username, password))
+                clear.click(clear_fun, None, text, queue=False)
+        demo.queue(concurrency_count=16).launch(
+            share=share, server_port=web_port, server_name='0.0.0.0', auth=(username, password)
+        )
 
 
-def get_chatbot_demo(share, username, password, server_port=5555, web_port=9889, loop=None):
+def get_chatbot_demo(
+    share, username, password, server_port=5555, web_port=9889, loop=None, value=False, defaults=None
+):
     check_gradio_import()
     from nemo.collections.nlp.modules.common.chatbot_component import Chatbot
 
     asyncio.set_event_loop(loop)
     with gr.Blocks(css=CSS) as demo:
-        # store the mutliple turn conversation
         with gr.Row():
             with gr.Column(scale=2, width=200):
                 # store the mutliple turn conversation
                 session_state = gr.State(value=[])
-                greedy_flag = gr.Checkbox(label="Greedy", value=True)
-                add_BOS = gr.Checkbox(label="Add BOS token", value=False)
                 token_to_gen = gr.Number(label='Number of Tokens to generate', value=300, type=int)
-                min_token_to_gen = gr.Number(label='Min number of Tokens to generate', value=1, type=int)
-                temperature = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, label='Temperature', step=0.1)
-                top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.02, value=0.9, label='Top P')
-                top_k = gr.Slider(minimum=0, maximum=10000, step=2, value=0, label='Top K')
-                repetition_penality = gr.Slider(
-                    minimum=1.0, maximum=5.0, step=0.02, value=1.2, label='Repetition penalty'
+                seed = gr.Number(label='Random seed', value=0, type=int)
+                prompt_presets = gr.Dropdown(
+                    list(PROMPT_PRESETS.keys()), label='Template Presets', default='DIALOGUE2', value='DIALOGUE2'
+                )
+                sampling_method = gr.Dropdown(
+                    list(PRESETS.keys()), label='Sampling Presets', default='K50', value='K50'
                 )
-                end_strings = gr.Textbox(
-                    label="End strings (comma separated)", value=f"<|endoftext|>,<extra_id_1>,", lines=1,
+                with gr.Accordion("Sampling Parameters", open=False):
+                    temperature = gr.Slider(
+                        minimum=0.0, maximum=5.0, value=0.75, label='Temperature', step=0.1, interactive=False
+                    )
+                    top_p = gr.Slider(
+                        minimum=0.0, maximum=1.0, step=0.02, value=0.95, label='Top P', interactive=False
+                    )
+                    top_k = gr.Slider(minimum=0, maximum=1024, step=2, value=50, label='Top K', interactive=False)
+                    repetition_penality = gr.Slider(
+                        minimum=1.0, maximum=5.0, step=0.02, value=1.0, label='Repetition penalty', interactive=False
+                    )
+
+                with gr.Accordion("Value Parameters", open=True, visible=value):
+                    keys = ['quality', 'toxicity', 'humor', 'creativity', 'violence', 'helpfulness', 'not_appropriate']
+                    quality_value = gr.Slider(
+                        minimum=0, maximum=9, step=1, value=9, label='Quality', interactive=True, visible=True
+                    )
+                    toxicity_value = gr.Slider(
+                        minimum=0, maximum=9, step=1, value=0, label='Toxicity', interactive=True, visible=True
+                    )
+                    humor_value = gr.Slider(
+                        minimum=0, maximum=9, step=1, value=0, label='Humor', interactive=True, visible=True
+                    )
+                    creativity_value = gr.Slider(
+                        minimum=0, maximum=9, step=1, value=0, label='Creativity', interactive=True, visible=True
+                    )
+                    violence_value = gr.Slider(
+                        minimum=0, maximum=9, step=1, value=0, label='Violence', interactive=True, visible=True
+                    )
+                    helpfulness_value = gr.Slider(
+                        minimum=0, maximum=9, step=1, value=9, label='Helpfulness', interactive=True, visible=True
+                    )
+                    not_appropriate_value = gr.Slider(
+                        minimum=0, maximum=9, step=1, value=0, label='Not Appropriate', interactive=True, visible=True
+                    )
+                    used_value = gr.CheckboxGroup(keys, value=keys)
+
+                    def change_visibility(x):
+                        values = []
+                        for key in keys:
+                            if key in x:
+                                values.append(gr.update(visible=True))
+                            else:
+                                values.append(gr.update(visible=False))
+                        return values
+
+                    used_value.change(
+                        change_visibility,
+                        inputs=[used_value],
+                        outputs=[
+                            quality_value,
+                            toxicity_value,
+                            humor_value,
+                            creativity_value,
+                            violence_value,
+                            helpfulness_value,
+                            not_appropriate_value,
+                        ],
+                    )
+
+                def set_sampling(x):
+                    if x == 'Custom':
+                        values = [gr.update(value=v, interactive=True) for v in PRESETS[x].values()]
+                        return values
+                    else:
+                        values = [gr.update(value=v, interactive=False) for v in PRESETS[x].values()]
+                        return values
+
+                sampling_method.change(
+                    set_sampling, inputs=[sampling_method], outputs=[temperature, top_p, top_k, repetition_penality]
                 )
-                gr.HTML("<hr/>")
-                human_name = gr.Textbox(label="Human Name", value="User", line=1,)
-                assistant_name = gr.Textbox(label="Assistant Name", value="Assistant", line=1,)
-                preamble = gr.Textbox(label="System", value=DEFAULT_SYSTEM, lines=2,)
-            with gr.Column(scale=1, min_width=800):
+
+                gr.HTML("<hr>")
+                human_name = gr.Textbox(label="Human Name", value=defaults['user'], line=1,)
+                assistant_name = gr.Textbox(label="Assistant Name", value=defaults['assistant'], line=1,)
+                preamble = gr.Textbox(label="System", value=defaults['system'], lines=2,)
+
+                def set_prompt(x):
+                    if x == "DIALOGUE":
+                        return '', ''
+                    return defaults['user'], defaults['assistant']
+
+                prompt_presets.change(set_prompt, inputs=[prompt_presets], outputs=[human_name, assistant_name])
+
+            with gr.Column(scale=1, min_width=900):
                 chatbot = Chatbot(elem_id="chatbot").style(height=800)
                 msg = gr.Textbox(label="User", value="", lines=1,)
                 clear = gr.Button("Clear")
@@ -183,45 +300,86 @@ def user(user_message, history, session_state):
                     user_message = user_message.replace('\n', '<br>')
                     return "", history + [[user_message, None]]
 
+                def get_value_str(values_array, used_value):
+                    if len(used_value) == 0:
+                        return ''
+                    assert len(values_array) == len(keys)
+                    value_str = '<extra_id_2>'
+                    elements = []
+                    for i, key in enumerate(keys):
+                        if key in used_value:
+                            elements.append(f'{key}:{values_array[i]}')
+                    value_str += ','.join(elements) + '\n'
+                    return value_str
+
                 def bot(
                     history,
                     preamble,
-                    greedy_flag,
-                    add_BOS,
                     token_to_gen,
-                    min_token_to_gen,
                     temperature,
                     top_p,
                     top_k,
                     repetition_penality,
-                    end_strings,
+                    seed,
                     human_name,
                     assistant_name,
                     session_state,
+                    prompts_presets,
+                    quality_value,
+                    toxicity_value,
+                    humor_value,
+                    creativity_value,
+                    violence_value,
+                    helpfulness_value,
+                    not_appropriate_value,
+                    used_value,
                 ):
+
+                    values_array = [
+                        quality_value,
+                        toxicity_value,
+                        humor_value,
+                        creativity_value,
+                        violence_value,
+                        helpfulness_value,
+                        not_appropriate_value,
+                    ]
+                    if value:
+                        value_str = get_value_str(values_array, used_value)
+                    else:
+                        value_str = ''
+
+                    prompt_preset = PROMPT_PRESETS[prompts_presets]
                     prompt_text = ''
                     names = [human_name, assistant_name]
+                    turn_tokens = [prompt_preset['USER_TURN_TOKEN'], prompt_preset['BOT_TURN_TOKEN']]
                     for i, meg in enumerate(session_state):
                         name = names[i % 2]
-                        prompt_text += TURN_TOKEN + name + '\n' + meg + '\n'
-                    prompt_text += TURN_TOKEN + assistant_name + '\n'
-                    bot_message = create_gen_function(server_port, chat=True)(
+                        turn = turn_tokens[i % 2]
+                        prompt_text += turn + name + prompt_preset['END_OF_NAME'] + meg + prompt_preset['END_OF_TURN']
+                    prompt_text += (
+                        prompt_preset['BOT_TURN_TOKEN'] + assistant_name + prompt_preset['END_OF_NAME'] + value_str
+                    )
+                    prompt_text = prompt_preset['SYSTEM_TURN_TOKEN'] + preamble + prompt_text
+                    bot_message = create_gen_function(server_port)(
                         prompt_text,
-                        preamble,
-                        greedy_flag,
-                        add_BOS,
+                        False,
+                        False,
                         token_to_gen,
-                        min_token_to_gen,
+                        1,
                         temperature,
                         top_p,
                         top_k,
                         repetition_penality,
-                        end_strings,
+                        '<extra_id_1>',
                     )
                     if bot_message.endswith(TURN_TOKEN):
                         bot_message = bot_message[: -len(TURN_TOKEN)]
                     history[-1][1] = bot_message
-                    session_state.append(bot_message.strip())
+                    print(prompt_text)
+                    print(bot_message)
+                    print('-------------------')
+                    session_state.append(value_str + bot_message.strip())
                     return history
 
                 msg.submit(user, [msg, chatbot, session_state], [msg, chatbot], queue=False).then(
@@ -229,20 +387,26 @@ def bot(
                     [
                         chatbot,
                         preamble,
-                        greedy_flag,
-                        add_BOS,
                         token_to_gen,
-                        min_token_to_gen,
                         temperature,
                         top_p,
                         top_k,
                         repetition_penality,
-                        end_strings,
+                        seed,
                         human_name,
                         assistant_name,
                         session_state,
+                        prompt_presets,
+                        quality_value,
+                        toxicity_value,
+                        humor_value,
+                        creativity_value,
+                        violence_value,
+                        helpfulness_value,
+                        not_appropriate_value,
+                        used_value,
                     ],
-                    chatbot,
+                    [chatbot],
                 )
 
                 def clear_fun(session_state):

From 47c9d743b05fd5db63a095ab5448e0fc52197e5b Mon Sep 17 00:00:00 2001
From: Kunal Dhawan <kunaldhawan97@gmail.com>
Date: Thu, 29 Jun 2023 08:51:01 -0700
Subject: [PATCH 076/123] Fix confidence ensembles RNNT logprobs selection
 logic for exclude_blank scenario (#6937)

* fixed rnnt logprob selection logic for exclude_blank scenario

Signed-off-by: KunalDhawan <kunaldhawan97@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix all blank ctc edge case

Signed-off-by: KunalDhawan <kunaldhawan97@gmail.com>

---------

Signed-off-by: KunalDhawan <kunaldhawan97@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 nemo/collections/asr/models/confidence_ensemble.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py
index 0a5441a1cd52..cd4738e7b97c 100644
--- a/nemo/collections/asr/models/confidence_ensemble.py
+++ b/nemo/collections/asr/models/confidence_ensemble.py
@@ -86,9 +86,10 @@ def get_filtered_logprobs(hypothesis: Hypothesis, exclude_blank: bool) -> torch.
         filtered_logprobs = []
         for alignment in hypothesis.alignments:
             for align_elem in alignment:
-                if exclude_blank and align_elem[1].item() != align_elem[0].shape[-1] - 1:
+                if not exclude_blank:
+                    filtered_logprobs.append(align_elem[0])
+                elif align_elem[1].item() != align_elem[0].shape[-1] - 1:
                     filtered_logprobs.append(align_elem[0])
-                filtered_logprobs.append(align_elem[0])
         if not filtered_logprobs:  # for the edge-case of all blanks
             filtered_logprobs.append(align_elem[0])
         filtered_logprobs = torch.stack(filtered_logprobs)
@@ -101,6 +102,8 @@ def get_filtered_logprobs(hypothesis: Hypothesis, exclude_blank: bool) -> torch.
         if exclude_blank:  # filtering blanks
             labels = logprobs.argmax(dim=-1)
             filtered_logprobs = logprobs[labels != logprobs.shape[1] - 1]
+            if filtered_logprobs.shape[0] == 0:  # for the edge-case of all blanks
+                filtered_logprobs = logprobs[:1]
         else:
             filtered_logprobs = logprobs
     return filtered_logprobs
@@ -136,6 +139,7 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig)
     conf_func = get_confidence_measure_bank()[conf_type]
 
     conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=alpha)).cpu().item()
+
     return conf_value
 
 
From 7e6e04a6f5e84aa928a984fdfe9f896f1add3278 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 29 Jun 2023 15:06:20 -0600
Subject: [PATCH 077/123] Add ub communicator initialization to validation step
 (#6814)

* Add ub communicator initialization to validation step (#6807)

* fix code qol

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: ericharper <complex451@gmail.com>
---
 .../language_modeling/megatron_gpt_model.py   | 55 ++++++++++---------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 817ef0bd6442..44b484b28949 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -22,7 +22,6 @@
 import torch
 from omegaconf.dictconfig import DictConfig
 from pytorch_lightning.accelerators import CPUAccelerator
-from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
 from pytorch_lightning.trainer.trainer import Trainer
 
 from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import (
@@ -53,7 +52,6 @@
     SamplingParam,
     TextGeneration,
 )
-from nemo.collections.nlp.parts.nlp_overrides import GradScaler
 from nemo.collections.nlp.parts.utils_funcs import get_last_rank
 from nemo.core.classes import Exportable
 from nemo.core.classes.common import PretrainedModelInfo
@@ -512,37 +510,38 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only):
 
         return loss_mean
 
+    def initialize_ub_func(self):
+        input_shape = [
+            self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'),
+            self.cfg.get('hidden_size'),
+        ]
+        ub_cfg_file_name = self.cfg.get('ub_tp_comm_overlap_cfg', None)
+        ub_cfgs = None
+        if ub_cfg_file_name is not None:
+            try:
+                import yaml
+
+                with open(ub_cfg_file_name, 'r') as ub_cfg_file:
+                    ub_cfgs = yaml.safe_load(ub_cfg_file)
+            except (ImportError, TypeError):
+                logging.error(f"Fail to read ub_tp_comm_overlap config file: {ub_cfg_file_name}.")
+        te_module.initialize_ub(
+            shape=input_shape,
+            tp_size=self.cfg.get('tensor_model_parallel_size'),
+            use_fp8=self.cfg.get('fp8'),
+            ub_cfgs=ub_cfgs,
+        )
+        self.initialize_ub = False
+
     def training_step(self, dataloader_iter, batch_idx):
         """
             We pass the dataloader iterator function to the micro-batch scheduler.
             The input batch to each micro-batch is fetched using the dataloader function
             in the micro-batch fwd function.
         """
-        # Initialize userbuffer communicators. Initialization is done only once at the
-        # beginning of the first training step.
+        # Initialize userbuffer communicators.
         if self.initialize_ub:
-            input_shape = [
-                self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'),
-                self.cfg.get('hidden_size'),
-            ]
-            ub_cfg_file_name = self.cfg.get('ub_tp_comm_overlap_cfg', None)
-            ub_cfgs = None
-            if ub_cfg_file_name is not None:
-                try:
-                    import yaml
-
-                    with open(ub_cfg_file_name, 'r') as ub_cfg_file:
-                        ub_cfgs = yaml.safe_load(ub_cfg_file)
-                except (ImportError, TypeError):
-                    print("Fail to read ub_tp_comm_overlap config file.")
-
-            te_module.initialize_ub(
-                shape=input_shape,
-                tp_size=self.cfg.get('tensor_model_parallel_size'),
-                use_fp8=self.cfg.get('fp8'),
-                ub_cfgs=ub_cfgs,
-            )
-            self.initialize_ub = False
+            self.initialize_ub_func()
 
         if self.rampup_batch_size:
             num_microbatch_calculator = apex.transformer.pipeline_parallel.utils._GLOBAL_NUM_MICROBATCHES_CALCULATOR
@@ -873,6 +872,10 @@ def validation_step(self, dataloader_iter, batch_idx):
             from the dataloader to produce a list of microbatches.
             The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions.
         """
+        # Initialize userbuffer communicators.
+        if self.initialize_ub:
+            self.initialize_ub_func()
+
         if isinstance(self.model, list):
             for model_module in self.model:
                 model_module.eval()

From 89225e4105a939050a8ba973ea7abbc1aab543cb Mon Sep 17 00:00:00 2001
From: Somshubra Majumdar <titu1994@gmail.com>
Date: Thu, 29 Jun 2023 14:58:13 -0700
Subject: [PATCH 078/123] Add missing save restore connector to eval scripts
 (#6935)

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 examples/nlp/language_modeling/megatron_gpt_eval.py    |  8 +++++++-
 examples/nlp/language_modeling/megatron_t5_eval.py     | 10 +++++++++-
 .../language_modeling/tuning/megatron_gpt_ia3_eval.py  |  2 +-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py
index dc299fccdf90..2a6890e1a9b4 100644
--- a/examples/nlp/language_modeling/megatron_gpt_eval.py
+++ b/examples/nlp/language_modeling/megatron_gpt_eval.py
@@ -174,8 +174,14 @@ def main(cfg) -> None:
         or cfg.pipeline_model_parallel_size < 0
         or cfg.get('pipeline_model_parallel_split_rank', -1) < 0
     ):
+        save_restore_connector = NLPSaveRestoreConnector()
+        if os.path.isdir(cfg.gpt_model_file):
+            save_restore_connector.model_extracted_dir = cfg.gpt_model_file
         model_config = MegatronGPTModel.restore_from(
-            restore_path=cfg.gpt_model_file, trainer=trainer, return_config=True,
+            restore_path=cfg.gpt_model_file,
+            trainer=trainer,
+            return_config=True,
+            save_restore_connector=save_restore_connector,
         )
 
         with open_dict(cfg):
diff --git a/examples/nlp/language_modeling/megatron_t5_eval.py b/examples/nlp/language_modeling/megatron_t5_eval.py
index 0282f9fb2913..0b6ea54b6b99 100644
--- a/examples/nlp/language_modeling/megatron_t5_eval.py
+++ b/examples/nlp/language_modeling/megatron_t5_eval.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 
+import os
 from argparse import ArgumentParser
 
 import torch
@@ -61,8 +62,15 @@ def main():
         or args.pipeline_model_parallel_size < 0
         or args.pipeline_model_parallel_split_rank < 0
     ):
+        save_restore_connector = NLPSaveRestoreConnector()
+        if os.path.isdir(args.model_file):
+            save_restore_connector.model_extracted_dir = args.model_file
+
         model_config = MegatronT5Model.restore_from(
-            restore_path=args.model_file, trainer=Trainer(strategy=NLPDDPStrategy()), return_config=True,
+            restore_path=args.model_file,
+            trainer=Trainer(strategy=NLPDDPStrategy()),
+            return_config=True,
+            save_restore_connector=save_restore_connector,
         )
 
         args.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1)
diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py
index a676fee00a7e..a30818f29fb3 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+import os
 import torch
 import torch.multiprocessing as mp
 from megatron.core import parallel_state

From 0dee17b40f6e3af2333c4cd4ba5fa34fe91994fb Mon Sep 17 00:00:00 2001
From: Vahid Noroozi <VahidooX@users.noreply.github.com>
Date: Thu, 29 Jun 2023 15:04:39 -0700
Subject: [PATCH 079/123] added cache-aware checkpoints. (#6940)

Signed-off-by: vnoroozi <vnoroozi@nvidia.com>
---
 docs/source/asr/data/benchmark_en.csv         |  5 ++++-
 .../asr/data/scores/en/conformer_en.csv       |  6 ++++++
 .../asr/models/hybrid_rnnt_ctc_bpe_models.py  | 21 +++++++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/docs/source/asr/data/benchmark_en.csv b/docs/source/asr/data/benchmark_en.csv
index 5c764ba38651..684d9f9fa76d 100644
--- a/docs/source/asr/data/benchmark_en.csv
+++ b/docs/source/asr/data/benchmark_en.csv
@@ -31,4 +31,7 @@ stt_en_fastconformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog
 stt_en_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc"
 stt_en_fastconformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xlarge"
 stt_en_fastconformer_ctc_xlarge,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_xlarge"
-stt_en_fastconformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xxlarge"
\ No newline at end of file
+stt_en_fastconformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_xxlarge"
+stt_en_fastconformer_hybrid_large_streaming_80ms,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_80ms"
+stt_en_fastconformer_hybrid_large_streaming_480ms,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_480ms"
+stt_en_fastconformer_hybrid_large_streaming_1040ms,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_1040ms"
\ No newline at end of file
diff --git a/docs/source/asr/data/scores/en/conformer_en.csv b/docs/source/asr/data/scores/en/conformer_en.csv
index 905bdf2ebedc..d77f0a687ce8 100644
--- a/docs/source/asr/data/scores/en/conformer_en.csv
+++ b/docs/source/asr/data/scores/en/conformer_en.csv
@@ -12,3 +12,9 @@ stt_en_conformer_transducer_large,en,,,1.6,3.5,1.7,3.7,,,,,,,,,,,,
 stt_en_conformer_transducer_large_ls,en,,,2.1,5.0,2.3,5.1,,,,,,,,,,,,
 stt_en_conformer_transducer_xlarge,en,,,1.48 %,2.95 %,1.62 %,3.01 %,,6.46 %,4.59 %,5.32 %,5.70 %,6.47 %,21.32 %,,,,2.05 %,1.17 %
 stt_en_conformer_transducer_xxlarge,en,,,1.52 %,3.09 %,1.72 %,3.14 %,,,5.29 %,5.85 %,6.64 %,,,,,,2.42 %,1.49 %
+stt_en_fastconformer_hybrid_large_streaming_80ms (CTC),en,,,,,3.5 %,8.1 %,,,10.2 %,7.2 %,,,,,,,3.5 %,2.3 %
+stt_en_fastconformer_hybrid_large_streaming_480ms (CTC),en,,,,,3.6 %,7.5 %,,,9.8 %,7.0 %,,,,,,,3.5 %,2.1 %
+stt_en_fastconformer_hybrid_large_streaming_1040ms (CTC),en,,,,,2.7 %,6.4 %,,,9.0 %,7.0 %,,,,,,,3.2 %,1.9 %
+stt_en_fastconformer_hybrid_large_streaming_80ms (RNNT),en,,,,,2.7 %,6.5 %,,,9.1 %,6.9 %,,,,,,,3.2 %,1.9 %
+stt_en_fastconformer_hybrid_large_streaming_480ms (RNNT),en,,,,,2.7 %,6.1 %,,,8.5 %,6.7 %,,,,,,,3.1 %,1.8 %
+stt_en_fastconformer_hybrid_large_streaming_1040ms (RNNT),en,,,,,2.3 %,5.5 %,,,8.0 %,6.6 %,,,,,,,2.9 %,1.6 %
diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
index 6637486f18dc..6604983b6461 100644
--- a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
+++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
@@ -523,4 +523,25 @@ def list_available_models(cls) -> List[PretrainedModelInfo]:
         )
         results.append(model)
 
+        model = PretrainedModelInfo(
+            pretrained_model_name="stt_en_fastconformer_hybrid_large_streaming_80ms",
+            description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_80ms",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_streaming_80ms/versions/1.20.0/files/stt_en_fastconformer_hybrid_large_streaming_80ms.nemo",
+        )
+        results.append(model)
+
+        model = PretrainedModelInfo(
+            pretrained_model_name="stt_en_fastconformer_hybrid_large_streaming_480ms",
+            description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_480ms",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_streaming_480ms/versions/1.20.0/files/stt_en_fastconformer_hybrid_large_streaming_480ms.nemo",
+        )
+        results.append(model)
+
+        model = PretrainedModelInfo(
+            pretrained_model_name="stt_en_fastconformer_hybrid_large_streaming_1040ms",
+            description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_streaming_1040ms",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_streaming_1040ms/versions/1.20.0/files/stt_en_fastconformer_hybrid_large_streaming_1040ms.nemo",
+        )
+        results.append(model)
+
         return results

From 85e3e151f30f6c76409d3d2856b3fa795f8da7ff Mon Sep 17 00:00:00 2001
From: Eric Harper <complex451@gmail.com>
Date: Thu, 29 Jun 2023 16:07:40 -0600
Subject: [PATCH 080/123] Merge release r1.19.0 into main (#6948)

* Cut branch r1.19.0

Signed-off-by: smajumdar <titu1994@gmail.com>

* Tutorial fixes (#6717)

Signed-off-by: smajumdar <titu1994@gmail.com>

* fix notebook error (#6840)

Signed-off-by: Yi Dong <yidong@nvidia.com>

* fix (#6842)

Signed-off-by: Yi Dong <yidong@nvidia.com>

* update branch

Signed-off-by: ericharper <complex451@gmail.com>

* update package info

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: Yi Dong <yidong@nvidia.com>
Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
---
 nemo/package_info.py                          |  2 +-
 ...on_Synthetic_Tabular_Data_Generation.ipynb | 21 +++++++++++++++++++
 .../tools/CTC_Segmentation_Tutorial.ipynb     |  2 +-
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/nemo/package_info.py b/nemo/package_info.py
index 709159dd575a..17efc924de32 100644
--- a/nemo/package_info.py
+++ b/nemo/package_info.py
@@ -14,7 +14,7 @@
 
 
 MAJOR = 1
-MINOR = 19
+MINOR = 20
 PATCH = 0
 PRE_RELEASE = 'rc0'
 
diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
index b7ae11ef3f5d..bfd3c7094198 100644
--- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
+++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb
@@ -1,6 +1,7 @@
 {
     "cells": [
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "c3217a15",
             "metadata": {},
@@ -15,6 +16,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "8c72dc42",
             "metadata": {},
@@ -25,6 +27,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "79154a9e",
             "metadata": {},
@@ -73,6 +76,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "7e0bbc89",
             "metadata": {},
@@ -92,6 +96,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "1ff1d46f",
             "metadata": {},
@@ -141,6 +146,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "aa356012",
             "metadata": {},
@@ -239,6 +245,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "02bff63f",
             "metadata": {},
@@ -267,6 +274,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "89e1e5b3",
             "metadata": {},
@@ -339,6 +347,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "05ebadc3",
             "metadata": {},
@@ -347,6 +356,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "2fe38a29",
             "metadata": {},
@@ -381,6 +391,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "678f65ef",
             "metadata": {},
@@ -411,6 +422,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "8af66b4a",
             "metadata": {},
@@ -464,6 +476,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "6ecec681",
             "metadata": {},
@@ -472,6 +485,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "58a3d4fa",
             "metadata": {},
@@ -543,6 +557,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "45ac928f",
             "metadata": {},
@@ -557,6 +572,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "158a4bbe",
             "metadata": {},
@@ -597,6 +613,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "fa16378e",
             "metadata": {},
@@ -605,6 +622,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "ed056ec6",
             "metadata": {},
@@ -630,6 +648,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "a62b48dc",
             "metadata": {},
@@ -685,6 +704,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "cccd54d9",
             "metadata": {},
@@ -790,6 +810,7 @@
             ]
         },
         {
+            "attachments": {},
             "cell_type": "markdown",
             "id": "0f2f6e3a",
             "metadata": {},
diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
index 98f0cce4e9ec..15a82a36a1b2 100644
--- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
+++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb
@@ -280,7 +280,7 @@
         "* `max_length` argument - max number of words in a segment for alignment (used only if there are no punctuation marks present in the original text. Long non-speech segments are better for segments split and are more likely to co-occur with punctuation marks. Random text split could deteriorate the quality of the alignment.\n",
         "* out-of-vocabulary words will be removed based on pre-trained ASR model vocabulary, and the text will be changed to lowercase \n",
         "* sentences for alignment with the original punctuation and capitalization will be stored under  `$OUTPUT_DIR/processed/*_with_punct.txt`\n",
-        "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n",
+        "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n",
         "\n",
         "### Audio preprocessing:\n",
         "* non '.wav' audio files will be converted to `.wav` format\n",

From e5706a0e70be2cb6abfd5698ab5f7be40860127d Mon Sep 17 00:00:00 2001
From: Eric Harper <complex451@gmail.com>
Date: Fri, 30 Jun 2023 11:35:57 -0600
Subject: [PATCH 081/123] upgrade base container (#6938)

Signed-off-by: ericharper <complex451@gmail.com>
---
 Dockerfile  | 4 ++--
 Jenkinsfile | 2 +-
 README.rst  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 7722555357b2..3aa4c39d6a4d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.04-py3
+ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.06-py3
 
 # build an image that includes only the nemo dependencies, ensures that dependencies
 # are included first for optimal caching, and useful for building a development
@@ -94,7 +94,7 @@ COPY . .
 
 # start building the final container
 FROM nemo-deps as nemo
-ARG NEMO_VERSION=1.19.0
+ARG NEMO_VERSION=1.20.0
 
 # Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container
 # version information as runtime environment variable for introspection purposes
diff --git a/Jenkinsfile b/Jenkinsfile
index 1a79d87bcd38..be62291daf24 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,7 +1,7 @@
 pipeline {
   agent {
         docker {
-          image 'nvcr.io/nvidia/pytorch:23.04-py3'
+          image 'nvcr.io/nvidia/pytorch:23.06-py3'
           args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache:/root/.cache --shm-size=8g --env TRANSFORMERS_OFFLINE=1'
         }
   }
diff --git a/README.rst b/README.rst
index 8a788da71550..7ac95b8cef70 100644
--- a/README.rst
+++ b/README.rst
@@ -319,13 +319,13 @@ To build a nemo container with Dockerfile from a branch, please run
     DOCKER_BUILDKIT=1 docker build -f Dockerfile -t nemo:latest .
 
 
-If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 23.04-py3 and then installing from GitHub.
+If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 23.06-py3 and then installing from GitHub.
 
 .. code-block:: bash
 
     docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g \
     -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \
-    stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:23.04-py3
+    stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:23.06-py3
 
 Examples
 --------

From b0e5bf3627dbcfb3f4a72d73d3c5e92184d8b1f6 Mon Sep 17 00:00:00 2001
From: Somshubra Majumdar <titu1994@gmail.com>
Date: Fri, 30 Jun 2023 17:32:52 -0700
Subject: [PATCH 082/123] Fix requirements for pydantic + inflect (#6956)

* Fix requirements for pydantic + inflect

Signed-off-by: smajumdar <titu1994@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 requirements/requirements_asr.txt    | 1 -
 requirements/requirements_common.txt | 2 ++
 requirements/requirements_nlp.txt    | 1 -
 requirements/requirements_tts.txt    | 1 -
 4 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/requirements/requirements_asr.txt b/requirements/requirements_asr.txt
index fdeaeb2d450d..011862ad723b 100644
--- a/requirements/requirements_asr.txt
+++ b/requirements/requirements_asr.txt
@@ -1,7 +1,6 @@
 braceexpand
 editdistance
 g2p_en
-inflect
 ipywidgets
 jiwer
 kaldi-python-io
diff --git a/requirements/requirements_common.txt b/requirements/requirements_common.txt
index 29d8ac4dd49b..a4d343a32d1a 100644
--- a/requirements/requirements_common.txt
+++ b/requirements/requirements_common.txt
@@ -1,4 +1,6 @@
+inflect
 pandas
+pydantic<2  # remove after inflect supports Pydantic 2.0+
 sacremoses>=0.0.43
 sentencepiece<1.0.0
 youtokentome>=1.0.5
diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt
index 2018de6fbc31..68d8b8985748 100644
--- a/requirements/requirements_nlp.txt
+++ b/requirements/requirements_nlp.txt
@@ -7,7 +7,6 @@ ftfy
 gdown
 h5py
 ijson
-inflect
 jieba
 markdown2
 matplotlib>=3.3.2
diff --git a/requirements/requirements_tts.txt b/requirements/requirements_tts.txt
index 20484871ee4b..bb330aaf2e58 100644
--- a/requirements/requirements_tts.txt
+++ b/requirements/requirements_tts.txt
@@ -1,6 +1,5 @@
 attrdict
 einops
-inflect
 jieba
 kornia
 librosa

From 0b6e4e61bd23cbf9704dac431756d491adab084d Mon Sep 17 00:00:00 2001
From: Tim Moon <4406448+timmoon10@users.noreply.github.com>
Date: Mon, 3 Jul 2023 08:01:35 -0700
Subject: [PATCH 083/123] Update distopt API for coalesced NCCL calls (#6886)

* Update distopt API for coalesced NCCL calls

Signed-off-by: Tim Moon <tmoon@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update comment

Signed-off-by: Tim Moon <tmoon@nvidia.com>

---------

Signed-off-by: Tim Moon <tmoon@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 Dockerfile                          |  6 +++---
 nemo/core/optim/distributed_adam.py | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3aa4c39d6a4d..2e6b617087bc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -45,11 +45,11 @@ RUN apt-get update && \
 WORKDIR /workspace/
 
 WORKDIR /tmp/
-# TODO: Remove once this Apex commit (2/24/23) is included in PyTorch
+# TODO: Remove once this Apex commit (5/12/23) is included in PyTorch
 # container
 RUN git clone https://github.com/NVIDIA/apex.git && \
   cd apex && \
-  git checkout 57057e2fcf1c084c0fcc818f55c0ff6ea1b24ae2 && \
+  git checkout 8b7a1ff183741dd8f9b87e7bafd04cfde99cea28 && \
   pip3 install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./
 
 # uninstall stuff from base container
@@ -75,7 +75,7 @@ RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-chec
 # install flash attention dependencies
 RUN pip install flash-attn
 # pinned triton version for flash-attention https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attn_triton.py#L3
-RUN pip install triton==2.0.0.dev20221202 
+RUN pip install triton==2.0.0.dev20221202
 
 # install k2, skip if installation fails
 COPY scripts /tmp/nemo/scripts/
diff --git a/nemo/core/optim/distributed_adam.py b/nemo/core/optim/distributed_adam.py
index 1f2ce90f3ff7..8c3b0a30658f 100644
--- a/nemo/core/optim/distributed_adam.py
+++ b/nemo/core/optim/distributed_adam.py
@@ -19,6 +19,7 @@
 from apex.contrib.optimizers.distributed_fused_adam import (
     DistributedFusedAdam,
     _coalescing_manager,
+    _coalescing_manager_append_work,
     _disable_pre_forward_hook,
 )
 from megatron.core import parallel_state
@@ -173,16 +174,15 @@ def _fp32_optim_grad_sync(self):
         for model_param, main_param in self._fp32_optim_main_params.items():
             if model_param.grad is not None:
                 main_param.grad += model_param.grad.detach()
-        sync_requests = []
-        with _coalescing_manager(self.process_group, self.device, sync_requests):
+        with _coalescing_manager(self.process_group, self.device, async_ops=True) as cm:
             for main_param in self._fp32_optim_main_params.values():
-                sync_requests.append(
+                _coalescing_manager_append_work(
+                    cm,
                     torch.distributed.all_reduce(
                         main_param.grad, op=torch.distributed.ReduceOp.AVG, group=self.process_group, async_op=True,
-                    )
+                    ),
                 )
-        for req in sync_requests:
-            req.wait()
+        cm.wait()
         self._fp32_optim_grad_sync_needed = False
 
     def zero_grad(self, *args, **kwargs):

From 17447184bdf026b2f88d81353998856170bc09bc Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 5 Jul 2023 14:13:13 -0700
Subject: [PATCH 084/123] Remove `compute_on_step` from metrics (#6979) (#6981)

* Remove `compute_on_step` from metrics


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Remove confusing log message


* Update tests


---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 nemo/collections/asr/metrics/rnnt_wer.py                 | 2 +-
 nemo/collections/asr/metrics/rnnt_wer_bpe.py             | 2 +-
 nemo/collections/asr/metrics/wer.py                      | 2 +-
 nemo/collections/asr/metrics/wer_bpe.py                  | 2 +-
 .../common/metrics/global_average_loss_metric.py         | 9 ++-------
 nemo/collections/common/metrics/perplexity.py            | 8 ++------
 nemo/collections/nlp/metrics/sequence_perplexity.py      | 9 ++-------
 .../nlp/models/language_modeling/bert_lm_model.py        | 2 +-
 .../nlp/models/text2sparql/text2sparql_model.py          | 2 +-
 nemo/core/optim/optimizers.py                            | 1 -
 tests/collections/common/pl_utils.py                     | 8 +++-----
 11 files changed, 15 insertions(+), 32 deletions(-)

diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py
index 55f9f4b5ea9f..7e5636191a1d 100644
--- a/nemo/collections/asr/metrics/rnnt_wer.py
+++ b/nemo/collections/asr/metrics/rnnt_wer.py
@@ -1224,7 +1224,7 @@ def validation_epoch_end(self, outputs):
     def __init__(
         self, decoding: RNNTDecoding, batch_dim_index=0, use_cer=False, log_prediction=True, dist_sync_on_step=False
     ):
-        super(RNNTWER, self).__init__(dist_sync_on_step=dist_sync_on_step, compute_on_step=False)
+        super(RNNTWER, self).__init__(dist_sync_on_step=dist_sync_on_step)
         self.decoding = decoding
         self.batch_dim_index = batch_dim_index
         self.use_cer = use_cer
diff --git a/nemo/collections/asr/metrics/rnnt_wer_bpe.py b/nemo/collections/asr/metrics/rnnt_wer_bpe.py
index 0870eb180776..d2e2c3cc5923 100644
--- a/nemo/collections/asr/metrics/rnnt_wer_bpe.py
+++ b/nemo/collections/asr/metrics/rnnt_wer_bpe.py
@@ -359,7 +359,7 @@ def __init__(
         log_prediction: bool = True,
         dist_sync_on_step=False,
     ):
-        super(RNNTBPEWER, self).__init__(dist_sync_on_step=dist_sync_on_step, compute_on_step=False)
+        super(RNNTBPEWER, self).__init__(dist_sync_on_step=dist_sync_on_step)
         self.decoding = decoding
         self.batch_dim_index = batch_dim_index
         self.use_cer = use_cer
diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py
index 7f7f853d307d..4d90810cc3df 100644
--- a/nemo/collections/asr/metrics/wer.py
+++ b/nemo/collections/asr/metrics/wer.py
@@ -1125,7 +1125,7 @@ def __init__(
         fold_consecutive=True,
         dist_sync_on_step=False,
     ):
-        super().__init__(dist_sync_on_step=dist_sync_on_step, compute_on_step=False)
+        super().__init__(dist_sync_on_step=dist_sync_on_step)
 
         self.decoding = decoding
         self.use_cer = use_cer
diff --git a/nemo/collections/asr/metrics/wer_bpe.py b/nemo/collections/asr/metrics/wer_bpe.py
index 762acf172a16..8a92e4745a1b 100644
--- a/nemo/collections/asr/metrics/wer_bpe.py
+++ b/nemo/collections/asr/metrics/wer_bpe.py
@@ -247,7 +247,7 @@ def __init__(
         fold_consecutive=True,
         dist_sync_on_step=False,
     ):
-        super().__init__(dist_sync_on_step=dist_sync_on_step, compute_on_step=False)
+        super().__init__(dist_sync_on_step=dist_sync_on_step)
         self.decoding = decoding
         self.tokenizer = self.decoding.tokenizer
         self.blank_id = self.decoding.tokenizer.tokenizer.vocab_size
diff --git a/nemo/collections/common/metrics/global_average_loss_metric.py b/nemo/collections/common/metrics/global_average_loss_metric.py
index fae1dbfea5e8..3bbd4d13abf4 100644
--- a/nemo/collections/common/metrics/global_average_loss_metric.py
+++ b/nemo/collections/common/metrics/global_average_loss_metric.py
@@ -28,9 +28,6 @@ class GlobalAverageLossMetric(Metric):
     See :doc:`PyTorch Lightning Metrics<pytorch-lightning:metrics>` for the metric usage instruction.
 
     Args:
-        compute_on_step:
-            The method :meth:`forward` only calls ``update()`` and returns ``None`` if this is set to ``False``.
-            default: ``True``
         dist_sync_on_step:
             Synchronize metric state across processes at each method :meth:`forward` call before returning the
             value at the step
@@ -44,10 +41,8 @@ class GlobalAverageLossMetric(Metric):
 
     full_state_update = True
 
-    def __init__(self, compute_on_step=True, dist_sync_on_step=False, process_group=None, take_avg_loss=True):
-        super().__init__(
-            compute_on_step=compute_on_step, dist_sync_on_step=dist_sync_on_step, process_group=process_group
-        )
+    def __init__(self, dist_sync_on_step=False, process_group=None, take_avg_loss=True):
+        super().__init__(dist_sync_on_step=dist_sync_on_step, process_group=process_group)
         self.add_state("loss_sum", torch.tensor(0.0, dtype=torch.float64), dist_reduce_fx='sum')
         self.add_state("num_measurements", torch.tensor(0, dtype=torch.int64), dist_reduce_fx='sum')
         self.take_avg_loss = take_avg_loss
diff --git a/nemo/collections/common/metrics/perplexity.py b/nemo/collections/common/metrics/perplexity.py
index 1158e3408611..9e1c21737ec8 100644
--- a/nemo/collections/common/metrics/perplexity.py
+++ b/nemo/collections/common/metrics/perplexity.py
@@ -29,8 +29,6 @@ class Perplexity(Metric):
     See `PyTorch Lightning Metrics <https://pytorch-lightning.readthedocs.io/en/stable/ecosystem/metrics.html>`_ for the metric usage instructions.
 
     Args:
-        compute_on_step:
-            Forward only calls ``update()`` and returns ``None`` if this is set to ``False``. default: ``True``
         dist_sync_on_step:
             Synchronize metric state across processes at each ``forward()``
             before returning the value at the step.
@@ -44,10 +42,8 @@ class Perplexity(Metric):
 
     full_state_update = True
 
-    def __init__(self, compute_on_step=True, dist_sync_on_step=False, process_group=None, validate_args=True):
-        super().__init__(
-            compute_on_step=compute_on_step, dist_sync_on_step=dist_sync_on_step, process_group=process_group
-        )
+    def __init__(self, dist_sync_on_step=False, process_group=None, validate_args=True):
+        super().__init__(dist_sync_on_step=dist_sync_on_step, process_group=process_group)
         self.validate_args = validate_args
         self.add_state('perplexities_sum', torch.tensor(0.0, dtype=torch.float64), dist_reduce_fx='sum')
         # Total number of distributions seen since last reset
diff --git a/nemo/collections/nlp/metrics/sequence_perplexity.py b/nemo/collections/nlp/metrics/sequence_perplexity.py
index 688f9db87ea6..339f062f7cc1 100644
--- a/nemo/collections/nlp/metrics/sequence_perplexity.py
+++ b/nemo/collections/nlp/metrics/sequence_perplexity.py
@@ -31,8 +31,6 @@ class SequencePerplexity(Metric):
     See :doc:`PyTorch Lightning Metrics<pytorch-lightning:metrics>` for the metric usage instructions.
 
     Args:
-        compute_on_step:
-            Forward only calls ``update()`` and returns ``None`` if this is set to ``False``. default: ``True``
         dist_sync_on_step:
             Synchronize metric state across processes at each ``forward()`` before returning the value at the step.
         process_group:
@@ -43,12 +41,9 @@ class SequencePerplexity(Metric):
                 to perform the allgather.
     """
 
-    def __init__(self, compute_on_step=True, dist_sync_on_step=False, process_group=None, dist_sync_fn=None):
+    def __init__(self, dist_sync_on_step=False, process_group=None, dist_sync_fn=None):
         super().__init__(
-            compute_on_step=compute_on_step,
-            dist_sync_on_step=dist_sync_on_step,
-            process_group=process_group,
-            dist_sync_fn=dist_sync_fn,
+            dist_sync_on_step=dist_sync_on_step, process_group=process_group, dist_sync_fn=dist_sync_fn,
         )
 
         # Total sum of exponentiated average negative log likelihoods
diff --git a/nemo/collections/nlp/models/language_modeling/bert_lm_model.py b/nemo/collections/nlp/models/language_modeling/bert_lm_model.py
index 4c9d43c20d54..5cf509e77846 100644
--- a/nemo/collections/nlp/models/language_modeling/bert_lm_model.py
+++ b/nemo/collections/nlp/models/language_modeling/bert_lm_model.py
@@ -116,7 +116,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         # create extra bias
 
         # setup to track metrics
-        self.validation_perplexity = Perplexity(compute_on_step=False)
+        self.validation_perplexity = Perplexity()
 
         self.setup_optimization(cfg.optim)
 
diff --git a/nemo/collections/nlp/models/text2sparql/text2sparql_model.py b/nemo/collections/nlp/models/text2sparql/text2sparql_model.py
index 5290209b0c95..50046aef0344 100644
--- a/nemo/collections/nlp/models/text2sparql/text2sparql_model.py
+++ b/nemo/collections/nlp/models/text2sparql/text2sparql_model.py
@@ -100,7 +100,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
                 decoder=cfg.language_model.pretrained_decoder_model_name,
             )
 
-        self.validation_perplexity = Perplexity(compute_on_step=False)
+        self.validation_perplexity = Perplexity()
 
         self.setup_optimization(cfg.optim)
 
diff --git a/nemo/core/optim/optimizers.py b/nemo/core/optim/optimizers.py
index 76e47e20e0cc..9473ef0af969 100644
--- a/nemo/core/optim/optimizers.py
+++ b/nemo/core/optim/optimizers.py
@@ -51,7 +51,6 @@
     AVAILABLE_OPTIMIZERS['fused_adam'] = FusedAdam
 except ModuleNotFoundError:
     HAVE_APEX = False
-    logging.warning("Apex was not found. Using the lamb or fused_adam optimizer will error out.")
 
 HAVE_APEX_DISTRIBUTED_ADAM = False
 if HAVE_APEX:
diff --git a/tests/collections/common/pl_utils.py b/tests/collections/common/pl_utils.py
index 395c8cef5969..a2e9609c8492 100644
--- a/tests/collections/common/pl_utils.py
+++ b/tests/collections/common/pl_utils.py
@@ -90,7 +90,7 @@ def _class_test(
                 calculated across devices for each batch (and not just at the end)
     """
     # Instanciate lightning metric
-    metric = metric_class(compute_on_step=True, dist_sync_on_step=dist_sync_on_step, **metric_args)
+    metric = metric_class(dist_sync_on_step=dist_sync_on_step, **metric_args)
 
     # verify metrics work after being loaded from pickled state
     pickled_metric = pickle.dumps(metric)
@@ -303,7 +303,7 @@ def _perplexity_class_test(
                 calculated across devices for each batch (and not just at the end)
     """
     # Instanciate lightning metric
-    perplexity = Perplexity(compute_on_step=True, dist_sync_on_step=dist_sync_on_step, **metric_args)
+    perplexity = Perplexity(dist_sync_on_step=dist_sync_on_step, **metric_args)
     if (probs is None) == (logits is None):
         with pytest.raises(ValueError):
             perplexity(probs, logits)
@@ -464,9 +464,7 @@ def _loss_class_test(
                 calculated across devices for each batch (and not just at the end)
     """
     # Instantiate lightning metric
-    loss_metric = GlobalAverageLossMetric(
-        compute_on_step=True, dist_sync_on_step=dist_sync_on_step, take_avg_loss=take_avg_loss
-    )
+    loss_metric = GlobalAverageLossMetric(dist_sync_on_step=dist_sync_on_step, take_avg_loss=take_avg_loss)
 
     # verify loss works after being loaded from pickled state
     pickled_metric = pickle.dumps(loss_metric)

From 153c30780a2bc72f115b4c697cc303ddc1d451ea Mon Sep 17 00:00:00 2001
From: Sergii Dymchenko <kit1980@gmail.com>
Date: Thu, 6 Jul 2023 19:40:29 -0700
Subject: [PATCH 085/123] Fix require_grad typos (#6930)

Signed-off-by: Sergii Dymchenko <sdym@fb.com>
---
 .../modules/transformer/transformer_generators.py    | 12 ++++++------
 .../common/transformer/transformer_generators.py     | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/nemo/collections/asr/modules/transformer/transformer_generators.py b/nemo/collections/asr/modules/transformer/transformer_generators.py
index 504fdf076d3d..6e17151dcd1b 100644
--- a/nemo/collections/asr/modules/transformer/transformer_generators.py
+++ b/nemo/collections/asr/modules/transformer/transformer_generators.py
@@ -188,7 +188,7 @@ def freeze(self) -> None:
             param.requires_grad = False
         self.decoder.eval()
         for param in self.log_softmax.parameters():
-            param.require_grad = False
+            param.requires_grad = False
         self.log_softmax.eval()
 
     def unfreeze(self) -> None:
@@ -201,7 +201,7 @@ def unfreeze(self) -> None:
             param.requires_grad = True
         self.decoder.train()
         for param in self.log_softmax.parameters():
-            param.require_grad = True
+            param.requires_grad = True
         self.log_softmax.train()
 
     @contextmanager
@@ -701,10 +701,10 @@ def freeze(self) -> None:
                 param.requires_grad = False
             self.decoders[model_num].eval()
             for param in self.log_softmaxes[model_num].parameters():
-                param.require_grad = False
+                param.requires_grad = False
             self.log_softmaxes[model_num].eval()
             for param in self.encoders[model_num].parameters():
-                param.require_grad = False
+                param.requires_grad = False
             self.encoders[model_num].eval()
 
     def unfreeze(self) -> None:
@@ -718,10 +718,10 @@ def unfreeze(self) -> None:
                 param.requires_grad = True
             self.decoders[model_num].train()
             for param in self.log_softmaxes[model_num].parameters():
-                param.require_grad = True
+                param.requires_grad = True
             self.log_softmaxes[model_num].train()
             for param in self.encoders[model_num].parameters():
-                param.require_grad = True
+                param.requires_grad = True
             self.encoders[model_num].train()
 
     @contextmanager
diff --git a/nemo/collections/nlp/modules/common/transformer/transformer_generators.py b/nemo/collections/nlp/modules/common/transformer/transformer_generators.py
index 504fdf076d3d..6e17151dcd1b 100644
--- a/nemo/collections/nlp/modules/common/transformer/transformer_generators.py
+++ b/nemo/collections/nlp/modules/common/transformer/transformer_generators.py
@@ -188,7 +188,7 @@ def freeze(self) -> None:
             param.requires_grad = False
         self.decoder.eval()
         for param in self.log_softmax.parameters():
-            param.require_grad = False
+            param.requires_grad = False
         self.log_softmax.eval()
 
     def unfreeze(self) -> None:
@@ -201,7 +201,7 @@ def unfreeze(self) -> None:
             param.requires_grad = True
         self.decoder.train()
         for param in self.log_softmax.parameters():
-            param.require_grad = True
+            param.requires_grad = True
         self.log_softmax.train()
 
     @contextmanager
@@ -701,10 +701,10 @@ def freeze(self) -> None:
                 param.requires_grad = False
             self.decoders[model_num].eval()
             for param in self.log_softmaxes[model_num].parameters():
-                param.require_grad = False
+                param.requires_grad = False
             self.log_softmaxes[model_num].eval()
             for param in self.encoders[model_num].parameters():
-                param.require_grad = False
+                param.requires_grad = False
             self.encoders[model_num].eval()
 
     def unfreeze(self) -> None:
@@ -718,10 +718,10 @@ def unfreeze(self) -> None:
                 param.requires_grad = True
             self.decoders[model_num].train()
             for param in self.log_softmaxes[model_num].parameters():
-                param.require_grad = True
+                param.requires_grad = True
             self.log_softmaxes[model_num].train()
             for param in self.encoders[model_num].parameters():
-                param.require_grad = True
+                param.requires_grad = True
             self.encoders[model_num].train()
 
     @contextmanager

From 19449a9a00c346d5900dbf3df3864e378c0db23e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 7 Jul 2023 14:18:45 -0400
Subject: [PATCH 086/123] fix the mpt chatbot (#6957) (#6968)

Signed-off-by: Yi Dong <yidong@nvidia.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
---
 .../nlp/modules/common/megatron_web_server.py    |  3 +++
 .../modules/common/text_generation_strategy.py   | 16 ++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/nemo/collections/nlp/modules/common/megatron_web_server.py b/nemo/collections/nlp/modules/common/megatron_web_server.py
index 648bca024ba0..7c04ef201927 100644
--- a/nemo/collections/nlp/modules/common/megatron_web_server.py
+++ b/nemo/collections/nlp/modules/common/megatron_web_server.py
@@ -90,6 +90,9 @@ def get_generation(prompt, greedy, add_BOS, token_to_gen, min_tokens, temp, top_
         response = text_generation(data, port=port)
         sentences = response['sentences']
         bot_message = sentences[0]
+        if bot_message.find('<extra_id_0') < 0:
+            # hack due to the problem that huggingface's tokenizer strips out the <extra_id_x> token
+            prompt = prompt.replace('<extra_id_0>', '').replace('<extra_id_1>', '').replace('<extra_id_2>', '')
         bot_message = bot_message[len(prompt) :]
         return bot_message
 
diff --git a/nemo/collections/nlp/modules/common/text_generation_strategy.py b/nemo/collections/nlp/modules/common/text_generation_strategy.py
index 8608c0c9a680..573bdc80735e 100644
--- a/nemo/collections/nlp/modules/common/text_generation_strategy.py
+++ b/nemo/collections/nlp/modules/common/text_generation_strategy.py
@@ -153,15 +153,19 @@ def end_of_generation_condition(
         else:
             tokenizer = self.model.tokenizer
             conditions = []
+            end_tokens = set()
+            end_tokens.add(eod_id)
+            for end_string in end_strings:
+                ids_1 = tokenizer.text_to_ids(f'<extra_id_1>{end_string}')
+                ids_2 = tokenizer.text_to_ids('<extra_id_1>')
+                if len(ids_1) <= len(ids_2):
+                    continue
+                token_id = ids_1[len(ids_2) :][0]
+                end_tokens.add(token_id)
             for p, token_item in zip(prev, tokens):
                 text = tokenizer.ids_to_text(token_item.tolist())
                 conditions.append(
-                    any(
-                        [
-                            p.item() == eod_id if end_string == END_OF_SEQ else text.endswith(end_string)
-                            for end_string in end_strings
-                        ]
-                    )
+                    any([text.endswith(end_string) for end_string in end_strings] + [p.item() in end_tokens])
                 )
             return torch.tensor(conditions, dtype=torch.bool, device=tokens.device)
 

From ff430e479d52f8e2d3a04d682f53be0372f2ac15 Mon Sep 17 00:00:00 2001
From: Zhilin Wang <wangzhilin12061996@hotmail.com>
Date: Fri, 7 Jul 2023 12:41:18 -0700
Subject: [PATCH 087/123] add support for max_total_length=4096 for 43b (#6763)

* add support for max_total_length=4096 for 43b

Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../customization_dataset_preparation.py           | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/customization_dataset_preparation/customization_dataset_preparation.py b/tools/customization_dataset_preparation/customization_dataset_preparation.py
index 071c06e20803..53582f5489f1 100644
--- a/tools/customization_dataset_preparation/customization_dataset_preparation.py
+++ b/tools/customization_dataset_preparation/customization_dataset_preparation.py
@@ -41,6 +41,7 @@
 1.   `--drop_duplicates` : Use this flag to drop rows that are exactly the same for both prompt and completion
 2.   `--split_train_validation` : Use this flag to split one file into separate train and validation files.
 3.   `--val_proportion 0.1`: Use a float (default 0.1) between 0 and 1 to control how much of the dataset to allocate to the validation set and the remaining for the train dataset.
+4.   `--short_context_model`: Use this flag to prepare data for use with models that have shorter context length of 2048 tokens (e.g. 5B and 20B models)
 
 What to expect
 
@@ -396,6 +397,12 @@ def print_all_messages(messages):
     parser.add_argument("--completion_template", "-ct", default="{completion}")
     parser.add_argument("--drop_duplicates", "-dd", action="store_true")
     parser.add_argument("--split_train_validation", "-stv", action="store_true")
+    parser.add_argument(
+        "--short_context_model",
+        "-scm",
+        action="store_true",
+        help="Specifies if using models with shorter context length of 2048 tokens e.g. 5B and 20B models",
+    )
     parser.add_argument(
         "--val_proportion",
         "-vp",
@@ -409,8 +416,13 @@ def print_all_messages(messages):
     messages = []
     messages.append(str(args))
 
+    if args.short_context_model:
+        MAX_TOKEN_LENGTH = 2048
+    else:
+        MAX_TOKEN_LENGTH = 4096
+
     # every token is around 4 chars
-    MAX_TOTAL_CHAR_LENGTH = 4 * 2048
+    MAX_TOTAL_CHAR_LENGTH = 4 * MAX_TOKEN_LENGTH
 
     df, message = load_file_into_df(args.filename)
     messages.append(message)

From b22a21ffac1fcf1a5d3e2f0c70b6b6263c5a152a Mon Sep 17 00:00:00 2001
From: Vadim Kantorov <vadimkantorov@gmail.com>
Date: Fri, 7 Jul 2023 22:11:16 +0200
Subject: [PATCH 088/123] rnnt_greedy_decoding.py: typos? auto-repressively ->
 auto-regressively (#6989)

Signed-off-by: Vadim Kantorov <vadimkantorov@gmail.com>
---
 .../parts/submodules/rnnt_greedy_decoding.py  | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
index 42b14fd7b8bf..ac10e54bb249 100644
--- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
+++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
@@ -241,7 +241,7 @@ def _joint_step(self, enc, pred, log_normalize: Optional[bool] = None):
 class GreedyRNNTInfer(_GreedyRNNTInfer):
     """A greedy transducer decoder.
 
-    Sequence level greedy decoding, performed auto-repressively.
+    Sequence level greedy decoding, performed auto-regressively.
 
     Args:
         decoder_model: rnnt_utils.AbstractRNNTDecoder implementation.
@@ -326,7 +326,7 @@ def forward(
         partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None,
     ):
         """Returns a list of hypotheses given an input batch of the encoder hidden embedding.
-        Output token is generated auto-repressively.
+        Output token is generated auto-regressively.
 
         Args:
             encoder_output: A tensor of size (batch, features, timesteps).
@@ -479,7 +479,7 @@ def _greedy_decode(
 class GreedyBatchedRNNTInfer(_GreedyRNNTInfer):
     """A batch level greedy transducer decoder.
 
-    Batch level greedy decoding, performed auto-repressively.
+    Batch level greedy decoding, performed auto-regressively.
 
     Args:
         decoder_model: rnnt_utils.AbstractRNNTDecoder implementation.
@@ -571,7 +571,7 @@ def forward(
         partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None,
     ):
         """Returns a list of hypotheses given an input batch of the encoder hidden embedding.
-        Output token is generated auto-repressively.
+        Output token is generated auto-regressively.
 
         Args:
             encoder_output: A tensor of size (batch, features, timesteps).
@@ -1034,7 +1034,7 @@ def __init__(self, encoder_model: str, decoder_joint_model: str, max_symbols_per
 
     def __call__(self, audio_signal: torch.Tensor, length: torch.Tensor):
         """Returns a list of hypotheses given an input batch of the encoder hidden embedding.
-        Output token is generated auto-repressively.
+        Output token is generated auto-regressively.
 
         Args:
             encoder_output: A tensor of size (batch, features, timesteps).
@@ -1455,7 +1455,7 @@ def _get_initial_states(self, batchsize):
 class GreedyMultiblankRNNTInfer(GreedyRNNTInfer):
     """A greedy transducer decoder for multi-blank RNN-T.
 
-    Sequence level greedy decoding, performed auto-repressively.
+    Sequence level greedy decoding, performed auto-regressively.
 
     Args:
         decoder_model: rnnt_utils.AbstractRNNTDecoder implementation.
@@ -1655,7 +1655,7 @@ def _greedy_decode(
 
 class GreedyBatchedMultiblankRNNTInfer(GreedyBatchedRNNTInfer):
     """A batch level greedy transducer decoder.
-    Batch level greedy decoding, performed auto-repressively.
+    Batch level greedy decoding, performed auto-regressively.
     Args:
         decoder_model: rnnt_utils.AbstractRNNTDecoder implementation.
         joint_model: rnnt_utils.AbstractRNNTJoint implementation.
@@ -2207,7 +2207,7 @@ class GreedyBatchedRNNTInferConfig:
 class GreedyTDTInfer(_GreedyRNNTInfer):
     """A greedy TDT decoder.
 
-    Sequence level greedy decoding, performed auto-repressively.
+    Sequence level greedy decoding, performed auto-regressively.
 
     Args:
         decoder_model: rnnt_utils.AbstractRNNTDecoder implementation.
@@ -2289,7 +2289,7 @@ def forward(
         partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None,
     ):
         """Returns a list of hypotheses given an input batch of the encoder hidden embedding.
-        Output token is generated auto-repressively.
+        Output token is generated auto-regressively.
         Args:
             encoder_output: A tensor of size (batch, features, timesteps).
             encoded_lengths: list of int representing the length of each sequence
@@ -2459,7 +2459,7 @@ def _greedy_decode(
 
 class GreedyBatchedTDTInfer(_GreedyRNNTInfer):
     """A batch level greedy TDT decoder.
-    Batch level greedy decoding, performed auto-repressively.
+    Batch level greedy decoding, performed auto-regressively.
     Args:
         decoder_model: rnnt_utils.AbstractRNNTDecoder implementation.
         joint_model: rnnt_utils.AbstractRNNTJoint implementation.
@@ -2547,7 +2547,7 @@ def forward(
         partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None,
     ):
         """Returns a list of hypotheses given an input batch of the encoder hidden embedding.
-        Output token is generated auto-repressively.
+        Output token is generated auto-regressively.
         Args:
             encoder_output: A tensor of size (batch, features, timesteps).
             encoded_lengths: list of int representing the length of each sequence

From 07b79d2061cbcf98f2f0bc9106f810610183fc96 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 7 Jul 2023 14:51:30 -0700
Subject: [PATCH 089/123] Cache handling without input tensors mutation (#6980)
 (#6996)

* Cache handling without input tensors mutation


* Cleanup


* Cleanup#2


* Cleanup#3


---------

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
---
 nemo/collections/asr/models/asr_model.py      | 64 ++++++-----------
 .../asr/modules/conformer_encoder.py          | 48 ++++++-------
 .../multi_head_attention_adapter_module.py    | 16 ++---
 .../asr/parts/submodules/causal_convs.py      | 28 ++++----
 .../asr/parts/submodules/conformer_modules.py | 70 +++++++------------
 .../parts/submodules/multi_head_attention.py  | 53 ++++++++------
 6 files changed, 118 insertions(+), 161 deletions(-)

diff --git a/nemo/collections/asr/models/asr_model.py b/nemo/collections/asr/models/asr_model.py
index c0f4c1cd0a70..6ac3633201e2 100644
--- a/nemo/collections/asr/models/asr_model.py
+++ b/nemo/collections/asr/models/asr_model.py
@@ -161,7 +161,7 @@ def output_module(self):
     @property
     def output_names(self):
         otypes = self.output_module.output_types
-        if hasattr(self.input_module, 'export_cache_support') and self.input_module.export_cache_support:
+        if getattr(self.input_module, 'export_cache_support', False):
             in_types = self.input_module.output_types
             otypes = {n: t for (n, t) in list(otypes.items())[:1]}
             for (n, t) in list(in_types.items())[1:]:
@@ -174,7 +174,6 @@ def forward_for_export(
         """
         This forward is used when we need to export the model to ONNX format.
         Inputs cache_last_channel and cache_last_time are needed to be passed for exporting streaming models.
-        When they are passed, it just passes the inputs through the encoder part and currently the ONNX conversion does not fully work for this case.
         Args:
             input: Tensor that represents a batch of raw audio signals,
                 of shape [B, T]. T here represents timesteps.
@@ -187,49 +186,26 @@ def forward_for_export(
         Returns:
             the output of the model
         """
-        if hasattr(self.input_module, 'forward_for_export'):
-            if cache_last_channel is None and cache_last_time is None:
-                encoder_output = self.input_module.forward_for_export(audio_signal=input, length=length)
-            else:
-                encoder_output = self.input_module.forward_for_export(
-                    audio_signal=input,
-                    length=length,
-                    cache_last_channel=cache_last_channel,
-                    cache_last_time=cache_last_time,
-                    cache_last_channel_len=cache_last_channel_len,
-                )
+        enc_fun = getattr(self.input_module, 'forward_for_export', self.input_module.forward)
+        if cache_last_channel is None:
+            encoder_output = enc_fun(audio_signal=input, length=length)
+            if isinstance(encoder_output, tuple):
+                encoder_output = encoder_output[0]
         else:
-            if cache_last_channel is None and cache_last_time is None:
-                encoder_output = self.input_module(audio_signal=input, length=length)
-            else:
-                encoder_output = self.input_module(
-                    audio_signal=input,
-                    length=length,
-                    cache_last_channel=cache_last_channel,
-                    cache_last_time=cache_last_time,
-                    cache_last_channel_len=cache_last_channel_len,
-                )
-        if isinstance(encoder_output, tuple):
-            decoder_input = encoder_output[0]
-        else:
-            decoder_input = encoder_output
-        if hasattr(self.output_module, 'forward_for_export'):
-            if cache_last_channel is None and cache_last_time is None:
-                ret = self.output_module.forward_for_export(encoder_output=decoder_input)
-            else:
-                ret = self.output_module.forward_for_export(encoder_output=decoder_input)
-        else:
-            if cache_last_channel is None and cache_last_time is None:
-                ret = self.output_module(encoder_output=decoder_input)
-            else:
-                ret = self.output_module(encoder_output=decoder_input)
-        if cache_last_channel is None and cache_last_time is None:
-            pass
-        else:
-            if isinstance(ret, tuple):
-                ret = (ret[0], encoder_output[1], encoder_output[2], encoder_output[3], encoder_output[4])
-            else:
-                ret = (ret, encoder_output[1], encoder_output[2], encoder_output[3], encoder_output[4])
+            encoder_output, length, cache_last_channel, cache_last_time, cache_last_channel_len = enc_fun(
+                audio_signal=input,
+                length=length,
+                cache_last_channel=cache_last_channel,
+                cache_last_time=cache_last_time,
+                cache_last_channel_len=cache_last_channel_len,
+            )
+
+        dec_fun = getattr(self.output_module, 'forward_for_export', self.output_module.forward)
+        ret = dec_fun(encoder_output=encoder_output)
+        if isinstance(ret, tuple):
+            ret = ret[0]
+        if cache_last_channel is not None:
+            ret = (ret, length, cache_last_channel, cache_last_time, cache_last_channel_len)
         return cast_all(ret, from_dtype=torch.float16, to_dtype=torch.float32)
 
     @property
diff --git a/nemo/collections/asr/modules/conformer_encoder.py b/nemo/collections/asr/modules/conformer_encoder.py
index 74c255741039..8f429c25806d 100644
--- a/nemo/collections/asr/modules/conformer_encoder.py
+++ b/nemo/collections/asr/modules/conformer_encoder.py
@@ -505,11 +505,6 @@ def forward_internal(
                 (audio_signal.size(0),), audio_signal.size(-1), dtype=torch.int64, device=audio_signal.device
             )
 
-        if cache_last_time is not None:
-            cache_last_time_next = torch.zeros_like(cache_last_time)
-        else:
-            cache_last_time_next = None
-
         # select a random att_context_size with the distribution specified by att_context_probs during training
         # for non-validation cases like test, validation or inference, it uses the first mode in self.att_context_size
         if self.training and len(self.att_context_size_all) > 1:
@@ -536,7 +531,6 @@ def forward_internal(
         if cache_last_channel is not None:
             cache_len = self.streaming_cfg.last_channel_cache_size
             cache_keep_size = max_audio_length - self.streaming_cfg.cache_drop_size
-            cache_last_channel_next = torch.zeros_like(cache_last_channel)
             max_audio_length = max_audio_length + cache_len
             padding_length = length + cache_len
             offset = torch.neg(cache_last_channel_len) + cache_len
@@ -561,19 +555,32 @@ def forward_internal(
             pad_mask = pad_mask[:, cache_len:]
             if att_mask is not None:
                 att_mask = att_mask[:, cache_len:]
+            # Convert caches from the tensor to list
+            cache_last_time_next = []
+            cache_last_channel_next = []
 
         for lth, (drop_prob, layer) in enumerate(zip(self.layer_drop_probs, self.layers)):
             original_signal = audio_signal
+            if cache_last_channel is not None:
+                cache_last_channel_cur = cache_last_channel[lth]
+                cache_last_time_cur = cache_last_time[lth]
+            else:
+                cache_last_channel_cur = None
+                cache_last_time_cur = None
             audio_signal = layer(
                 x=audio_signal,
                 att_mask=att_mask,
                 pos_emb=pos_emb,
                 pad_mask=pad_mask,
-                cache_last_channel=cache_last_channel,
-                cache_last_time=cache_last_time,
-                cache_last_channel_next=cache_last_channel_next,
-                cache_last_time_next=cache_last_time_next,
+                cache_last_channel=cache_last_channel_cur,
+                cache_last_time=cache_last_time_cur,
             )
+
+            if cache_last_channel_cur is not None:
+                (audio_signal, cache_last_channel_cur, cache_last_time_cur) = audio_signal
+                cache_last_channel_next.append(cache_last_channel_cur)
+                cache_last_time_next.append(cache_last_time_cur)
+
             # applying stochastic depth logic from https://arxiv.org/abs/2102.03216
             if self.training and drop_prob > 0.0:
                 should_drop = torch.rand(1) < drop_prob
@@ -626,6 +633,8 @@ def forward_internal(
         length = length.to(dtype=torch.int64)
 
         if cache_last_channel is not None:
+            cache_last_channel_next = torch.stack(cache_last_channel_next, dim=0)
+            cache_last_time_next = torch.stack(cache_last_time_next, dim=0)
             return (
                 audio_signal,
                 length,
@@ -860,20 +869,12 @@ def setup_streaming_params(
         else:
             streaming_cfg.drop_extra_pre_encoded = streaming_cfg.pre_encode_cache_size // self.subsampling_factor
 
-        # counting the number of the layers need caching
-        streaming_cfg.last_channel_num = 0
-        streaming_cfg.last_time_num = 0
         for m in self.layers.modules():
             if hasattr(m, "_max_cache_len"):
                 if isinstance(m, MultiHeadAttention):
-                    m._cache_id = streaming_cfg.last_channel_num
                     m.cache_drop_size = streaming_cfg.cache_drop_size
-                    streaming_cfg.last_channel_num += 1
-
                 if isinstance(m, CausalConv1D):
-                    m._cache_id = streaming_cfg.last_time_num
                     m.cache_drop_size = streaming_cfg.cache_drop_size
-                    streaming_cfg.last_time_num += 1
 
         self.streaming_cfg = streaming_cfg
 
@@ -886,19 +887,12 @@ def get_initial_cache_state(self, batch_size=1, dtype=torch.float32, device=None
             create_tensor = torch.zeros
         last_time_cache_size = self.conv_context_size[0]
         cache_last_channel = create_tensor(
-            (
-                self.streaming_cfg.last_channel_num,
-                batch_size,
-                self.streaming_cfg.last_channel_cache_size,
-                self.d_model,
-            ),
+            (len(self.layers), batch_size, self.streaming_cfg.last_channel_cache_size, self.d_model,),
             device=device,
             dtype=dtype,
         )
         cache_last_time = create_tensor(
-            (self.streaming_cfg.last_time_num, batch_size, self.d_model, last_time_cache_size),
-            device=device,
-            dtype=dtype,
+            (len(self.layers), batch_size, self.d_model, last_time_cache_size), device=device, dtype=dtype,
         )
         if max_dim > 0:
             cache_last_channel_len = torch.randint(
diff --git a/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py b/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py
index 169dde48602f..563d4219baa7 100644
--- a/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py
+++ b/nemo/collections/asr/parts/submodules/adapters/multi_head_attention_adapter_module.py
@@ -147,18 +147,18 @@ def __init__(
         # reset parameters for Q to be identity operation
         self.reset_parameters()
 
-    def forward(self, query, key, value, mask, pos_emb=None, cache=None, cache_next=None):
+    def forward(self, query, key, value, mask, pos_emb=None, cache=None):
         """Compute 'Scaled Dot Product Attention'.
         Args:
             query (torch.Tensor): (batch, time1, size)
             key (torch.Tensor): (batch, time2, size)
             value(torch.Tensor): (batch, time2, size)
             mask (torch.Tensor): (batch, time1, time2)
-            cache (torch.Tensor) : (cache_nums, batch, time_cache, size)
-            cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size)
+            cache (torch.Tensor) : (batch, time_cache, size)
 
         returns:
             output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention
+            cache  (torch.Tensor) : (batch, time_cache_next, size)
         """
         # Need to perform duplicate computations as at this point the tensors have been
         # separated by the adapter forward
@@ -166,7 +166,7 @@ def forward(self, query, key, value, mask, pos_emb=None, cache=None, cache_next=
         key = self.pre_norm(key)
         value = self.pre_norm(value)
 
-        return super().forward(query, key, value, mask, pos_emb, cache=cache, cache_next=cache_next)
+        return super().forward(query, key, value, mask, pos_emb, cache=cache)
 
     def reset_parameters(self):
         with torch.no_grad():
@@ -242,7 +242,7 @@ def __init__(
         # reset parameters for Q to be identity operation
         self.reset_parameters()
 
-    def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None):
+    def forward(self, query, key, value, mask, pos_emb, cache=None):
         """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
         Args:
             query (torch.Tensor): (batch, time1, size)
@@ -250,10 +250,10 @@ def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None)
             value(torch.Tensor): (batch, time2, size)
             mask (torch.Tensor): (batch, time1, time2)
             pos_emb (torch.Tensor) : (batch, time1, size)
-            cache (torch.Tensor) : (cache_nums, batch, time_cache, size)
-            cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size)
+            cache (torch.Tensor) : (batch, time_cache, size)
         Returns:
             output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention
+            cache_next (torch.Tensor) : (batch, time_cache_next, size)
         """
         # Need to perform duplicate computations as at this point the tensors have been
         # separated by the adapter forward
@@ -261,7 +261,7 @@ def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None)
         key = self.pre_norm(key)
         value = self.pre_norm(value)
 
-        return super().forward(query, key, value, mask, pos_emb, cache=cache, cache_next=cache_next)
+        return super().forward(query, key, value, mask, pos_emb, cache=cache)
 
     def reset_parameters(self):
         with torch.no_grad():
diff --git a/nemo/collections/asr/parts/submodules/causal_convs.py b/nemo/collections/asr/parts/submodules/causal_convs.py
index 25f841802154..c6251690b1b1 100644
--- a/nemo/collections/asr/parts/submodules/causal_convs.py
+++ b/nemo/collections/asr/parts/submodules/causal_convs.py
@@ -45,7 +45,6 @@ def __init__(
             raise ValueError("Argument padding should be set to None for CausalConv2D.")
         self._left_padding = kernel_size - 1
         self._right_padding = stride - 1
-        self._cache_id = None
 
         padding = 0
         super(CausalConv2D, self).__init__(
@@ -113,7 +112,6 @@ def __init__(
                 raise ValueError(f"Invalid padding param: {padding}!")
 
         self._max_cache_len = self._left_padding
-        self._cache_id = None
 
         super(CausalConv1D, self).__init__(
             in_channels=in_channels,
@@ -129,21 +127,21 @@ def __init__(
             dtype=dtype,
         )
 
-    def update_cache(self, x, cache=None, cache_next=None):
+    def update_cache(self, x, cache=None):
         if cache is None:
             new_x = F.pad(x, pad=(self._left_padding, self._right_padding))
         else:
             new_x = F.pad(x, pad=(0, self._right_padding))
-            new_x = torch.cat([cache[self._cache_id], new_x], dim=-1)
-            # todo: we should know input_x.size(-1) at config time
-            if cache_next is not None:
-                cache_keep_size = torch.tensor(x.size(-1) - self.cache_drop_size, dtype=torch.int64, device=x.device)
-                cache_keep_size = torch.clip(cache_keep_size, min=1, max=cache_next.size(-1))
-                cache_next[self._cache_id, :, :, :-cache_keep_size] = cache[self._cache_id, :, :, cache_keep_size:]
-                cache_next[self._cache_id, :, :, -cache_keep_size:] = x[:, :, :cache_keep_size]
-        return new_x
-
-    def forward(self, x, cache=None, cache_next=None):
-        x = self.update_cache(x, cache=cache, cache_next=cache_next)
+            new_x = torch.cat([cache, new_x], dim=-1)
+            if self.cache_drop_size > 0:
+                x = x[:, :, : -self.cache_drop_size]
+            cache = torch.cat([cache[:, :, x.size(-1) :], x], dim=-1)
+        return new_x, cache
+
+    def forward(self, x, cache=None):
+        x, cache = self.update_cache(x, cache=cache)
         x = super().forward(x)
-        return x
+        if cache is None:
+            return x
+        else:
+            return x, cache
diff --git a/nemo/collections/asr/parts/submodules/conformer_modules.py b/nemo/collections/asr/parts/submodules/conformer_modules.py
index 579b78a8f5a8..677d2acd9f2e 100644
--- a/nemo/collections/asr/parts/submodules/conformer_modules.py
+++ b/nemo/collections/asr/parts/submodules/conformer_modules.py
@@ -138,29 +138,19 @@ def __init__(
         self.dropout = nn.Dropout(dropout)
         self.norm_out = LayerNorm(d_model)
 
-    def forward(
-        self,
-        x,
-        att_mask=None,
-        pos_emb=None,
-        pad_mask=None,
-        cache_last_channel=None,
-        cache_last_time=None,
-        cache_last_channel_next=None,
-        cache_last_time_next=None,
-    ):
+    def forward(self, x, att_mask=None, pos_emb=None, pad_mask=None, cache_last_channel=None, cache_last_time=None):
         """
         Args:
             x (torch.Tensor): input signals (B, T, d_model)
             att_mask (torch.Tensor): attention masks(B, T, T)
             pos_emb (torch.Tensor): (L, 1, d_model)
             pad_mask (torch.tensor): padding mask
-            cache_last_channel (torch.tensor) : cache for MHA layers (N, B, T_cache, d_model)
-            cache_last_time (torch.tensor) : cache for convolutional layers (N, B, d_model, T_cache)
-            cache_last_channel_next (torch.tensor) : next cache for MHA layers (N, B, T_cache, d_model)
-            cache_last_time_next (torch.tensor) : next cache for convolutional layers (N, B, d_model, T_cache)
+            cache_last_channel (torch.tensor) : cache for MHA layers (B, T_cache, d_model)
+            cache_last_time (torch.tensor) : cache for convolutional layers (B, d_model, T_cache)
         Returns:
             x (torch.Tensor): (B, T, d_model)
+            cache_last_channel (torch.tensor) : next cache for MHA layers (B, T_cache, d_model)
+            cache_last_time (torch.tensor) : next cache for convolutional layers (B, d_model, T_cache)
         """
         residual = x
         x = self.norm_feed_forward1(x)
@@ -169,31 +159,17 @@ def forward(
 
         x = self.norm_self_att(residual)
         if self.self_attention_model == 'rel_pos':
-            x = self.self_attn(
-                query=x,
-                key=x,
-                value=x,
-                mask=att_mask,
-                pos_emb=pos_emb,
-                cache=cache_last_channel,
-                cache_next=cache_last_channel_next,
-            )
+            x = self.self_attn(query=x, key=x, value=x, mask=att_mask, pos_emb=pos_emb, cache=cache_last_channel)
         elif self.self_attention_model == 'rel_pos_local_attn':
-            x = self.self_attn(
-                query=x,
-                key=x,
-                value=x,
-                pad_mask=pad_mask,
-                pos_emb=pos_emb,
-                cache=cache_last_channel,
-                cache_next=cache_last_channel_next,
-            )
+            x = self.self_attn(query=x, key=x, value=x, pad_mask=pad_mask, pos_emb=pos_emb, cache=cache_last_channel)
         elif self.self_attention_model == 'abs_pos':
-            x = self.self_attn(
-                query=x, key=x, value=x, mask=att_mask, cache=cache_last_channel, cache_next=cache_last_channel_next
-            )
+            x = self.self_attn(query=x, key=x, value=x, mask=att_mask, cache=cache_last_channel)
         else:
             x = None
+
+        if x is not None and cache_last_channel is not None:
+            (x, cache_last_channel) = x
+
         residual = residual + self.dropout(x)
 
         if self.is_adapter_available():
@@ -208,7 +184,9 @@ def forward(
             residual = pack_ip['x']
 
         x = self.norm_conv(residual)
-        x = self.conv(x, pad_mask=pad_mask, cache=cache_last_time, cache_next=cache_last_time_next)
+        x = self.conv(x, pad_mask=pad_mask, cache=cache_last_time)
+        if cache_last_time is not None:
+            (x, cache_last_time) = x
         residual = residual + self.dropout(x)
 
         x = self.norm_feed_forward2(residual)
@@ -228,8 +206,10 @@ def forward(
 
         if self.is_access_enabled() and self.access_cfg.get('save_encoder_tensors', False):
             self.register_accessible_tensor(name='encoder', tensor=x)
-
-        return x
+        if cache_last_channel is None:
+            return x
+        else:
+            return x, cache_last_channel, cache_last_time
 
     def forward_single_enabled_adapter_(
         self,
@@ -355,7 +335,7 @@ def __init__(
             in_channels=dw_conv_input_dim, out_channels=d_model, kernel_size=1, stride=1, padding=0, bias=True
         )
 
-    def forward(self, x, pad_mask=None, cache=None, cache_next=None):
+    def forward(self, x, pad_mask=None, cache=None):
         x = x.transpose(1, 2)
         x = self.pointwise_conv1(x)
 
@@ -368,10 +348,9 @@ def forward(self, x, pad_mask=None, cache=None, cache_next=None):
         if pad_mask is not None:
             x = x.float().masked_fill(pad_mask.unsqueeze(1), 0.0)
 
+        x = self.depthwise_conv(x, cache=cache)
         if cache is not None:
-            x = self.depthwise_conv(x, cache=cache, cache_next=cache_next)
-        else:
-            x = self.depthwise_conv(x)
+            x, cache = x
 
         if self.norm_type == "layer_norm":
             x = x.transpose(1, 2)
@@ -383,7 +362,10 @@ def forward(self, x, pad_mask=None, cache=None, cache_next=None):
         x = self.activation(x)
         x = self.pointwise_conv2(x)
         x = x.transpose(1, 2)
-        return x
+        if cache is None:
+            return x
+        else:
+            return x, cache
 
     def reset_parameters_conv(self):
         pw1_max = pw2_max = self.d_model ** -0.5
diff --git a/nemo/collections/asr/parts/submodules/multi_head_attention.py b/nemo/collections/asr/parts/submodules/multi_head_attention.py
index b7356ffe87e4..a0253524419e 100644
--- a/nemo/collections/asr/parts/submodules/multi_head_attention.py
+++ b/nemo/collections/asr/parts/submodules/multi_head_attention.py
@@ -73,7 +73,6 @@ def __init__(self, n_head, n_feat, dropout_rate, max_cache_len=0):
         self.dropout = nn.Dropout(p=dropout_rate)
 
         self._max_cache_len = max_cache_len
-        self._cache_id = None
 
     def forward_qkv(self, query, key, value):
         """Transforms query, key and value.
@@ -119,20 +118,20 @@ def forward_attention(self, value, scores, mask):
 
         return self.linear_out(x)  # (batch, time1, d_model)
 
-    def forward(self, query, key, value, mask, pos_emb=None, cache=None, cache_next=None):
+    def forward(self, query, key, value, mask, pos_emb=None, cache=None):
         """Compute 'Scaled Dot Product Attention'.
         Args:
             query (torch.Tensor): (batch, time1, size)
             key (torch.Tensor): (batch, time2, size)
             value(torch.Tensor): (batch, time2, size)
             mask (torch.Tensor): (batch, time1, time2)
-            cache (torch.Tensor) : (cache_nums, batch, time_cache, size)
-            cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size)
+            cache (torch.Tensor) : (batch, time_cache, size)
 
         returns:
             output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention
+            cache (torch.Tensor) : (batch, time_cache_next, size)
         """
-        key, value, query = self.update_cache(key=key, value=value, query=query, cache=cache, cache_next=cache_next)
+        key, value, query, cache = self.update_cache(key=key, value=value, query=query, cache=cache)
 
         if torch.is_autocast_enabled():
             query, key, value = query.to(torch.float32), key.to(torch.float32), value.to(torch.float32)
@@ -142,17 +141,17 @@ def forward(self, query, key, value, mask, pos_emb=None, cache=None, cache_next=
             q, k, v = self.forward_qkv(query, key, value)
             scores = torch.matmul(q, k.transpose(-2, -1)) / self.s_d_k
             out = self.forward_attention(v, scores, mask)
+        if cache is None:
+            return out
+        else:
+            return out, cache
 
-        return out
-
-    def update_cache(self, key, value, query, cache, cache_next):
+    def update_cache(self, key, value, query, cache):
         if cache is not None:
-            key = value = torch.cat([cache[self._cache_id], key], dim=1)
+            key = value = torch.cat([cache, key], dim=1)
             q_keep_size = query.shape[1] - self.cache_drop_size
-            if cache_next is not None:
-                cache_next[self._cache_id, :, :-q_keep_size, :] = cache[self._cache_id, :, q_keep_size:, :]
-                cache_next[self._cache_id, :, -q_keep_size:, :] = query[:, :q_keep_size, :]
-        return key, value, query
+            cache = torch.cat([cache[:, q_keep_size:, :], query[:, :q_keep_size, :]], dim=1)
+        return key, value, query, cache
 
 
 class RelPositionMultiHeadAttention(MultiHeadAttention):
@@ -195,7 +194,7 @@ def rel_shift(self, x):
         x = x[:, :, 1:].view(b, h, qlen, pos_len)  # (b, h, t1, t2)
         return x
 
-    def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None):
+    def forward(self, query, key, value, mask, pos_emb, cache=None):
         """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
         Args:
             query (torch.Tensor): (batch, time1, size)
@@ -203,12 +202,13 @@ def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None)
             value(torch.Tensor): (batch, time2, size)
             mask (torch.Tensor): (batch, time1, time2)
             pos_emb (torch.Tensor) : (batch, time1, size)
-            cache (torch.Tensor) : (cache_nums, batch, time_cache, size)
-            cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size)
+            cache (torch.Tensor) : (batch, time_cache, size)
+
         Returns:
             output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention
+            cache (torch.Tensor) : (batch, time_cache_next, size)
         """
-        key, value, query = self.update_cache(key=key, value=value, query=query, cache=cache, cache_next=cache_next)
+        key, value, query, cache = self.update_cache(key=key, value=value, query=query, cache=cache)
 
         if torch.is_autocast_enabled():
             query, key, value = query.to(torch.float32), key.to(torch.float32), value.to(torch.float32)
@@ -244,7 +244,10 @@ def forward(self, query, key, value, mask, pos_emb, cache=None, cache_next=None)
 
             out = self.forward_attention(v, scores, mask)
 
-        return out
+        if cache is None:
+            return out
+        else:
+            return out, cache
 
 
 class RelPositionMultiHeadAttentionLongformer(RelPositionMultiHeadAttention):
@@ -298,7 +301,7 @@ def __init__(
             self.global_k = nn.Linear(n_feat, n_feat)
             self.global_v = nn.Linear(n_feat, n_feat)
 
-    def forward(self, query, key, value, pad_mask, pos_emb, cache=None, cache_next=None):
+    def forward(self, query, key, value, pad_mask, pos_emb, cache=None):
         """Compute Scaled Dot Product Local Attention with rel. positional encoding. using overlapping chunks
         Args:
             query (torch.Tensor): (batch, time, size)
@@ -306,13 +309,13 @@ def forward(self, query, key, value, pad_mask, pos_emb, cache=None, cache_next=N
             value(torch.Tensor): (batch, time, size)
             pad_mask (torch.Tensor): (batch, time)
             pos_emb (torch.Tensor) : (batch, 2w + 1, size)
-            cache (torch.Tensor) : (cache_nums, batch, time_cache, size)
-            cache_next (torch.Tensor) : (cache_nums, batch, time_cache_next, size)
+            cache (torch.Tensor) : (batch, time_cache, size)
         Returns:
             output (torch.Tensor): transformed `value` (batch, time1, d_model) weighted by the query dot key attention
+            cache (torch.Tensor) : (batch, time_cache_next, size)
         """
 
-        key, value, query = self.update_cache(key=key, value=value, query=query, cache=cache, cache_next=cache_next)
+        key, value, query, cache = self.update_cache(key=key, value=value, query=query, cache=cache)
 
         if torch.is_autocast_enabled():
             query, key, value = query.to(torch.float32), key.to(torch.float32), value.to(torch.float32)
@@ -453,7 +456,11 @@ def forward(self, query, key, value, pad_mask, pos_emb, cache=None, cache_next=N
 
                 out[is_index_global_attn_nonzero] += out_global_to_all
 
-        return self.linear_out(out.reshape(n_batch, -1, self.h * self.d_k)[:, :T])
+        ret = self.linear_out(out.reshape(n_batch, -1, self.h * self.d_k)[:, :T])
+        if cache is None:
+            return ret
+        else:
+            return ret, cache
 
     def _get_global_attn_indices(self, is_index_global_attn: torch.Tensor) -> Tuple:
         """

From 94e1efaace9f761a8676a0d3cc7b54c57db736c5 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 7 Jul 2023 14:52:07 -0700
Subject: [PATCH 090/123] Hybrid conformer export (#6983) (#6995)

* Implemented generic kv-pair setting of export_config from args


* Hybrid conformer export


* Hybrid decoder export


* Cleanup


* Changed from **kwargs


* Docstring


* Docs added


* Stringify args


* Added docs for ASR export configs


* lowercase ctc


---------

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
---
 docs/source/asr/models.rst                    | 10 ++++++
 docs/source/core/export.rst                   | 31 +++++++++++++++++++
 nemo/collections/asr/models/asr_model.py      |  8 +++++
 .../asr/models/hybrid_rnnt_ctc_models.py      | 14 +++++++++
 nemo/collections/asr/models/rnnt_models.py    | 12 +++++--
 nemo/core/classes/exportable.py               | 14 +++++++++
 scripts/export.py                             | 19 +++++++++---
 7 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/docs/source/asr/models.rst b/docs/source/asr/models.rst
index 80a0fd90f0fb..697a89827145 100644
--- a/docs/source/asr/models.rst
+++ b/docs/source/asr/models.rst
@@ -215,6 +215,11 @@ It is recommended to train a model in streaming model with limited context for t
 
 You may find FastConformer variants of cache-aware streaming models under ``<NeMo_git_root>/examples/asr/conf/fastconformer/``.
 
+Note cache-aware streaming models are being exported without caching support by default.
+To include caching support, `model.set_export_config({'cache_support' : 'True'})` should be called before export.
+Or, if ``<NeMo_git_root>/scripts/export.py`` is being used:
+`python export.py cache_aware_conformer.nemo cache_aware_conformer.onnx --config cache_support=True`
+
 .. _LSTM-Transducer_model:
 
 LSTM-Transducer
@@ -291,6 +296,11 @@ Similar example configs for FastConformer variants of Hybrid models can be found
 ``<NeMo_git_root>/examples/asr/conf/fastconformer/hybrid_transducer_ctc/``
 ``<NeMo_git_root>/examples/asr/conf/fastconformer/hybrid_cache_aware_streaming/``
 
+Note Hybrid models are being exported as RNNT (encoder and decoder+joint parts) by default.
+To export as CTC (single encoder+decoder graph), `model.set_export_config({'decoder_type' : 'ctc'})` should be called before export.
+Or, if ``<NeMo_git_root>/scripts/export.py`` is being used:
+`python export.py hybrid_transducer.nemo hybrid_transducer.onnx --config decoder_type=ctc`
+
 .. _Conformer-HAT_model:
 
 Conformer-HAT (Hybrid Autoregressive Transducer)
diff --git a/docs/source/core/export.rst b/docs/source/core/export.rst
index 0e598e215dbf..f54daffe9c9c 100644
--- a/docs/source/core/export.rst
+++ b/docs/source/core/export.rst
@@ -177,6 +177,37 @@ Another common requirement for models that are being exported is to run certain
     # call base method for common set of modifications
 	Exportable._prepare_for_export(self, **kwargs)
 
+Some models that require control flow, need to be exported in multiple parts. Typical examples are RNNT nets.
+To facilitate that, the hooks below are provided. To export, for example, 'encoder' and 'decoder' subnets of the model, overload list_export_subnets to return ['encoder', 'decoder'].
+
+.. code-block:: Python
+
+    def get_export_subnet(self, subnet=None):
+        """
+        Returns Exportable subnet model/module to export 
+        """
+
+
+    def list_export_subnets(self):
+        """
+        Returns default set of subnet names exported for this model
+        First goes the one receiving input (input_example)
+        """
+
+Some nertworks may be exported differently according to user-settable options (like ragged batch support for TTS or cache support for ASR). To facilitate that - `set_export_config()` method is provided by Exportable to set key/value pairs to predefined model.export_config dictionary, to be used during the export:
+
+.. code-block:: Python	
+    def set_export_config(self, args):
+        """
+        Sets/updates export_config dictionary
+        """
+Also, if an action hook on setting config is desired, this method may be overloaded by `Exportable` descendants to include one.
+An example can be found in ``<NeMo_git_root>/nemo/collections/asr/models/rnnt_models.py``.
+
+Here is example on now `set_export_config()` call is being tied to command line arguments in ``<NeMo_git_root>/scripts/export.py`` :
+
+.. code-block:: Python
+    python scripts/export.py  hybrid_conformer.nemo hybrid_conformer.onnx --config decoder_type=ctc
 
 Exportable Model Code
 ~~~~~~~~~~~~~~~~~~~~~
diff --git a/nemo/collections/asr/models/asr_model.py b/nemo/collections/asr/models/asr_model.py
index 6ac3633201e2..7e03d587139f 100644
--- a/nemo/collections/asr/models/asr_model.py
+++ b/nemo/collections/asr/models/asr_model.py
@@ -215,3 +215,11 @@ def disabled_deployment_input_names(self):
     @property
     def disabled_deployment_output_names(self):
         return self.encoder.disabled_deployment_output_names
+
+    def set_export_config(self, args):
+        if 'cache_support' in args:
+            enable = bool(args['cache_support'])
+            self.encoder.export_cache_support = enable
+            logging.info(f"Caching support enabled: {enable}")
+            self.encoder.setup_streaming_params()
+        super().set_export_config(args)
diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py
index 5ca6124ecfd7..11c616b1257f 100644
--- a/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py
+++ b/nemo/collections/asr/models/hybrid_rnnt_ctc_models.py
@@ -645,6 +645,20 @@ def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0):
         self.finalize_interctc_metrics(metrics, outputs, prefix="test_")
         return metrics
 
+    # EncDecRNNTModel is exported in 2 parts
+    def list_export_subnets(self):
+        if self.cur_decoder == 'rnnt':
+            return ['encoder', 'decoder_joint']
+        else:
+            return ['self']
+
+    @property
+    def output_module(self):
+        if self.cur_decoder == 'rnnt':
+            return self.decoder
+        else:
+            return self.ctc_decoder
+
     @classmethod
     def list_available_models(cls) -> Optional[PretrainedModelInfo]:
         """
diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py
index 92bb04fd2a3e..0c1da97c5012 100644
--- a/nemo/collections/asr/models/rnnt_models.py
+++ b/nemo/collections/asr/models/rnnt_models.py
@@ -28,7 +28,7 @@
 from nemo.collections.asr.data.audio_to_text_dali import AudioToCharDALIDataset, DALIOutputs
 from nemo.collections.asr.losses.rnnt import RNNTLoss, resolve_rnnt_default_loss_name
 from nemo.collections.asr.metrics.rnnt_wer import RNNTWER, RNNTDecoding, RNNTDecodingConfig
-from nemo.collections.asr.models.asr_model import ASRModel
+from nemo.collections.asr.models.asr_model import ASRModel, ExportableEncDecModel
 from nemo.collections.asr.modules.rnnt import RNNTDecoderJoint
 from nemo.collections.asr.parts.mixins import ASRModuleMixin
 from nemo.collections.asr.parts.utils.audio_utils import ChannelSelectorType
@@ -39,7 +39,7 @@
 from nemo.utils import logging
 
 
-class EncDecRNNTModel(ASRModel, ASRModuleMixin, Exportable):
+class EncDecRNNTModel(ASRModel, ASRModuleMixin, ExportableEncDecModel):
     """Base class for encoder decoder RNNT-based models."""
 
     def __init__(self, cfg: DictConfig, trainer: Trainer = None):
@@ -960,6 +960,14 @@ def list_export_subnets(self):
     def decoder_joint(self):
         return RNNTDecoderJoint(self.decoder, self.joint)
 
+    def set_export_config(self, args):
+        if 'decoder_type' in args:
+            if hasattr(self, 'change_decoding_strategy'):
+                self.change_decoding_strategy(decoder_type=args['decoder_type'])
+            else:
+                raise Exception("Model does not have decoder type option")
+        super().set_export_config(args)
+
     @classmethod
     def list_available_models(cls) -> List[PretrainedModelInfo]:
         """
diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py
index 3d2682f2304e..8469e80219d6 100644
--- a/nemo/core/classes/exportable.py
+++ b/nemo/core/classes/exportable.py
@@ -302,3 +302,17 @@ def list_export_subnets(self):
         First goes the one receiving input (input_example)
         """
         return ['self']
+
+    def get_export_config(self):
+        """
+        Returns export_config dictionary
+        """
+        return getattr(self, 'export_config', {})
+
+    def set_export_config(self, args):
+        """
+        Sets/updates export_config dictionary
+        """
+        ex_config = self.get_export_config()
+        ex_config.update(args)
+        self.export_config = ex_config
diff --git a/scripts/export.py b/scripts/export.py
index fe3b79ebdf28..4b21bc4ffd73 100644
--- a/scripts/export.py
+++ b/scripts/export.py
@@ -62,6 +62,15 @@ def get_args(argv):
     )
     parser.add_argument("--device", default="cuda", help="Device to export for")
     parser.add_argument("--check-tolerance", type=float, default=0.01, help="tolerance for verification")
+    parser.add_argument(
+        "--config",
+        metavar="KEY=VALUE",
+        nargs='+',
+        help="Set a number of key-value pairs to model.export_config dictionary "
+        "(do not put spaces before or after the = sign). "
+        "Note that values are always treated as strings.",
+    )
+
     args = parser.parse_args(argv)
     return args
 
@@ -130,10 +139,12 @@ def nemo_export(argv):
         in_args["max_dim"] = args.max_dim
         max_dim = args.max_dim
 
-    if args.cache_support and hasattr(model, "encoder") and hasattr(model.encoder, "export_cache_support"):
-        model.encoder.export_cache_support = True
-        logging.info("Caching support is enabled.")
-        model.encoder.setup_streaming_params()
+    if args.cache_support:
+        model.set_export_config({"cache_support": "True"})
+
+    if args.config:
+        kv = dict(map(lambda s: s.split('='), args.config))
+        model.set_export_config(kv)
 
     autocast = nullcontext
     if args.autocast:

From 112c80607a05523b271ef58cb7f6af856d7e2df7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 10 Jul 2023 11:48:49 -0700
Subject: [PATCH 091/123] Fixing an issue with confidence ensembles (#6987)
 (#7004)

* Bug fix for the confidence ensembles


* Relax constraints for the test


---------

Signed-off-by: Igor Gitman <igitman@nvidia.com>
Co-authored-by: Igor Gitman <igitman@nvidia.com>
---
 examples/asr/transcribe_speech.py                        | 8 ++++++--
 nemo/collections/asr/models/confidence_ensemble.py       | 9 +++++----
 scripts/confidence_ensembles/build_ensemble.py           | 6 ++----
 .../confidence_ensembles/test_confidence_ensembles.py    | 2 +-
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py
index 4ed3d92a6305..401755bc8275 100644
--- a/examples/asr/transcribe_speech.py
+++ b/examples/asr/transcribe_speech.py
@@ -130,6 +130,8 @@ class TranscriptionConfig:
 
     # Set to True to output greedy timestamp information (only supported models)
     compute_timestamps: bool = False
+    # set to True if need to return full alignment information
+    preserve_alignment: bool = False
 
     # Set to True to output language ID information
     compute_langs: bool = False
@@ -230,6 +232,8 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis
     # we will adjust this flag if the model does not support it
     compute_timestamps = cfg.compute_timestamps
     compute_langs = cfg.compute_langs
+    # has to be True if timestamps are required
+    preserve_alignment = True if cfg.compute_timestamps else cfg.preserve_alignment
 
     # Check whether model and decoder type match
     if isinstance(asr_model, EncDecCTCModel):
@@ -252,7 +256,7 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis
             decoding_cfg = cfg.rnnt_decoding if cfg.decoder_type == 'rnnt' else cfg.ctc_decoding
             decoding_cfg.compute_timestamps = cfg.compute_timestamps  # both ctc and rnnt support it
             if 'preserve_alignments' in decoding_cfg:
-                decoding_cfg.preserve_alignments = cfg.compute_timestamps
+                decoding_cfg.preserve_alignments = preserve_alignment
             if 'compute_langs' in decoding_cfg:
                 decoding_cfg.compute_langs = cfg.compute_langs
             if hasattr(asr_model, 'cur_decoder'):
@@ -267,7 +271,7 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis
             cfg.rnnt_decoding.compute_langs = cfg.compute_langs
 
             if 'preserve_alignments' in cfg.rnnt_decoding:
-                cfg.rnnt_decoding.preserve_alignments = cfg.compute_timestamps
+                cfg.rnnt_decoding.preserve_alignments = preserve_alignment
 
             asr_model.change_decoding_strategy(cfg.rnnt_decoding)
         else:
diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py
index cd4738e7b97c..dd52d9a7010a 100644
--- a/nemo/collections/asr/models/confidence_ensemble.py
+++ b/nemo/collections/asr/models/confidence_ensemble.py
@@ -106,6 +106,11 @@ def get_filtered_logprobs(hypothesis: Hypothesis, exclude_blank: bool) -> torch.
                 filtered_logprobs = logprobs[:1]
         else:
             filtered_logprobs = logprobs
+
+    # need to make sure logprobs are always normalized, so checking if they sum up to 1
+    if not torch.allclose(filtered_logprobs[0].exp().sum(), torch.tensor(1.0)):
+        filtered_logprobs = torch.log_softmax(filtered_logprobs, dim=1)
+
     return filtered_logprobs
 
 
@@ -217,10 +222,6 @@ def update_decoding_parameters(self, decoding_cfg: DictConfig):
         with open_dict(decoding_cfg):
             decoding_cfg.temperature = self.cfg.temperature
             decoding_cfg.preserve_alignments = True
-            if 'confidence_cfg' in decoding_cfg:
-                decoding_cfg.confidence_cfg.preserve_frame_confidence = True
-            else:
-                decoding_cfg.confidence_cfg = ConfidenceConfig(preserve_frame_confidence=True)
 
     def setup_training_data(self, train_data_config: Union[DictConfig, Dict]):
         """Pass-through to the ensemble models.
diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py
index 07ceccb8b3d5..e953dec02b7a 100644
--- a/scripts/confidence_ensembles/build_ensemble.py
+++ b/scripts/confidence_ensembles/build_ensemble.py
@@ -458,7 +458,7 @@ def find_best_confidence(
     return best_conf_spec.to_confidence_config(), best_pipe
 
 
-@hydra_runner(schema=BuildEnsembleConfig)
+@hydra_runner(config_name="BuildEnsembleConfig", schema=BuildEnsembleConfig)
 def main(cfg: BuildEnsembleConfig):
     # silencing all messages from nemo/ptl to avoid dumping tons of configs to the stdout
     logging.getLogger('pytorch_lightning').setLevel(logging.CRITICAL)
@@ -471,12 +471,10 @@ def main(cfg: BuildEnsembleConfig):
     pl.seed_everything(cfg.random_seed)
     cfg.transcription.random_seed = None  # seed is already applied
     cfg.transcription.return_transcriptions = True
-    # that sets preserve_alignment to True
-    cfg.transcription.compute_timestamps = True
+    cfg.transcription.preserve_alignment = True
     cfg.transcription.ctc_decoding.temperature = cfg.temperature
     cfg.transcription.rnnt_decoding.temperature = cfg.temperature
     # this ensures that generated output is after log-softmax for consistency with CTC
-    cfg.transcription.rnnt_decoding.confidence_cfg.preserve_frame_confidence = True
 
     train_confidences = []
     dev_confidences = []
diff --git a/scripts/confidence_ensembles/test_confidence_ensembles.py b/scripts/confidence_ensembles/test_confidence_ensembles.py
index b665375c0c33..fa537529ab6b 100644
--- a/scripts/confidence_ensembles/test_confidence_ensembles.py
+++ b/scripts/confidence_ensembles/test_confidence_ensembles.py
@@ -113,4 +113,4 @@ def test_confidence_ensemble(tmp_path, build_args):
     )
 
     results = speech_to_text_eval.main(eval_cfg)
-    assert results.metric_value < 0.15  # relaxed check for better than 15% WER
+    assert results.metric_value < 0.20  # relaxed check for better than 20% WER

From 68b4d1f9e0271d41af5c0598ad1b13f2b2738323 Mon Sep 17 00:00:00 2001
From: Ryan Langman <rlangman@nvidia.com>
Date: Tue, 11 Jul 2023 08:55:51 -0700
Subject: [PATCH 092/123] [TTS] Add cosine distance option to TTS aligner
 (#6806)

* [TTS] Add cosine distance option to TTS aligner

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Update aligner comments

Signed-off-by: Ryan <rlangman@nvidia.com>

---------

Signed-off-by: Ryan <rlangman@nvidia.com>
---
 examples/tts/conf/fastpitch/fastpitch.yaml |  2 +
 nemo/collections/tts/models/fastpitch.py   | 16 ++--
 nemo/collections/tts/modules/aligner.py    | 92 +++++++++++++++++-----
 nemo/collections/tts/modules/submodules.py |  2 +-
 4 files changed, 79 insertions(+), 33 deletions(-)

diff --git a/examples/tts/conf/fastpitch/fastpitch.yaml b/examples/tts/conf/fastpitch/fastpitch.yaml
index 1d552d058d76..39d5f395afbc 100644
--- a/examples/tts/conf/fastpitch/fastpitch.yaml
+++ b/examples/tts/conf/fastpitch/fastpitch.yaml
@@ -193,6 +193,8 @@ model:
   alignment_module:
     _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder
     n_text_channels: ${model.symbols_embedding_dim}
+    dist_type: cosine
+    temperature: 15.0
 
   duration_predictor:
     _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor
diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py
index 1a68d9e51aeb..dc598a9a76d1 100644
--- a/nemo/collections/tts/models/fastpitch.py
+++ b/nemo/collections/tts/models/fastpitch.py
@@ -121,16 +121,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         self.log_images = cfg.get("log_images", False)
         self.log_train_images = False
 
-        loss_scale = 0.1 if self.learn_alignment else 1.0
-        dur_loss_scale = loss_scale
-        pitch_loss_scale = loss_scale
-        energy_loss_scale = loss_scale
-        if "dur_loss_scale" in cfg:
-            dur_loss_scale = cfg.dur_loss_scale
-        if "pitch_loss_scale" in cfg:
-            pitch_loss_scale = cfg.pitch_loss_scale
-        if "energy_loss_scale" in cfg:
-            energy_loss_scale = cfg.energy_loss_scale
+        default_prosody_loss_scale = 0.1 if self.learn_alignment else 1.0
+        dur_loss_scale = cfg.get("dur_loss_scale", default_prosody_loss_scale)
+        pitch_loss_scale = cfg.get("pitch_loss_scale", default_prosody_loss_scale)
+        energy_loss_scale = cfg.get("energy_loss_scale", default_prosody_loss_scale)
 
         self.mel_loss_fn = MelLoss()
         self.pitch_loss_fn = PitchLoss(loss_scale=pitch_loss_scale)
@@ -139,7 +133,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
 
         self.aligner = None
         if self.learn_alignment:
-            aligner_loss_scale = cfg.aligner_loss_scale if "aligner_loss_scale" in cfg else 1.0
+            aligner_loss_scale = cfg.get("aligner_loss_scale", 1.0)
             self.aligner = instantiate(self._cfg.alignment_module)
             self.forward_sum_loss_fn = ForwardSumLoss(loss_scale=aligner_loss_scale)
             self.bin_loss_fn = BinLoss(loss_scale=aligner_loss_scale)
diff --git a/nemo/collections/tts/modules/aligner.py b/nemo/collections/tts/modules/aligner.py
index bc170742df23..2910602474fd 100644
--- a/nemo/collections/tts/modules/aligner.py
+++ b/nemo/collections/tts/modules/aligner.py
@@ -14,6 +14,7 @@
 
 
 import torch
+from einops import rearrange
 from torch import nn
 
 from nemo.collections.tts.modules.submodules import ConditionalInput, ConvNorm
@@ -21,10 +22,27 @@
 
 
 class AlignmentEncoder(torch.nn.Module):
-    """Module for alignment text and mel spectrogram. """
+    """
+    Module for alignment text and mel spectrogram.
+
+    Args:
+        n_mel_channels: Dimension of mel spectrogram.
+        n_text_channels: Dimension of text embeddings.
+        n_att_channels: Dimension of model
+        temperature: Temperature to scale distance by.
+            Suggested to be 0.0005 when using dist_type "l2" and 15.0 when using "cosine".
+        condition_types: List of types for nemo.collections.tts.modules.submodules.ConditionalInput.
+        dist_type: Distance type to use for similarity measurement. Supports "l2" and "cosine" distance.
+    """
 
     def __init__(
-        self, n_mel_channels=80, n_text_channels=512, n_att_channels=80, temperature=0.0005, condition_types=[]
+        self,
+        n_mel_channels=80,
+        n_text_channels=512,
+        n_att_channels=80,
+        temperature=0.0005,
+        condition_types=[],
+        dist_type="l2",
     ):
         super().__init__()
         self.temperature = temperature
@@ -45,27 +63,60 @@ def __init__(
             torch.nn.ReLU(),
             ConvNorm(n_mel_channels, n_att_channels, kernel_size=1, bias=True),
         )
+        if dist_type == "l2":
+            self.dist_fn = self.get_euclidean_dist
+        elif dist_type == "cosine":
+            self.dist_fn = self.get_cosine_dist
+        else:
+            raise ValueError(f"Unknown distance type '{dist_type}'")
+
+    @staticmethod
+    def _apply_mask(inputs, mask, mask_value):
+        if mask is None:
+            return
+
+        mask = rearrange(mask, "B T2 1 -> B 1 1 T2")
+        inputs.data.masked_fill_(mask, mask_value)
 
     def get_dist(self, keys, queries, mask=None):
         """Calculation of distance matrix.
 
         Args:
-            queries (torch.tensor): B x C x T1 tensor (probably going to be mel data).
+            queries (torch.tensor): B x C1 x T1 tensor (probably going to be mel data).
             keys (torch.tensor): B x C2 x T2 tensor (text data).
             mask (torch.tensor): B x T2 x 1 tensor, binary mask for variable length entries and also can be used
                 for ignoring unnecessary elements from keys in the resulting distance matrix (True = mask element, False = leave unchanged).
         Output:
             dist (torch.tensor): B x T1 x T2 tensor.
         """
-        keys_enc = self.key_proj(keys)  # B x n_attn_dims x T2
-        queries_enc = self.query_proj(queries)  # B x n_attn_dims x T1
-        attn = (queries_enc[:, :, :, None] - keys_enc[:, :, None]) ** 2  # B x n_attn_dims x T1 x T2
-        dist = attn.sum(1, keepdim=True)  # B x 1 x T1 x T2
+        # B x C x T1
+        queries_enc = self.query_proj(queries)
+        # B x C x T2
+        keys_enc = self.key_proj(keys)
+        # B x 1 x T1 x T2
+        dist = self.dist_fn(queries_enc=queries_enc, keys_enc=keys_enc)
+
+        self._apply_mask(dist, mask, float("inf"))
 
-        if mask is not None:
-            dist.data.masked_fill_(mask.permute(0, 2, 1).unsqueeze(2), float("inf"))
+        return dist
 
-        return dist.squeeze(1)
+    @staticmethod
+    def get_euclidean_dist(queries_enc, keys_enc):
+        queries_enc = rearrange(queries_enc, "B C T1 -> B C T1 1")
+        keys_enc = rearrange(keys_enc, "B C T2 -> B C 1 T2")
+        # B x C x T1 x T2
+        distance = (queries_enc - keys_enc) ** 2
+        # B x 1 x T1 x T2
+        l2_dist = distance.sum(axis=1, keepdim=True)
+        return l2_dist
+
+    @staticmethod
+    def get_cosine_dist(queries_enc, keys_enc):
+        queries_enc = rearrange(queries_enc, "B C T1 -> B C T1 1")
+        keys_enc = rearrange(keys_enc, "B C T2 -> B C 1 T2")
+        cosine_dist = -torch.nn.functional.cosine_similarity(queries_enc, keys_enc, dim=1)
+        cosine_dist = rearrange(cosine_dist, "B T1 T2 -> B 1 T1 T2")
+        return cosine_dist
 
     @staticmethod
     def get_durations(attn_soft, text_len, spect_len):
@@ -96,8 +147,7 @@ def get_mean_dist_by_durations(dist, durations, mask=None):
         batch_size, t1_size, t2_size = dist.size()
         assert torch.all(torch.eq(durations.sum(dim=1), t1_size))
 
-        if mask is not None:
-            dist = dist.masked_fill(mask.permute(0, 2, 1).unsqueeze(2), 0)
+        AlignmentEncoder._apply_mask(dist, mask, 0)
 
         # TODO(oktai15): make it more efficient
         mean_dist_by_durations = []
@@ -149,7 +199,7 @@ def forward(self, queries, keys, mask=None, attn_prior=None, conditioning=None):
         """Forward pass of the aligner encoder.
 
         Args:
-            queries (torch.tensor): B x C x T1 tensor (probably going to be mel data).
+            queries (torch.tensor): B x C1 x T1 tensor (probably going to be mel data).
             keys (torch.tensor): B x C2 x T2 tensor (text data).
             mask (torch.tensor): B x T2 x 1 tensor, binary mask for variable length entries (True = mask element, False = leave unchanged).
             attn_prior (torch.tensor): prior for attention matrix.
@@ -159,20 +209,20 @@ def forward(self, queries, keys, mask=None, attn_prior=None, conditioning=None):
             attn_logprob (torch.tensor): B x 1 x T1 x T2 log-prob attention mask.
         """
         keys = self.cond_input(keys.transpose(1, 2), conditioning).transpose(1, 2)
-        keys_enc = self.key_proj(keys)  # B x n_attn_dims x T2
-        queries_enc = self.query_proj(queries)  # B x n_attn_dims x T1
-
-        # Simplistic Gaussian Isotopic Attention
-        attn = (queries_enc[:, :, :, None] - keys_enc[:, :, None]) ** 2  # B x n_attn_dims x T1 x T2
-        attn = -self.temperature * attn.sum(1, keepdim=True)
+        # B x C x T1
+        queries_enc = self.query_proj(queries)
+        # B x C x T2
+        keys_enc = self.key_proj(keys)
+        # B x 1 x T1 x T2
+        distance = self.dist_fn(queries_enc=queries_enc, keys_enc=keys_enc)
+        attn = -self.temperature * distance
 
         if attn_prior is not None:
             attn = self.log_softmax(attn) + torch.log(attn_prior[:, None] + 1e-8)
 
         attn_logprob = attn.clone()
 
-        if mask is not None:
-            attn.data.masked_fill_(mask.permute(0, 2, 1).unsqueeze(2), -float("inf"))
+        self._apply_mask(attn, mask, -float("inf"))
 
         attn = self.softmax(attn)  # softmax along T2
         return attn, attn_logprob
diff --git a/nemo/collections/tts/modules/submodules.py b/nemo/collections/tts/modules/submodules.py
index 408ab02dead2..92218e807aac 100644
--- a/nemo/collections/tts/modules/submodules.py
+++ b/nemo/collections/tts/modules/submodules.py
@@ -509,7 +509,7 @@ def forward(self, inputs, conditioning=None):
                 inputs = inputs + conditioning
 
             if "concat" in self.condition_types:
-                conditioning = conditionting.repeat(1, inputs.shape[1], 1)
+                conditioning = conditioning.repeat(1, inputs.shape[1], 1)
                 inputs = torch.cat([inputs, conditioning])
                 inputs = self.concat_proj(inputs)
 

From 0f79a9f14cb768aa2369dff97b296836ce7ade0e Mon Sep 17 00:00:00 2001
From: trias702 <25867060+trias702@users.noreply.github.com>
Date: Tue, 11 Jul 2023 11:38:57 -0500
Subject: [PATCH 093/123] Minor MPT-7B fixes and creation script update (#6982)

* Initial commit of minor MPT-7B fixes

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Daniel Egert <degert@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../language_modeling/megatron_base_model.py  |  1 +
 .../convert_mpt_7b_hf_to_nemo.py              | 44 ++++++++++++++-----
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index e018a4decaf6..3f541cfce14e 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -221,6 +221,7 @@ def _build_tokenizer(self):
             merges_file=self.register_artifact("tokenizer.merge_file", self._cfg.tokenizer.get('merge_file', None)),
             use_fast=self.cfg.tokenizer.get('use_fast', False),
             delimiter=self.cfg.tokenizer.get('delimiter', None),
+            special_tokens=self.cfg.tokenizer.get('special_tokens', None),
             legacy=legacy,
         )
 
diff --git a/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py b/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py
index 14d7b6ae54ea..fd761b6b20c2 100644
--- a/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py
+++ b/scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py
@@ -34,11 +34,19 @@
 TP/PP values you want:
     NeMo/examples/nlp/language_modeling/megatron_change_num_partitions.py
 
+* Please note: when using the above script, you MUST also pass the `-–megatron_legacy` flag
+  Failure to do this will result in a corrupt model! *
+
+This script also requires a baseline config file from which to override default parameters.
+You can specify the location of this file using the -c argument. You can use any Nemo config
+file which is appropriate, but in the default case, we highly recommend you use the following:
+    NeMo/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+
 
 Here is an example usage command:
 
 ```python
-python scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py -i /path/to/mpt_7b -o /path/to/save
+python scripts/nlp_language_modeling/convert_mpt_7b_hf_to_nemo.py -c /path/to/megatron_gpt_config.yaml -i /path/to/mpt_7b -o /path/to/save
 ```
 
 """
@@ -49,6 +57,7 @@
 
 import pytorch_lightning as pl
 import torch
+import yaml
 from omegaconf import OmegaConf
 
 from nemo.collections.nlp.models.language_modeling.megatron import GPTModel
@@ -60,6 +69,9 @@
     parser.add_argument(
         '-i', '--input', required=True, type=str, help='path to the two MPT-7B .bin weight files from HuggingFace'
     )
+    parser.add_argument(
+        '-c', '--config', required=True, type=str, help='the path to the megatron_gpt_config.yaml file'
+    )
     parser.add_argument(
         '-o', '--output', required=False, default=None, type=str, help='path to dir where to store output .nemo file'
     )
@@ -71,22 +83,37 @@
         logging.critical(f'Input directory [ {args.input} ] does not exist or cannot be found. Aborting.')
         exit(255)
 
-    model_dict = {
-        'micro_batch_size': 4,
-        'global_batch_size': 8,
+    if not os.path.exists(args.config):
+        logging.critical(f'Path to config file [ {args.config} ] does not exist or cannot be found. Aborting.')
+        exit(255)
+
+    with open(args.config, 'r', encoding='utf_8') as fr:
+        orig_cfg = yaml.safe_load(fr)
+
+    model_dict = orig_cfg['model']
+    if 'tokenizer' in model_dict:
+        del model_dict['tokenizer']
+    if 'data' in model_dict:
+        del model_dict['data']
+
+    override_model_dict = {
+        'micro_batch_size': 1,
+        'global_batch_size': 4,
         'rampup_batch_size': None,
         'tensor_model_parallel_size': 1,
         'pipeline_model_parallel_size': 1,
         'virtual_pipeline_model_parallel_size': None,
         'megatron_amp_O2': True,
         'transformer_engine': False,
-        'use_cpu_initialization': True,
+        'use_cpu_initialization': False,
         'hidden_size': 4096,
+        'encoder_seq_length': 2048,
         'max_position_embeddings': 2048,
         'num_layers': 32,
         'num_attention_heads': 32,
         'ffn_hidden_size': 4 * 4096,
         'precision': 'bf16',
+        'layernorm_epsilon': 1e-5,
         'pre_process': True,
         'post_process': True,
         'num_tokentypes': 0,
@@ -114,11 +141,6 @@
         'type': 'EleutherAI/gpt-neox-20b',
         'use_fast': True,
     }
-    optim_dict = {
-        'name': 'fused_adam',
-        'lr': 2e-4,
-        'weight_decay': 0.01,
-    }
     trainer_dict = {
         'devices': 1,
         'num_nodes': 1,
@@ -139,8 +161,8 @@
         'enable_model_summary': False,
     }
 
+    model_dict.update(override_model_dict)
     model_dict['tokenizer'] = tokeniser_dict
-    model_dict['optim'] = optim_dict
 
     omega_cfg = OmegaConf.create(model_dict)
 

From 0cca30006a40611b5b3d925b86bc226aa7942437 Mon Sep 17 00:00:00 2001
From: Eric Harper <complex451@gmail.com>
Date: Tue, 11 Jul 2023 12:38:35 -0600
Subject: [PATCH 094/123] Change Jenkins timeout (#6997)

* change timeout

Signed-off-by: ericharper <complex451@gmail.com>

* change to 8 hours

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index be62291daf24..766d32ebd8c4 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -6,7 +6,7 @@ pipeline {
         }
   }
   options {
-    timeout(time: 2, unit: 'HOURS')
+    timeout(time: 8, unit: 'HOURS')
     disableConcurrentBuilds(abortPrevious: true)
   }
 

From 9e75050e6b64a3d2aa528a7cc9acbb2861bed8d4 Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithyare@nvidia.com>
Date: Tue, 11 Jul 2023 12:23:23 -0700
Subject: [PATCH 095/123] remove hard coded input and output fields (#7008)

* remove hard coded input and output fields

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../megatron/gpt_sft_dataset.py                | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
index 94c4b3c54c63..756494f2f315 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
@@ -147,15 +147,23 @@ def _process_example(self, example):
         output = example[self.label_key]
 
         if self.prompt_template is not None:
-            assert '{input}' in self.prompt_template
-            assert '{output}' in self.prompt_template
+            assert f'{{{self.context_key}}}' in self.prompt_template
+            assert f'{{{self.label_key}}}' in self.prompt_template
             # Make sure that '{output}' always occurs at the end of the prompt template string
-            assert self.prompt_template.index('{output}') == len(self.prompt_template) - len('{output}')
+            assert self.prompt_template.index(f'{{{self.label_key}}}') == len(self.prompt_template) - len(
+                f'{{{self.label_key}}}'
+            )
             # Get the context by replacing only the input
             original_context = context
-            context = self.prompt_template.replace('{input}', context).replace('{output}', '').strip(' ')
+            context = (
+                self.prompt_template.replace(f'{{{self.context_key}}}', context)
+                .replace(f'{{{self.label_key}}}', '')
+                .strip(' ')
+            )
             # Replace the input and output placeholders with the actual input and output
-            text = self.prompt_template.replace('{input}', original_context).replace('{output}', output)
+            text = self.prompt_template.replace(f'{{{self.context_key}}}', original_context).replace(
+                f'{{{self.label_key}}}', output
+            )
 
         if self.separate_prompt_and_response_with_newline and self.prompt_template is None:
             text = context + '\n' + output

From 41d8477404dcce20667f453166a9e7adb178bef4 Mon Sep 17 00:00:00 2001
From: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Date: Tue, 11 Jul 2023 12:56:17 -0700
Subject: [PATCH 096/123] RoPE length extrapolation with interpolation (#7005)

* Push changes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fixes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* add continue training script

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [WIP] nonlinear interp

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fix

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* override encoder_seq_len

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Remove nonlinear

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* sft with pi (#7006)

* sft with pi

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* update values only if not None"

Signed-off-by: Evelina <ebakhturina@nvidia.com>

---------

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* Address comments

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add info

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Empty

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

---------

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Evelina <ebakhturina@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
---
 .../conf/megatron_gpt_config.yaml             |   1 +
 .../megatron_gpt_continue_training.py         | 193 ++++++++++++++++++
 .../tuning/conf/megatron_gpt_sft.yaml         |   2 +
 .../tuning/megatron_gpt_peft_eval.py          |   4 +
 .../tuning/megatron_gpt_sft.py                |   9 +
 .../language_modeling/megatron/gpt_model.py   |   2 +
 .../language_modeling/megatron_gpt_model.py   |  17 +-
 .../modules/common/megatron/language_model.py |   7 +-
 .../nlp/modules/common/megatron/module.py     |   4 +-
 .../rotary_position_embedding.py              |  17 +-
 10 files changed, 249 insertions(+), 7 deletions(-)
 create mode 100644 examples/nlp/language_modeling/megatron_gpt_continue_training.py

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index e588e94a6720..c2b0343c2ff7 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -83,6 +83,7 @@ model:
   share_embeddings_and_output_weights: True # Share embedding and output layer weights.
   overlap_p2p_comm: False # Overlap p2p communication with computes. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1
   batch_p2p_comm: True # Batch consecutive inter-peer send/recv operations. This argument is valid only when `virtual_pipeline_model_parallel_size` is larger than 1
+  seq_len_interpolation_factor: null # RoPE Interpolation factor for sequence length. This is used to build long-context models with RoPE ex: https://arxiv.org/abs/2306.15595.
 
   tokenizer:
     library: 'megatron'
diff --git a/examples/nlp/language_modeling/megatron_gpt_continue_training.py b/examples/nlp/language_modeling/megatron_gpt_continue_training.py
new file mode 100644
index 000000000000..e90198833595
--- /dev/null
+++ b/examples/nlp/language_modeling/megatron_gpt_continue_training.py
@@ -0,0 +1,193 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+
+from omegaconf.omegaconf import OmegaConf, open_dict
+from pytorch_lightning import Trainer
+from pytorch_lightning.plugins.environments import TorchElasticEnvironment
+from pytorch_lightning.trainer.connectors.checkpoint_connector import CheckpointConnector
+
+from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
+from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
+from nemo.collections.nlp.parts.nlp_overrides import (
+    GradScaler,
+    MegatronHalfPrecisionPlugin,
+    NLPDDPStrategy,
+    NLPSaveRestoreConnector,
+    PipelineMixedPrecisionPlugin,
+)
+from nemo.core.config import hydra_runner
+from nemo.utils import AppState, logging
+from nemo.utils.exp_manager import exp_manager
+from nemo.utils.model_utils import inject_model_parallel_rank
+
+
+def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False):
+    """
+    This function modifies the original gpt pre-training config (t5_cfg) with attributes from the finetuning config (cfg).
+    The `add_cfg_to_tree` arg adds `cfg` to the top of the yaml tree which is needed for all `hparams.yaml` files when passed as an arg to `load_from_checkpoint()`.
+    """
+    OmegaConf.set_struct(gpt_cfg, True)
+    OmegaConf.resolve(cfg)
+    with open_dict(gpt_cfg):
+        gpt_cfg.megatron_amp_O2 = cfg.model.get('megatron_amp_O2', False)
+        gpt_cfg.micro_batch_size = cfg.model.micro_batch_size
+        gpt_cfg.global_batch_size = cfg.model.global_batch_size
+        gpt_cfg.sequence_parallel = cfg.model.get("sequence_parallel", False)
+        gpt_cfg.activations_checkpoint_granularity = cfg.model.get("activations_checkpoint_granularity", None)
+        gpt_cfg.activations_checkpoint_num_layers = cfg.model.get("activations_checkpoint_num_layers", None)
+        gpt_cfg.activations_checkpoint_method = cfg.model.get("activations_checkpoint_method", None)
+        gpt_cfg.data = cfg.model.data
+        gpt_cfg.optim = cfg.model.optim
+        gpt_cfg.precision = cfg.trainer.precision
+        gpt_cfg.restore_from_path = cfg.restore_from_path
+        gpt_cfg.resume_from_checkpoint = cfg.model.resume_from_checkpoint
+        gpt_cfg.gradient_as_bucket_view = cfg.model.gradient_as_bucket_view
+        gpt_cfg.encoder_seq_length = cfg.model.encoder_seq_length
+        gpt_cfg.max_position_embeddings = cfg.model.max_position_embeddings
+        gpt_cfg.seq_len_interpolation_factor = cfg.model.seq_len_interpolation_factor
+        gpt_cfg.use_flash_attention = cfg.model.use_flash_attention
+
+        # This is needed when modifying a hparam file directly to load `.ckpt` files.
+        # This is not needed to modify the cfg in `.nemo` files.
+        if add_cfg_to_tree:
+            OmegaConf.resolve(gpt_cfg)
+            gpt_cfg.cfg = gpt_cfg
+
+    return gpt_cfg
+
+
+def load_from_nemo(cls, cfg, trainer, gpt_cfg, modify_confg_fn):
+    gpt_cfg = modify_confg_fn(gpt_cfg, cfg, add_cfg_to_tree=False)
+    save_restore_connector = NLPSaveRestoreConnector()
+    if os.path.isdir(cfg.restore_from_path):
+        save_restore_connector.model_extracted_dir = cfg.restore_from_path
+    model = cls.restore_from(
+        restore_path=cfg.restore_from_path,
+        trainer=trainer,
+        override_config_path=gpt_cfg,
+        save_restore_connector=save_restore_connector,
+    )
+    return model
+
+
+def load_from_checkpoint_dir(cls, cfg, trainer, modify_confg_fn):
+    app_state = AppState()
+    if cfg.model.tensor_model_parallel_size > 1 or cfg.model.pipeline_model_parallel_size > 1:
+        app_state.model_parallel_size = cfg.model.tensor_model_parallel_size * cfg.model.pipeline_model_parallel_size
+        app_state.tensor_model_parallel_size = cfg.model.tensor_model_parallel_size
+        app_state.pipeline_model_parallel_size = cfg.model.pipeline_model_parallel_size
+        (
+            app_state.tensor_model_parallel_rank,
+            app_state.pipeline_model_parallel_rank,
+            app_state.model_parallel_size,
+            app_state.data_parallel_size,
+            app_state.pipeline_model_parallel_split_rank,
+            app_state.virtual_pipeline_model_parallel_rank,
+        ) = fake_initialize_model_parallel(
+            world_size=app_state.model_parallel_size,
+            rank=trainer.global_rank,
+            tensor_model_parallel_size_=cfg.model.tensor_model_parallel_size,
+            pipeline_model_parallel_size_=cfg.model.pipeline_model_parallel_size,
+            pipeline_model_parallel_split_rank_=cfg.model.pipeline_model_parallel_split_rank,
+        )
+    checkpoint_path = inject_model_parallel_rank(
+        os.path.join(cfg.model.pretrained_checkpoint.checkpoint_dir, cfg.model.pretrained_checkpoint.checkpoint_name)
+    )
+    hparams_file = OmegaConf.load(cfg.model.pretrained_checkpoint.hparams_file)
+    gpt_cfg = modify_confg_fn(hparams_file.cfg, cfg, add_cfg_to_tree=True)
+    with tempfile.NamedTemporaryFile(suffix='.yaml') as f:
+        OmegaConf.save(config=gpt_cfg, f=f.name)
+        model = cls.load_from_checkpoint(checkpoint_path=checkpoint_path, trainer=trainer, hparams_file=f.name,)
+        return model
+
+
+def validate_checkpoint_loading_args(cfg):
+    if cfg.checkpoint_dir is None or not os.path.isdir(cfg.checkpoint_dir):
+        raise ValueError(f'Checkpoint directory {cfg.checkpoint_dir} does not exist or is not a directory.')
+    if cfg.checkpoint_name is None:
+        raise ValueError(f'Checkpoint name {cfg.checkpoint_name} is not valid.')
+    if cfg.hparams_file is None or not os.path.isfile(cfg.hparams_file):
+        raise ValueError(f'Hparams file {cfg.hparams_file} does not exist or is not a file.')
+
+
+@hydra_runner(config_path="conf", config_name="megatron_gpt_config")
+def main(cfg) -> None:
+    logging.info("\n\n************** Experiment configuration ***********")
+    logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
+
+    megatron_amp_o2 = cfg.model.get('megatron_amp_O2', False)
+    with_distributed_adam = cfg.model.optim.get('name', 'fused_adam') == 'distributed_fused_adam'
+    plugins = []
+    strategy = NLPDDPStrategy(
+        no_ddp_communication_hook=True,
+        gradient_as_bucket_view=cfg.model.gradient_as_bucket_view,
+        find_unused_parameters=False,
+    )
+    if cfg.trainer.precision in [16, 'bf16']:
+        scaler = None
+        if cfg.trainer.precision == 16:
+            scaler = GradScaler(
+                init_scale=cfg.model.get('native_amp_init_scale', 2 ** 32),
+                growth_interval=cfg.model.get('native_amp_growth_interval', 1000),
+                hysteresis=cfg.model.get('hysteresis', 2),
+            )
+        if megatron_amp_o2 and not with_distributed_adam:
+            plugins.append(MegatronHalfPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler))
+        else:
+            plugins.append(PipelineMixedPrecisionPlugin(precision=cfg.trainer.precision, device='cuda', scaler=scaler))
+
+    if cfg.get('cluster_type', None) == 'BCP':
+        plugins.append(TorchElasticEnvironment())
+
+    trainer = Trainer(plugins=plugins, strategy=strategy, **cfg.trainer)
+
+    exp_manager(trainer, cfg.exp_manager)
+
+    # update resume from checkpoint found by exp_manager
+    if cfg.model.resume_from_checkpoint is not None:
+        resume_from_checkpoint = cfg.model.resume_from_checkpoint
+    else:
+        resume_from_checkpoint = trainer._checkpoint_connector.resume_from_checkpoint_fit_path
+    logging.info(f'Resuming training from checkpoint: {resume_from_checkpoint}')
+
+    trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint)
+
+    if cfg.restore_from_path:
+        save_restore_connector = NLPSaveRestoreConnector()
+        if os.path.isdir(cfg.restore_from_path):
+            save_restore_connector.model_extracted_dir = cfg.restore_from_path
+        gpt_cfg = MegatronGPTModel.restore_from(
+            restore_path=cfg.restore_from_path,
+            trainer=trainer,
+            return_config=True,
+            save_restore_connector=save_restore_connector,
+        )
+        model = load_from_nemo(MegatronGPTModel, cfg, trainer, gpt_cfg, modify_confg_fn=_modify_config)
+    elif cfg.model.get("pretrained_checkpoint", None) is not None:
+        validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint)
+        model = load_from_checkpoint_dir(MegatronGPTModel, cfg, trainer, gpt_cfg, modify_confg_fn=_modify_config)
+    else:
+        print(' > WARNING: No checkpoint provided. Starting from scratch.')
+        # hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams
+        with open_dict(cfg):
+            cfg.model.precision = cfg.trainer.precision
+        model = MegatronGPTModel(cfg.model, trainer)
+    trainer.fit(model)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
index f8a8e6b9dbc0..0e3f0d712dd6 100644
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
@@ -60,6 +60,8 @@ model:
   activations_checkpoint_num_layers: null # not used with 'selective'
   answer_only_loss: False # not used right now
   gradient_as_bucket_view: False
+  seq_len_interpolation_factor: null # if not None, seq_len_interpolation_factor will match the base model's value
+  use_flash_attention: null # if not None, will match the base model's value
 
   hidden_dropout: 0.0
   attention_dropout: 0.0
diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
index fc427a60d172..ed60328fd812 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py
@@ -127,6 +127,10 @@ def main(cfg) -> None:
         peft_model_cfg.data.test_ds = cfg.model.data.test_ds
         peft_model_cfg.activations_checkpoint_granularity = None
         peft_model_cfg.activations_checkpoint_method = None
+        if peft_model_cfg.get("use_flash_attention", False):
+            peft_model_cfg.use_flash_attention = cfg.model.use_flash_attention
+        if cfg.model.get("seq_len_interpolation_factor", None) is not None:
+            peft_model_cfg["seq_len_interpolation_factor"] = cfg.model.seq_len_interpolation_factor
 
     with open_dict(cfg):
         # update the config with the trained model config
diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py
index 0737d55cc514..eb4bd3125cd0 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_sft.py
@@ -64,6 +64,15 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False):
         sft_cls = MegatronGPTSFTModel
         gpt_cfg.target = f"{sft_cls.__module__}.{sft_cls.__name__}"
 
+        if cfg.model.get('use_flash_attention', None) is not None:
+            gpt_cfg.use_flash_attention = cfg.model.use_flash_attention
+
+        if cfg.model.get('seq_len_interpolation_factor', None) is not None:
+            gpt_cfg.seq_len_interpolation_factor = cfg.model.seq_len_interpolation_factor
+
+        sft_cls = MegatronGPTSFTModel
+        gpt_cfg.target = f"{sft_cls.__module__}.{sft_cls.__name__}"
+
         # This is needed when modifying a hparam file directly to load `.ckpt` files.
         # This is not needed to modify the cfg in `.nemo` files.
         if add_cfg_to_tree:
diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
index 8e28b6cab362..d70c3e06bf01 100755
--- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py
@@ -166,6 +166,7 @@ def __init__(
         use_emha=False,
         ub_tp_comm_overlap=False,
         use_flash_attention=False,
+        seq_len_interpolation_factor=None,
     ):
         super(GPTModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights)
 
@@ -249,6 +250,7 @@ def __init__(
             use_emha=use_emha,
             ub_tp_comm_overlap=ub_tp_comm_overlap,
             use_flash_attention=use_flash_attention,
+            seq_len_interpolation_factor=seq_len_interpolation_factor,
         )
 
         if self.share_embeddings_and_output_weights:
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 44b484b28949..55c3786a3d96 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -249,10 +249,20 @@ def __init__(self, cfg: DictConfig, trainer: Trainer):
             if isinstance(self.model, list):
                 converted_model = []
                 for module in self.model:
-                    converted_model.append(Float16Module(module=module, precision=cfg.precision))
+                    converted_model.append(
+                        Float16Module(
+                            module=module,
+                            precision=cfg.precision,
+                            share_token_embeddings=self.cfg.get('share_embeddings_and_output_weights', True),
+                        )
+                    )
                 self.model = converted_model
             else:
-                self.model = Float16Module(module=self.model, precision=cfg.precision)
+                self.model = Float16Module(
+                    module=self.model,
+                    precision=cfg.precision,
+                    share_token_embeddings=self.cfg.get('share_embeddings_and_output_weights', True),
+                )
 
         if self.trainer.precision == 'bf16':
             self.autocast_dtype = torch.bfloat16
@@ -360,6 +370,7 @@ def model_provider_func(self, pre_process, post_process):
             ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False),
             use_flash_attention=self.cfg.get('use_flash_attention', False),
             megatron_legacy=self.cfg.get('megatron_legacy', False),
+            seq_len_interpolation_factor=self.cfg.get('seq_len_interpolation_factor', None),
         )
 
         return model
@@ -981,7 +992,7 @@ def build_pretraining_data_loader(
                     data_parallel_size=parallel_state.get_data_parallel_world_size(),
                     drop_last=drop_last,
                     global_batch_size=self.cfg.global_batch_size,
-                    rampup_batch_size=self.cfg.rampup_batch_size,
+                    rampup_batch_size=self.cfg.get('rampup_batch_size', None),
                     pad_samples_to_global_batch_size=pad_samples_to_global_batch_size,
                 )
             elif self.cfg.data.dataloader_type == 'cyclic':
diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py
index 683163246379..2aa2e8a3860e 100755
--- a/nemo/collections/nlp/modules/common/megatron/language_model.py
+++ b/nemo/collections/nlp/modules/common/megatron/language_model.py
@@ -123,6 +123,7 @@ def get_language_model(
     use_emha=False,
     ub_tp_comm_overlap=False,
     use_flash_attention=False,
+    seq_len_interpolation_factor=None,
 ):
     """Build language model and return along with the key to save."""
 
@@ -200,6 +201,7 @@ def get_language_model(
         use_emha=use_emha,
         ub_tp_comm_overlap=ub_tp_comm_overlap,
         use_flash_attention=use_flash_attention,
+        seq_len_interpolation_factor=seq_len_interpolation_factor,
     )
     # key used for checkpoints.
     language_model_key = 'language_model'
@@ -508,6 +510,7 @@ def __init__(
         use_emha=False,
         ub_tp_comm_overlap=False,
         use_flash_attention=False,
+        seq_len_interpolation_factor=None,
     ):
         super(TransformerLanguageModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights)
 
@@ -559,7 +562,9 @@ def __init__(
             assert 0 < rotary_percentage <= 1
             if rotary_percentage < 1:
                 rotary_dim = int(rotary_dim * rotary_percentage)
-            self.rotary_pos_emb = RotaryEmbedding(rotary_dim)
+            self.rotary_pos_emb = RotaryEmbedding(
+                rotary_dim, seq_len_interpolation_factor=seq_len_interpolation_factor
+            )
 
         elif position_embedding_type == 'alibi':
             # TODO: If this is used for encoder-decodemax_position_embeddingsr model, implement proper logic and following
diff --git a/nemo/collections/nlp/modules/common/megatron/module.py b/nemo/collections/nlp/modules/common/megatron/module.py
index 22a223013fd2..0c8c811c2661 100644
--- a/nemo/collections/nlp/modules/common/megatron/module.py
+++ b/nemo/collections/nlp/modules/common/megatron/module.py
@@ -254,12 +254,12 @@ def float_conversion(val):
 
 
 class Float16Module(MegatronModule):
-    def __init__(self, module, precision):
+    def __init__(self, module, precision, share_token_embeddings=True):
         if not HAVE_MEGATRON_CORE:
             raise ImportError(
                 "Megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt."
             )
-        super().__init__()
+        super().__init__(share_token_embeddings=share_token_embeddings)
         self.precision = precision
 
         if precision == 'bf16':
diff --git a/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py b/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py
index 5a8d6d7dd333..c97010ecb911 100644
--- a/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py
+++ b/nemo/collections/nlp/modules/common/megatron/position_embedding/rotary_position_embedding.py
@@ -21,13 +21,28 @@
 
 
 class RotaryEmbedding(nn.Module):
-    def __init__(self, dim):
+    """
+    Implements Rotary Position Embedding from https://arxiv.org/abs/2104.09864.
+    """
+
+    def __init__(self, dim: int, seq_len_interpolation_factor: int = None):
+        """
+        Args:
+
+            dim (int): rotary embedding dimension
+            seq_len_interpolation_factor (int): if not None, discrete positions will be interpolated
+            by this factor via the trick in https://arxiv.org/abs/2306.15595.
+        """
         super().__init__()
+        self.seq_len_interpolation_factor = seq_len_interpolation_factor
         inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim))
         self.register_buffer('inv_freq', inv_freq)
 
     def forward(self, max_seq_len, offset=0):
         seq = torch.arange(max_seq_len, device=self.inv_freq.device) + offset
+        if self.seq_len_interpolation_factor is not None:
+            seq = seq.type_as(self.inv_freq)
+            seq *= 1 / self.seq_len_interpolation_factor
         freqs = einsum('i , j -> i j', seq.type_as(self.inv_freq), self.inv_freq)
         # first part even vector components, second part odd vector components,
         #  2 * dim in dimension size

From e87985d0b77fcf3ab770df7aecf4e821fc2ba140 Mon Sep 17 00:00:00 2001
From: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Date: Wed, 12 Jul 2023 01:24:20 -0700
Subject: [PATCH 097/123] add async + distopt to sft (#7018)

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
---
 .../language_modeling/megatron_gpt_sft_model.py    | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
index 9507a01d01f0..946df3da2aa5 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
@@ -296,6 +296,15 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only):
         tensor_shape = [seq_length, get_micro_batch_size(), self.cfg.hidden_size]
         data_iter = get_iterator_k_split(batch, get_num_microbatches())
 
+        # handle asynchronous grad reduction
+        no_sync_func = None
+        grad_sync_func = None
+        param_sync_func = None
+        if not forward_only and self.with_distributed_adam:
+            no_sync_func = partial(self._optimizer.no_sync, greedy_grad_copy=self.megatron_amp_o2,)
+            grad_sync_func = self.reduce_overlap_gradients
+            param_sync_func = self.sync_overlap_parameters
+
         fwd_bwd_function = get_forward_backward_func()
 
         losses_reduced_per_micro_batch = fwd_bwd_function(
@@ -309,6 +318,11 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only):
             grad_scaler=self.trainer.precision_plugin.scaler.scale if self.cfg.precision == 16 else None,
             sequence_parallel=self.cfg.get('sequence_parallel', False),
             enable_autocast=self.enable_autocast,
+            no_sync_func=no_sync_func,
+            grad_sync_func=grad_sync_func,
+            param_sync_func=param_sync_func,
+            overlap_p2p_comm=self.cfg.get('overlap_p2p_comm', False),
+            batch_p2p_comm=self.cfg.get('batch_p2p_comm', True),
         )
 
         # only the last stages of the pipeline return losses

From 77c666f6710f0e7ce6076f023f31122f0dfe0f43 Mon Sep 17 00:00:00 2001
From: Igor Gitman <igitman@nvidia.com>
Date: Wed, 12 Jul 2023 09:25:23 -0700
Subject: [PATCH 098/123] Adding tutorial for confidence ensembles (#6932)

* Adding the confidence ensembles tutorial.

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Fix issues with notebook in colab

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add clarification about the last cell

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Move SDP installation on top of the tutorial

Signed-off-by: Igor Gitman <igitman@nvidia.com>

---------

Signed-off-by: Igor Gitman <igitman@nvidia.com>
---
 PUBLICATIONS.md                               |  49 +-
 docs/source/asr/api.rst                       |   5 +
 docs/source/asr/configs.rst                   |   2 +-
 docs/source/asr/models.rst                    |  36 +-
 docs/source/starthere/tutorials.rst           |   5 +-
 examples/asr/transcribe_speech.py             |   1 -
 .../asr/models/confidence_ensemble.py         |  10 +-
 .../confidence_ensembles/build_ensemble.py    |  67 ++-
 .../asr/test_asr_interctc_models.py           |   1 -
 .../asr/test_confidence_ensembles.py          | 180 ++++++
 tutorials/asr/Confidence_Ensembles.ipynb      | 517 ++++++++++++++++++
 11 files changed, 832 insertions(+), 41 deletions(-)
 create mode 100644 tests/collections/asr/test_confidence_ensembles.py
 create mode 100644 tutorials/asr/Confidence_Ensembles.ipynb

diff --git a/PUBLICATIONS.md b/PUBLICATIONS.md
index 365ed2773ed3..cd120efc7e7b 100644
--- a/PUBLICATIONS.md
+++ b/PUBLICATIONS.md
@@ -9,6 +9,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 <details>
   <summary>2023</summary>
 
+  * [Confidence-based Ensembles of End-to-End Speech Recognition Models](https://arxiv.org/abs/2306.15824)
   * [Fast Entropy-Based Methods of Word-Level Confidence Estimation for End-to-End Automatic Speech Recognition](https://ieeexplore.ieee.org/abstract/document/10022960)
   * [Damage Control During Domain Adaptation for Transducer Based Automatic Speech Recognition](https://ieeexplore.ieee.org/abstract/document/10023219)
 
@@ -23,13 +24,13 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2021</summary>
-  
+
   * [Citrinet: Closing the Gap between Non-Autoregressive and Autoregressive End-to-End Models for Automatic Speech Recognition](https://arxiv.org/abs/2104.01721)
   * [SPGISpeech: 5,000 hours of transcribed financial audio for fully formatted end-to-end speech recognition](https://www.isca-speech.org/archive/interspeech_2021/oneill21_interspeech.html)
   * [CarneliNet: Neural Mixture Model for Automatic Speech Recognition](https://arxiv.org/abs/2107.10708)
   * [CTC Variations Through New WFST Topologies](https://arxiv.org/abs/2110.03098)
   * [A Toolbox for Construction and Analysis of Speech Datasets](https://openreview.net/pdf?id=oJ0oHQtAld)
-  
+
 </details>
 
 
@@ -45,11 +46,11 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2019</summary>
-  
+
   * [Jasper: An End-to-End Convolutional Neural Acoustic Model](https://arxiv.org/abs/1904.03288)
   * [QuartzNet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions](https://arxiv.org/abs/1910.10261)
-  
-  
+
+
 </details>
 
 
@@ -60,7 +61,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2022</summary>
-  
+
   * [TitaNet: Neural Model for Speaker Representation with 1D Depth-Wise Separable Convolutions and Global Context](https://ieeexplore.ieee.org/abstract/document/9746806)
 
 </details>
@@ -68,8 +69,8 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2020</summary>
-  
-  * [SpeakerNet: 1D Depth-wise Separable Convolutional Network for Text-Independent Speaker Recognition and Verification]( https://arxiv.org/pdf/2010.12653.pdf) 
+
+  * [SpeakerNet: 1D Depth-wise Separable Convolutional Network for Text-Independent Speaker Recognition and Verification]( https://arxiv.org/pdf/2010.12653.pdf)
 
 </details>
 
@@ -79,7 +80,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2022</summary>
-  
+
   * [AmberNet: A Compact End-to-End Model for Spoken Language Identification](https://arxiv.org/abs/2210.15781)
   * [Accidental Learners: Spoken Language Identification in Multilingual Self-Supervised Models](https://arxiv.org/abs/2211.05103)
 
@@ -88,17 +89,17 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2021</summary>
-  
+
   * [MarbleNet: Deep 1D Time-Channel Separable Convolutional Neural Network for Voice Activity Detection](https://ieeexplore.ieee.org/abstract/document/9414470/)
 
 </details>
 
-  
+
 <details>
   <summary>2020</summary>
-  
+
   * [MatchboxNet - 1D Time-Channel Separable Convolutional Neural Network Architecture for Speech Commands Recognition](http://www.interspeech2020.org/index.php?m=content&c=index&a=show&catid=337&id=993)
-  
+
 </details>
 
 
@@ -108,7 +109,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2022</summary>
-  
+
   * [NVIDIA NeMo Offline Speech Translation Systems for IWSLT 2022](https://aclanthology.org/2022.iwslt-1.18/)
 
 </details>
@@ -130,7 +131,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2021</summary>
-  
+
   * [BioMegatron: Larger Biomedical Domain Language Model  ](https://aclanthology.org/2020.emnlp-main.379/)
 
 </details>
@@ -157,16 +158,16 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2021</summary>
-  
+
   * [SGD-QA: Fast Schema-Guided Dialogue State Tracking for Unseen Services](https://arxiv.org/abs/2105.08049)
-  
+
 </details>
 
 <details>
   <summary>2020</summary>
-  
+
   * [A Fast and Robust BERT-based Dialogue State Tracker for Schema-Guided Dialogue Dataset](https://arxiv.org/abs/2008.12335)
-  
+
 </details>
 --------
 
@@ -175,19 +176,19 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
 <details>
   <summary>2022</summary>
-    
+
   * [Adapter-Based Extension of Multi-Speaker Text-to-Speech Model for New Speakers](https://arxiv.org/abs/2211.00585)
 
 </details>
 
 <details>
   <summary>2021</summary>
-  
+
   * [TalkNet: Fully-Convolutional Non-Autoregressive Speech Synthesis Model](https://www.isca-speech.org/archive/interspeech_2021/beliaev21_interspeech.html)
   * [TalkNet 2: Non-Autoregressive Depth-Wise Separable Convolutional Model for Speech Synthesis with Explicit Pitch and Duration Prediction](https://arxiv.org/abs/2104.08189)
   * [Hi-Fi Multi-Speaker English TTS Dataset](https://www.isca-speech.org/archive/pdfs/interspeech_2021/bakhturina21_interspeech.pdf)
   * [Mixer-TTS: non-autoregressive, fast and compact text-to-speech model conditioned on language model embeddings](https://arxiv.org/abs/2110.03584)
-  
+
 </details>
 
 
@@ -196,7 +197,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 # (Inverse) Text Normalization
 <details>
   <summary>2022</summary>
-  
+
   * [Shallow Fusion of Weighted Finite-State Transducer and Language Model for Text Normalization](https://arxiv.org/abs/2203.15917)
   * [Thutmose Tagger: Single-pass neural model for Inverse Text Normalization](https://arxiv.org/abs/2208.00064)
 
@@ -207,7 +208,7 @@ Here, we list a collection of research articles that utilize the NeMo Toolkit. I
 
   * [NeMo Inverse Text Normalization: From Development to Production](https://www.isca-speech.org/archive/pdfs/interspeech_2021/zhang21ga_interspeech.pdf)
   * [A Unified Transformer-based Framework for Duplex Text Normalization](https://arxiv.org/pdf/2108.09889.pdf )
-  
+
 </details>
 
 --------
\ No newline at end of file
diff --git a/docs/source/asr/api.rst b/docs/source/asr/api.rst
index 1e2073798d64..1d880018fd15 100644
--- a/docs/source/asr/api.rst
+++ b/docs/source/asr/api.rst
@@ -39,6 +39,11 @@ Model Classes
     :show-inheritance:
     :members: from_asr_config, from_pretrained_models, save_asr_model_to, setup_training_data
 
+.. _confidence-ensembles-api:
+
+.. autoclass:: nemo.collections.asr.models.confidence_ensembles.ConfidenceEnsembleModel
+    :show-inheritance:
+    :members: transcribe
 
 Modules
 -------
diff --git a/docs/source/asr/configs.rst b/docs/source/asr/configs.rst
index f9a4ea9970b1..d21b40e34570 100644
--- a/docs/source/asr/configs.rst
+++ b/docs/source/asr/configs.rst
@@ -64,7 +64,7 @@ An example ASR train and validation configuration should look similar to the fol
 There are two ways to test/validate on more than one manifest:
 
 - Specify a list in the `manifest_filepath` field. Results will be reported for each, the first one being used for overall loss / WER (specify `val_dl_idx` if you wish to change that). In this case, all manifests will share configuration parameters.
-- Use the ds_item key and pass a list of config objects to it. This allows you to use differently configured datasets for validation, e.g. 
+- Use the ds_item key and pass a list of config objects to it. This allows you to use differently configured datasets for validation, e.g.
 
 .. code-block:: yaml
 
diff --git a/docs/source/asr/models.rst b/docs/source/asr/models.rst
index 697a89827145..708d66307dd3 100644
--- a/docs/source/asr/models.rst
+++ b/docs/source/asr/models.rst
@@ -142,7 +142,7 @@ With local attention, inference is possible on audios >1 hrs (256 subsampling ch
 
 Fast Conformer models were trained using CosineAnnealing (instead of Noam) as the scheduler.
 
-You may find the example CTC config at 
+You may find the example CTC config at
 ``<NeMo_git_root>/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml`` and
 the transducer config at ``<NeMo_git_root>/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml``
 
@@ -310,7 +310,7 @@ The main idea is to separate labels and blank score predictions, which allows to
 When external LM is available for inference, the internal LM can be subtracted from HAT model prediction in beamsearch decoding to improve external LM efficiency.
 It can be helpful in the case of text-only adaptation for new domains.
 
-The only difference from the standard Conformer-Transducer model (RNNT) is the use of `"HATJiont" <https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/modules/hybrid_autoregressive_transducer.py#L39>`_ 
+The only difference from the standard Conformer-Transducer model (RNNT) is the use of `"HATJiont" <https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/modules/hybrid_autoregressive_transducer.py#L39>`_
 class (instead of "RNNTJoint") for joint module. The all HAT logic is implemented in the "HATJiont" class.
 
     .. image:: images/hat.png
@@ -353,6 +353,38 @@ For the detailed information see:
 * :ref:`Configs and training <Hybrid-ASR-TTS_model__Config>`
 
 
+.. _Confidence-Ensembles:
+
+Confidence-based Ensembles
+--------------------------
+
+Confidence-based ensemble is a simple way to combine multiple models into a single system by only retaining the
+output of the most confident model. Below is a schematic illustration of how such ensembles work.
+
+    .. image:: https://github.com/NVIDIA/NeMo/releases/download/v1.19.0/conf-ensembles-overview.png
+        :align: center
+        :alt: confidence-based ensembles
+        :scale: 50%
+
+For more details about this model, see the `paper <https://arxiv.org/abs/2306.15824>`_
+or read our `tutorial <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/Confidence_Ensembles.ipynb>`_.
+
+NeMo support Confidence-based Ensembles through the
+:ref:`nemo.collections.asr.models.confidence_ensembles.ConfidenceEnsembleModel <confidence-ensembles-api>` class.
+
+A typical workflow to create and use the ensemble is like this
+
+1. Run `scripts/confidence_ensembles/build_ensemble.py <https://github.com/NVIDIA/NeMo/blob/main/scripts/confidence_ensembles/build_ensemble.py>`_
+   script to create ensemble from existing models. See the documentation inside the script for usage examples
+   and description of all the supported functionality.
+2. The script outputs a checkpoint that combines all the models in an ensemble. It can be directly used to transcribe
+   speech by calling ``.trascribe()`` method or using
+   `examples/asr/transcribe_speech.py <https://github.com/NVIDIA/NeMo/blob/main/examples/asr/transcribe_speech.py>`_.
+
+Note that the ensemble cannot be modified after construction (e.g. it does not support finetuning) and only
+transcribe functionality is supported (e.g., ``.forward()`` is not properly defined).
+
+
 References
 ----------
 
diff --git a/docs/source/starthere/tutorials.rst b/docs/source/starthere/tutorials.rst
index 9c960053398b..e24637718690 100644
--- a/docs/source/starthere/tutorials.rst
+++ b/docs/source/starthere/tutorials.rst
@@ -106,6 +106,9 @@ To run a tutorial:
    * - ASR
      - Multi-lingual ASR
      - `Multi-lingual ASR <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/Multilang_ASR.ipynb>`_
+   * - ASR
+     - Confidence-based Ensembles
+     - `Confidence-based Ensembles <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/Confidence_Ensembles.ipynb>`_
    * - NLP
      - Using Pretrained Language Models for Downstream Tasks
      - `Pretrained Language Models for Downstream Tasks <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb>`_
@@ -146,7 +149,7 @@ To run a tutorial:
      - P-Tuning/Prompt-Tuning
      - `P-Tuning/Prompt-Tuning <https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb>`_
    * - NLP
-     - Synthetic Tabular Data Generation 
+     - Synthetic Tabular Data Generation
      - `Synthetic Tabular Data Generation <https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb>`_
    * - TTS
      - NeMo TTS Primer
diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py
index 401755bc8275..f97dd96ad0f3 100644
--- a/examples/asr/transcribe_speech.py
+++ b/examples/asr/transcribe_speech.py
@@ -269,7 +269,6 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis
             cfg.rnnt_decoding.fused_batch_size = -1
             cfg.rnnt_decoding.compute_timestamps = cfg.compute_timestamps
             cfg.rnnt_decoding.compute_langs = cfg.compute_langs
-
             if 'preserve_alignments' in cfg.rnnt_decoding:
                 cfg.rnnt_decoding.preserve_alignments = preserve_alignment
 
diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py
index dd52d9a7010a..9b3191c8874d 100644
--- a/nemo/collections/asr/models/confidence_ensemble.py
+++ b/nemo/collections/asr/models/confidence_ensemble.py
@@ -151,7 +151,11 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig)
 class ConfidenceEnsembleModel(ModelPT):
     """Implementation of the confidence ensemble model.
 
-    See <PAPER TBD> for details.
+    See https://arxiv.org/abs/2306.15824 for details.
+
+    .. note::
+        Currently this class only support `transcribe` method as it requires
+        full-utterance confidence scores to operate.
     """
 
     def __init__(
@@ -206,7 +210,7 @@ def __init__(
         for model_idx in range(self.num_models):
             model = getattr(self, f"model{model_idx}")
             # for now we assume users are direclty responsible for matching
-            # decoder type when building ensemlbe with inference type
+            # decoder type when building ensemble with inference type
             # TODO: add automatic checks for errors
             if isinstance(model, EncDecHybridRNNTCTCModel):
                 self.update_decoding_parameters(model.cfg.decoding)
@@ -218,7 +222,7 @@ def __init__(
                 model.change_decoding_strategy(model.cfg.decoding)
 
     def update_decoding_parameters(self, decoding_cfg: DictConfig):
-        """Updating temperature/preserve_alignment/preserve_frame_confidence parameters of the config."""
+        """Updating temperature/preserve_alignment parameters of the config."""
         with open_dict(decoding_cfg):
             decoding_cfg.temperature = self.cfg.temperature
             decoding_cfg.preserve_alignments = True
diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py
index e953dec02b7a..b5685c63aa25 100644
--- a/scripts/confidence_ensembles/build_ensemble.py
+++ b/scripts/confidence_ensembles/build_ensemble.py
@@ -11,9 +11,60 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-# Run ``python build_ensemble.py --help`` for usage examples.
-# TODO: write usage. Mention that neither train nor dev requires transcriptions
+
+"""
+This script provides a functionality to create confidence-based ensembles
+from a collection of pretrained models.
+
+For more details see the paper https://arxiv.org/abs/2306.15824
+or tutorial in tutorials/asr/Confidence_Ensembles.ipynb
+
+You would typically use this script by providing a yaml config file or overriding
+default options from command line.
+
+Usage examples:
+
+1. Building an ensemble of two monolingual models with default settings (no confidence tuning).
+
+    python build_ensemble.py --config-path=. --config-name=ensemble_config.yaml
+        ensemble.0.model=stt_it_conformer_ctc_large
+        ensemble.0.training_manifest=<path to the Italian data of 100+ utterances (no transcription required)>
+        ensemble.1.model=stt_es_conformer_ctc_large
+        ensemble.1.training_manifest=<path to the Spanish data of 100+ utterances (no transcription required)>
+        output_path=<path to the desired location of the .nemo checkpoint>
+
+    You can have more than 2 models and can control transcription settings (e.g., batch size)
+    with ``transcription.<any argument of examples/asr/transcribe_speech.py>`` parameters.
+
+2. If you want to get improved results, you can enable tuning of the confidence and logistic regression (LR) parameters.
+   E.g.
+
+   python build_ensemble.py
+        <all arguments like in the previous example>
+        ensemble.0.dev_manifest=<path to the dev data that's required for tuning>
+        ...
+        # IMPORTANT: see the note below if you use > 2 models!
+        ensemble.N.dev_manifest=<path to the dev data that's required for tuning>
+        tune_confidence=True  # to allow confidence tuning. LR is tuned by default
+
+    As with any tuning, it is recommended to have reasonably large validation set for each model,
+    otherwise you might overfit to the validation data.
+
+    Note that if you add additional models (> 2) you will need to modify ensemble_config.yaml
+    or create a new one with added models in there. While it's theoretically possible to
+    fully override such parameters from commandline, hydra is very unfriendly for such
+    use-cases, so it's strongly recommended to be creating new configs.
+
+3. If you want to precisely control tuning grid search, you can do that with
+
+    python build_ensemble.py
+        <all arguments as in the previous examples>
+        tune_confidence_config.confidence_type='[entropy_renui_exp,entropy_tsallis_exp]'  # only tune over this set
+        tune_confidence_config.alpha='[0.1,0.5,1.0]'  # only tune over this set
+
+You can check the dataclasses in this file for the full list of supported
+arguments and their default values.
+"""
 
 import atexit
 
@@ -31,7 +82,7 @@
 import joblib
 import numpy as np
 import pytorch_lightning as pl
-from omegaconf import DictConfig, OmegaConf
+from omegaconf import MISSING, DictConfig, OmegaConf
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import confusion_matrix
 from sklearn.pipeline import Pipeline, make_pipeline
@@ -73,9 +124,9 @@
 @dataclass
 class EnsembleConfig:
     # .nemo path or pretrained name
-    model: str
+    model: str = MISSING
     # path to the training data manifest (non-tarred)
-    training_manifest: str
+    training_manifest: str = MISSING
     # specify to limit the number of training samples
     # 100 is most likely enough, but setting higher default just in case
     max_training_samples: int = 1000
@@ -150,10 +201,10 @@ class TuneLogisticRegressionConfig:
 @dataclass
 class BuildEnsembleConfig:
     # where to save the resulting ensemble model
-    output_path: str
+    output_path: str = MISSING
 
     # each model specification
-    ensemble: List[EnsembleConfig]
+    ensemble: List[EnsembleConfig] = MISSING
 
     random_seed: int = 0  # for reproducibility
 
diff --git a/tests/collections/asr/test_asr_interctc_models.py b/tests/collections/asr/test_asr_interctc_models.py
index bad918fbc1f0..db9a4396d72d 100644
--- a/tests/collections/asr/test_asr_interctc_models.py
+++ b/tests/collections/asr/test_asr_interctc_models.py
@@ -86,7 +86,6 @@ class TestInterCTCLoss:
             ([], [0.3]),
         ],
     )
-    @pytest.mark.pleasefixme
     def test_forward(self, model_class, encoder_config, apply_at_layers, loss_weights):
         preprocessor_config = {'_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor'}
         vocabulary = [
diff --git a/tests/collections/asr/test_confidence_ensembles.py b/tests/collections/asr/test_confidence_ensembles.py
new file mode 100644
index 000000000000..ad14a2a7e6ff
--- /dev/null
+++ b/tests/collections/asr/test_confidence_ensembles.py
@@ -0,0 +1,180 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import joblib
+import pytest
+from omegaconf import DictConfig, ListConfig
+
+from nemo.collections.asr.metrics.wer import CTCDecodingConfig
+from nemo.collections.asr.models import EncDecCTCModel, EncDecHybridRNNTCTCModel, EncDecRNNTModel
+from nemo.collections.asr.models.confidence_ensemble import ConfidenceEnsembleModel
+from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, ConfidenceMethodConfig
+
+
+def get_model_config(model_class):
+    preprocessor_config = {'_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor'}
+    vocabulary = [' ', "'", 'a', 'b', 'c']  # does not matter, so keeping small
+    encoder_config = {
+        '_target_': 'nemo.collections.asr.modules.ConformerEncoder',
+        'feat_in': 64,
+        'n_layers': 8,
+        'd_model': 4,
+    }
+    if model_class is EncDecCTCModel:
+        decoder_config = {
+            '_target_': 'nemo.collections.asr.modules.ConvASRDecoder',
+            'feat_in': None,
+            'num_classes': len(vocabulary),
+            'vocabulary': vocabulary,
+        }
+        model_config = DictConfig(
+            {
+                'compute_eval_loss': True,  # will be ignored by the model
+                'preprocessor': DictConfig(preprocessor_config),
+                'encoder': DictConfig(encoder_config),
+                'decoder': DictConfig(decoder_config),
+            }
+        )
+    else:
+        decoder_config = {
+            '_target_': 'nemo.collections.asr.modules.RNNTDecoder',
+            'prednet': {'pred_hidden': 4, 'pred_rnn_layers': 1},
+        }
+        joint_config = {
+            '_target_': 'nemo.collections.asr.modules.RNNTJoint',
+            'jointnet': {'joint_hidden': 4, 'activation': 'relu'},
+        }
+        decoding_config = {'strategy': 'greedy_batch', 'greedy': {'max_symbols': 30}}
+        loss_config = {'loss_name': 'default', 'warprnnt_numba_kwargs': {'fastemit_lambda': 0.001}}
+
+        model_config = DictConfig(
+            {
+                'compute_eval_loss': True,
+                'labels': ListConfig(vocabulary),
+                'preprocessor': DictConfig(preprocessor_config),
+                'model_defaults': DictConfig({'enc_hidden': 4, 'pred_hidden': 4}),
+                'encoder': DictConfig(encoder_config),
+                'decoder': DictConfig(decoder_config),
+                'joint': DictConfig(joint_config),
+                'decoding': DictConfig(decoding_config),
+                'loss': DictConfig(loss_config),
+                'optim': {'name': 'adamw'},
+                'aux_ctc': {
+                    'ctc_loss_weight': 0.3,
+                    'use_cer': False,
+                    'ctc_reduction': 'mean_batch',
+                    'decoder': {
+                        '_target_': 'nemo.collections.asr.modules.ConvASRDecoder',
+                        'feat_in': None,
+                        'num_classes': len(vocabulary),
+                        'vocabulary': vocabulary,
+                    },
+                    'decoding': DictConfig(CTCDecodingConfig),
+                },
+            }
+        )
+    model_config['target'] = f'{model_class.__module__}.{model_class.__name__}'
+
+    return model_config
+
+
+class TestConfidenceEnsembles:
+    """Only basic tests that are very fast to run.
+
+    There are much more extensive integration tests available in
+    scripts/confidence_ensembles/test_confidence_ensembles.py
+    """
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize(
+        "model_class0", [EncDecCTCModel, EncDecRNNTModel, EncDecHybridRNNTCTCModel],
+    )
+    @pytest.mark.parametrize(
+        "model_class1", [EncDecCTCModel, EncDecRNNTModel, EncDecHybridRNNTCTCModel],
+    )
+    def test_model_creation_2models(self, tmp_path, model_class0, model_class1):
+        """Basic test to check that ensemble of 2 models can be created."""
+        model_config0 = get_model_config(model_class0)
+        model_config1 = get_model_config(model_class1)
+
+        # dummy pickle file for the model selection block
+        joblib.dump({}, tmp_path / 'dummy.pkl')
+
+        # default confidence
+        confidence_config = ConfidenceConfig(
+            # we keep frame confidences and apply aggregation manually to get full-utterance confidence
+            preserve_frame_confidence=True,
+            exclude_blank=True,
+            aggregation="mean",
+            method_cfg=ConfidenceMethodConfig(
+                name="entropy",
+                entropy_type="renui",
+                temperature=0.25,  # this is not really temperature, but alpha, see https://arxiv.org/abs/2212.08703
+                entropy_norm="lin",
+            ),
+        )
+
+        # just checking that no errors are raised when creating the model
+        ConfidenceEnsembleModel(
+            cfg=DictConfig(
+                {
+                    'model_selection_block': str(tmp_path / 'dummy.pkl'),
+                    'confidence': confidence_config,
+                    'temperature': 1.0,
+                    'num_models': 2,
+                    'model0': model_config0,
+                    'model1': model_config1,
+                }
+            ),
+            trainer=None,
+        )
+
+    def test_model_creation_5models(self, tmp_path):
+        """Basic test to check that ensemble of 5 models can be created."""
+        model_configs = [get_model_config(EncDecCTCModel) for _ in range(5)]
+
+        # dummy pickle file for the model selection block
+        joblib.dump({}, tmp_path / 'dummy.pkl')
+
+        # default confidence
+        confidence_config = ConfidenceConfig(
+            # we keep frame confidences and apply aggregation manually to get full-utterance confidence
+            preserve_frame_confidence=True,
+            exclude_blank=True,
+            aggregation="mean",
+            method_cfg=ConfidenceMethodConfig(
+                name="entropy",
+                entropy_type="renui",
+                temperature=0.25,  # this is not really temperature, but alpha, see https://arxiv.org/abs/2212.08703
+                entropy_norm="lin",
+            ),
+        )
+
+        # just checking that no errors are raised when creating the model
+        ConfidenceEnsembleModel(
+            cfg=DictConfig(
+                {
+                    'model_selection_block': str(tmp_path / 'dummy.pkl'),
+                    'confidence': confidence_config,
+                    'temperature': 1.0,
+                    'num_models': 2,
+                    'model0': model_configs[0],
+                    'model1': model_configs[1],
+                    'model2': model_configs[2],
+                    'model3': model_configs[3],
+                    'model4': model_configs[4],
+                }
+            ),
+            trainer=None,
+        )
diff --git a/tutorials/asr/Confidence_Ensembles.ipynb b/tutorials/asr/Confidence_Ensembles.ipynb
new file mode 100644
index 000000000000..f9617c75e36a
--- /dev/null
+++ b/tutorials/asr/Confidence_Ensembles.ipynb
@@ -0,0 +1,517 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n",
+    "\n",
+    "\n",
+    "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n",
+    "\"\"\"\n",
+    "import os\n",
+    "\n",
+    "# Install dependencies\n",
+    "!apt-get install sox libsndfile1 ffmpeg\n",
+    "\n",
+    "# setting up a workspace folder where all downloaded content will be held\n",
+    "# change it to whatever location is convenient and remove after you're done with this tutorial\n",
+    "WORKSPACE_DIR = os.path.abspath('confidence-ensembles-tutorial')\n",
+    "os.makedirs(WORKSPACE_DIR, exist_ok=True)\n",
+    "\n",
+    "# need to locate NeMo repository\n",
+    "# either provide a path to local NeMo repository with NeMo already installed or git clone\n",
+    "\n",
+    "# option #1: local path to NeMo repo with NeMo already installed\n",
+    "NEMO_DIR = os.path.dirname(os.path.dirname(os.path.abspath('')))\n",
+    "\n",
+    "# option #2: download NeMo repo\n",
+    "if 'google.colab' in str(get_ipython()) or not os.path.exists(os.path.join(NEMO_DIR, \"nemo\")):\n",
+    "    BRANCH = \"main\"\n",
+    "    !git clone -b $BRANCH https://github.com/NVIDIA/NeMo $WORKSPACE_DIR/NeMo\n",
+    "    NEMO_DIR = os.path.join(WORKSPACE_DIR, 'NeMo')\n",
+    "\n",
+    "# installing nemo (from source code)\n",
+    "!cd $NEMO_DIR && ./reinstall.sh\n",
+    "\n",
+    "# clone SDP and install requirements\n",
+    "!git clone https://github.com/NVIDIA/NeMo-speech-data-processor $WORKSPACE_DIR/NeMo-speech-data-processor\n",
+    "!pip install -r $WORKSPACE_DIR/NeMo-speech-data-processor/requirements.txt\n",
+    "\n",
+    "\"\"\"\n",
+    "Remember to restart the runtime for the kernel to pick up any upgraded packages.\n",
+    "Alternatively, you can uncomment the exit() below to crash and restart the kernel, in the case\n",
+    "that you want to use the \"Run All Cells\" (or similar) option.\n",
+    "\"\"\"\n",
+    "# exit()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Confidence-based Ensembles of End-to-End ASR Models\n",
+    "\n",
+    "In this tutorial we discuss how to use confidence-based ensembles to improve different aspects of ASR models.\n",
+    "\n",
+    "We are only going to cover basics in this tutorial, so make sure to check out our [paper](https://arxiv.org/abs/2306.15824) to learn more details!\n",
+    "\n",
+    "Before we are going to learn **what** a confidence-based ensemble is, let's discuss **why** you might want to use one. A high-level motivation behind this method is that there are many \"expert\" ASR models that are publicly available. These models are often specialized to a certain language, accent or domain and might not perform well outside of it. But what if you need to cover multiple such target domains and you don't have a single model that works well on all of them? This is exactly the case when you should try confidence-based ensembles! In our paper we show two applications of this general idea:\n",
+    "\n",
+    "1. If you need to support multi-lingual ASR, but don't have a single model that covers all your languages, you basically have two choices. You can either run a separate [language-identification](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/langid_ambernet) (LID) block first to pick an ASR model from the corresponding language. Or you can run all models in parallel and use confidence to select which output to use. In the paper we show that the second method generally works better and can be even combined with LID model for the best results.\n",
+    "2. If you have a generic ASR model as well as a [finetuned version](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb) that works much better on a target domain. In such a case, your finetuned model will likely degrade on the \"base\" domain. What if you need to support both cases in a single application and don't have an easy way to know which domain the input comes from? To solve this, you can use confidence ensembles to pick the right output automatically.\n",
+    "\n",
+    "Let's also briefly talk about some limitations of the confidence-based ensembles.\n",
+    "\n",
+    "1. Confidence-based ensembles are not well suited for latency-critical applications as they require a few seconds of audio to select the most confident model.\n",
+    "2. The runtime cost grows linearly with each added model, which limits the practically useful ensemble size.\n",
+    "3. Given enough compute and data, it is likely possible to build specialized models that would outperform confidence-based ensembles on most tasks.\n",
+    "\n",
+    "To sum up — if you're combining a small number of models (e.g., up to 5), can afford a few seconds of additional latency and don't have resources to build a specialized model, confidence-based ensembles might be a good fit and you should try them out! There are many ASR models that you can combine in the ensemble available in [NVIDIA NGC cloud](https://catalog.ngc.nvidia.com/models) as well as other model hubs, such as [Hugging Face](https://huggingface.co/nvidia).\n",
+    "\n",
+    "In the next few cells we will cover what a confidence-based ensemble is and some best practices of using these models. Each cell is mostly self-contained, so feel free to skip around or jump directly to the code part if you want to see usage examples right away."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## What is a confidence-based ensemble?\n",
+    "\n",
+    "You're probably familiar with more traditional [ensembles of machine learning models](https://en.wikipedia.org/wiki/Ensemble_learning). Confidence ensembles are a less popular approach where we only use an output of a single model that is deemed best for the current input. A typical way to pick the \"best\" output is to select a model with the highest confidence score, which provides an estimate of how likely the output is to be correct. Here is a schematic illustration of the model.\n",
+    "\n",
+    "<img src=\"https://github.com/NVIDIA/NeMo/releases/download/v1.19.0/conf-ensembles-overview.png\" alt=\"Confidence-ensemble schematic representation\" width=\"600\"/>\n",
+    "\n",
+    "As you can see, to define confidence ensemble, we need to define 3 things:\n",
+    "\n",
+    "1. Which models are part of the ensemble.\n",
+    "2. How do we estimate model's confidence.\n",
+    "3. How do we \"calibrate\" confidence values via a model selection block.\n",
+    "\n",
+    "Let's discuss each of these 3 items below.\n",
+    "\n",
+    "### Which models to use?\n",
+    "\n",
+    "A short answer — you can use any ASR models. E.g., you can combine a number of CTC models, or Transducer models, or even mix-and-match. \n",
+    "\n",
+    "A more detailed answer is that hte performance of the confidence ensemble is upper-bounded by the performance of the best model on each of the input examples. Thus you will benefit if some of your models work really well on part of the input compared to other models. This way you will get more gains compared to each separate model, and it will also make correct model identification easier.\n",
+    "\n",
+    "### How to estimate a model's confidence?\n",
+    "\n",
+    "Good news, we have a whole separate [tutorial](TBD) on this topic! You can go through it if you want to know all the details about different ways to estimate confidence of NeMo ASR models. There are different confidence measures and aggregation functions and for the absolute best performance, you will need to run a grid-search to pick the best confidence estimation way for your specific models and data.\n",
+    "\n",
+    "That being said, we found that there exist a set of confidence parameters that work pretty well on a large set of models and datsets. They are default in NeMo and so you might not need to worry about running the search. If you do want to maximize the performance by tuning the confidence parameters, you only need to add [a few extra config lines](#Building-and-evaluating-ensemble-(tuned-parameters)).\n",
+    "\n",
+    "### How to calibrate confidence values?\n",
+    "\n",
+    "Let's now talk about the \"model selection block\". First of all — you don't need to know the details to use confidence ensembles, calibration is always automatically performed when you build the model. But if you want to learn more, read on!\n",
+    "\n",
+    "First, let's discuss why we need a separate \"model selection block\" to pick the most confident model. If we had an access to the perfect confidence, which would exactly equal to the probability of the model's output being correct, we wouldn't need this block. In this idealized case we can simply take the model with the maximum confidence score. But in practice, models tend to be over- or under-confident, which means that their confidence scores need to be calibrated together to be comparable. E.g., one model might mostly produce scores from 0 to 0.8, while another model tend to produce scores from 0 to 0.5, even though they have the same average accuracy. So we want to multiply the first model's score by 1.25 and the second model's score by 2.0 to put the on the same \"scale\".\n",
+    "\n",
+    "More generally, the goal of the model selection block is to pick the right model for each input. So it needs to solve a standard classification task, where the set of all model's confidence scores is the input and the \"most confident\" model index is the output. Since this is a standard classification problem in a low-dimensional space, we found that using a logistic regression (LR) model is sufficient to solve it with a high accuracy. We assume that for each model there exist a small (e.g., 100-1000 examples) set of input utterances that the model performs the best on. E.g., if you build a multi-lingual ensemble, this set will come from the language the model is trained to recognize. We will use these samples → model correspondence as the ground-truth for training LR.\n",
+    "\n",
+    "> **_note:_**  If you don't have a clear \"audio → best recognition model\" correspondence, you can still build it artificially, as long as you also have ground-truth text labels. Just take a larger set of inputs, run all models on them and compute WER. This will tell you which model works best for which audio. But note that if all your models perform very similarly, the gains from confidence ensembling will also be minimal!\n",
+    "\n",
+    "Even though logistic regression is a simple model and operates in a low-dimensional space, we found that it's still beneficial sometimes to tune its hyperparameters, especially if your input data is imbalanced (e.g., you have more ground-truth samples for some models than others). This tuning is very cheap and so will be performed automatically, as long as you [specify a validation set in the config](#Building-and-evaluating-ensemble-(tuned-parameters))."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# How to use confidence-based ensembles in NeMo?\n",
+    "\n",
+    "The following cells contain code examples of how to use confidence ensembles in NeMo. We will build confidence ensemble of two models - generic ASR model trained on a large set of audio and a modified version of the same model that's finetuned to recognize [Irish English accent](https://openslr.org/83/).\n",
+    "\n",
+    "To do this, we will go through the following steps:\n",
+    "\n",
+    "1. Download and process the Irish accent data using NVIDIA's [Speech Data Processor](https://github.com/NVIDIA/NeMo-speech-data-processor).\n",
+    "2. Finetune the [Conformer Large CTC LS model](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_conformer_ctc_large_ls) on this data. All steps work exactly the same for Transducer models as well.\n",
+    "3. Evaluate performance of the original and finetuned models on the Irish accent data and on LibriSpeech.\n",
+    "4. Build a confidence-based ensemble (with default parameters) of these two models and check how it compares with each of the models.\n",
+    "5. Tune the confidence hyperparameters of the ensemble and check how the performance changes."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Downloading and preparing Irish accent data using [Speech Data Processor](https://github.com/NVIDIA/NeMo-speech-data-processor)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# let's start by downloading and processing the Irish accent data with SDP\n",
+    "# Check out https://github.com/NVIDIA/NeMo-speech-data-processor to learn more details\n",
+    "\n",
+    "# run the Irish accent preparation config (will download and process data for us)\n",
+    "cmd = (\n",
+    "    f\"cd {WORKSPACE_DIR}/NeMo-speech-data-processor && \"\n",
+    "    \"python main.py --config-path=dataset_configs/english/slr83 --config-name=config.yaml \"\n",
+    "    f\"workspace_dir={WORKSPACE_DIR}/slr83-data dialect=irish_english_male data_split={{data_split}}\"\n",
+    ")\n",
+    "for data_split in ['train', 'dev', 'test']:\n",
+    "    print(f\"****************** Preparing Irish accent data (split={data_split}) ******************\\n\\n\")\n",
+    "    cur_cmd = cmd.format(data_split=data_split)\n",
+    "    !$cur_cmd\n",
+    "    \n",
+    "# you can inspect https://github.com/NVIDIA/NeMo-speech-data-processor/blob/main/dataset_configs/english/slr83/config.yaml\n",
+    "# to see what processing was done. \n",
+    "# You can also check the generated NeMo manifests inside 'slr83-data' folder \n",
+    "# that are ready for training and evaluation \n",
+    "\n",
+    "!ls $WORKSPACE_DIR/slr83-data/irish_english_male"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Finetuning the generic model on the accent data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# before running training, let's open up a tensorboard pane to see the progress\n",
+    "# you might need to install tensorboard and tensorboard jupyter extension if you get errors\n",
+    "# you can totally skip this cell, since the logs will also be streamed to stdout\n",
+    "%load_ext tensorboard\n",
+    "%tensorboard --logdir $WORKSPACE_DIR/irish_finetuning --bind_all "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# now let's finetune the generic model on this data. \n",
+    "# We will only run finetuning for 5 epochs (the results can be improved by running longer)\n",
+    "# check out https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb\n",
+    "# to learn more about finetuning NeMo ASR models\n",
+    "from omegaconf import open_dict, OmegaConf\n",
+    "from pytorch_lightning import Trainer\n",
+    "\n",
+    "from nemo.collections.asr.models.ctc_bpe_models import EncDecCTCModelBPE\n",
+    "import nemo.utils.exp_manager as exp_manager\n",
+    "\n",
+    "\n",
+    "# feel free to play around with parameters here (e.g., increase bs/devices to match your GPUs)\n",
+    "# but note that you might need to tune LR a bit to get good results\n",
+    "\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    devices=1,  # to have the same results on single/multi-gpu systems\n",
+    "    max_epochs=5,  # we typically want to finetune for 50-100 epochs, but 5 is enough for the tutorial\n",
+    "    # just some reasonable defaults\n",
+    "    accelerator='auto',\n",
+    "    accumulate_grad_batches=1,\n",
+    "    enable_checkpointing=False,\n",
+    "    logger=False,\n",
+    "    log_every_n_steps=100,\n",
+    ")  \n",
+    "model = EncDecCTCModelBPE.from_pretrained(\"stt_en_conformer_ctc_large_ls\", trainer=trainer)\n",
+    "\n",
+    "# updating data/optimization to support finetuning\n",
+    "with open_dict(model.cfg):\n",
+    "    # setting up data manifests and lowering batch size in case we deal with low-memory GPUs\n",
+    "    model.cfg.train_ds.manifest_filepath = f\"{WORKSPACE_DIR}/slr83-data/irish_english_male/train_manifest.json\"\n",
+    "    model.cfg.train_ds.batch_size = 4\n",
+    "    model.cfg.train_ds.is_tarred = False\n",
+    "    model.cfg.validation_ds.manifest_filepath = f\"{WORKSPACE_DIR}/slr83-data/irish_english_male/dev_manifest.json\"\n",
+    "    model.cfg.validation_ds.batch_size = 4\n",
+    "\n",
+    "    model.cfg.optim.lr = 0.02  # 100 times lower to facilitate finetuning\n",
+    "    model.cfg.optim.sched.warmup_steps = 0  # no warmup\n",
+    "\n",
+    "# updating the model according to the new parameters\n",
+    "model.setup_training_data(model.cfg.train_ds)\n",
+    "model.setup_multiple_validation_data(model.cfg.validation_ds)\n",
+    "model.setup_optimization(model.cfg.optim)\n",
+    "\n",
+    "# controlling where the model is saved and asking to save best WER model\n",
+    "exp_manager_config = exp_manager.ExpManagerConfig(\n",
+    "    exp_dir=f'{WORKSPACE_DIR}/irish_finetuning',\n",
+    "    checkpoint_callback_params=exp_manager.CallbackParams(\n",
+    "        monitor=\"val_wer\",\n",
+    "        mode=\"min\",\n",
+    "        always_save_nemo=True,\n",
+    "        save_best_model=True,\n",
+    "    ),\n",
+    ")\n",
+    "exp_manager.exp_manager(trainer, OmegaConf.structured(exp_manager_config))\n",
+    "            \n",
+    "# launching finetuning\n",
+    "trainer.fit(model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluating both models to compare performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# let's evaluate the performance of the original and finetuned models on the test set of the Irish accent data\n",
+    "# as well as the LibriSpeech (which is a proxy for generic ASR domain). We expect the finetuned model to be\n",
+    "# significantly better on the Irish data and significantly worse on the LS\n",
+    "\n",
+    "# running the script to download LibriSpeech data\n",
+    "os.makedirs(os.path.join(WORKSPACE_DIR, \"librispeech\"), exist_ok=True)\n",
+    "!cd $NEMO_DIR && python scripts/dataset_processing/get_librispeech_data.py \\\n",
+    "                        --data_root=$WORKSPACE_DIR/librispeech --data_set=test_other,dev_other\n",
+    "\n",
+    "\n",
+    "# running evaluation with generic model on LS. Typically will be run as a script in command line, but we want to\n",
+    "# capture WER numbers for display later, so let's import and run the evaluation function here\n",
+    "\n",
+    "# adding script folder to python path to be able to import it\n",
+    "import glob\n",
+    "import sys\n",
+    "import pandas as pd\n",
+    "\n",
+    "sys.path.insert(0, os.path.join(NEMO_DIR, \"examples\", \"asr\"))\n",
+    "from speech_to_text_eval import EvaluationConfig, main as run_eval\n",
+    "\n",
+    "wer_results = {\n",
+    "    'generic': [],  # LS, Irish\n",
+    "    'finetuned': [],\n",
+    "}\n",
+    "\n",
+    "# running evaluation with generic model\n",
+    "eval_cfg = run_eval(EvaluationConfig(\n",
+    "    dataset_manifest=os.path.join(WORKSPACE_DIR, \"librispeech\", \"test_other.json\"),\n",
+    "    pretrained_name=\"stt_en_conformer_ctc_large_ls\",\n",
+    "    batch_size=4,\n",
+    "    output_filename=os.path.join(WORKSPACE_DIR, \"eval_results.json\"),\n",
+    "))\n",
+    "wer_results['generic'].append(eval_cfg.metric_value)\n",
+    "\n",
+    "eval_cfg = run_eval(EvaluationConfig(\n",
+    "    dataset_manifest=os.path.join(WORKSPACE_DIR, \"slr83-data\", \"irish_english_male\", \"test_manifest.json\"),\n",
+    "    pretrained_name=\"stt_en_conformer_ctc_large_ls\",\n",
+    "    batch_size=4,\n",
+    "    output_filename=os.path.join(WORKSPACE_DIR, \"eval_results.json\"),\n",
+    "))\n",
+    "wer_results['generic'].append(eval_cfg.metric_value)\n",
+    "\n",
+    "\n",
+    "# running evaluation with finetuned model\n",
+    "finetuned_model_path = glob.glob(os.path.join(WORKSPACE_DIR, \"irish_finetuning\", \"**\", \"*.nemo\"), recursive=True)[0]\n",
+    "eval_cfg = run_eval(EvaluationConfig(\n",
+    "    dataset_manifest=os.path.join(WORKSPACE_DIR, \"librispeech\", \"test_other.json\"),\n",
+    "    model_path=finetuned_model_path,\n",
+    "    batch_size=4,\n",
+    "    output_filename=os.path.join(WORKSPACE_DIR, \"eval_results.json\"),\n",
+    "))\n",
+    "wer_results['finetuned'].append(eval_cfg.metric_value)\n",
+    "\n",
+    "eval_cfg = run_eval(EvaluationConfig(\n",
+    "    dataset_manifest=os.path.join(WORKSPACE_DIR, \"slr83-data\", \"irish_english_male\", \"test_manifest.json\"),\n",
+    "    model_path=finetuned_model_path,\n",
+    "    batch_size=4,\n",
+    "    output_filename=os.path.join(WORKSPACE_DIR, \"eval_results.json\"),\n",
+    "))\n",
+    "wer_results['finetuned'].append(eval_cfg.metric_value)\n",
+    "\n",
+    "# you should be able to see that the generic model is much better\n",
+    "# on LibriSpeech and much worse on the accent data\n",
+    "print(\"\\n*************************** Results ***************************\\n\")\n",
+    "pd.DataFrame(wer_results, index=['LibriSpeech', 'Irish Accent']).transpose()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Building and evaluating ensemble (default parameters)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# now let's finally combine the two models in the confidence-based ensemble!\n",
+    "# first, we are going to use default parameters (no tuning)\n",
+    "cmd = (\n",
+    "    f\"cd {NEMO_DIR} && python scripts/confidence_ensembles/build_ensemble.py \"\n",
+    "    # and example config is good enough for our purposes\n",
+    "    f\"--config-path={NEMO_DIR}/scripts/confidence_ensembles --config-name=ensemble_config.yaml \"\n",
+    "    # specifying model and corresponding dataset (to be used as ground-truth for logistic regression training)\n",
+    "    \"ensemble.0.model=stt_en_conformer_ctc_large_ls \"\n",
+    "    # by default it subsamples to a max of 1000 samples, so it's not going to use the full data\n",
+    "    # note that for librispeech we are using the dev data - this is just to avoid downloading the training set\n",
+    "    # it's perfectly fine and simpler to use the training data here\n",
+    "    f\"ensemble.0.training_manifest={WORKSPACE_DIR}/librispeech/dev_other.json \"\n",
+    "    # same for the second model/dataset\n",
+    "    f\"ensemble.1.model={finetuned_model_path} \"\n",
+    "    f\"ensemble.1.training_manifest={WORKSPACE_DIR}/slr83-data/irish_english_male/train_manifest.json \"\n",
+    "    # setting up the final checkpoint location and lower batch size to save GPU memory\n",
+    "    f\"output_path={WORKSPACE_DIR}/confidence_ensemble_default.nemo \"\n",
+    "    \"transcription.batch_size=4 \"\n",
+    ")\n",
+    "\n",
+    "# building the ensemble\n",
+    "!$cmd\n",
+    "\n",
+    "# running evaluation on LibriSpeech and Irish accent data\n",
+    "# you will see that the transcription is run 2 times, since we need to run both models to get confidence scores\n",
+    "wer_results['ensemble (default)'] = []\n",
+    "eval_cfg = run_eval(EvaluationConfig(\n",
+    "    dataset_manifest=os.path.join(WORKSPACE_DIR, \"librispeech\", \"test_other.json\"),\n",
+    "    model_path=os.path.join(WORKSPACE_DIR, 'confidence_ensemble_default.nemo'),\n",
+    "    batch_size=4,\n",
+    "    output_filename=os.path.join(WORKSPACE_DIR, \"eval_results.json\"),\n",
+    "))\n",
+    "wer_results['ensemble (default)'].append(eval_cfg.metric_value)\n",
+    "\n",
+    "eval_cfg = run_eval(EvaluationConfig(\n",
+    "    dataset_manifest=os.path.join(WORKSPACE_DIR, \"slr83-data\", \"irish_english_male\", \"test_manifest.json\"),\n",
+    "    model_path=os.path.join(WORKSPACE_DIR, 'confidence_ensemble_default.nemo'),\n",
+    "    batch_size=4,\n",
+    "    output_filename=os.path.join(WORKSPACE_DIR, \"eval_results.json\"),\n",
+    "))\n",
+    "wer_results['ensemble (default)'].append(eval_cfg.metric_value)\n",
+    "\n",
+    "# you should be able to see that the ensemble with default parameters is already \n",
+    "# working very well. It might even be slightly better than the best model,\n",
+    "# because it can sometimes \"incorrectly\" pick generic model on Irish data\n",
+    "# when it's actually giving lower WER than the finetuned model (and same for LibriSpeech).\n",
+    "print(\"\\n*************************** Results ***************************\\n\")\n",
+    "pd.DataFrame(wer_results, index=['LibriSpeech', 'Irish Accent']).transpose()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Building and evaluating ensemble (tuned parameters)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# now, we are going to allow tuning of the confidence and LR parameters to see how this affects results\n",
+    "# this cell is quite similar to the previous one - the only difference is in parameters of the\n",
+    "# command-line to build an ensemble.\n",
+    "\n",
+    "# for LibriSpeech, since we already used validation for training the logistic regression \n",
+    "# (to avoid downloading actual training data), we will create a new manifest with \n",
+    "# just 100 samples for training and another 100 for validation\n",
+    "!head -n 100 {WORKSPACE_DIR}/librispeech/dev_other.json > {WORKSPACE_DIR}/librispeech/dev_other_train100.json\n",
+    "!tail -n 100 {WORKSPACE_DIR}/librispeech/dev_other.json > {WORKSPACE_DIR}/librispeech/dev_other_dev100.json\n",
+    "\n",
+    "# we keep everything exactly the same, but specify a few additional config settings\n",
+    "cmd = (\n",
+    "    f\"cd {NEMO_DIR} && python scripts/confidence_ensembles/build_ensemble.py \"\n",
+    "    f\"--config-path={NEMO_DIR}/scripts/confidence_ensembles --config-name=ensemble_config.yaml \"\n",
+    "    \"ensemble.0.model=stt_en_conformer_ctc_large_ls \"\n",
+    "    f\"ensemble.0.training_manifest={WORKSPACE_DIR}/librispeech/dev_other_train100.json \"\n",
+    "    f\"ensemble.1.model={finetuned_model_path} \"\n",
+    "    f\"ensemble.1.training_manifest={WORKSPACE_DIR}/slr83-data/irish_english_male/train_manifest.json \"\n",
+    "    # let's specify to just use 100 samples here as well to make tuning faster\n",
+    "    # 100 is usually more than enough (remember that we are just fitting 2 parameters in the logistic regression)\n",
+    "    # but default is 1000 just in case\n",
+    "    f\"ensemble.1.max_training_samples=100 \"\n",
+    "    # the tuning will take a bit more memory, so let's use bs=2 this time\n",
+    "    \"transcription.batch_size=2 \"\n",
+    "    # requesting to tune the confidence\n",
+    "    # you can also specify exactly what grid-search to run here,\n",
+    "    # but we'd just use the default (it's reasonably large)\n",
+    "    \"tune_confidence=True \"\n",
+    "    # need to provide the validation sets for the tuning\n",
+    "    f\"ensemble.0.dev_manifest={WORKSPACE_DIR}/librispeech/dev_other_dev100.json \"\n",
+    "    f\"ensemble.1.dev_manifest={WORKSPACE_DIR}/slr83-data/irish_english_male/dev_manifest.json \"\n",
+    "    f\"output_path={WORKSPACE_DIR}/confidence_ensemble_tuned.nemo \"\n",
+    ")\n",
+    "\n",
+    "# building the ensemble. You should see that confidence computation step is \n",
+    "# taking quite a bit longer - this is where the grid search happens\n",
+    "!$cmd\n",
+    "\n",
+    "# running evaluation on LibriSpeech and Irish accent data\n",
+    "# you will see that the transcription is run 2 times, since we need to run both models to get confidence scores\n",
+    "wer_results['ensemble (tuned)'] = []\n",
+    "eval_cfg = run_eval(EvaluationConfig(\n",
+    "    dataset_manifest=os.path.join(WORKSPACE_DIR, \"librispeech\", \"test_other.json\"),\n",
+    "    model_path=os.path.join(WORKSPACE_DIR, 'confidence_ensemble_tuned.nemo'),\n",
+    "    batch_size=4,\n",
+    "    output_filename=os.path.join(WORKSPACE_DIR, \"eval_results.json\"),\n",
+    "))\n",
+    "wer_results['ensemble (tuned)'].append(eval_cfg.metric_value)\n",
+    "\n",
+    "eval_cfg = run_eval(EvaluationConfig(\n",
+    "    dataset_manifest=os.path.join(WORKSPACE_DIR, \"slr83-data\", \"irish_english_male\", \"test_manifest.json\"),\n",
+    "    model_path=os.path.join(WORKSPACE_DIR, 'confidence_ensemble_tuned.nemo'),\n",
+    "    batch_size=4,\n",
+    "    output_filename=os.path.join(WORKSPACE_DIR, \"eval_results.json\"),\n",
+    "))\n",
+    "wer_results['ensemble (tuned)'].append(eval_cfg.metric_value)\n",
+    "\n",
+    "# the tuned ensemble should be a bit better than default (but not too much)\n",
+    "# note that there is a bit of randomness in the finetuning and our dev set is quite small\n",
+    "# so it's possible that the tuned model can be similar to default or even slightly worse\n",
+    "# for the real applications it's recommended to use larger dev set,\n",
+    "# but tuning will take longer in this case\n",
+    "print(\"\\n*************************** Results ***************************\\n\")\n",
+    "pd.DataFrame(wer_results, index=['LibriSpeech', 'Irish Accent']).transpose()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 40c1cccc9eb6773243a5637ddaf0e6ff023d87e9 Mon Sep 17 00:00:00 2001
From: Somshubra Majumdar <titu1994@gmail.com>
Date: Wed, 12 Jul 2023 10:00:05 -0700
Subject: [PATCH 099/123] Add support for Numba FP16 RNNT Loss (#6991)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Force working space memory to always be in fp32

Signed-off-by: smajumdar <titu1994@gmail.com>

* Add support for fp16 testing in Numba

Signed-off-by: smajumdar <titu1994@gmail.com>

* Add support for fp16 testing in Numba

Signed-off-by: smajumdar <titu1994@gmail.com>

* Add support for fp16 testing in Numba

Signed-off-by: smajumdar <titu1994@gmail.com>

* Fix cost calculation by upcasting to fp32

Signed-off-by: smajumdar <titu1994@gmail.com>

* Fix cost calculation by upcasting to fp32

Signed-off-by: smajumdar <titu1994@gmail.com>

* Add support to check if numba fp16 is available

Signed-off-by: smajumdar <titu1994@gmail.com>

* add RNN-T loss implemented by PyTorch and test code (#5312)

* Fix the bugs in cache-aware streaming Conformer (#5032)

Signed-off-by: Vahid <vnoroozi@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* IA3 support for GPT and T5 (#4909)

* init commit for ia3 adater training in GPT

Signed-off-by: arendu <adithya.r@gmail.com>

* ia3 adater training in GPT, models and adapter classes

Signed-off-by: arendu <adithya.r@gmail.com>

* reshape to operate even on non-contiguous tensors

Signed-off-by: arendu <adithya.r@gmail.com>

* configs

Signed-off-by: arendu <adithya.r@gmail.com>

* fixed none init

Signed-off-by: arendu <adithya.r@gmail.com>

* adding adapter and ia3 support for T5 based models

Signed-off-by: arendu <adithya.r@gmail.com>

* style fix

Signed-off-by: arendu <adithya.r@gmail.com>

* config update and t5 model adapter and ia3

Signed-off-by: arendu <adithya.r@gmail.com>

* removed unused imports

Signed-off-by: arendu <adithya.r@gmail.com>

* predict step for inference

Signed-off-by: arendu <adithya.r@gmail.com>

* style fix

Signed-off-by: arendu <adithya.r@gmail.com>

* style fix

Signed-off-by: arendu <adithya.r@gmail.com>

* adapter inference for t5

Signed-off-by: arendu <adithya.r@gmail.com>

* style fix

Signed-off-by: arendu <adithya.r@gmail.com>

* fixed bug micro and global batch size in eval

Signed-off-by: arendu <adithya.r@gmail.com>

* minor edit

Signed-off-by: arendu <adithya.r@gmail.com>

* agressive truncation if in test examples if no truncation field is given

Signed-off-by: arendu <adithya.r@gmail.com>

* corrected for language_model_path name changes in main

Signed-off-by: arendu <adithya.r@gmail.com>

* removed unused import

Signed-off-by: arendu <adithya.r@gmail.com>

* name change for language_model_path

Signed-off-by: arendu <adithya.r@gmail.com>

* include inter_attention to IA3

Signed-off-by: arendu <adithya.r@gmail.com>

* minor fix in confg

Signed-off-by: arendu <adithya.r@gmail.com>

* minor fixes

Signed-off-by: arendu <adithya.r@gmail.com>

* removed unused flag

Signed-off-by: arendu <adithya.r@gmail.com>

* addressing PR comments

Signed-off-by: arendu <adithya.r@gmail.com>

* address PR comments

Signed-off-by: arendu <adithya.r@gmail.com>

* minor fix

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* style fix

Signed-off-by: arendu <adithya.r@gmail.com>

* CI test

Signed-off-by: arendu <adithya.r@gmail.com>

* minor fix in jenkinsfile

Signed-off-by: arendu <adithya.r@gmail.com>

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Bug fix - Limit val batches set to 1.0  (#5023)

* Bug fix

Signed-off-by: shanmugamr1992 <shanmugamr1992@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Adressed sandeep's comments

* Fixing limit val batches support in bert

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixing limit val batches support in bert

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: shanmugamr1992 <shanmugamr1992@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [bug_fix] kv_channels is used when available (#5066)

* fix bug s.t kv_channels is used when available

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* P&C Docs (#5068) (#5069)

Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>

Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>

Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>
Co-authored-by: Matvei Novikov <mattyson.so@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Add spe_split_by_unicode_script arg (#5072)

* Add spe_split_by_unicode_script arg

Signed-off-by: Anas <aabouallaban@pm.me>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Anas <aabouallaban@pm.me>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* probabilites -> probabilities (#5078) (#5079)

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
Co-authored-by: Nithin Rao <nithinrao.koluguri@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* increase PR and Issue sweep quantity and active close PRs. (#5073)

* increase PR and Issue sweep quantity and active close PRs.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* update with stricter rules, 30 days to be stale and 7 days to be closed for both Issues and PRs.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [TTS] added missing German phoneme tokenizer. (#5070) (#5074)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* rename to match prompt leanring (#5076)

Signed-off-by: arendu <adithya.r@gmail.com>

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Missing fixes from r1.11.0 to T5 finetuning eval (#5054) (#5061)

* Fixes to seq2seq eval

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Style

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Notebook bug fixes (#5084) (#5085)

* Notebook bug fixes

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Turned nemo install back on

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* reverted notebook

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Updated one line in entity linking nb

Signed-off-by: Virginia Adams <vadams@nvidia.com>

Signed-off-by: Virginia Adams <vadams@nvidia.com>
Co-authored-by: Eric Harper <complex451@gmail.com>

Signed-off-by: Virginia Adams <vadams@nvidia.com>
Co-authored-by: Virginia Adams <78445382+vadam5@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* update strategy in notebook from ddp_fork to dp (#5088) (#5089)

Co-authored-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Fix bug in Squeezeformer Conv block (#5011) (#5024)

* Fix bug in Squeezeformer Conv block

Signed-off-by: smajumdar <smajumdar@nvidia.com>

* Fix kernel context

Signed-off-by: smajumdar <smajumdar@nvidia.com>

* Fix access mixin

Signed-off-by: smajumdar <smajumdar@nvidia.com>

Signed-off-by: smajumdar <smajumdar@nvidia.com>

Signed-off-by: smajumdar <smajumdar@nvidia.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* fixed megatron lm conversion bug (PTL related) (#5038) (#5063)

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: David Mosallanezhad <dmosallanezh@nvidia.com>

Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: David <amosalla@asu.edu>
Co-authored-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Fix Unhashable type list for Numba Cuda spec augment kernel (#5093) (#5094)

Signed-off-by: smajumdar <smajumdar@nvidia.com>

Signed-off-by: smajumdar <smajumdar@nvidia.com>

Signed-off-by: smajumdar <smajumdar@nvidia.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Fix numba (#5098)

Signed-off-by: smajumdar <titu1994@gmail.com>

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Make it possible to specify output_filename in normalize_with_audio.py (#5092)

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>

Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Greedy decoding confidence for CTC and RNNT (#4931)

* rnnt confidence draft

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* word confidence

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* advanced entropies added

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* refactoring

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* oops forgot a file

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* metrics and benchmarking script added

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* style fix

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* texterrors installation added

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* lgtm and bug fix

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* fix comments

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* fix typos

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* add missing import after rebase

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>
Co-authored-by: Aleksandr Laptev <alaptev@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [Add] SLURP models and examples (#4668)

* add model, util and loss

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor annd update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update and refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update and refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update and refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update docs

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update available models

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor data processing

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update docs

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor and update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* move transformer to asr.modules

Signed-off-by: stevehuang52 <heh@nvidia.com>

* move transformer to asr.modules

Signed-off-by: stevehuang52 <heh@nvidia.com>

* get rid of jsonlines

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* revert changes to nlp

Signed-off-by: stevehuang52 <heh@nvidia.com>

Signed-off-by: stevehuang52 <heh@nvidia.com>
Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: Jagadeesh Balam <4916480+jbalam-nv@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* only optimize params that are part of the adapter modules (#5086)

Signed-off-by: arendu <adithya.r@gmail.com>

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: Virginia Adams <78445382+vadam5@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Pipeline Parallel T5 Prompt Learning (#4956)

* Added pre process flag checks and pipeline parallel in fwd

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Added rank check for pipeline parallel

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* T5 prompt learning works!

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* IA3 passing CI

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Fixed typo

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* removed optimizer setup so Adi's change will not conflict

Signed-off-by: Virginia Adams <vadams@nvidia.com>

Signed-off-by: Virginia Adams <vadams@nvidia.com>
Signed-off-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Co-authored-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [TTS] remove phonemizer.py (#5090)

remove phonemizer.py and convert code block to markdown in the tutorial.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* T5 Decoding with PP > 2 fix (#5091) (#5103)

* set sequence lenghts in the pipeline properly

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fix

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [TTS] fixed wrong val loss for epoch 0 and inconsistent metrics names (#5087) (#5102)

* fixed hifigan configs as well
* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Fix and refactor consumed samples save/restore for Megatron models. (#5077)

* Fixes and refactor

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fix

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Remove unused imports

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Empty

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fix

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* RIR corpus generator tool (#4927)

Signed-off-by: Ante Jukić <ajukic@nvidia.com>

Signed-off-by: Ante Jukić <ajukic@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Multiprocessing fix (#5106) (#5107)

Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>

Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>

Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>
Co-authored-by: Matvei Novikov <mattyson.so@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [Bug fix] PC lexical + audio (#5109) (#5110)

* training running

Signed-off-by: ekmb <ebakhturina@nvidia.com>

* revert

Signed-off-by: ekmb <ebakhturina@nvidia.com>

* revert

Signed-off-by: ekmb <ebakhturina@nvidia.com>

Signed-off-by: ekmb <ebakhturina@nvidia.com>

Signed-off-by: ekmb <ebakhturina@nvidia.com>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [Fix] schedulers with no max_steps param (#4564)

* fix schedulers

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update to use python inspect module

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

Signed-off-by: stevehuang52 <heh@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* T5 prompt learning fixes missing from r.11.0 merge (#5075) (#5101)

* Fix special tokens

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fix

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Empty

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: David <amosalla@asu.edu>

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: David <amosalla@asu.edu>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [TTS] Add NeMo TTS Primer Tutorial (#4933)

* [TTS] Add NeMo TTS Primer Tutorial

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Add Squeezeformer CTC model checkpoints on Librispeech (#5121)

Signed-off-by: smajumdar <titu1994@gmail.com>

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* adding loss normalization options to rnnt joint  (#4829)

* adding normalization options to rnnt joint loss

* moving the param to joint

* moving loss normalization to rnnt loss config

* style

* cleaning up

* fixing sum reduction in joint

Signed-off-by: Dima Rekesh <drekesh@nvidia.com>

* moving reduction into RNNT loss class

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* refactoring

* typos

Signed-off-by: Dima Rekesh <drekesh@nvidia.com>

Signed-off-by: Dima Rekesh <drekesh@nvidia.com>
Co-authored-by: Dima Rekesh <drekesh@nvidia.com>
Co-authored-by: Oleksii Kuchaiev <okuchaiev@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Asr concat dataloader (#5108)

* forced precision

* typo

* initial commit

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* typos and bugs

Signed-off-by: Dima Rekesh <drekesh@nvidia.com>

* reverting conformer encoder

Signed-off-by: Dima Rekesh <drekesh@nvidia.com>

* additional checks

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* adding support to CTC models as well

* reverting conformer_encoder

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* typo

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* refactoring

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* refactoring

Signed-off-by: Dima Rekesh <drekesh@nvidia.com>

* merging

Signed-off-by: Dima Rekesh <drekesh@nvidia.com>

Signed-off-by: Dima Rekesh <bmwshop@gmail.com>
Signed-off-by: Dima Rekesh <drekesh@nvidia.com>
Co-authored-by: Dima Rekesh <drekesh@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* fix blossom ci unittests

Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* bugfix: pybtex.database.InvalidNameString: Too many commas in author field. (#5112) (#5115)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Uppdate container version to 22.09 (#5105)

* update container version

Signed-off-by: ericharper <complex451@gmail.com>

* pin click

Signed-off-by: ericharper <complex451@gmail.com>

* pin click 8.0.2

Signed-off-by: ericharper <complex451@gmail.com>

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Remove unsupported arguments from MegatronNMT (#5065)

* Fixes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fixes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Style

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fix

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* More fixes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* pp2 support for T5 IA3 learning and T5 Adapters learning (#5116)

* enabling pp2

Signed-off-by: arendu <adithya.r@gmail.com>

* optimizer update

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* T5 pp>1 support for adapters and ia3

Signed-off-by: arendu <adithya.r@gmail.com>

* fix bug with missing adapter_tuning

Signed-off-by: arendu <adithya.r@gmail.com>

* inference error fixed, pp=2

Signed-off-by: arendu <adithya.r@gmail.com>

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Oleksii Kuchaiev <okuchaiev@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* T5 Prompt Learning Fixes for Pipeline Parallel (#5120)

* Initial fixes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Added back validation acc

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Put num workers back

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* added relative encoding if statament

Signed-off-by: Virginia Adams <vadams@selene-login-01.nvidia.com>

* Added back val loss only validation

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Revert "Added back val loss only validation"

This reverts commit 86d8f4806fe30335c40c3716ce18259939df500f.

* Removed val acc for PP > 1

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Removed enc_seq_len if statement

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Added back validation acc calc

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Virginia Adams <vadams@nvidia.com>
Signed-off-by: Virginia Adams <vadams@selene-login-01.nvidia.com>
Co-authored-by: Virginia Adams <vadams@nvidia.com>
Co-authored-by: Virginia Adams <78445382+vadam5@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Virginia Adams <vadams@selene-login-01.nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* add doc info (#4721)

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>

Signed-off-by: Yang Zhang <yangzhang@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [TTS] Add SpanishCharsTokenizer (#5135)

* [TTS] Add SpanishCharsTokenizer

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Update megatron interface to dialogue (#4936)

* fix style formatting

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update template to include description of intent

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* changes based on requests in review

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add compatibility with assistant dataset

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove dialogue_state_tracking

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update huggingface utils for dialogue

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* rename dialogue_state_tracking_hybrid to dialogue_state_tracking_sgdqa

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix style

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix nemo/collections/nlp/models/dialogue_state_tracking_sgdqa/__init__.py

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile for SGDGEN

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile for SGDGEN

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile for SGDGEN

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile for SGDGEN

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile for SGDGEN

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix typo

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add docstrings for assistant data processsor

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkins for SGDGEN local checkpoint

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update style

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* use local vocab file for Jenkinsfile

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* patch for Jenkins CI using local file

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add slot filling prediction and metrics

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove unused code

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* refactor metrics code out of Dialogue GPT Model

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* integrate backward compatible support for IntentSlotClassificationModel (bert model)

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* save prediction file for IntentSlotClassification

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update dialogue gpt model training for megatron gpt

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove batch generate for HF GPT2, which causes lower performance

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add few shot capability to dialogue gpt model

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile and remove unused import

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update code description and clarity

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address PR comments

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* integrate compatibility with ZeroShotIntentModel

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* rename folder to dialogue due to increased scope and further refactor for clarity

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* added dialogue GPT for sequence generation task (e.g. answer extender)

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add CI test for DialogueGPTGenerationModel

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* integrate DialogueS2SGenerationModel for generation task (e.g. answer extender)

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* modify huggingface utils to support HF t5/BART models

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove unused imports

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update bleu metric

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix bleu metric style

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* debug bleu metric

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* debug bleu metric

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update based on PR #3893

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update 2 based on PR #3893

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update 3 based on PR #3893

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* integrate sgd generation based on user user utterance and system slot-values to generate system utterance

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add validation model saving capabilities

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* cleaned up code for SGD Based Answer extender

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Dialogue Generation CI

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkinsfile

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix Jenkins CI issue"

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add support for design dataset

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove unnecessary imports

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* support megatron for dialogue_s2s_generation_model

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* reduce loaded samples in MSMarcoDataProcessor to 64 when cfg.model.dataset.debug_mode=True

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update CI

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update checkpoint and predictions filename to include epoch number

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* integrate HF BART MNLI into zero shot intent model

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* integrate Dialogue Nearest Neighbour Model

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* refactor Dialogue SGD Data Processor to make interface for models cleaner

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update Dialogue S2S Generation model for DialogueSGDDataProcessor interface

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update jenkins

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* support sgd and drive thru datasets by zero shot model and nearest neighbour model

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add prediction saving code to nearest neighbour and zero shot intent models

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix typo in sgd data processor

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* integrate Dialogue Mellon QA Data Processor

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update mellon qa

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update dialogue.py to remove outdated info

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update dialogue_config.yaml

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update dialogue_config.yaml

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add dialogue docs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address review comments

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix for cfg

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* make dependency on apex optional

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* change NLPDDPluggin calling logic to make it possible to run without apex

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add first draft of tutorial

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* reduce ms marco size by removing lines without wellFormedAnswers

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address pr comments

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update colab tutorial link in dialogue docs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* include unit test and some refactor to facilitate unit test

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address pr issues

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove typos in dialogue tutorial

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* support larger files for question answering

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove unnecessary artifacts to reduce memory use

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* put 0 tensor to device

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update link within dialogue tutorial

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* restore previously delete files

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error handling when loss = nan

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update nan handling

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update spanning loss func

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update spanning loss

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix type error raised in qa_dataset.py

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add error checking message

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* revert back to float32

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* revert back to float32

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update exp logging

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msgs

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update loading of large file from pickle to json

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update loading of large file from pickle to json

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* limit number of negative samples

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* revert post processing

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* revert post processing

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove unused methods and style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add more documentation

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove unused imports

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* changes base on PR review

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* set wandb logger falseby default

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update interface with megatron gpt prompt learning

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update inline documentation

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update prompt_ids

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update error msg

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update config

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update config

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* set inference = False for dialgue prompt learning during trainng

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* set inference = False for dialgue prompt learning during trainng

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove unused code

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update config yaml

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix bug for megatron gpt prompt learning

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove unused import

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address comments in PR

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address comments in PR

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address typo

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* add megatron t5 inference

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix bug due to bert tokenizer not being space-aware

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update style

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update IntentSlotModel onnx export test

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update style

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update exportable

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address PR comments

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* replace functools.cache_property with functools.lru_cache to maintain python 3.7 compatibility

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* improve speed of rank_candidates and support for p tuning

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update dialogue.py

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* fix megatron prompt learning saving bug

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update generate_candidate method

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* remove repeated init text ids and invert attention masks

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update typo

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* custom collate fn to remove excess padding in batch

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* style fix

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update complete method to mitigate issue when max seq len is low

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* address pr comments

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

* update generation interface

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>

Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>
Co-authored-by: Zhilin Wang <zhilinw@nvidia.com>
Co-authored-by: Oleksii Kuchaiev <okuchaiev@users.noreply.github.com>
Co-authored-by: Yang Zhang <yzhang123@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Added save inference ready .nemo file with every checkpoint (#5055)

* Added save inference ready .nemo file with every checkpoint

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Python style fix

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* addressed Adi's comment

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Added ptuning check in model checkpoint saving

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Changed save_nemo_on_valdaition default to False

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Changes global batch size of adapter CI

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* Changed num workers to 0

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* added first stage of pipeline check

Signed-off-by: Virginia Adams <vadams@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Virginia Adams <vadams@nvidia.com>
Signed-off-by: Virginia Adams <78445382+vadam5@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Fixes for docs/typos + remove max_utts parameter from tarred datasets as it causes hang in training (#5118)

* Remove ; from jupyter notebook cells

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Fix typos in documentation/code

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Fix output message to have 'or equal'

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Link formatting fixes

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Add error if max_utts is used in tarred datasets

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Remove max_utts parameter from tarred datasets

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Fix max_utts removal in tests

Signed-off-by: Igor Gitman <igitman@nvidia.com>

* Fix typo if -> is

Signed-off-by: Igor Gitman <igitman@nvidia.com>

Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Merge r1.12.0 main (#5139)

* update branch

Signed-off-by: ericharper <complex451@gmail.com>

* Add cherry-pick action (#4958)

* add cherry-pick action

Signed-off-by: ericharper <complex451@gmail.com>

* Pin Transformers version to fix CI (#4955)

* Pin transformers version in CI to prevent offline tokenizer loading error

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Drop version

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Disable offline temporarily

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Disable offline temporarily

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Enable offline

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
Co-authored-by: Sean Naren <snarenthiran@nvidia.com>

* upper bound transformers

Signed-off-by: ericharper <complex451@gmail.com>

* remove duplicate transformers requirement

Signed-off-by: ericharper <complex451@gmail.com>

* Release SOTA Lang ID model  (#5080)

* add pretrained lang id model ambernet

Signed-off-by: fayejf <fayejf07@gmail.com>

* update doc and style fix

Signed-off-by: fayejf <fayejf07@gmail.com>

Signed-off-by: fayejf <fayejf07@gmail.com>

* update branch and package info

Signed-off-by: ericharper <complex451@gmail.com>

* remove upper bounds on lightning and transformers

Signed-off-by: ericharper <complex451@gmail.com>

* remove transformers offline from ci

Signed-off-by: ericharper <complex451@gmail.com>

* upper bound transformers

Signed-off-by: ericharper <complex451@gmail.com>

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
Signed-off-by: fayejf <fayejf07@gmail.com>
Co-authored-by: Sean Naren <snarenthiran@nvidia.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Added ASR model comparison to SDE (#5043)

SDE: Added ASR model comparison tool to SDE
transcribe speech: Added support for many predictions in one file, as well as custom field names
Signed-off-by: George Zelenfroynd <gzelenfroind@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* fix nmt eval sampler (#5154)

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>

Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Fix Global init steps (#5143)

* move global step to base

Signed-off-by: Yi Dong <yidong@nvidia.com>

* fix fused softmax

Signed-off-by: Yi Dong <yidong@nvidia.com>

* add the missing file

Signed-off-by: Yi Dong <yidong@nvidia.com>

* update the fused kernel

Signed-off-by: Yi Dong <doyend@gmail.com>

* fix import error

Signed-off-by: Yi Dong <doyend@gmail.com>

* fix import again

Signed-off-by: Yi Dong <yidong@nvidia.com>

Signed-off-by: Yi Dong <yidong@nvidia.com>
Signed-off-by: Yi Dong <doyend@gmail.com>
Co-authored-by: Yi Dong <doyend@gmail.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [TTS] bug fix - sample rate was being ignored in vocoder dataset (#4518)

* bug fix - sample rate was being ignored in vocoder dataset when not loading mel
* handled n segments for a different sampling rate than original sampling rate
* Added case for n_segments 0, warning for n_segments greater than file length

Signed-off-by: Paarth Neekhara <paarth.n@gmail.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Jocelyn <jocelynh@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Add EMA support to NeMo (#4764)

* Added Base files

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Some refactors, swap to using MNIST Lnet

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add a few more tests, allow the callback to be set via the exp manager

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Actually run validation for testing

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Run isort

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add test for saving state/fix saving state

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Use dummy model

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Fix test

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add copyright

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Support saving separate EMA weight module

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add standalone functionality/logging

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Expose more parameters

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Modify to allow option to replace validation

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add jenkins test, formatting

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Pin Transformers version to fix CI (#4955)

* Pin transformers version in CI to prevent offline tokenizer loading error

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Drop version

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Disable offline temporarily

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Disable offline temporarily

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Enable offline

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add cherry-pick action (#4958) (#4961)

* add cherry-pick action

Signed-off-by: ericharper <complex451@gmail.com>

* Pin Transformers version to fix CI (#4955)

* Pin transformers version in CI to prevent offline tokenizer loading error

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Drop version

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Disable offline temporarily

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Disable offline temporarily

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Enable offline

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
Co-authored-by: Sean Naren <snarenthiran@nvidia.com>

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Sean Naren <snarenthiran@nvidia.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Fix changelog builder (#4962) (#4963)

Signed-off-by: smajumdar <smajumdar@nvidia.com>

Signed-off-by: smajumdar <smajumdar@nvidia.com>

Signed-off-by: smajumdar <smajumdar@nvidia.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* fix cherry pick workflow (#4964) (#4965)

Signed-off-by: ericharper <complex451@gmail.com>

Signed-off-by: ericharper <complex451@gmail.com>

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* reorder model check (#4959) (#4967)

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
Co-authored-by: Nithin Rao <nithinrao.koluguri@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* check for active conda environment (#4970) (#4971)

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* [TTS] fix broken tutorial for MixerTTS. (#4949) (#4976)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Checkpoint averaging class fix (#4946)

* 1. Added args.class_path to provide it externally.

Signed-off-by: Micha Livne <mlivne@cs.toronto.edu>

* 1. Fixed style.

Signed-off-by: Micha Livne <mlivne@cs.toronto.edu>

Signed-off-by: Micha Livne <mlivne@cs.toronto.edu>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add ability to give seperate datasets for test, train and validation (#4798)

* Add ability to give seperate datasets for test, train and validation

* Addressed Sandeeps comments

* Addressed Sandeeps comments

* Add ability to give seperate datasets for test, train and validation

* Add ability to give seperate datasets for test, train and validation

* Addressed review comments

* Bug fix for common dataset utils

* Add CI tests

Signed-off-by: shanmugamr1992 <shanmugamr1992@gmail.com>

* Reformat code

Signed-off-by: shanmugamr1992 <shanmugamr1992@gmail.com>

* Bug fix

Signed-off-by: shanmugamr1992 <shanmugamr1992@gmail.com>

* Bug fix

* Bug Fix

* Bug Fix

* Update Jenkinsfile

* Addressed comments

* Addressed Eriks comments.

* Addressed Sandeep

* Update Jenkinsfile

* Update Jenkinsfile

* Update dataset_utils.py

* Update Jenkinsfile

* Update Jenkinsfile

* Use GPT CI config

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

Signed-off-by: shanmugamr1992 <shanmugamr1992@gmail.com>
Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* fix label models restoring issue from wrighted cross entropy (#4968) (#4975)

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>

Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
Co-authored-by: Nithin Rao <nithinrao.koluguri@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add simple pre-commit file (#4983)

* Add simple pre-commit file

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Exclude docs folder

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks"

This reverts commit 053bd5ba579537a5f311b431871c21f3381b43eb.

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Import pycuda.autoprimaryctx or pycuda.autoinit to init pycuda execution environment (#4951)

Signed-off-by: Jin Li <liji@nvidia.com>

Signed-off-by: Jin Li <liji@nvidia.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Adding speaker embedding conditioning in fastpitch (#4986)

Signed-off-by: subhankar-ghosh <subhankar2321@gmail.com>

Signed-off-by: subhankar-ghosh <subhankar2321@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Fix ASR issues (#4984) (#4991)

* Fix ASR issues

Signed-off-by: smajumdar <smajumdar@nvidia.com>

* Revert fix

Signed-off-by: smajumdar <smajumdar@nvidia.com>

Signed-off-by: smajumdar <smajumdar@nvidia.com>

Signed-off-by: smajumdar <smajumdar@nvidia.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Fix current tests

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* More test coverage

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Address reviews

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Address review

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Drop bf16 test

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Address review

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* remove print

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

* Add bf16

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: smajumdar <smajumdar@nvidia.com>
Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Micha Livne <mlivne@cs.toronto.edu>
Signed-off-by: shanmugamr1992 <shanmugamr1992@gmail.com>
Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Jin Li <liji@nvidia.com>
Signed-off-by: subhankar-ghosh <subhankar2321@gmail.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Co-authored-by: Nithin Rao <nithinrao.koluguri@gmail.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Micha Livne <michalivne@users.noreply.github.com>
Co-authored-by: shanmugamr1992 <111910568+shanmugamr1992@users.noreply.github.com>
Co-authored-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: liji-nv <59594262+liji-nv@users.noreply.github.com>
Co-authored-by: Subhankar Ghosh <subhankar2321@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Fix BF16 test (#5162)

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Fix errors in speaker diarization nemo docs (#5153)

* fix docs and docstrings for MSDD

Signed-off-by: Taejin Park <tango4j@gmail.com>

* fix nemo docs errors

Signed-off-by: Taejin Park <tango4j@gmail.com>

* reflected review comments

Signed-off-by: Taejin Park <tango4j@gmail.com>

Signed-off-by: Taejin Park <tango4j@gmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* Add interleaved pipeline schedule to GPT (#5025)

* add virtual pipeline size to config

Signed-off-by: ericharper <complex451@gmail.com>

* convert model to list of modules

Signed-off-by: ericharper <complex451@gmail.com>

* convert model to list of modules

Signed-off-by: ericharper <complex451@gmail.com>

* convert model to list of modules

Signed-off-by: ericharper <complex451@gmail.com>

* update for list of modules

Signed-off-by: ericharper <complex451@gmail.com>

* add virtual to init

Signed-off-by: ericharper <complex451@gmail.com>

* update first last stage embedding all reduce

Signed-off-by: ericharper <complex451@gmail.com>

* update sequence parallel all reduce for virtual models

Signed-off-by: ericharper <complex451@gmail.com>

* runs but we get an error

Signed-off-by: ericharper <complex451@gmail.com>

* set virtual rank 0 after looping

Signed-off-by: ericharper <complex451@gmail.com>

* account for virtual when determinining first and last pipeline stages

Signed-off-by: ericharper <complex451@gmail.com>

* checkpointing for virtual models in progress

Signed-off-by: ericharper <complex451@gmail.com>

* add checkpoint hooks

Signed-off-by: ericharper <complex451@gmail.com>

* working on validation when resuming

Signed-off-by: ericharper <complex451@gmail.com>

* skip sanity val steps by default in config

Signed-off-by: ericharper <complex451@gmail.com>

* remove comment

Signed-off-by: ericharper <complex451@gmail.com>

* log number of params

Signed-off-by: ericharper <complex451@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* style

Signed-off-by: ericharper <complex451@gmail.com>

* check if self.model is a list

Signed-off-by: ericharper <complex451@gmail.com>

* make virtual pipeline default size None on init

Signed-off-by: ericharper <complex451@gmail.com>

* make virtual pipeline default to None in config

Signed-off-by: ericharper <complex451@gmail.com>

* remove ensure_divisibility call

Signed-off-by: ericharper <complex451@gmail.com>

* fix lgtm alerts

Signed-off-by: ericharper <complex451@gmail.com>

* remove num_sanity_val_steps from config

Signed-off-by: ericharper <complex451@gmail.com>

* default virtual pipeline size to none

Signed-off-by: ericharper <complex451@gmail.com>

* check for list

Signed-off-by: ericharper <complex451@gmail.com>

* update assert to make sure we are only doing virtual for gpt

Signed-off-by: ericharper <complex451@gmail.com>

* revert change to get_params_for_weight_decay

Signed-off-by: ericharper <complex451@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* init var

Signed-off-by: ericharper <complex451@gmail.com>

* add import guard for set virtual model parallel world size

Signed-off-by: ericharper <complex451@gmail.com>

* use import guard

Signed-off-by: ericharper <complex451@gmail.com>

* update calls to fake init in eval scripts

Signed-off-by: ericharper <complex451@gmail.com>

* add _get_fwd_bwd_function

Signed-off-by: ericharper <complex451@gmail.com>

* log all total model parameters

Signed-off-by: ericharper <complex451@gmail.com>

* remove unused import

Signed-off-by: ericharper <complex451@gmail.com>

Signed-off-by: ericharper <complex451@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* reduced to 14 inactive days to be stale for PRs. (#5165)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* refactor TTS documentation organization and add new contents. (#5137)

* refactor TTS documentation organization and add new contents.
* fix asr api bug.
* fix broken links.
* fix unexpected indentation errors.
* fixed unexpected indentation.
* fixed broken paper reference.
* fixed cross-reference and typos.
* fixed toctree errors.
* revert to 'Augmentors'
* reordered TTS tutorial list in starthere.
* ordered api classes alphabetically for each Section.
* fixed underscore typo for fastpitch checkpoint.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* upcase 'Tuning'

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* fixed typo for RAD-TTS Aligner

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* reorder aligner section after mel-gen and vocoders in models.rst.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* clarify Mixer-TTS-X and reorder model descriptions alphabetically.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* fixed some typos and formats.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* removed old megatron.rst.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* fixed block quote ends without a blank line warnings.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* remove duplicate reference; fixed missing key nlp-megatron-shoeybi2019megatron

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* Revert "removed old megatron.rst."

This reverts commit c5ea1dc3f23272eecfe8040e3abfa54fa122cf73.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* removed Russian, a hyphen, and add a note about G2P in tts/config.rst

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* added pynini installation in wfst_text_normalization.rst

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* added description of manifest key/value pairs.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* add toctree in tts/intro

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* replace main branch to stable.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* add 'upcoming' for e2e systems.

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>

* replaced main branch to stabl…

* Multiblank Transducer (#5527)

* multi-blank transducers

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* one line bug fix

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* change interface of RNNTDecoding class to extract num-extra-output from joint instead of constructor

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* addressed PR comments

Signed-off-by: Hainan Xu <hainanx@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Hainan Xu <hainanx@nvidia.com>
Co-authored-by: Hainan Xu <hainanx@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Default RNNT loss to int64 targets (#6011)

Signed-off-by: smajumdar <titu1994@gmail.com>

* Rebase

Signed-off-by: smajumdar <titu1994@gmail.com>

* Begin refactoring tests

Signed-off-by: smajumdar <titu1994@gmail.com>

* Pass all tests for RNNT numba loss

Signed-off-by: smajumdar <titu1994@gmail.com>

* Pass all tests for RNNT numba loss

Signed-off-by: smajumdar <titu1994@gmail.com>

* Remove print

Signed-off-by: smajumdar <titu1994@gmail.com>

* Fix test for version

Signed-off-by: smajumdar <titu1994@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Revert bad merges

Signed-off-by: smajumdar <titu1994@gmail.com>

* Revert bad merges

Signed-off-by: smajumdar <titu1994@gmail.com>

* Address comments

Signed-off-by: smajumdar <titu1994@gmail.com>

* Remove wrong file

Signed-off-by: smajumdar <titu1994@gmail.com>

---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Signed-off-by: Vahid <vnoroozi@nvidia.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>
Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: shanmugamr1992 <shanmugamr1992@gmail.com>
Signed-off-by: Matvei Novikov <mattyson.so@gmail.com>
Signed-off-by: Anas <aabouallaban@pm.me>
Signed-off-by: nithinraok <nithinrao.koluguri@gmail.com>
Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Virginia Adams <vadams@nvidia.com>
Signed-off-by: smajumdar <smajumdar@nvidia.com>
Signed-off-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Signed-off-by: Elena Rastorgueva <erastorgueva@nvidia.com>
Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>
Signed-off-by: stevehuang52 <heh@nvidia.com>
Signed-off-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Signed-off-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Signed-off-by: Ante Jukić <ajukic@nvidia.com>
Signed-off-by: ekmb <ebakhturina@nvidia.com>
Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: Dima Rekesh <drekesh@nvidia.com>
Signed-off-by: Dima Rekesh <bmwshop@gmail.com>
Signed-off-by: Oleksii Kuchaiev <okuchaiev@nvidia.com>
Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: Virginia Adams <vadams@selene-login-01.nvidia.com>
Signed-off-by: Yang Zhang <yangzhang@nvidia.com>
Signed-off-by: Zhilin Wang <zhilinw@nvidia.com>
Signed-off-by: Virginia Adams <78445382+vadam5@users.noreply.github.com>
Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
Signed-off-by: fayejf <fayejf07@gmail.com>
Signed-off-by: George Zelenfroynd <gzelenfroind@nvidia.com>
Signed-off-by: Abhinav Khattar <aklife97@gmail.com>
Signed-off-by: Yi Dong <yidong@nvidia.com>
Signed-off-by: Yi Dong <doyend@gmail.com>
Signed-off-by: Paarth Neekhara <paarth.n@gmail.com>
Signed-off-by: Micha Livne <mlivne@cs.toronto.edu>
Signed-off-by: Jin Li <liji@nvidia.com>
Signed-off-by: subhankar-ghosh <subhankar2321@gmail.com>
Signed-off-by: Taejin Park <tango4j@gmail.com>
Signed-off-by: Miguel Martínez <miguelusque@users.noreply.github.com>
Signed-off-by: miguelangel <miguelangel@@users.noreply.github.com>
Signed-off-by: Jocelyn Huang <jocelynh@nvidia.com>
Signed-off-by: 彭震东 <275331498@qq.com>
Signed-off-by: Alexandra Antonova <aleksandraa@nvidia.com>
Signed-off-by: Jason <jasoli@nvidia.com>
Signed-off-by: Patrick Simianer <patrick@lilt.com>
Signed-off-by: Shantanu Acharya <shantanua@nvidia.com>
Signed-off-by: Shanmugam Ramasamy <111910568+shanmugamr1992@users.noreply.github.com>
Signed-off-by: Tim Moon <tmoon@nvidia.com>
Signed-off-by: eharper <eharper@nvidia.com>
Signed-off-by: Micha Livne <mlivne@nvidia.com>
Signed-off-by: Oleksii Volkovskyi <volkovskyi@berkeley.edu>
Signed-off-by: Yuekai Zhang <zhangyuekai@foxmail.com>
Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Signed-off-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com>
Signed-off-by: whrichd <trabeitwrq@gmail.com>
Signed-off-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com>
Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
Signed-off-by: Viraj Karandikar <vkarandikar@nvidia.com>
Signed-off-by: Yu Yao <yuya@nvidia.com>
Signed-off-by: PeganovAnton <peganoff2@mail.ru>
Signed-off-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: Jonghwan Hyeon <hyeon0145@gmail.com>
Signed-off-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Signed-off-by: shane carroll <shane.carroll@utsa.edu>
Co-authored-by: Samuel Kriman <samuelkriman@gmail.com>
Co-authored-by: Hainan Xu <hainan.xv@gmail.com>
Co-authored-by: Vahid Noroozi <VahidooX@users.noreply.github.com>
Co-authored-by: Adi Renduchintala <108822655+arendu@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Shanmugam Ramasamy <111910568+shanmugamr1992@users.noreply.github.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Matvei Novikov <mattyson.so@gmail.com>
Co-authored-by: Anas Abou Allaban <aabouallaban@pm.me>
Co-authored-by: Nithin Rao <nithinrao.koluguri@gmail.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Virginia Adams <78445382+vadam5@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Co-authored-by: David <amosalla@asu.edu>
Co-authored-by: David Mosallanezhad <dmosallanezh@nvidia.com>
Co-authored-by: Elena Rastorgueva <80532067+erastorgueva-nv@users.noreply.github.com>
Co-authored-by: Aleksandr Laptev <laptevsasha12@gmail.com>
Co-authored-by: Aleksandr Laptev <alaptev@nvidia.com>
Co-authored-by: He Huang (Steve) <105218074+stevehuang52@users.noreply.github.com>
Co-authored-by: Jagadeesh Balam <4916480+jbalam-nv@users.noreply.github.com>
Co-authored-by: anteju <108555623+anteju@users.noreply.github.com>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
Co-authored-by: Ryan Langman <rlangman@nvidia.com>
Co-authored-by: Dima Rekesh <bmwshop@gmail.com>
Co-authored-by: Dima Rekesh <drekesh@nvidia.com>
Co-authored-by: Oleksii Kuchaiev <okuchaiev@users.noreply.github.com>
Co-authored-by: Oleksii Kuchaiev <okuchaiev@nvidia.com>
Co-authored-by: Virginia Adams <vadams@nvidia.com>
Co-authored-by: Virginia Adams <vadams@selene-login-01.nvidia.com>
Co-authored-by: Yang Zhang <yzhang123@users.noreply.github.com>
Co-authored-by: Zhilin Wang <zhilinw@nvidia.com>
Co-authored-by: Igor Gitman <igor.a.gitman@gmail.com>
Co-authored-by: Sean Naren <snarenthiran@nvidia.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
Co-authored-by: George <37293288+Jorjeous@users.noreply.github.com>
Co-authored-by: Abhinav Khattar <aklife97@gmail.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
Co-authored-by: Yi Dong <doyend@gmail.com>
Co-authored-by: Paarth Neekhara <paarth.n@gmail.com>
Co-authored-by: Jocelyn <jocelynh@nvidia.com>
Co-authored-by: Micha Livne <michalivne@users.noreply.github.com>
Co-authored-by: liji-nv <59594262+liji-nv@users.noreply.github.com>
Co-authored-by: Subhankar Ghosh <subhankar2321@gmail.com>
Co-authored-by: Taejin Park <tango4j@gmail.com>
Co-authored-by: Miguel Martínez <26169771+miguelusque@users.noreply.github.com>
Co-authored-by: miguelangel <miguelangel@@users.noreply.github.com>
Co-authored-by: 彭震东 <275331498@qq.com>
Co-authored-by: Igor Gitman <igitman@nvidia.com>
Co-authored-by: bene-ges <61418381+bene-ges@users.noreply.github.com>
Co-authored-by: Alexandra Antonova <aleksandraa@nvidia.com>
Co-authored-by: Jason <jasoli@nvidia.com>
Co-authored-by: Rajesh Ilango <rilango@gmail.com>
Co-authored-by: pks <pks@users.noreply.github.com>
Co-authored-by: Shantanu Acharya <shan.sacharya@gmail.com>
Co-authored-by: Shantanu Acharya <shantanua@nvidia.com>
Co-authored-by: Tim Moon <4406448+timmoon10@users.noreply.github.com>
Co-authored-by: Sangkug Lym <slym@nvidia.com>
Co-authored-by: Kirthi Shankar Sivamani <smkirthishankar@gmail.com>
Co-authored-by: ksivamani <ksivamani@nvidia.com>
Co-authored-by: Eric Harper <eharper@nvidia.com>
Co-authored-by: Sandeep Subramanian <sandeepsub@nvidia.com>
Co-authored-by: Micha Livne <mlivne@nvidia.com>
Co-authored-by: Oleksii Volkovskyi <volkovskyi@berkeley.edu>
Co-authored-by: Yuekai Zhang <zhangyuekai@foxmail.com>
Co-authored-by: Hainan Xu <hainanx@nvidia.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Co-authored-by: anmolgupt <14880251+anmolgupt@users.noreply.github.com>
Co-authored-by: Anmol Gupta <anmolg@nvidia.com>
Co-authored-by: Sasha Meister <117230141+ssh-meister@users.noreply.github.com>
Co-authored-by: Riqiang Wang <43883260+whrichd@users.noreply.github.com>
Co-authored-by: Vladimir Bataev <vbataev@nvidia.com>
Co-authored-by: Shanmugam Ramasamy <shanmugamr@shanmugamr-mlt.client.nvidia.com>
Co-authored-by: Viraj Karandikar <16838694+virajkarandikar@users.noreply.github.com>
Co-authored-by: Shane Carroll <50530592+1-800-BAD-CODE@users.noreply.github.com>
Co-authored-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com>
Co-authored-by: Yi Dong <yidong@nvidia.com>
Co-authored-by: PeganovAnton <peganoff2@mail.ru>
Co-authored-by: Jonghwan Hyeon <jonghwanhyeon93@gmail.com>
Co-authored-by: Kaden Uhlig <Numeri@users.noreply.github.com>
Co-authored-by: Kaden Uhlig <kaden.uhlig@lilt.com>
Co-authored-by: Boris Fomitchev <bfomitchev@nvidia.com>
Co-authored-by: Jonghwan Hyeon <hyeon0145@gmail.com>
---
 nemo/collections/asr/losses/rnnt.py           |  26 +++-
 nemo/collections/asr/losses/rnnt_pytorch.py   |   5 +
 .../asr/parts/numba/rnnt_loss/rnnt.py         |   2 +-
 .../asr/parts/numba/rnnt_loss/rnnt_numpy.py   |   5 +
 .../asr/parts/numba/rnnt_loss/rnnt_pytorch.py |   7 +-
 .../rnnt_loss/utils/cpu_utils/cpu_rnnt.py     |   8 +-
 .../numba/rnnt_loss/utils/rnnt_helper.py      |   3 +-
 nemo/core/utils/numba_utils.py                |  36 +++++
 .../asr/numba/rnnt_loss/test_rnnt_pytorch.py  | 126 ++++++++++++------
 .../rnnt_loss/utils/test_gpu_rnnt_kernel.py   |  64 +++++----
 .../asr/numba/rnnt_loss/utils/test_reduce.py  |  18 ++-
 .../numba/rnnt_loss/utils/test_rnnt_helper.py |  75 +++++++----
 12 files changed, 263 insertions(+), 112 deletions(-)

diff --git a/nemo/collections/asr/losses/rnnt.py b/nemo/collections/asr/losses/rnnt.py
index 10b85acb42ef..a884f7d3cc68 100644
--- a/nemo/collections/asr/losses/rnnt.py
+++ b/nemo/collections/asr/losses/rnnt.py
@@ -38,9 +38,10 @@
 from nemo.collections.asr.losses.rnnt_pytorch import MultiblankRNNTLossPytorch, RNNTLossPytorch, TDTLossPytorch
 from nemo.core.classes import Loss, typecheck
 from nemo.core.neural_types import LabelsType, LengthsType, LogprobsType, LossType, NeuralType
+from nemo.core.utils import numba_utils
 from nemo.core.utils.k2_utils import K2_INSTALLATION_MESSAGE
 from nemo.core.utils.numba_utils import NUMBA_INSTALLATION_MESSAGE
-from nemo.utils import logging, model_utils
+from nemo.utils import logging, logging_mode, model_utils
 
 try:
     import warprnnt_pytorch as warprnnt
@@ -98,7 +99,7 @@ class RNNTLossConfig:
         min_version='0.53.0',
         is_available=NUMBA_RNNT_AVAILABLE,
         installation_msg=NUMBA_INSTALLATION_MESSAGE,
-        force_float32=True,
+        force_float32=not numba_utils.NUMBA_FP16_SUPPORTED,
     ),
     "pytorch": RNNTLossConfig(
         loss_name="pytorch",
@@ -387,7 +388,7 @@ def __init__(self, num_classes, reduction: str = 'mean_batch', loss_name: str =
                 for the standard "blank" symbol. In particular, say V is the number of non-blank tokens in
                 the vocabulary, then in the case of,
                 standard RNNT: num_classes = V
-                multiblank RNNT: num_classes = V + number-big-blanks (since we store big-blanks before 
+                multiblank RNNT: num_classes = V + number-big-blanks (since we store big-blanks before
                                  standard blank, and the standard blank is the last symbol in the vocab)
                 TDT: num_classes = V. Note, V here does not include any of the "duration outputs".
 
@@ -413,6 +414,7 @@ def __init__(self, num_classes, reduction: str = 'mean_batch', loss_name: str =
         self.reduction = reduction
         self._loss = resolve_rnnt_loss(loss_name, blank_idx=self._blank, loss_kwargs=loss_kwargs)
         self._force_float32 = RNNT_LOSS_RESOLVER[loss_name].force_float32
+        self._fp16_compat_checked = False
 
     def reduce(self, losses, target_lengths):
 
@@ -442,8 +444,22 @@ def forward(self, log_probs, targets, input_lengths, target_lengths):
         max_targets_len = target_lengths.max()
 
         # Force cast joint to float32
-        # TODO: Remove once Numba supports FP16
-        if self._force_float32 and log_probs.dtype != torch.float32:
+        if not self._force_float32 and numba_utils.NUMBA_FP16_SUPPORTED:
+            # Execute the kernel in fp16
+            pass
+        elif self._force_float32 and log_probs.dtype != torch.float32:
+            # Log just once if fp16 tensor was passed and fp16 Numba CUDA loss could not be used.
+            if log_probs.dtype == torch.float16 and not self._fp16_compat_checked:
+                _, reason = numba_utils.is_numba_cuda_fp16_supported(return_reason=True)
+                logging.warning(
+                    f"Provided RNNT Joint tensor is of dtype {log_probs.dtype}, but RNNT loss could not be calculated "
+                    f"in fp16 due to following reason stated below. Loss will be calculated in fp32. \n\n"
+                    f"{reason}",
+                    mode=logging_mode.ONCE,
+                )
+                self._fp16_compat_checked = True
+
+            # Upcast the activation tensor and compute loss and grads in fp32
             logits_orig = log_probs
             log_probs = log_probs.float()
             del logits_orig  # save memory *before* computing the loss
diff --git a/nemo/collections/asr/losses/rnnt_pytorch.py b/nemo/collections/asr/losses/rnnt_pytorch.py
index bc6e5a25a3b2..c8eee90a2eb5 100644
--- a/nemo/collections/asr/losses/rnnt_pytorch.py
+++ b/nemo/collections/asr/losses/rnnt_pytorch.py
@@ -47,7 +47,12 @@ def __init__(self, blank, reduction):
         self.reduction = reduction
 
     def forward(self, acts, labels, act_lens, label_lens):
+        # CPU patch for FP16
+        if not acts.is_cuda and acts.dtype == torch.float16:
+            acts = acts.float()
+
         acts = torch.log_softmax(acts, -1)
+
         forward_logprob = self.compute_forward_prob(acts, labels, act_lens, label_lens)
         losses = -forward_logprob
         if self.reduction == 'mean_batch':
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py
index 118ee88acbfe..046aea425e20 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt.py
@@ -186,7 +186,7 @@ def rnnt_loss_gpu(
 
     # Select GPU index
     cuda.select_device(acts.device.index)
-    gpu_workspace = torch.zeros(gpu_size, device=acts.device, dtype=acts.dtype, requires_grad=False)
+    gpu_workspace = torch.zeros(gpu_size, device=acts.device, dtype=torch.float32, requires_grad=False)
 
     ### VIEW TENSORS AS VECTORS FOR POINTER INDEXING ###
     acts, acts_shape = rnnt_helper.flatten_tensor(acts)
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_numpy.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_numpy.py
index eaa6d332a0fc..58508970aa83 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_numpy.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_numpy.py
@@ -344,10 +344,15 @@ def forward(self, acts, labels, act_lens, label_lens):
         _assert_no_grad(label_lens)
         certify_inputs(acts, labels, act_lens, label_lens)
 
+        # CPU Patch for fp16 - force cast to fp32
+        if not acts.is_cuda and acts.dtype == torch.float16:
+            acts = acts.float()
+
         if self.clamp > 0.0:
             acts = LogSoftmaxGradModification.apply(acts, self.clamp)
 
         acts = torch.nn.functional.log_softmax(acts, -1)
+
         return self.rnnt(acts, labels, act_lens, label_lens, self.blank, self.fastemit_lambda)
 
 
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py
index 2ffe08be361e..5960d5ab6b18 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py
@@ -57,7 +57,7 @@ def forward(ctx, acts, labels, act_lens, label_lens, blank, reduction, fastemit_
         loss_func = rnnt.rnnt_loss_gpu if is_cuda else rnnt.rnnt_loss_cpu
         grads = torch.zeros_like(acts) if acts.requires_grad else None
         minibatch_size = acts.size(0)
-        costs = torch.zeros(minibatch_size, device=acts.device, dtype=acts.dtype)
+        costs = torch.zeros(minibatch_size, device=acts.device, dtype=torch.float32)
 
         loss_func(
             acts,
@@ -119,7 +119,6 @@ def forward(
         label_lens: Tensor of (batch) containing label length of each example
         fastemit_lambda: Float scaling factor for FastEmit regularization. Refer to
             FastEmit: Low-latency Streaming ASR with Sequence-level Emission Regularization.
-
         durations: list of durations for TDT model, must include 0 and 1, e.g.
             [0, 1, 2, 3, 4].
         sigma: hyper-parameter for logit under-normalization method for training
@@ -417,6 +416,10 @@ def forward(self, acts, labels, act_lens, label_lens):
         label_lens: Tensor of (batch) containing label length of each example
         """
         if not acts.is_cuda:
+            # Force FP32 until log_softmax() is implemented for fp16 on CPU
+            if acts.dtype == torch.float16:
+                acts = acts.float()
+
             # Since CPU requires log_softmax to be computed explicitly, we need to perform grad clipping
             # *after* we have obtained the gradients of loss(logsoftmax()).
             # This is highly wasteful since it requires a copy of the entire joint tensor which is expensive.
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cpu_utils/cpu_rnnt.py b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cpu_utils/cpu_rnnt.py
index 1528606716e1..3feb7b513a50 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/utils/cpu_utils/cpu_rnnt.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/utils/cpu_utils/cpu_rnnt.py
@@ -231,8 +231,8 @@ def cost_and_grad_kernel(
         )
 
         # Scale llForward by FastEmit lambda
-        llForward *= 1.0 + self.fastemit_lambda_
-        llBackward *= 1.0 + self.fastemit_lambda_
+        llForward += llForward * self.fastemit_lambda_
+        llBackward += llBackward * self.fastemit_lambda_
 
         diff = (llForward - llBackward).abs()
         if diff > 0.1:
@@ -300,6 +300,10 @@ def compute_betas_and_grads(
         Returns:
             Loglikelihood of the forward variable and inplace updates the grad tensor.
         """
+        # Patch for CPU + fp16
+        if log_probs.dtype == torch.float16 and not log_probs.is_cuda:
+            log_probs = log_probs.float()
+
         idx = CpuRNNT_index(U, self.maxU_, self.minibatch_, self.alphabet_size_, self.batch_first)
         betas[idx(T - 1, U - 1)] = log_probs[idx(T - 1, U - 1) * 2]
 
diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/utils/rnnt_helper.py b/nemo/collections/asr/parts/numba/rnnt_loss/utils/rnnt_helper.py
index b579b7315ef2..6ca7cd237264 100644
--- a/nemo/collections/asr/parts/numba/rnnt_loss/utils/rnnt_helper.py
+++ b/nemo/collections/asr/parts/numba/rnnt_loss/utils/rnnt_helper.py
@@ -30,6 +30,7 @@
 import math
 from typing import Optional, Tuple
 
+import numba
 import torch
 from numba import cuda
 
@@ -112,7 +113,7 @@ def compute_costs_data(source: torch.Tensor, dest: torch.Tensor, fastemit_lambda
     if idx < length:
         copy_data_1d(source, dest, idx)
         dest[idx] *= -1.0
-        dest[idx] *= 1.0 + fastemit_lambda
+        dest[idx] *= numba.float32(1.0 + fastemit_lambda)
 
 
 def get_workspace_size(
diff --git a/nemo/core/utils/numba_utils.py b/nemo/core/utils/numba_utils.py
index 6e1a8cb247d6..04010a2f7db4 100644
--- a/nemo/core/utils/numba_utils.py
+++ b/nemo/core/utils/numba_utils.py
@@ -17,6 +17,8 @@
 import operator
 import os
 
+from typing import Tuple, Union
+
 from nemo.utils import model_utils
 
 # Prevent Numba CUDA logs from showing at info level
@@ -26,6 +28,11 @@
 __NUMBA_DEFAULT_MINIMUM_VERSION__ = "0.53.0"
 __NUMBA_MINIMUM_VERSION__ = os.environ.get("NEMO_NUMBA_MINVER", __NUMBA_DEFAULT_MINIMUM_VERSION__)
 
+__NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__ = "0.57.0"
+NUMBA_FP16_SUPPORTED = model_utils.check_lib_version(
+    'numba', __NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__, operator=operator.ge
+)[0]
+
 
 NUMBA_INSTALLATION_MESSAGE = (
     "Could not import `numba`.\n"
@@ -148,6 +155,35 @@ def numba_cuda_is_supported(min_version: str) -> bool:
         return False
 
 
+def is_numba_cuda_fp16_supported(return_reason: bool = False) -> Union[bool, Tuple[bool, str]]:
+    """
+    Utility method that returns a bool, stating if FP16 is supported for numba cuda kernels or not.
+
+    Returns:
+        bool, whether Numba CUDA will support fp16 or not.
+    """
+    reason = ""
+    use_nvidia_binding = os.environ.get('NUMBA_CUDA_USE_NVIDIA_BINDING', None)
+    if use_nvidia_binding is not None:
+        use_nvidia_binding = use_nvidia_binding.lower() == "1"
+        reason += "Env variable `NUMBA_CUDA_USE_NVIDIA_BINDING` is available and set to `1`. "
+    else:
+        use_nvidia_binding = False
+        reason += "Env variable `NUMBA_CUDA_USE_NVIDIA_BINDING` is not available or has not set to `1`."
+
+    if NUMBA_FP16_SUPPORTED:
+        reason += f"Numba CUDA FP16 is supported in installed numba version."
+    else:
+        reason += f"Numba CUDA FP16 is not supported in installed numba version."
+
+    result = use_nvidia_binding and NUMBA_FP16_SUPPORTED
+
+    if return_reason:
+        return result, reason
+    else:
+        return result
+
+
 def skip_numba_cuda_test_if_unsupported(min_version: str):
     """
     Helper method to skip pytest test case if numba cuda is not supported.
diff --git a/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py b/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py
index 3fbfcf6df54b..1a29a14f540d 100644
--- a/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py
+++ b/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py
@@ -34,9 +34,14 @@
     DEVICES.append('cuda')
 
 
+DTYPES = [np.float32]
+if numba_utils.is_numba_cuda_fp16_supported():
+    DTYPES.append(np.float16)
+
+
 def wrap_and_call(fn, acts, labels, device):
     if not torch.is_tensor(acts):
-        acts = torch.FloatTensor(acts)
+        acts = torch.tensor(acts)
 
     if 'cuda' in device:
         acts = acts.cuda()
@@ -72,7 +77,8 @@ def wrap_and_call(fn, acts, labels, device):
 class TestRNNTLossPytorch:
     @pytest.mark.unit
     @pytest.mark.parametrize('device', DEVICES)
-    def test_case_small(self, device):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_case_small(self, device, dtype):
         if device == 'cuda':
             numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
@@ -83,9 +89,13 @@ def test_case_small(self, device):
                     [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]],
                 ]
             ]
-        )
+        ).astype(dtype)
         labels = [[1, 2]]
 
+        cost_threshold = 1e-8 if dtype == np.float32 else 5e-4
+        grad_threshold = 1e-8 if dtype == np.float32 else 1e-4
+        rtol = 1e-5 if dtype == np.float32 else 1e-3
+
         fn_pt = RNNTLossNumba(blank=0, reduction='sum')
         pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device)
 
@@ -113,23 +123,28 @@ def test_case_small(self, device):
             ]
         )
 
-        assert np.allclose(pt_cost, expected_cost, rtol=1e-6), "small_test costs mismatch."
-        assert np.allclose(pt_grads, expected_grads), "small_test gradient mismatch."
+        assert np.allclose(pt_cost, expected_cost, atol=cost_threshold, rtol=1e-6), "small_test costs mismatch."
+        assert np.allclose(pt_grads, expected_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch."
 
-        assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_test costs mismatch."
-        assert np.allclose(pt_grads, np_grads), "small_test gradient mismatch."
+        assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch."
+        assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch."
 
-        assert np.allclose(ag_cost, np_cost, rtol=1e-6), "small_test costs mismatch."
-        assert np.allclose(ag_grads, np_grads), "small_test gradient mismatch."
+        assert np.allclose(ag_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch."
+        assert np.allclose(ag_grads, np_grads, atol=cost_threshold, rtol=rtol), "small_test gradient mismatch."
 
     @pytest.mark.unit
     @pytest.mark.parametrize('device', DEVICES)
-    def test_case_small_random(self, device):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_case_small_random(self, device, dtype):
         if device == 'cuda':
             numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
+        cost_threshold = 1e-8 if dtype == np.float32 else 5e-4
+        grad_threshold = 1e-8 if dtype == np.float32 else 1e-4
+        rtol = 1e-5 if dtype == np.float32 else 1e-3
+
         rng = np.random.RandomState(0)
-        acts = rng.randn(1, 4, 3, 3)
+        acts = rng.randn(1, 4, 3, 3).astype(dtype)
         labels = [[1, 2]]
 
         fn_pt = RNNTLossNumba(blank=0, reduction='sum')
@@ -141,16 +156,17 @@ def test_case_small_random(self, device):
         fn_ag = RNNTLossPytorch(blank=0, reduction='sum')  # ag for automatic gradient computation
         ag_cost, ag_grads = wrap_and_call(fn_ag, acts, labels, device)
 
-        assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_random_test costs mismatch."
-        assert np.allclose(pt_grads, np_grads), "small_random_test gradient mismatch."
+        assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_random_test costs mismatch."
+        assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "small_random_test gradient mismatch."
 
-        assert np.allclose(pt_cost, ag_cost, rtol=1e-6), "small_random_test costs mismatch."
-        assert np.allclose(pt_grads, ag_grads), "small_random_test gradient mismatch."
+        assert np.allclose(pt_cost, ag_cost, atol=cost_threshold, rtol=rtol), "small_random_test costs mismatch."
+        assert np.allclose(pt_grads, ag_grads, atol=grad_threshold, rtol=rtol), "small_random_test gradient mismatch."
 
     @pytest.mark.unit
     @pytest.mark.parametrize('device', DEVICES)
+    @pytest.mark.parametrize('dtype', DTYPES)
     @pytest.mark.parametrize('fastemit_lambda', [1.0, 0.01, 0.00001])
-    def test_case_small_random_fastemit_reg(self, device, fastemit_lambda):
+    def test_case_small_random_fastemit_reg(self, device, dtype, fastemit_lambda):
         if device == 'cuda':
             numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
@@ -165,11 +181,12 @@ def test_case_small_random_fastemit_reg(self, device, fastemit_lambda):
         np_cost, np_grads = wrap_and_call(fn_np, acts, labels, device)
 
         assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_random_test costs mismatch."
-        assert np.allclose(pt_grads, np_grads, atol=1e-5, rtol=1e-5), "small_random_test gradient mismatch."
+        assert np.allclose(pt_grads, np_grads, rtol=1e-5), "small_random_test gradient mismatch."
 
     @pytest.mark.unit
     @pytest.mark.parametrize('device', DEVICES)
-    def test_case_big_tensor(self, device):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_case_big_tensor(self, device, dtype):
         if device == 'cuda':
             numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
@@ -269,9 +286,13 @@ def test_case_big_tensor(self, device):
             ],
         ]
 
-        activations = np.array(activations)
+        activations = np.array(activations).astype(dtype)
         labels = [[1, 2], [1, 1]]
 
+        cost_threshold = 1e-8 if dtype == np.float32 else 5e-4
+        grad_threshold = 1e-8 if dtype == np.float32 else 1e-4
+        rtol = 1e-3 if dtype == np.float32 else 0.1
+
         fn_pt = RNNTLossNumba(blank=0, reduction='sum')
         pt_costs, pt_grads = wrap_and_call(fn_pt, activations, labels, device)
 
@@ -281,23 +302,30 @@ def test_case_big_tensor(self, device):
         fn_ag = RNNTLossPytorch(blank=0, reduction='sum')
         ag_costs, ag_grads = wrap_and_call(fn_ag, activations, labels, device)
 
-        assert np.allclose(pt_costs, sum(expected_costs)), "big_test average costs mismatch."
-        assert np.allclose(pt_grads, expected_grads, rtol=1e-3), "big_test grads for average cost mismatch."
+        assert np.allclose(pt_costs, sum(expected_costs), atol=cost_threshold), "big_test average costs mismatch."
+        assert np.allclose(
+            pt_grads, expected_grads, atol=grad_threshold, rtol=1e-3
+        ), "big_test grads for average cost mismatch."
 
-        assert np.allclose(pt_costs, np_costs), "big_test average costs mismatch."
-        assert np.allclose(pt_grads, np_grads, rtol=1e-3), "big_test grads for average cost mismatch."
+        assert np.allclose(pt_costs, np_costs, atol=cost_threshold, rtol=rtol), "big_test average costs mismatch."
+        assert np.allclose(
+            pt_grads, np_grads, atol=grad_threshold, rtol=rtol
+        ), "big_test grads for average cost mismatch."
 
-        assert np.allclose(pt_costs, ag_costs), "big_test average costs mismatch."
-        assert np.allclose(pt_grads, ag_grads, rtol=1e-3), "big_test grads for average cost mismatch."
+        assert np.allclose(pt_costs, ag_costs, atol=cost_threshold, rtol=rtol), "big_test average costs mismatch."
+        assert np.allclose(
+            pt_grads, ag_grads, atol=grad_threshold, rtol=rtol
+        ), "big_test grads for average cost mismatch."
 
     @pytest.mark.unit
     @pytest.mark.parametrize('device', DEVICES)
-    def test_case_large_random(self, device):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_case_large_random(self, device, dtype):
         if device == 'cuda':
             numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         rng = np.random.RandomState(0)
-        acts = rng.randn(4, 8, 11, 5)
+        acts = rng.randn(4, 8, 11, 5).astype(dtype)
         labels = [
             [1, 2, 4, 3, 2, 2, 1, 1, 1, 1],
             [3, 2, 2, 3, 4, 1, 1, 1, 1, 1],
@@ -305,6 +333,10 @@ def test_case_large_random(self, device):
             [1, 1, 2, 1, 2, 3, 3, 1, 1, 1],
         ]
 
+        cost_threshold = 1e-8 if dtype == np.float32 else 5e-4
+        grad_threshold = 1e-8 if dtype == np.float32 else 1e-4
+        rtol = 1e-3 if dtype == np.float32 else 5e-2
+
         fn_pt = RNNTLossNumba(blank=0, reduction='sum')
         pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device)
 
@@ -314,14 +346,15 @@ def test_case_large_random(self, device):
         fn_ag = RNNTLossPytorch(blank=0, reduction='sum')
         ag_cost, ag_grads = wrap_and_call(fn_ag, acts, labels, device)
 
-        assert np.allclose(pt_cost, np_cost, atol=1e-5, rtol=1e-3), "large_random_test costs mismatch."
-        assert np.allclose(ag_cost, np_cost, atol=1e-5, rtol=1e-3), "large_random_test costs mismatch."
-        assert np.allclose(pt_grads, np_grads, atol=1e-5, rtol=1e-3), "large_random_test gradient mismatch."
-        assert np.allclose(ag_grads, np_grads, atol=1e-5, rtol=1e-3), "large_random_test gradient mismatch."
+        assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "large_random_test costs mismatch."
+        assert np.allclose(ag_cost, np_cost, atol=cost_threshold, rtol=rtol), "large_random_test costs mismatch."
+        assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "large_random_test gradient mismatch."
+        assert np.allclose(ag_grads, np_grads, atol=grad_threshold, rtol=rtol), "large_random_test gradient mismatch."
 
     @pytest.mark.unit
     @pytest.mark.parametrize('device', DEVICES)
-    def test_case_small_clamp(self, device):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_case_small_clamp(self, device, dtype):
         if device == 'cuda':
             numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
@@ -333,9 +366,13 @@ def test_case_small_clamp(self, device):
                     [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]],
                 ]
             ]
-        )
+        ).astype(dtype)
         labels = [[1, 2]]
 
+        cost_threshold = 1e-8 if dtype == np.float32 else 5e-4
+        grad_threshold = 1e-8 if dtype == np.float32 else 5e-5
+        rtol = 1e-5 if dtype == np.float32 else 1e-3
+
         fn_pt = RNNTLossNumba(blank=0, reduction='sum', clamp=GRAD_CLAMP)
         pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device)
 
@@ -360,16 +397,17 @@ def test_case_small_clamp(self, device):
             ]
         )
 
-        assert np.allclose(pt_cost, expected_cost, rtol=1e-6), "small_test costs mismatch."
-        assert np.allclose(pt_grads, expected_grads), "small_test gradient mismatch."
+        assert np.allclose(pt_cost, expected_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch."
+        assert np.allclose(pt_grads, expected_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch."
 
-        assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_test costs mismatch."
-        assert np.allclose(pt_grads, np_grads), "small_test gradient mismatch."
+        assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch."
+        assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch."
 
     @pytest.mark.unit
     @pytest.mark.parametrize('device', DEVICES)
+    @pytest.mark.parametrize('dtype', DTYPES)
     @pytest.mark.parametrize('fastemit_lambda', [1.0, 0.01, 0.00001])
-    def test_case_small_fastemit_clamp(self, device, fastemit_lambda):
+    def test_case_small_fastemit_clamp(self, device, dtype, fastemit_lambda):
         if device == 'cuda':
             numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
@@ -381,9 +419,13 @@ def test_case_small_fastemit_clamp(self, device, fastemit_lambda):
                     [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]],
                 ]
             ]
-        )
+        ).astype(dtype)
         labels = [[1, 2]]
 
+        cost_threshold = 1e-8 if dtype == np.float32 else 1e-3
+        grad_threshold = 1e-8 if dtype == np.float32 else 5e-4
+        rtol = 1e-5 if dtype == np.float32 else 1e-3
+
         fn_pt = RNNTLossNumba(blank=0, reduction='sum', fastemit_lambda=fastemit_lambda, clamp=GRAD_CLAMP)
         pt_cost, pt_grads = wrap_and_call(fn_pt, acts, labels, device)
 
@@ -393,9 +435,9 @@ def test_case_small_fastemit_clamp(self, device, fastemit_lambda):
         expected_cost = 4.495666
         expected_cost += expected_cost * fastemit_lambda
 
-        assert np.allclose(pt_cost, expected_cost, rtol=1e-6), "small_test costs mismatch."
-        assert np.allclose(pt_cost, np_cost, rtol=1e-6), "small_test costs mismatch."
-        assert np.allclose(pt_grads, np_grads), "small_test gradient mismatch."
+        assert np.allclose(pt_cost, expected_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch."
+        assert np.allclose(pt_cost, np_cost, atol=cost_threshold, rtol=rtol), "small_test costs mismatch."
+        assert np.allclose(pt_grads, np_grads, atol=grad_threshold, rtol=rtol), "small_test gradient mismatch."
 
     @pytest.mark.unit
     @pytest.mark.parametrize('device', DEVICES)
diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py b/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py
index 230b6b7c099f..cb5a9816e237 100644
--- a/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py
+++ b/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py
@@ -25,8 +25,14 @@
 from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
 
+DTYPES = [torch.float32]
+if numba_utils.is_numba_cuda_fp16_supported():
+    DTYPES.append(torch.float16)
+
+
 def log_softmax(x, axis=-1):
     x = torch.from_numpy(x)  # zero-copy
+    x = x.float()
     x = torch.log_softmax(x, dim=axis)
     x = x.numpy()
     return x
@@ -42,12 +48,14 @@ def log_softmax_grad(x, axis=-1):
 class TestRNNTCUDAKernels:
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_compute_alphas_kernel(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_compute_alphas_kernel(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         random = np.random.RandomState(0)
         original_shape = [1, 5, 11, 3]
         B, T, U, V = original_shape
+        threshold = 1e-5 if dtype == torch.float32 else 3e-4
 
         # Numpy kernel
         x = random.randn(*original_shape)
@@ -67,7 +75,7 @@ def test_compute_alphas_kernel(self):
         else:
             stream = cuda.default_stream()
 
-        x_c = torch.tensor(x, device=device, dtype=torch.float32)
+        x_c = torch.tensor(x, device=device, dtype=dtype)
         labels_c = torch.tensor(labels, device=device, dtype=torch.int64)
 
         # Allocate workspace memory
@@ -100,22 +108,24 @@ def test_compute_alphas_kernel(self):
         alphas = alphas.view([B, T, U])
         diff = ground_alphas - alphas[0].cpu().numpy()
 
-        assert np.abs(diff).mean() <= 1e-5
-        assert np.square(diff).mean() <= 1e-10
+        assert np.abs(diff).mean() <= threshold
+        assert np.square(diff).mean() <= (threshold ** 2)
 
         ll_diff = ground_log_likelihood - llForward[0].cpu().numpy()
 
-        assert np.abs(ll_diff).mean() <= 1e-5
-        assert np.square(ll_diff).mean() <= 1e-10
+        assert np.abs(ll_diff).mean() <= threshold
+        assert np.square(ll_diff).mean() <= (threshold ** 2)
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_compute_betas_kernel(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_compute_betas_kernel(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         random = np.random.RandomState(0)
         original_shape = [1, 5, 11, 3]
         B, T, U, V = original_shape
+        threshold = 1e-5 if dtype == torch.float32 else 3e-4
 
         # Numpy kernel
         x = random.randn(*original_shape)
@@ -135,7 +145,7 @@ def test_compute_betas_kernel(self):
         else:
             stream = cuda.default_stream()
 
-        x_c = torch.tensor(x, device=device, dtype=torch.float32)
+        x_c = torch.tensor(x, device=device, dtype=dtype)
         labels_c = torch.tensor(labels, device=device, dtype=torch.int64)
 
         # Allocate workspace memory
@@ -168,17 +178,18 @@ def test_compute_betas_kernel(self):
         betas = betas.view([B, T, U])
         diff = ground_alphas - betas[0].cpu().numpy()
 
-        assert np.abs(diff).mean() <= 1e-5
-        assert np.square(diff).mean() <= 1e-10
+        assert np.abs(diff).mean() <= threshold
+        assert np.square(diff).mean() <= (threshold ** 2)
 
         ll_diff = ground_log_likelihood - llBackward[0].cpu().numpy()
 
-        assert np.abs(ll_diff).mean() <= 1e-5
-        assert np.square(ll_diff).mean() <= 1e-10
+        assert np.abs(ll_diff).mean() <= threshold
+        assert np.square(ll_diff).mean() <= (threshold ** 2)
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_compute_grads_kernel(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_compute_grads_kernel(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         fastemit_lambda = 0.0
@@ -187,6 +198,7 @@ def test_compute_grads_kernel(self):
         random = np.random.RandomState(0)
         original_shape = [1, 5, 11, 3]
         B, T, U, V = original_shape
+        threshold = 1e-5 if dtype == torch.float32 else 3e-5
 
         # Numpy kernel
         x = random.randn(*original_shape)
@@ -220,7 +232,7 @@ def test_compute_grads_kernel(self):
         else:
             stream = cuda.default_stream()
 
-        x_c = torch.tensor(x, device=device, dtype=torch.float32)
+        x_c = torch.tensor(x, device=device, dtype=dtype)
         labels_c = labels.clone().to(device=device, dtype=torch.int64)
 
         # Allocate workspace memory
@@ -283,12 +295,13 @@ def test_compute_grads_kernel(self):
         grads = grads.view([B, T, U, V])
         diff = true_grads - grads[0].cpu().numpy()
 
-        assert np.abs(diff).mean() <= 1e-5
-        assert np.square(diff).mean() <= 1e-10
+        assert np.abs(diff).mean() <= threshold
+        assert np.square(diff).mean() <= (threshold ** 2) * 5.0
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_compute_grads_kernel_fastemit(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_compute_grads_kernel_fastemit(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         fastemit_lambda = 0.001
@@ -297,6 +310,7 @@ def test_compute_grads_kernel_fastemit(self):
         random = np.random.RandomState(0)
         original_shape = [1, 5, 11, 3]
         B, T, U, V = original_shape
+        threshold = 1e-5 if dtype == torch.float32 else 3e-5
 
         # Numpy kernel
         x = random.randn(*original_shape)
@@ -330,7 +344,7 @@ def test_compute_grads_kernel_fastemit(self):
         else:
             stream = cuda.default_stream()
 
-        x_c = torch.tensor(x, device=device, dtype=torch.float32)
+        x_c = torch.tensor(x, device=device, dtype=dtype)
         labels_c = labels.clone().to(device=device, dtype=torch.int64)
 
         # Allocate workspace memory
@@ -393,12 +407,13 @@ def test_compute_grads_kernel_fastemit(self):
         grads = grads.view([B, T, U, V])
         diff = true_grads - grads[0].cpu().numpy()
 
-        assert np.abs(diff).mean() <= 1e-5
-        assert np.square(diff).mean() <= 1e-10
+        assert np.abs(diff).mean() <= threshold
+        assert np.square(diff).mean() <= (threshold ** 2) * 5
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_compute_grads_kernel_clamp(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_compute_grads_kernel_clamp(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         fastemit_lambda = 0.0
@@ -407,6 +422,7 @@ def test_compute_grads_kernel_clamp(self):
         random = np.random.RandomState(0)
         original_shape = [1, 5, 11, 3]
         B, T, U, V = original_shape
+        threshold = 1e-5 if dtype == torch.float32 else 3e-5
 
         # Numpy kernel
         x = random.randn(*original_shape)
@@ -440,7 +456,7 @@ def test_compute_grads_kernel_clamp(self):
         else:
             stream = cuda.default_stream()
 
-        x_c = torch.tensor(x, device=device, dtype=torch.float32)
+        x_c = torch.tensor(x, device=device, dtype=dtype)
         labels_c = labels.clone().to(device=device, dtype=torch.int64)
 
         # Allocate workspace memory
@@ -503,8 +519,8 @@ def test_compute_grads_kernel_clamp(self):
         grads = grads.view([B, T, U, V])
         diff = true_grads - grads[0].cpu().numpy()
 
-        assert np.abs(diff).mean() <= 1e-5
-        assert np.square(diff).mean() <= 1e-10
+        assert np.abs(diff).mean() <= threshold
+        assert np.square(diff).mean() <= (threshold ** 2) * 5
 
 
 class TestTDTCUDAKernels:
diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py b/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py
index 7c2ba6a41208..5994d53e1d8f 100644
--- a/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py
+++ b/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py
@@ -20,17 +20,22 @@
 from nemo.core.utils import numba_utils
 from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
+DTYPES = [np.float32]
+if numba_utils.is_numba_cuda_fp16_supported():
+    DTYPES.append(np.float16)
+
 
 class TestRNNTCUDAReductions:
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_reduce_max(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_reduce_max(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         random = np.random.RandomState(0)
         original_shape = [1, 5, 4, 3]
-        x = random.randn(*original_shape).reshape([-1])
-        dx = random.randn(*x.shape)
+        x = random.randn(*original_shape).reshape([-1]).astype(dtype)
+        dx = random.randn(*x.shape).astype(dtype)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -53,13 +58,14 @@ def test_reduce_max(self):
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Reductions can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_reduce_exp(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_reduce_exp(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         random = np.random.RandomState(0)
         original_shape = [1, 5, 4, 2]
-        x = random.randn(*original_shape).reshape([-1])
-        dx = np.zeros_like(x)
+        x = random.randn(*original_shape).reshape([-1]).astype(dtype)
+        dx = np.zeros_like(x).astype(dtype)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py b/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py
index 243fe727e172..08f12da8324d 100644
--- a/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py
+++ b/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py
@@ -20,11 +20,16 @@
 from nemo.core.utils import numba_utils
 from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
+DTYPES = [np.float32]
+if numba_utils.is_numba_cuda_fp16_supported():
+    DTYPES.append(np.float16)
+
 
 class TestRNNTHelper:
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_log_sum_exp(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_log_sum_exp(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -34,8 +39,9 @@ def _kernel(x, y):
             if x_pos < x.shape[0] and x_pos < y.shape[0]:
                 x[x_pos] = rnnt_helper.log_sum_exp(x[x_pos], y[x_pos])
 
-        x = np.zeros([8])  # np.random.rand(8192)
-        y = np.ones([8])  # np.random.rand(8192)
+        x = np.zeros([8]).astype(dtype)  # np.random.rand(8192)
+        y = np.ones([8]).astype(dtype)  # np.random.rand(8192)
+        threshold = 1e-5 if dtype == np.float32 else 2e-3
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -52,11 +58,12 @@ def _kernel(x, y):
         x_new = x_c.copy_to_host(stream=stream)
         del x_c, y_c
 
-        assert (x_new.sum() - 10.506093500145782) <= 1e-5
+        assert (x_new.sum() - 10.506093500145782) <= threshold
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_log_sum_exp_neg_inf(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_log_sum_exp_neg_inf(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -66,8 +73,8 @@ def _kernel(x, y):
             if x_pos < x.shape[0] and x_pos < y.shape[0]:
                 x[x_pos] = rnnt_helper.log_sum_exp(x[x_pos], y[x_pos])
 
-        x = np.asarray([global_constants.FP32_NEG_INF] * 8)
-        y = np.ones([len(x)])
+        x = np.asarray([global_constants.FP32_NEG_INF] * 8).astype(dtype)
+        y = np.ones([len(x)]).astype(dtype)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -88,7 +95,8 @@ def _kernel(x, y):
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_div_up(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_div_up(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -98,8 +106,8 @@ def _kernel(x, y):
             if x_pos < x.shape[0] and x_pos < y.shape[0]:
                 x[x_pos] = rnnt_helper.div_up(x[x_pos], y[x_pos])
 
-        x = np.full([8], fill_value=10)  # np.random.rand(8192)
-        y = np.full([8], fill_value=2)  # np.random.rand(8192)
+        x = np.full([8], fill_value=10).astype(dtype)  # np.random.rand(8192)
+        y = np.full([8], fill_value=2).astype(dtype)  # np.random.rand(8192)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -121,7 +129,8 @@ def _kernel(x, y):
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_add(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_add(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -131,8 +140,8 @@ def _kernel(x, y):
             if x_pos < x.shape[0] and x_pos < y.shape[0]:
                 x[x_pos] = rnnt_helper.add(x[x_pos], y[x_pos])
 
-        x = np.full([8], fill_value=10)  # np.random.rand(8192)
-        y = np.full([8], fill_value=2)  # np.random.rand(8192)
+        x = np.full([8], fill_value=10).astype(dtype)  # np.random.rand(8192)
+        y = np.full([8], fill_value=2).astype(dtype)  # np.random.rand(8192)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -154,7 +163,8 @@ def _kernel(x, y):
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_maximum(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_maximum(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -164,8 +174,8 @@ def _kernel(x, y):
             if x_pos < x.shape[0] and x_pos < y.shape[0]:
                 x[x_pos] = rnnt_helper.maximum(x[x_pos], y[x_pos])
 
-        x = np.full([8], fill_value=10)  # np.random.rand(8192)
-        y = np.full([8], fill_value=2)  # np.random.rand(8192)
+        x = np.full([8], fill_value=10).astype(dtype)  # np.random.rand(8192)
+        y = np.full([8], fill_value=2).astype(dtype)  # np.random.rand(8192)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -187,7 +197,8 @@ def _kernel(x, y):
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_identity(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_identity(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -197,7 +208,7 @@ def _kernel(x):
             if x_pos < x.shape[0]:
                 x[x_pos] = rnnt_helper.identity(x[x_pos])
 
-        x = np.full([8], fill_value=10)  # np.random.rand(8192)
+        x = np.full([8], fill_value=10).astype(dtype)  # np.random.rand(8192)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -218,7 +229,8 @@ def _kernel(x):
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_negate(self):
+    @pytest.mark.parametrize('dtype', [np.float32, np.float16])
+    def test_negate(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -228,7 +240,7 @@ def _kernel(x):
             if x_pos < x.shape[0]:
                 x[x_pos] = rnnt_helper.negate(x[x_pos])
 
-        x = np.full([8], fill_value=10)  # np.random.rand(8192)
+        x = np.full([8], fill_value=10).astype(dtype)  # np.random.rand(8192)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -249,7 +261,8 @@ def _kernel(x):
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_exponential(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_exponential(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -259,7 +272,7 @@ def _kernel(x):
             if x_pos < x.shape[0]:
                 x[x_pos] = rnnt_helper.exponential(x[x_pos])
 
-        x = np.random.rand(8)
+        x = np.random.rand(8).astype(dtype)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -281,7 +294,8 @@ def _kernel(x):
 
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.unit
-    def test_log_plus(self):
+    @pytest.mark.parametrize('dtype', DTYPES)
+    def test_log_plus(self, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
         # wrapper kernel for device function that is tested
@@ -291,8 +305,8 @@ def _kernel(x, y):
             if x_pos < x.shape[0] and x_pos < y.shape[0]:
                 x[x_pos] = rnnt_helper.log_plus(x[x_pos], y[x_pos])
 
-        x = np.full([8], fill_value=10.0)  # np.random.rand(8192)
-        y = np.full([8], fill_value=2.0)  # np.random.rand(8192)
+        x = np.full([8], fill_value=10.0).astype(dtype)  # np.random.rand(8192)
+        y = np.full([8], fill_value=2.0).astype(dtype)  # np.random.rand(8192)
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -317,12 +331,15 @@ def _kernel(x, y):
     @pytest.mark.skipif(not cuda.is_available(), reason="CUDA Helpers can only be run when CUDA is available")
     @pytest.mark.parametrize('batch_size', [8, 128, 256])
     @pytest.mark.parametrize('fastemit_lambda', [0.0, 0.001])
+    @pytest.mark.parametrize('dtype', DTYPES)
     @pytest.mark.unit
-    def test_compute_costs_data(self, batch_size, fastemit_lambda):
+    def test_compute_costs_data(self, batch_size, fastemit_lambda, dtype):
         numba_utils.skip_numba_cuda_test_if_unsupported(__NUMBA_MINIMUM_VERSION__)
 
+        np.random.seed(0)
         x = np.full([batch_size], fill_value=0.0)  # np.random.rand(8192)
-        y = np.random.randn(batch_size)  # np.random.rand(8192)
+        y = np.random.randn(batch_size).astype(dtype)  # np.random.rand(8192)
+        threshold = 1e-5 if dtype == np.float32 else 1e-5
 
         stream = cuda.stream()
         x_c = cuda.to_device(x, stream=stream)
@@ -340,11 +357,11 @@ def test_compute_costs_data(self, batch_size, fastemit_lambda):
         x_new = x_c.copy_to_host(stream=stream)
         del x_c, y_c
 
-        res = -(y.copy())
+        res = -(y.astype(np.float32).copy())
         res *= 1.0 + fastemit_lambda
 
         for i in range(len(x_new)):
-            assert x_new[i] == res[i], f"index failed {i}"
+            assert abs(x_new[i] - res[i]) < threshold, f"index failed {i}"
 
 
 if __name__ == '__main__':

From a69f0f4417e071965b2f0dc1910687aec4bfc90e Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithyare@nvidia.com>
Date: Wed, 12 Jul 2023 10:21:41 -0700
Subject: [PATCH 100/123] ptuning inference table bug fix (#7015)

* remove hardcoded input and output

Signed-off-by: arendu <adithya.r@gmail.com>

* fix inf table

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: Adi Renduchintala <adithyare@nvidia.com>
---
 .../nlp/modules/common/megatron/adapters/parallel_adapters.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
index fe339c6f9a8b..1818d33dc0d3 100644
--- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
+++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
@@ -268,7 +268,7 @@ def __init__(
         # (@adithyare) the persistent=False will not pollute the indices into the state_dict of this module.
         self.register_buffer("indices", torch.LongTensor(list(range(self.virtual_tokens))), persistent=False)
         self.embedding = torch.nn.Embedding(self.virtual_tokens, self.embedding_dim)
-        self.inference_table = InferenceTable("taskname", self.embedding_dim, self.virtual_tokens)
+        self.inference_table = InferenceTable("taskname", self.output_dim, self.virtual_tokens)
         self.first = ColumnParallelLinear(
             self.embedding_dim,
             self.bottleneck_dim,

From 728403d8c20069865e75effbbb7cfbfccce6589e Mon Sep 17 00:00:00 2001
From: Ryan Langman <rlangman@nvidia.com>
Date: Wed, 12 Jul 2023 14:08:56 -0700
Subject: [PATCH 101/123] [TTS] Add tutorial for TTS data prep scripts (#6922)

* [TTS] Add tutorial for TTS data prep scripts

---------
Signed-off-by: Ryan <rlangman@nvidia.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
---
 .../tts/FastPitch_Data_Preparation.ipynb      | 1126 +++++++++++++++++
 1 file changed, 1126 insertions(+)
 create mode 100644 tutorials/tts/FastPitch_Data_Preparation.ipynb

diff --git a/tutorials/tts/FastPitch_Data_Preparation.ipynb b/tutorials/tts/FastPitch_Data_Preparation.ipynb
new file mode 100644
index 000000000000..99cb32171da7
--- /dev/null
+++ b/tutorials/tts/FastPitch_Data_Preparation.ipynb
@@ -0,0 +1,1126 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "gpuClass": "standard"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Introduction"
+      ],
+      "metadata": {
+        "id": "rtBDkKqVGZJ8"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "In this tutorial, we will prepare a dataset using our [TTS Dataset Processing Scripts](https://github.com/NVIDIA/NeMo/tree/main/scripts/dataset_processing/tts) and use it for training a FastPitch model.\n",
+        "\n",
+        "**This tutorial uses a different workflow than all other existing TTS tutorials. The scripts and classes used are all experimental and not yet ready for production**."
+      ],
+      "metadata": {
+        "id": "pZ2QSsXuGbMe"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# License"
+      ],
+      "metadata": {
+        "id": "7X-TwhdTGmlc"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "> Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.\n",
+        ">\n",
+        "> Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n",
+        ">\n",
+        "> http://www.apache.org/licenses/LICENSE-2.0\n",
+        ">\n",
+        "> Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
+      ],
+      "metadata": {
+        "id": "fCQUeZRPGnoe"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Install"
+      ],
+      "metadata": {
+        "id": "3OZassNG5xff"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "BRANCH = 'main'\n",
+        "NEMO_ROOT_DIR = '/content/nemo'"
+      ],
+      "metadata": {
+        "id": "QLLoj7bD0W5f"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WZvQvPkIhRi3"
+      },
+      "outputs": [],
+      "source": [
+        "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n",
+        "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n",
+        "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Download local version of NeMo scripts. If you are running locally and want to use your own local NeMo code,\n",
+        "# comment out the below lines and set NEMO_ROOT_DIR to your local path.\n",
+        "!git clone -b $BRANCH https://github.com/NVIDIA/NeMo.git $NEMO_ROOT_DIR"
+      ],
+      "metadata": {
+        "id": "tvsgWO_WhV3M"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Dataset Preparation"
+      ],
+      "metadata": {
+        "id": "fM4QPsLTnzK7"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "For our tutorial, we use a subset of [VCTK](https://datashare.ed.ac.uk/handle/10283/2950) dataset with 5 speakers (p225-p229)."
+      ],
+      "metadata": {
+        "id": "tkZC6Dl7KRl6"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import tarfile\n",
+        "import wget\n",
+        "from pathlib import Path\n",
+        "\n",
+        "from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest"
+      ],
+      "metadata": {
+        "id": "sYzvAYr2vo1K"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Configure nemo paths\n",
+        "NEMO_DIR = Path(NEMO_ROOT_DIR)\n",
+        "NEMO_EXAMPLES_DIR = NEMO_DIR / \"examples\" / \"tts\"\n",
+        "NEMO_CONFIG_DIR = NEMO_EXAMPLES_DIR / \"conf\"\n",
+        "NEMO_SCRIPT_DIR = NEMO_DIR / \"scripts\" / \"dataset_processing\" / \"tts\""
+      ],
+      "metadata": {
+        "id": "APo1m5M-v3pB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Create dataset directory\n",
+        "root_dir = Path(\"/content\")\n",
+        "data_root = root_dir / \"data\"\n",
+        "\n",
+        "data_root.mkdir(parents=True, exist_ok=True)"
+      ],
+      "metadata": {
+        "id": "aoxN1QsUzX-k"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Download the dataset\n",
+        "dataset_url = \"https://vctk-subset.s3.amazonaws.com/vctk_subset_multispeaker.tar.gz\"\n",
+        "dataset_tar_filepath = data_root / \"vctk.tar.gz\"\n",
+        "\n",
+        "if not os.path.exists(dataset_tar_filepath):\n",
+        "    wget.download(dataset_url, out=str(dataset_tar_filepath))"
+      ],
+      "metadata": {
+        "id": "mArlQd5Hk36b"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Extract the dataset\n",
+        "with tarfile.open(dataset_tar_filepath) as tar_f:\n",
+        "    tar_f.extractall(data_root)"
+      ],
+      "metadata": {
+        "id": "p987cjtOy9C7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "DATA_DIR = data_root / \"vctk_subset_multispeaker\""
+      ],
+      "metadata": {
+        "id": "Ko6dxYJW0i3G"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Visualize the raw dataset\n",
+        "train_raw_filepath = DATA_DIR / \"train.json\"\n",
+        "!head $train_raw_filepath"
+      ],
+      "metadata": {
+        "id": "We5FHYQt5BeO"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Manifest Processing"
+      ],
+      "metadata": {
+        "id": "i3jsk2HCMSU5"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The downloaded manifest uses our traditional format for TTS training. The scripts here require it to be formatted slightly differently.\n",
+        "\n",
+        "The `speaker` field used to be an *integer* ID corresponding to an array index that the FastPitch model would query. Now we represent it as a *string* so we can give each speaker a human-friendly name. The mapping from speaker name to speaker index will be provided at training time.\n",
+        "\n",
+        "As a best practice, we suggest prepending the `speaker` field with the name of the dataset so that it is guaranteed to be unique across all datasets (eg. *vctk_225*, instead of *225*).\n",
+        "\n",
+        "The `audio_filepath` field used to require an *absolute path* which had to be manually updated depending on where the dataset was on your computer. Absolute paths still work, but now you can optionally provide it as a *relative path*, with the root directory provided as an argument to each script."
+      ],
+      "metadata": {
+        "id": "N8WuAGJsMHRn"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def update_metadata(data_type):\n",
+        "    input_filepath = DATA_DIR / f\"{data_type}.json\"\n",
+        "    output_filepath = DATA_DIR / f\"{data_type}_raw.json\"\n",
+        "\n",
+        "    entries = read_manifest(input_filepath)\n",
+        "    for entry in entries:\n",
+        "        # Provide relative path instead of absolute path\n",
+        "        entry[\"audio_filepath\"] = entry[\"audio_filepath\"].replace(\"audio/\", \"\")\n",
+        "        # Prepend speaker ID with the name of the dataset: 'vctk'\n",
+        "        entry[\"speaker\"] = f\"vctk_{entry['speaker']}\"\n",
+        "\n",
+        "    write_manifest(output_path=output_filepath, target_manifest=entries, ensure_ascii=False)"
+      ],
+      "metadata": {
+        "id": "zoCRrKQ20VZP"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "update_metadata(\"dev\")\n",
+        "update_metadata(\"train\")"
+      ],
+      "metadata": {
+        "id": "PaCc3GCG1UbH"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Visualize updated 'audio_filepath' and 'speaker' fields\n",
+        "train_filepath = DATA_DIR / \"train_raw.json\"\n",
+        "!head $train_filepath"
+      ],
+      "metadata": {
+        "id": "bVLIB3Ip1Aqn"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Text Preprocessing"
+      ],
+      "metadata": {
+        "id": "e3jHTOhL1M5_"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "First we will process the text transcripts using the script [preprocess_text.py](https://github.com/NVIDIA/NeMo/blob/main/scripts/dataset_processing/tts/preprocess_text.py).\n",
+        "\n",
+        "This step mainly passes the text through our NeMo *text normalizer* and then stores the output in the `normalized_text` field. It also has a few optional transformations, such as lowercasing the text."
+      ],
+      "metadata": {
+        "id": "H2rYykFLSR5t"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "text_preprocessing_script = NEMO_SCRIPT_DIR / \"preprocess_text.py\"\n",
+        "\n",
+        "# Number of threads to parallelize text processing across\n",
+        "num_workers = 4\n",
+        "# Text normalizer to apply\n",
+        "normalizer_config_filepath = NEMO_CONFIG_DIR / \"text\" / \"normalizer_en.yaml\"\n",
+        "# Whether to lowercase output text. We can safely do this here because we will train on IPA phonemes.\n",
+        "# If training on graphemes only, then consider disabling this to leave text with its original capitalization.\n",
+        "lower_case = True\n",
+        "# Whether to overwrite output manifest, if it exists\n",
+        "overwrite_manifest = True\n",
+        "\n",
+        "# Python wrapper to invoke the given bash script with the given input args\n",
+        "def run_script(script, args):\n",
+        "    args = ' \\\\'.join(args)\n",
+        "    cmd = f\"python {script} \\\\{args}\"\n",
+        "\n",
+        "    print(cmd.replace(\" \\\\\", \"\\n\"))\n",
+        "    print()\n",
+        "    !$cmd\n",
+        "\n",
+        "def preprocess_text(data_type):\n",
+        "    input_filepath = DATA_DIR / f\"{data_type}_raw.json\"\n",
+        "    output_filepath = DATA_DIR / f\"{data_type}_text.json\"\n",
+        "\n",
+        "    args = [\n",
+        "        f\"--input_manifest={input_filepath}\",\n",
+        "        f\"--output_manifest={output_filepath}\",\n",
+        "        f\"--num_workers={num_workers}\",\n",
+        "        f\"--normalizer_config_path={normalizer_config_filepath}\",\n",
+        "        f\"--lower_case={lower_case}\"\n",
+        "    ]\n",
+        "    if overwrite_manifest:\n",
+        "        args.append(\"--overwrite\")\n",
+        "\n",
+        "    run_script(text_preprocessing_script, args)"
+      ],
+      "metadata": {
+        "id": "6Z1vRsPd0g2s"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "preprocess_text(\"dev\")"
+      ],
+      "metadata": {
+        "id": "qg6iK3NyrZvx"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "preprocess_text(\"train\")"
+      ],
+      "metadata": {
+        "id": "DkLhSL_n1QAS"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Visualize the output of the 'normalized_text' field.\n",
+        "train_text_filepath = DATA_DIR / \"train_text.json\"\n",
+        "!head $train_text_filepath"
+      ],
+      "metadata": {
+        "id": "6qHbl0Cf5kQn"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Audio Preprocessing"
+      ],
+      "metadata": {
+        "id": "alrRDWio41qi"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Next we process the audio data using [preprocess_audio.py](https://github.com/NVIDIA/NeMo/blob/main/scripts/dataset_processing/tts/preprocess_audio.py).\n",
+        "\n",
+        "During this step we apply the following transformations:\n",
+        "\n",
+        "1. Resample the audio from 48khz to 44.1khz so that it is compatible with our default training configuration.\n",
+        "2. Remove long silence from the beginning and end of each audio file. This can be done using an *energy* based approach which will work on clean audio, or using *voice activity detection (VAD)* which also works on audio with background or static noise (eg. from a microphone).\n",
+        "3. Scale the audio so that files have approximately the same volume level.\n",
+        "4. Filter out audio files which are too long or too short.\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "4WfEaMwpUsFt"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import IPython.display as ipd"
+      ],
+      "metadata": {
+        "id": "WEvIefjnd7AG"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "audio_preprocessing_script = NEMO_SCRIPT_DIR / \"preprocess_audio.py\"\n",
+        "\n",
+        "# Directory with raw audio data\n",
+        "input_audio_dir = DATA_DIR / \"audio\"\n",
+        "# Directory to write preprocessed audio to\n",
+        "output_audio_dir = DATA_DIR / \"audio_44khz\"\n",
+        "# Whether to overwrite existing audio, if it exists in the output directory\n",
+        "overwrite_audio = True\n",
+        "# Whether to overwrite output manifest, if it exists\n",
+        "overwrite_manifest = True\n",
+        "# Number of threads to parallelize audio processing across\n",
+        "num_workers = 4\n",
+        "# Downsample data from 48khz to 44.1khz for compatibility\n",
+        "output_sample_rate = 44100\n",
+        "# Method for silence trimming. Can use \"energy.yaml\" or \"vad.yaml\".\n",
+        "# We use VAD for VCTK because the audio has background noise.\n",
+        "trim_config_path = NEMO_CONFIG_DIR / \"trim\" / \"vad.yaml\"\n",
+        "# Volume level (0, 1] to normalize audio to\n",
+        "volume_level = 0.95\n",
+        "# Filter out audio shorter than min_duration or longer than max_duration seconds.\n",
+        "# We set these bounds relatively low/high, as we can place stricter limits at training time\n",
+        "min_duration = 0.25\n",
+        "max_duration = 30.0\n",
+        "# Output file with entries that are filtered out based on duration\n",
+        "filter_file = DATA_DIR / \"filtered.json\"\n",
+        "\n",
+        "def preprocess_audio(data_type):\n",
+        "    input_filepath = DATA_DIR / f\"{data_type}_text.json\"\n",
+        "    output_filepath = DATA_DIR / f\"{data_type}_manifest.json\"\n",
+        "\n",
+        "    args = [\n",
+        "        f\"--input_manifest={input_filepath}\",\n",
+        "        f\"--output_manifest={output_filepath}\",\n",
+        "        f\"--input_audio_dir={input_audio_dir}\",\n",
+        "        f\"--output_audio_dir={output_audio_dir}\",\n",
+        "        f\"--num_workers={num_workers}\",\n",
+        "        f\"--output_sample_rate={output_sample_rate}\",\n",
+        "        f\"--trim_config_path={trim_config_path}\",\n",
+        "        f\"--volume_level={volume_level}\",\n",
+        "        f\"--min_duration={min_duration}\",\n",
+        "        f\"--max_duration={max_duration}\",\n",
+        "        f\"--filter_file={filter_file}\",\n",
+        "    ]\n",
+        "    if overwrite_manifest:\n",
+        "        args.append(\"--overwrite_manifest\")\n",
+        "    if overwrite_audio:\n",
+        "        args.append(\"--overwrite_audio\")\n",
+        "\n",
+        "    run_script(audio_preprocessing_script, args)"
+      ],
+      "metadata": {
+        "id": "0kQ1UDnGfdX6"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "preprocess_audio(\"dev\")"
+      ],
+      "metadata": {
+        "id": "ai0zbXSOriuY"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "preprocess_audio(\"train\")"
+      ],
+      "metadata": {
+        "id": "NUKnidQYfgDo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "We should listen to a few audio files before and after the processing so be sure we configured it correctly.\n",
+        "\n",
+        "Note that the processed audio is louder. It is also shorter because we trimmed the leading and trailing silence."
+      ],
+      "metadata": {
+        "id": "x2yhJtsj2lDR"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "audio_file = \"p228_009.wav\"\n",
+        "audio_filepath = input_audio_dir / audio_file\n",
+        "processed_audio_filepath = output_audio_dir / audio_file\n",
+        "\n",
+        "print(\"Original audio.\")\n",
+        "ipd.display(ipd.Audio(audio_filepath))\n",
+        "\n",
+        "print(\"Processed audio.\")\n",
+        "ipd.display(ipd.Audio(processed_audio_filepath))"
+      ],
+      "metadata": {
+        "id": "_fM3GwJxkjOA"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Speaker Mapping"
+      ],
+      "metadata": {
+        "id": "d129p0nrr3PD"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "We can use [create_speaker_map.py](https://github.com/NVIDIA/NeMo/blob/main/scripts/dataset_processing/tts/create_speaker_map.py) to easily create a mapping from speaker ID strings to integer indices that will be used at training time.\n",
+        "\n",
+        "The script will simply sort the speaker IDs and assign them numbers `[0, num_speakers)` in alphabetical order."
+      ],
+      "metadata": {
+        "id": "ZJ1MWX3F3X9u"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "speaker_map_script = NEMO_SCRIPT_DIR / \"create_speaker_map.py\"\n",
+        "\n",
+        "train_manifest_filepath = DATA_DIR / \"train_manifest.json\"\n",
+        "dev_manifest_filepath = DATA_DIR / \"dev_manifest.json\"\n",
+        "speaker_filepath = DATA_DIR / \"speakers.json\"\n",
+        "\n",
+        "args = [\n",
+        "    f\"--manifest_path={train_manifest_filepath}\",\n",
+        "    f\"--manifest_path={dev_manifest_filepath}\",\n",
+        "    f\"--speaker_map_path={speaker_filepath}\"\n",
+        "]\n",
+        "\n",
+        "run_script(speaker_map_script, args)"
+      ],
+      "metadata": {
+        "id": "b5gdccYhr5Gk"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Visualize the speaker map file.\n",
+        "!head $speaker_filepath"
+      ],
+      "metadata": {
+        "id": "CMcC2Nqmt5AR"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Feature Computation"
+      ],
+      "metadata": {
+        "id": "jyFxOjy6t8vo"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Before training FastPitch, we need to compute some features for every audio file. The default [config file](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/conf/feature/feature_44100.yaml) we will use has parameters for computing the **pitch** and **energy** of every audio frame. Be default it will also compute a **voiced_mask** indicating which audio frames have no pitch (eg. because they contain silence)."
+      ],
+      "metadata": {
+        "id": "QNPpwkM49orB"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "feature_script = NEMO_SCRIPT_DIR / \"compute_features.py\"\n",
+        "\n",
+        "feature_config_path = NEMO_CONFIG_DIR / \"feature\" / \"feature_44100.yaml\"\n",
+        "audio_dir = DATA_DIR / \"audio_44khz\"\n",
+        "feature_dir = DATA_DIR / \"features_44khz\"\n",
+        "num_workers = 4\n",
+        "\n",
+        "def compute_features(data_type):\n",
+        "    input_filepath = DATA_DIR / f\"{data_type}_manifest.json\"\n",
+        "\n",
+        "    args = [\n",
+        "        f\"--feature_config_path={feature_config_path}\",\n",
+        "        f\"--manifest_path={input_filepath}\",\n",
+        "        f\"--audio_dir={audio_dir}\",\n",
+        "        f\"--feature_dir={feature_dir}\",\n",
+        "        f\"--num_workers={num_workers}\"\n",
+        "    ]\n",
+        "\n",
+        "    run_script(feature_script, args)"
+      ],
+      "metadata": {
+        "id": "AI4aLRFbt_NQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "compute_features(\"dev\")"
+      ],
+      "metadata": {
+        "id": "kQqPw3uRwEsO"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "compute_features(\"train\")"
+      ],
+      "metadata": {
+        "id": "ct1fN_4pwCu9"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The features are stored in the specified `feature_dir`."
+      ],
+      "metadata": {
+        "id": "db83_UcOCOIo"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!ls $feature_dir"
+      ],
+      "metadata": {
+        "id": "_8bHP4j56LWG"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Feature Statistics"
+      ],
+      "metadata": {
+        "id": "QsuxK1P0x7hZ"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "For training it is beneficial for us to *normalize* our features. The most standard approach is to apply *mean-variance normalization* so that each feature has a mean of 0 and variance of 1. To do this we need to compute the *dataset statistics* with the mean and variance of each feature.\n",
+        "\n",
+        "For TTS it also helps\n",
+        "*   Normalize features using speaker-level statistics.\n",
+        "*   Use the `voiced_mask` to set the feature values of non-voiced audio frames to 0.\n",
+        "\n",
+        "Using the [compute_feature_stats.py](https://github.com/NVIDIA/NeMo/blob/main/scripts/dataset_processing/tts/compute_feature_stats.py) script we will compute the mean and variance of each feature for each speaker. The input to the script is the same [config file](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/conf/feature/feature_44100.yaml) we used to compute the features."
+      ],
+      "metadata": {
+        "id": "O8GiAnAMCNeh"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "feature_stats_script = NEMO_SCRIPT_DIR / \"compute_feature_stats.py\"\n",
+        "\n",
+        "train_manifest_filepath = DATA_DIR / \"train_manifest.json\"\n",
+        "output_stats_path = DATA_DIR / \"feature_stats_44khz.json\"\n",
+        "\n",
+        "args = [\n",
+        "    f\"--feature_config_path={feature_config_path}\",\n",
+        "    f\"--manifest_path={train_manifest_filepath}\",\n",
+        "    f\"--audio_dir={audio_dir}\",\n",
+        "    f\"--feature_dir={feature_dir}\",\n",
+        "    f\"--stats_path={output_stats_path}\"\n",
+        "]\n",
+        "\n",
+        "run_script(feature_stats_script, args)"
+      ],
+      "metadata": {
+        "id": "DC4c1L3CxH-h"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The output feature statistics file contains the mean and variance of the pitch and energy for the entire dataset (under the key `global`), and for each speaker in the dataset."
+      ],
+      "metadata": {
+        "id": "zos96yaoFho1"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!head $output_stats_path"
+      ],
+      "metadata": {
+        "id": "fOz1cpIdFcG9"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# HiFi-GAN Training"
+      ],
+      "metadata": {
+        "id": "oRO842MUyODC"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Our standard FastPitch model is a two-part recipe consisting of the **FastPitch** acoustic model which predicts a mel spectrogram from text, and **HiFi-GAN** vocoder which predicts audio from the mel spectrogram.\n",
+        "\n",
+        "We will train HiFi-GAN first so that we can use it to help evaluate the performance of FastPitch as it is being trained.\n",
+        "\n",
+        "HiFi-GAN training only requires a manifest with with the `audio_filepath` field. All other fields in the manifest are for FastPitch training.\n",
+        "\n",
+        "Here we show how to train these models from scratch. You can also fine-tune them from pretrained checkpoints as mentioned in our [FastPitch fine-tuning tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_Finetuning.ipynb), but pretrained checkpoints compatible with these experimental recipes are not yet available on NGC.\n"
+      ],
+      "metadata": {
+        "id": "E4wUKYOfH8ax"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch"
+      ],
+      "metadata": {
+        "id": "pqfl9jAYMJob"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "dataset_name = \"vctk\"\n",
+        "audio_dir = DATA_DIR / \"audio_44khz\"\n",
+        "train_manifest_filepath = DATA_DIR / \"train_manifest.json\"\n",
+        "dev_manifest_filepath = DATA_DIR / \"dev_manifest.json\""
+      ],
+      "metadata": {
+        "id": "jK2rr-Kr6Qg8"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "hifigan_training_script = NEMO_EXAMPLES_DIR / \"hifigan.py\"\n",
+        "\n",
+        "# The total number of training steps will be (epochs * steps_per_epoch)\n",
+        "epochs = 10\n",
+        "steps_per_epoch = 10\n",
+        "\n",
+        "sample_rate = 44100\n",
+        "\n",
+        "# Config files specifying all HiFi-GAN parameters\n",
+        "hifigan_config_dir = NEMO_CONFIG_DIR / \"hifigan\"\n",
+        "hifigan_config_filename = \"hifigan_data.yaml\"\n",
+        "feature_config = f\"feature_{sample_rate}\"\n",
+        "sample_config = f\"sample_{sample_rate}\"\n",
+        "\n",
+        "# Name of the experiment that will determine where it is saved locally and in TensorBoard and WandB\n",
+        "run_id = \"test_run\"\n",
+        "exp_dir = root_dir / \"exps\"\n",
+        "hifigan_exp_output_dir = exp_dir / \"HifiGan\" / run_id\n",
+        "# Directory where predicted audio will be stored periodically throughout training\n",
+        "hifigan_log_dir = hifigan_exp_output_dir / \"logs\"\n",
+        "\n",
+        "if sample_rate == 22050:\n",
+        "    generator_config = \"v1\"\n",
+        "elif sample_rate == 44100:\n",
+        "    generator_config = \"v1_44100\"\n",
+        "else:\n",
+        "    raise ValueError(f\"Unsupported sampling rate {sample_rate}\")\n",
+        "\n",
+        "if torch.cuda.is_available():\n",
+        "    accelerator=\"gpu\"\n",
+        "    batch_size = 16\n",
+        "else:\n",
+        "    accelerator=\"cpu\"\n",
+        "    batch_size = 2\n",
+        "\n",
+        "args = [\n",
+        "    f\"--config-path={hifigan_config_dir}\",\n",
+        "    f\"--config-dir={NEMO_CONFIG_DIR}\",\n",
+        "    f\"--config-name={hifigan_config_filename}\",\n",
+        "    f\"feature={feature_config}\",\n",
+        "    f\"sample={sample_config}\",\n",
+        "    f'model/generator={generator_config}',\n",
+        "    f\"max_epochs={epochs}\",\n",
+        "    f\"weighted_sampling_steps_per_epoch={steps_per_epoch}\",\n",
+        "    f\"batch_size={batch_size}\",\n",
+        "    f\"log_dir={hifigan_log_dir}\",\n",
+        "    f\"exp_manager.exp_dir={exp_dir}\",\n",
+        "    f\"+exp_manager.version={run_id}\",\n",
+        "    f\"trainer.accelerator={accelerator}\",\n",
+        "    f\"+train_ds_meta.{dataset_name}.manifest_path={train_manifest_filepath}\",\n",
+        "    f\"+train_ds_meta.{dataset_name}.audio_dir={audio_dir}\",\n",
+        "    f\"+val_ds_meta.{dataset_name}.manifest_path={dev_manifest_filepath}\",\n",
+        "    f\"+val_ds_meta.{dataset_name}.audio_dir={audio_dir}\",\n",
+        "    f\"+log_ds_meta.{dataset_name}.manifest_path={dev_manifest_filepath}\",\n",
+        "    f\"+log_ds_meta.{dataset_name}.audio_dir={audio_dir}\"\n",
+        "]"
+      ],
+      "metadata": {
+        "id": "Vr4D-NB-yQx8"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# If an error occurs, log the entire stacktrace.\n",
+        "os.environ[\"HYDRA_FULL_ERROR\"] = \"1\""
+      ],
+      "metadata": {
+        "id": "Bn8lQG0PxWGi"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "run_script(hifigan_training_script, args)"
+      ],
+      "metadata": {
+        "id": "yUxFCNrE3Ywi"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "During training, the model will automatically save predictions for all files specified in the `log_ds_meta` manifest."
+      ],
+      "metadata": {
+        "id": "BBPIpS-lL6z9"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "hifigan_log_epoch_dir = hifigan_log_dir / \"epoch_10\"\n",
+        "!ls $hifigan_log_epoch_dir"
+      ],
+      "metadata": {
+        "id": "rSFOm1Sg46Lh"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "This makes it easy to listen to the audio to determine how well the model is performing. We can decide to stop training when either:\n",
+        "\n",
+        "*   The predicted audio sounds almost exactly the same as the original audio\n",
+        "*   The predicted audio stops improving in between epochs.\n",
+        "\n",
+        "**Note that the dataset in this tutorial is too small to get good quality audio output.**"
+      ],
+      "metadata": {
+        "id": "oCJs7oCLMIjD"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "audio_filepath = hifigan_log_epoch_dir / \"p225_143.wav\"\n",
+        "ipd.display(ipd.Audio(audio_filepath))"
+      ],
+      "metadata": {
+        "id": "G6k4ymzfJ5Y6"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# FastPitch Training"
+      ],
+      "metadata": {
+        "id": "lV--2Wph7NPG"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Finally we can train the FastPitch model itself. The FastPitch training recipe requires:\n",
+        "\n",
+        "1. Training manifest(s) with `audio_filepath` and `text` or `normalized_text` fields.\n",
+        "2. Precomputed features such as *pitch* and *energy* specified in the feature [config file](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/conf/feature/feature_44100.yaml).\n",
+        "3. (Optional) Statistics file for normalizing features.\n",
+        "4. (Optional) For a multi-speaker model, the manifest needs a `speaker` field amd JSON file mapping speaker IDs to speaker indices.\n",
+        "5. (Optional) To train with IPA phonemes, a [phoneme dictionary](https://github.com/NVIDIA/NeMo/blob/main/scripts/tts_dataset_files/ipa_cmudict-0.7b_nv23.01.txt) and optional [heteronyms file](https://github.com/NVIDIA/NeMo/blob/main/scripts/tts_dataset_files/heteronyms-052722)\n",
+        "6. (Optional) HiFi-GAN checkpoint or [NGC model name](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/models/hifigan.py#L413) for generating audio predictions during training.\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "aOuoPXDhOVD7"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "fastpitch_training_script = NEMO_EXAMPLES_DIR / \"fastpitch.py\"\n",
+        "\n",
+        "# The total number of training steps will be (epochs * steps_per_epoch)\n",
+        "epochs = 10\n",
+        "steps_per_epoch = 10\n",
+        "\n",
+        "num_speakers = 5\n",
+        "sample_rate = 44100\n",
+        "\n",
+        "# Config files specifying all FastPitch parameters\n",
+        "fastpitch_config_dir = NEMO_CONFIG_DIR / \"fastpitch\"\n",
+        "fastpitch_config_filename = \"fastpitch.yaml\"\n",
+        "feature_config = f\"feature_{sample_rate}\"\n",
+        "\n",
+        "# Metadata files and directories\n",
+        "dataset_file_dir = NEMO_DIR / \"scripts\" / \"tts_dataset_files\"\n",
+        "phoneme_dict_path = dataset_file_dir / \"ipa_cmudict-0.7b_nv23.01.txt\"\n",
+        "heteronyms_path = dataset_file_dir / \"heteronyms-052722\"\n",
+        "\n",
+        "speaker_path = DATA_DIR / \"speakers.json\"\n",
+        "feature_dir = DATA_DIR / \"features_44khz\"\n",
+        "stats_path = DATA_DIR / \"feature_stats_44khz.json\"\n",
+        "\n",
+        "def get_latest_checkpoint(checkpoint_dir):\n",
+        "    output_path = None\n",
+        "    for checkpoint_path in checkpoint_dir.iterdir():\n",
+        "        checkpoint_name = str(checkpoint_path.name)\n",
+        "        if checkpoint_name.endswith(\".nemo\"):\n",
+        "            output_path = checkpoint_path\n",
+        "            break\n",
+        "        if checkpoint_name.endswith(\"last.ckpt\"):\n",
+        "            output_path = checkpoint_path\n",
+        "\n",
+        "    if not output_path:\n",
+        "        raise ValueError(f\"Could not find latest checkpoint in {checkpoint_dir}\")\n",
+        "\n",
+        "    return output_path\n",
+        "\n",
+        "# HiFi-GAN model for generating audio predictions from FastPitch output\n",
+        "vocoder_type = \"hifigan\"\n",
+        "vocoder_checkpoint_path = get_latest_checkpoint(hifigan_exp_output_dir / \"checkpoints\")\n",
+        "\n",
+        "run_id = \"test_run\"\n",
+        "exp_dir = root_dir / \"exps\"\n",
+        "fastpitch_exp_output_dir = exp_dir / \"FastPitch\" / run_id\n",
+        "fastpitch_log_dir = fastpitch_exp_output_dir / \"logs\"\n",
+        "\n",
+        "if torch.cuda.is_available():\n",
+        "    accelerator=\"gpu\"\n",
+        "    batch_size = 32\n",
+        "else:\n",
+        "    accelerator=\"cpu\"\n",
+        "    batch_size = 4\n",
+        "\n",
+        "args = [\n",
+        "    f\"--config-path={fastpitch_config_dir}\",\n",
+        "    f\"--config-dir={NEMO_CONFIG_DIR}\",\n",
+        "    f\"--config-name={fastpitch_config_filename}\",\n",
+        "    f\"feature={feature_config}\",\n",
+        "    f\"n_speakers={num_speakers}\",\n",
+        "    f\"speaker_path={speaker_path}\",\n",
+        "    f\"max_epochs={epochs}\",\n",
+        "    f\"weighted_sampling_steps_per_epoch={steps_per_epoch}\",\n",
+        "    f\"phoneme_dict_path={phoneme_dict_path}\",\n",
+        "    f\"heteronyms_path={heteronyms_path}\",\n",
+        "    f\"feature_stats_path={stats_path}\",\n",
+        "    f\"log_dir={fastpitch_log_dir}\",\n",
+        "    f\"vocoder_type={vocoder_type}\",\n",
+        "    f\"vocoder_checkpoint_path=\\\\'{vocoder_checkpoint_path}\\\\'\",\n",
+        "    f\"trainer.accelerator={accelerator}\",\n",
+        "    f\"exp_manager.exp_dir={exp_dir}\",\n",
+        "    f\"+exp_manager.version={run_id}\",\n",
+        "    f\"+train_ds_meta.{dataset_name}.manifest_path={train_manifest_filepath}\",\n",
+        "    f\"+train_ds_meta.{dataset_name}.audio_dir={audio_dir}\",\n",
+        "    f\"+train_ds_meta.{dataset_name}.feature_dir={feature_dir}\",\n",
+        "    f\"+val_ds_meta.{dataset_name}.manifest_path={dev_manifest_filepath}\",\n",
+        "    f\"+val_ds_meta.{dataset_name}.audio_dir={audio_dir}\",\n",
+        "    f\"+val_ds_meta.{dataset_name}.feature_dir={feature_dir}\",\n",
+        "    f\"+log_ds_meta.{dataset_name}.manifest_path={dev_manifest_filepath}\",\n",
+        "    f\"+log_ds_meta.{dataset_name}.audio_dir={audio_dir}\",\n",
+        "    f\"+log_ds_meta.{dataset_name}.feature_dir={feature_dir}\"\n",
+        "]"
+      ],
+      "metadata": {
+        "id": "8MdMXnOAIFvj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "run_script(fastpitch_training_script, args)"
+      ],
+      "metadata": {
+        "id": "apl7TvW0TaEG"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "During training, the model will automatically save spectrogram and audio predictions for all files specified in the `log_ds_meta` manifest."
+      ],
+      "metadata": {
+        "id": "Z01Fq7WRl7Di"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "faspitch_log_epoch_dir = fastpitch_log_dir / \"epoch_10\"\n",
+        "!ls $faspitch_log_epoch_dir"
+      ],
+      "metadata": {
+        "id": "E8rVKnKN5HDa"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "This makes it easy to listen to the audio to determine how well the model is performing. We can decide to stop training when either:\n",
+        "\n",
+        "*   The predicted audio stops improving in between epochs.\n",
+        "*   The predicted spectrogram stops changing in between epochs.\n",
+        "\n",
+        "**Note that the dataset in this tutorial is too small to get good quality audio output.**"
+      ],
+      "metadata": {
+        "id": "PeNaxoCzN7Ii"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "audio_filepath = faspitch_log_epoch_dir / \"p225_143.wav\"\n",
+        "spectrogram_filepath = faspitch_log_epoch_dir / \"p225_143_spec.png\"\n",
+        "\n",
+        "ipd.display(ipd.Audio(audio_filepath))\n",
+        "ipd.display(ipd.Image(spectrogram_filepath))"
+      ],
+      "metadata": {
+        "id": "ynZdcnKc3CRF"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file

From 36adc7e0943a9654c21a89a8b5b8b33ed6c2efcc Mon Sep 17 00:00:00 2001
From: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Date: Wed, 12 Jul 2023 23:21:43 -0700
Subject: [PATCH 102/123] Fix missing import (#7026)

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
---
 .../nlp/models/language_modeling/megatron_gpt_sft_model.py       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
index 946df3da2aa5..c80d2272613e 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import json
+from functools import partial
 from typing import Any, Optional
 
 import torch

From 5e02346506851b086abcf68976e9bbf91a6a8514 Mon Sep 17 00:00:00 2001
From: Nikolay Karpov <karpnv@gmail.com>
Date: Thu, 13 Jul 2023 20:05:11 +0400
Subject: [PATCH 103/123] fix install_beamsearch_decoders (#7011)

* aliases

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* add NEMO_PATH

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* expand_aliases

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

---------

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>
---
 .../ngram_lm/install_beamsearch_decoders.sh   | 13 ++++++++----
 tutorials/asr/Offline_ASR.ipynb               | 20 ++++++++++---------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh
index 0760dd02319d..558a84698f49 100755
--- a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh
+++ b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh
@@ -14,6 +14,8 @@
 # limitations under the License.
 
 # Use this script to install KenLM, OpenSeq2Seq decoder, Flashlight decoder
+shopt -s expand_aliases
+
 NEMO_PATH=/workspace/nemo  # Path to NeMo folder: /workspace/nemo if you use NeMo/Dockerfile
 if [ "$#" -eq 1 ]
 then
@@ -24,11 +26,14 @@ KENLM_MAX_ORDER=10 # Maximum order of KenLM model, also specified in the setup_o
 cd $NEMO_PATH
 
 if [ $(id -u) -eq 0 ]; then
-  alias sudo=eval
+  alias aptupdate='apt-get update'
+  alias b2install='./b2'
+else
+  alias aptupdate='sudo apt-get update'
+  alias b2install='sudo ./b2'
 fi
 
-sudo apt-get update && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # liblzma needed for flashlight decoder
-
+aptupdate && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # liblzma needed for flashlight decoder'
 
 git clone https://github.com/NVIDIA/OpenSeq2Seq
 cd OpenSeq2Seq
@@ -42,7 +47,7 @@ cp $NEMO_PATH/scripts/installers/setup_os2s_decoders.py ./setup.py
 ./setup.sh
 
 # install Boost package for KenLM
-wget https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.bz2 --no-check-certificate && tar --bzip2 -xf $NEMO_PATH/decoders/boost_1_80_0.tar.bz2 && cd boost_1_80_0 && ./bootstrap.sh && sudo ./b2 --layout=tagged link=static,shared threading=multi,single install -j4 || echo FAILURE
+wget https://boostorg.jfrog.io/artifactory/main/release/1.80.0/source/boost_1_80_0.tar.bz2 --no-check-certificate && tar --bzip2 -xf $NEMO_PATH/decoders/boost_1_80_0.tar.bz2 && cd boost_1_80_0 && ./bootstrap.sh && b2install --layout=tagged link=static,shared threading=multi,single install -j4 || echo FAILURE
 export BOOST_ROOT=$NEMO_PATH/decoders/boost_1_80_0
 
 # install KenLM
diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb
index fc8af2e76416..82fb5401a3fe 100644
--- a/tutorials/asr/Offline_ASR.ipynb
+++ b/tutorials/asr/Offline_ASR.ipynb
@@ -3,9 +3,7 @@
   "nbformat_minor": 0,
   "metadata": {
     "colab": {
-      "name": "Offline_ASR.ipynb",
       "provenance": [],
-      "collapsed_sections": [],
       "toc_visible": true
     },
     "kernelspec": {
@@ -31,7 +29,9 @@
         "\n",
         "You may find more info on how to train and use language models for ASR models here:\n",
         "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n",
-        "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n"
+        "\n",
+        "\n",
+        "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n"
       ]
     },
     {
@@ -41,7 +41,7 @@
       },
       "source": [
         "## Installation\n",
-        "NeMo can be installed via simple pip command. \n",
+        "NeMo can be installed via simple pip command.\n",
         "\n",
         "Optional CTC beam search decoder might require restart of Colab runtime after installation."
       ]
@@ -77,12 +77,14 @@
         "    import ctc_decoders\n",
         "except ModuleNotFoundError:\n",
         "    # install beam search decoder\n",
+        "    import os\n",
         "    !apt-get update && apt-get install -y swig\n",
         "    !git clone https://github.com/NVIDIA/NeMo -b \"$BRANCH\"\n",
-        "    !cd NeMo && bash scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh\n",
+        "    pwd = !pwd\n",
+        "    NEMO_PATH = os.path.join(pwd[0], \"NeMo\")\n",
+        "    !cd NeMo && bash scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh $NEMO_PATH\n",
         "    print('Restarting Colab runtime to successfully import built module.')\n",
         "    print('Please re-run the notebook.')\n",
-        "    import os\n",
         "    os.kill(os.getpid(), 9)"
       ],
       "execution_count": null,
@@ -110,7 +112,7 @@
       },
       "source": [
         "## Instantiate pre-trained NeMo model\n",
-        "``from_pretrained(...)`` API downloads and initializes model directly from the cloud. \n",
+        "``from_pretrained(...)`` API downloads and initializes model directly from the cloud.\n",
         "\n",
         "Alternatively, ``restore_from(...)`` allows loading a model from a disk.\n",
         "\n",
@@ -403,7 +405,7 @@
       "source": [
         "## Offline inference with beam search decoder and N-gram language model re-scoring\n",
         "\n",
-        "It is possible to use an external [KenLM](https://kheafield.com/code/kenlm/)-based N-gram language model to rescore multiple transcription candidates. \n",
+        "It is possible to use an external [KenLM](https://kheafield.com/code/kenlm/)-based N-gram language model to rescore multiple transcription candidates.\n",
         "\n",
         "Let's download and preprocess LibriSpeech 3-gram language model."
       ]
@@ -653,4 +655,4 @@
       "outputs": []
     }
   ]
-}
+}
\ No newline at end of file

From caddb8dc4cdd5ed991697566ecca7f44b8751693 Mon Sep 17 00:00:00 2001
From: Igor Gitman <igitman@nvidia.com>
Date: Thu, 13 Jul 2023 10:42:14 -0700
Subject: [PATCH 104/123] Update SDP docs page with a new documentation link
 (#7029)

Signed-off-by: Igor Gitman <igitman@nvidia.com>
---
 docs/source/tools/speech_data_processor.rst | 162 +-------------------
 1 file changed, 3 insertions(+), 159 deletions(-)

diff --git a/docs/source/tools/speech_data_processor.rst b/docs/source/tools/speech_data_processor.rst
index 29bc4abb82bd..262b214c6355 100644
--- a/docs/source/tools/speech_data_processor.rst
+++ b/docs/source/tools/speech_data_processor.rst
@@ -1,166 +1,10 @@
 Speech Data Processor
-========================
+=====================
 
 Speech Data Processor (SDP) is a toolkit to make it easy to:
   1. write code to process a new dataset, minimizing the amount of boilerplate code required.
   2. share the steps for processing a speech dataset.
 
-SDP is hosted here: https://github.com/NVIDIA/NeMo-speech-data-processor. 
+SDP is hosted here: https://github.com/NVIDIA/NeMo-speech-data-processor.
 
-SDP's philosophy is to represent processing operations as 'processor' classes, which take in a path to a NeMo-style data manifest as input (or a path to the raw data directory if you do not have a NeMo-style manifest to start with), apply some processing to it, and then save the output manifest file.
-
-You specifiy which processors you want to run using a YAML config file. Many common processing operations are provided, and it is easy to add your own. If you do not need to add your own processors, then all that is needed to process a new dataset is to write a single YAML file containing the parameters needed to process your dataset.
-
-.. image:: https://github.com/NVIDIA/NeMo/releases/download/v1.17.0/sdp_overview_diagram.png
-  :alt: Overview diagram of Speech Data Processor
-
-Overview of how SDP processes a dataset
----------------------------------------
-
-1. You call the ``main.py`` script, passing in a YAML config file, possibly with some overrides.
-2. ``main.py`` script calls ``run_processors.py``, passing in your config.
-3. ``run_processors.py`` does the following:
-
-   a. picks out the processors that you specified to be run (you can specify a subset of the processors in the config override, e.g. to avoid re-running time-consuming steps).
-   b. if some of the processors have not had "output_manifest_file" or "input_manfiest_file" entries specified, SDP will automatically create temporary files for those.
-   c. instantiates the processor classes using ``hydra.utils.instantiate``
-   d. runs the run-time processor tests by calling the ``processor.test()`` method (more details about testing :ref:`here<SDP Tests>`).
-   e. runs the processing method (``processor.process()``) of each processor in order.
-  
-
-Layout of config YAML files
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The YAML config file for processing a dataset must contain a key ``processors``, the value of which is a list. Each item in that list is expected to be a dictionary specifying a processor class, i.e. it must have a key ``_target_``, the value of which is a path to a "processor" class, and the remaining keys must be the kwargs necessary to instantiate that class with ``hydra.utils.instantiate()`` (c.f. https://hydra.cc/docs/advanced/instantiate_objects/overview/).
-
-SDP will run the processors specified in the ``processors`` list in the config file. It will also check for a ``processors_to_run`` key in the config file, which can be either the string ``"all"``, or any Python "slice" object like ``3:4``, ``2:`` etc. (if there is no ``processors_to_run`` key, then all of the processors will be run).
-
-.. note:: 
-    SDP will run the processors in the order in which they are listed in the config YAML file. Make sure to list the processors in an order which makes sense, e.g. create an initial manifest first; make sure to run asr inference before doing any processing which looks at ``pred_text`` fields in the manifest.
-
-Processor classes
------------------
-
-**BaseProcessor**
-~~~~~~~~~~~~~~~~~
-
-All processor classes inherit from the ``BaseProcessor`` class. This is a simple abstract class which has 2 empty methods: ``process()`` and ``test()``. 
-These serve to remind us that SDP essentially just runs ``test()`` on all processors, and then ``process()`` on all processors (more details about testing :ref:`here<SDP Tests>`).
-
-``ASRInference`` is a child class of ``BaseProcessor``. It has a simple ``process()`` method which runs transcription on every utterance in the input_manifest.
-
-``WriteManifest`` is also a child class of ``BaseProcessor``. It has a simple ``process()`` method which saves a copy of the input manifest containing only the fields specified in ``fields_to_save``.
-
-**BaseParallelProcessor**
-~~~~~~~~~~~~~~~~~~~~~~~~~
-``BaseParallelProcessor`` inherits from the ``BaseProcessor`` class. Within the ``BaseParallelProcessor.process()`` method, it calls other methods and functions, which allow it to do more complex processing. 
-Most importantly, it calls its ``BaseParallelProcessor.process_dataset_entry(data_entry)`` method on every utterance in the manifest, and it does this in parallel, allowing for more efficient processing.
-
-What is a **DataEntry**?
-~~~~~~~~~~~~~~~~~~~~~~~~
-As mentioned above, ``BaseParallelProcessor.process_dataset_entry(data_entry)`` is called on a variable called ``data_entry`` which represents an utterance in our dataset.
-Most often, ``data_entry`` will be a dictionary containing items which represent the JSON manifest entry. 
-Sometimes, such as in ``CreateInitialManifestMLS``, it will be a string containing a line for that utterance from the original raw MLS transcript.
-
-``BaseParallelProcessor.process_dataset_entry`` will process ``data_entry`` and output a ``DataEntry`` object. 
-
-The ``DataEntry`` class is a dataclass which contains 2 attributes:
-
-1. ``data`` is an Optional dictionary containing items which represent the JSON manifest entry. ``data`` can also be ``None``. If a ``.process_dataset_entry(data_entry)`` method returns a ``DataEntry`` class where ``data is None``, then that utterance will be dropped from the output manifest.
-2. ``metrics``, which can be of any type, and are ``None`` by default. This variable is used by some variables to record summary statistics about the changes made to the dataset, these metrics are aggregated and can be displayed once every utterance has been processed by the processor.
-
-What happens in **BaseParallelProcessor.process()**?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-We outline the ``BaseParallelProcessor.process()`` method below:
-
-.. raw:: html
-
-    <div align="center">
-      <img src="https://mermaid.ink/img/pako:eNplUl1r6zAM_SvCFy4pbL3vvaVwu-59sL0tl6LESmqIP7DkjWzsv89O0rVjzosiHR8dHetdtV6T2qg-YjjB0-Fv7SAfTs2cqdWjUGAwDrYiuz0yPWDEYaDhIfqWmH1chzmqVts_GQOW5OR1rWaqcv4916pcZxq6jKaAkRb0tok7IBtkXO5BM4KmDtMgUIotOmgIEpMG8VOK1v0atH91g0cNEV9BoyBgEm9RTJvljbX6D7e3O9hfVOyvVURCfbToTEcs11pKocwbksC5PnWFyhB00VvIE7wYnxiWwY3rgbNNqwlnOpATRQLD4B2dhdxdhNx9t2PiOJYRmORITuJYlb85XEydFGDDErGVL4tn6gNcuA-Zm_GFwCf5McJvwL6P1KNQoYim5SlfTY7-At9BEmHQ0YdAenVucH_hv7_W3hmHg3mj40JWXYudX8lwGHD86rb4d7YtN6hd-Qo1Oa1ulKVo0ei8k-8lXatsps0ubnK47EVZrY8MLQ_-OLpWbSQmulEpZNvoYDDvrlWbDgemj0-10vX9" height=100% />
-    </div>
-
-
-**ModifyManifestTextProcessor**
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``ModifyManifestTextProcessor`` inherits from the ``BaseParallelProcessor`` class. 
-
-The ``ModifyManifestTextProcessor`` constructor takes in the following arguments: 
-* ``text_key`` (string) and ``pred_text_key`` (string): these parameters specify which keys in ``data_entry.data`` will be used for processing. (default: ``text_key="text"``, ``pred_text_key="pred_text"``, ie. by default the processor will refer to and modify the ``"text"`` and/or ``"pred_text"`` attributes of the input manifest).
-* ``test_cases`` (optional, list of dicts) - test cases for checking that the processor makes the changes that we are expecting.
-
-``ModifyManifestTextProcessor`` has the following methods: 
-* ``ModifyManifestTextProcessor.test()``: this method makes sure that the output from the processor matches the expected output specified in the ``test_cases`` parameter.
-* ``ModifyManifestTextProcessor.process_dataset_entry(data_entry)``: this method applies processing to a ``data_entry``. First, spaces are added to the start and end of the 'text' and 'pred_text' entries (if they exist), then the abstract method ``ModifyManifestTextProcessor._process_dataset_entry(data_entry)`` is called. Then, any extra spaces (e.g. two spaces next to each other '  ') are removed from 'text' and 'pred_text' entries.
-* ``ModifyManifestTextProcessor._process_dataset_entry(data_entry)``: this is an abstract method which will be over-written by children of ``ModifyManifestTextProcessor``.
-
-How to make your own processor classes
---------------------------------------
-
-We will describe how to make your own processor classes by referring to SDP's existing classes.
-
-Creating an initial manifest
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-One of the child classes of ``BaseParallelProcessor`` provided in SDP is ``CreateInitialManifestMLS``. It downloads raw MLS data for a specified language, and creates an initial manifest (in the format expected by NeMo) which can be cleaned by subsequent processors.
-
-The ``CreateInitialManifestMLS.prepare()`` method downloads and extracts the raw data.
-
-The ``CreateInitialManifestMLS.read_manifest()`` method reads the lines in the raw MLS transcript file.
-
-The ``CreateInitialManifestMLS.process_dataset_entry()`` method takes in the lines from the raw MLS transcript file, and outputs ``DataEntry`` objects containing entries that will be saved into the manifest (i.e. ``"audio_filepath"``, ``"duration"``, ``"text"``) for each utterance.
-
-
-A **ModifyManifestTextProcessor** subclass that cleans the reference text
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-One of the classes provided in SDP is ``SubRegex``. At initialization, it takes in ``regex_params_list``, a list of dictionaries which must contain the keys ``"pattern"``, ``"repl"``, and, optionally, ``"count"``. These keys will be used to apply regex substitutions using these parameters fed into ``re.sub``. The substitutions will be applied to the data at ``text_key`` (i.e. ``data_entry.data[self.text_key]``). By default, ``text_key="text"``, i.e. the substitutions will be applied to the ``"text"`` attribute of the manifest.
-
-In its ``_process_dataset_entry(data_entry)`` method, the ``SubRegex`` processor does the string to string conversion upon the ``data_entry`` that is input. Its output is a ``data_entry`` with the changes applied to ``data``, and the the metrics of which regex patterns caused a substitution to be made. These metrics will be aggregated over all utterances by the ``BaseParallelProcessor`` class. ``SubRegex`` also has a ``finalize(metrics)`` method which will log information about the aggregated metrics after all of the utterances in the manifest have been processed.
-
-A **ModifyManifestTextProcessor** subclass that drops incorrectly transcribed utterances
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-One of the classes provided in SDP is ``DropHighLowCharrate``. At initialization, it takes in ``high_charrate_threshold`` and ``low_charrate_threshold``, for which the utterance will be dropped if it is above or below each value respectively. This is helpful for automatically filtering out incorrectly transcribed utterances.
-
-In its ``_process_dataset_entry(data_entry)`` method it evaluates the character rate of the utterance(by dividing the length of ``data_entry.data[self.text_key]`` by the value of ``data_entry.data["duration"]``). If the character rate is within bounds, it will return the same ``data_entry`` that was input. If the character rate is out of bounds, it will return a ``data_entry`` with ``data=None`` and ``metrics`` which reflect the applied changes.
-Similar to the ``SubSubstringToSpace`` class, it has a ``finalize(metrics)`` method which will log information about the aggregated metrics after all of the utterances in the manifest have been processed.
-
-Class diagram
--------------
-A diagram of the classes mentioned above is included here. Arrows represent inheritance.
-
-We omit the details of the ``CreateInitialManifestMLS`` class in the diagram in order to save space.
-
-
-.. raw:: html
-
-    <div align="center">
-      <img src="https://mermaid.ink/img/pako:eNqlVMFu2zAM_ZVApw1o8wHBLl17WIEGGOYCuxgQWImOhcqSQdFtM6__PjmSvbhzsgI1fKDI98gnklAvlNcoNkJZCOHGwI6gKd1XCPidvMIQPK2-_L68XB1cQGAt2im0iLwqfty6CgmdwkXATzKMW3CmwsCly5i3uRP2mhAYb51hA3bkbO-Ks6St16baj-h7fOEjxaU7E078onuIf2AybnfvixaGi_yXdUO-_WZ29Z1_vq6BKOoeqh06u5q1oS_dKn6-47Zj2eSUsjIWU8S4E4E2pfj0OR05Rgf7dVbmbVP6RW5L2ALheIx91lPFv5gDRWrgmJglOqb9GKyMA2t-4UzA8fCnusgExmHMH5fNJu8DsKpliPx_1E3JZovSj1XR6iDZywBPZ7inFienWa_Xk7GeEc_MuR-7_sLyEffT9bScu4axSBU7FuZjOt3S4ZTMDJPvwE2SF_Y1Sw2jO7w_7Wy2TZydUeG42sKe52p19EqVfZJrwlB7q1PQ-ueTsQ_IisLEhWiQGjA6PmQHKaXgGhssxSaaGivoLJciQaFjX-ydEpsKbMAL0bWxDua3L3tf_wDMstkP" height=100% />
-    </div>
-
-SDP Tests
----------
-It is important to make sure that your data processing code has the effect you intend, so SDP has a few different types of tests:
-
-1. Runtime tests
-
-* Before running the specified processors, SDP runs ``processor.test()`` on all specified processors.
-* Currently, the only provided processor classes with a test method are subclasses of ``ModifyManifestTextProcessor``.
-
-  * ``ModifyManifestTextProcessor.test()`` runs any ``test_cases`` that were provided in the object constructor.
-  * This means you can provided test cases in the YAML config file, and the dataset will only be processed if the test cases pass.
-  * This is helpful to (a) make sure that the rules you wrote have the effect you desired, and (b) demonstrate why you wrote those rules.
-  * An example of test cases we could include in the YAML config file::
-
-      - _target_: sdp.processors.DropIfRegexMatch
-        regex_patterns:
-          - "(\\D ){5,20}" # looks for between 4 and 19 characters surrounded by spaces
-        test_cases:
-          - {input: {text: "some s p a c e d out letters"}, output: null}
-          - {input: {text: "normal words only"}, output: {text: "normal words only"}}
-
-2. ``pytest`` tests which can be run locally with ``python -m pytest tests/`` and will be run during the GitHub CI process. There are 2 sub-types:
-
-   a. "End to end" tests (link) which run SDP on a mini version of the raw initial dataset, and make sure the final manifest matches the reference final manifest. 
-   b. "Unit tests" for processors and utils (link).
+To learn more about SDP, please check the [documentation](https://nvidia.github.io/NeMo-speech-data-processor/).

From 7ccc2cf2c81f48bf3dd352180498a483292bcc7e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 13 Jul 2023 22:32:39 +0400
Subject: [PATCH 105/123] [TTS] Append pretrained FastPitch &
 SpectrogamEnhancer pair to available models (#7012) (#7013)

* [TTS] fastpitch: add english libritts model with asr stft parameters (25 ms 10 ms)


* [TTS] enhancer: add pretrained model intended for asr finetuning


---------

Signed-off-by: Roman Korostik <rkorostik@nvidia.com>
---
 nemo/collections/tts/models/fastpitch.py      | 14 +++++++++++++
 .../tts/models/spectrogram_enhancer.py        | 20 +++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/tts/models/fastpitch.py b/nemo/collections/tts/models/fastpitch.py
index dc598a9a76d1..8f0e06ea304d 100644
--- a/nemo/collections/tts/models/fastpitch.py
+++ b/nemo/collections/tts/models/fastpitch.py
@@ -772,6 +772,20 @@ def list_available_models(cls) -> 'List[PretrainedModelInfo]':
         )
         list_of_models.append(model)
 
+        # en, multi speaker, LibriTTS, 16000 Hz
+        # stft 25ms 10ms matching ASR params
+        # for use during Enhlish ASR training/adaptation
+        model = PretrainedModelInfo(
+            pretrained_model_name="tts_en_fastpitch_for_asr_finetuning",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_fastpitch_spectrogram_enhancer_for_asr_finetuning/versions/1.20.0/files/tts_en_fastpitch_for_asr_finetuning.nemo",
+            description="This model is trained on LibriSpeech, train-960 subset."
+            " STFT parameters follow those commonly used in ASR: 25 ms window, 10 ms hop."
+            " This model is supposed to be used with its companion SpetrogramEnhancer for "
+            " ASR fine-tuning. Usage for regular TTS tasks is not advised.",
+            class_=cls,
+        )
+        list_of_models.append(model)
+
         return list_of_models
 
     # Methods for model exportability
diff --git a/nemo/collections/tts/models/spectrogram_enhancer.py b/nemo/collections/tts/models/spectrogram_enhancer.py
index bcc7e69a10bf..ca2fe6122230 100644
--- a/nemo/collections/tts/models/spectrogram_enhancer.py
+++ b/nemo/collections/tts/models/spectrogram_enhancer.py
@@ -56,7 +56,7 @@
     HingeLoss,
 )
 from nemo.collections.tts.parts.utils.helpers import mask_sequence_tensor, to_device_recursive
-from nemo.core import Exportable, ModelPT, typecheck
+from nemo.core import Exportable, ModelPT, PretrainedModelInfo, typecheck
 from nemo.core.neural_types import LengthsType, MelSpectrogramType, NeuralType
 from nemo.core.neural_types.elements import BoolType
 from nemo.utils import logging
@@ -277,7 +277,23 @@ def setup_validation_data(self, val_data_config):
 
     @classmethod
     def list_available_models(cls):
-        return []
+        list_of_models = []
+
+        # en, multi speaker, LibriTTS, 16000 Hz
+        # stft 25ms 10ms matching ASR params
+        # for use during Enhlish ASR training/adaptation
+        model = PretrainedModelInfo(
+            pretrained_model_name="tts_en_spectrogram_enhancer_for_asr_finetuning",
+            location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_en_fastpitch_spectrogram_enhancer_for_asr_finetuning/versions/1.20.0/files/tts_en_spectrogram_enhancer_for_asr_finetuning.nemo",
+            description="This model is trained to add details to synthetic spectrograms."
+            " It was trained on pairs of real-synthesized spectrograms generated by FastPitch."
+            " STFT parameters follow ASR with 25 ms window and 10 ms hop."
+            " It is supposed to be used in conjunction with that model for ASR training/adaptation.",
+            class_=cls,
+        )
+        list_of_models.append(model)
+
+        return list_of_models
 
     def log_illustration(self, target_spectrograms, input_spectrograms, enhanced_spectrograms, lengths):
         if self.global_rank != 0:

From d44127eebc975647d47009948ef65302b802fb32 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 13 Jul 2023 22:34:01 +0400
Subject: [PATCH 106/123] Add ASR with TTS Tutorial. Fix enhancer usage.
 (#6955) (#7023)

* Add ASR with TTS Tutorial
* Fix enhancer usage

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
Co-authored-by: Vladimir Bataev <vbataev@nvidia.com>
---
 docs/source/starthere/tutorials.rst           |   4 +-
 .../asr/models/hybrid_asr_tts_models.py       |   2 +
 tutorials/asr/ASR_TTS_Tutorial.ipynb          | 846 ++++++++++++++++++
 3 files changed, 850 insertions(+), 2 deletions(-)
 create mode 100644 tutorials/asr/ASR_TTS_Tutorial.ipynb

diff --git a/docs/source/starthere/tutorials.rst b/docs/source/starthere/tutorials.rst
index e24637718690..2f4ea59cff5c 100644
--- a/docs/source/starthere/tutorials.rst
+++ b/docs/source/starthere/tutorials.rst
@@ -107,8 +107,8 @@ To run a tutorial:
      - Multi-lingual ASR
      - `Multi-lingual ASR <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/Multilang_ASR.ipynb>`_
    * - ASR
-     - Confidence-based Ensembles
-     - `Confidence-based Ensembles <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/Confidence_Ensembles.ipynb>`_
+     - Hybrid ASR-TTS Models Tutorial
+     - `Multi-lingual ASR <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/ASR_TTS_Tutorial.ipynb>`_
    * - NLP
      - Using Pretrained Language Models for Downstream Tasks
      - `Pretrained Language Models for Downstream Tasks <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb>`_
diff --git a/nemo/collections/asr/models/hybrid_asr_tts_models.py b/nemo/collections/asr/models/hybrid_asr_tts_models.py
index 8486f956c3b7..8494a093b29d 100644
--- a/nemo/collections/asr/models/hybrid_asr_tts_models.py
+++ b/nemo/collections/asr/models/hybrid_asr_tts_models.py
@@ -311,8 +311,10 @@ def from_pretrained_models(
                 )
             )
         else:
+            cfg = copy.deepcopy(cfg)  # copy to avoid modifying original config
             cfg.tts_model_path = f"{tts_model_path}"
             cfg.asr_model_path = f"{asr_model_path}"
+            cfg.enhancer_model_path = f"{enhancer_model_path}" if enhancer_model_path is not None else None
         return ASRWithTTSModel(cfg, trainer=trainer)
 
     def __setattr__(self, name, value):
diff --git a/tutorials/asr/ASR_TTS_Tutorial.ipynb b/tutorials/asr/ASR_TTS_Tutorial.ipynb
new file mode 100644
index 000000000000..939ef8a28d29
--- /dev/null
+++ b/tutorials/asr/ASR_TTS_Tutorial.ipynb
@@ -0,0 +1,846 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a3570803-9bfa-4e97-9891-5ae0759eb8ca",
+   "metadata": {},
+   "source": [
+    "# Hybrid ASR-TTS Models Tutorial"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "50fc294f-f319-4465-8f90-a28b49843e60",
+   "metadata": {},
+   "source": [
+    "This tutorial is intended to introduce you to using ASR-TTS Hybrid Models, also known as `ASRWithTTSModel`, to finetune existing ASR models using an integrated text-to-mel-spectrogram generator. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2a01ca5-bd48-4d82-a97d-5b07a7b27ca0",
+   "metadata": {},
+   "source": [
+    "## ASR-TTS Models: Description"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b32467a9-c458-4590-aff7-e8d1e91b0870",
+   "metadata": {},
+   "source": [
+    "### Problem\n",
+    "\n",
+    "Adapting ASR models to a new text domain is a challenging task. Modern end-to-end systems can require several hundreds and thousands of hours to perform recognition with high accuracy. Acquiring audio-text paired data for a specific domain can be prohibitively expensive. Text-only data, on the other side, is widely available. \n",
+    "\n",
+    "One of the approaches for efficient adaptation is synthesizing audio data from text and using such data for training the ASR model conventionally. We modify this approach, incorporating TTS and ASR systems into a single model. We use only a lightweight multi-speaker text-to-mel-spectrogram generator (without vocoder) with an optional enhancer that mitigates the mismatch between natural and synthetic spectrograms.\n",
+    "\n",
+    "### Architecture\n",
+    "\n",
+    "<img width=\"400px\" height=\"auto\"\n",
+    "     src=\"https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/_images/hybrid_asr_tts_model.png\"\n",
+    "     alt=\"ASR-TTS model architecture\"\n",
+    "     style=\"float: right; margin-left: 20px;\">\n",
+    "\n",
+    "`ASRWithTTSModel` is a transparent wrapper for three models:\n",
+    "- ASR model (`EncDecCTCModelBPE`, `EncDecRNNTBPEModel` or `EncDecHybridRNNTCTCBPEModel` are supported)\n",
+    "- frozen text-to-mel-spectrogram model (currently, only `FastPitch` model is supported)\n",
+    "- optional frozen enhancer model\n",
+    "\n",
+    "The architecture is shown in the figure. \n",
+    "\n",
+    "The model can take text or audio as input during training. In the case of audio input, a mel spectrogram is extracted as usual and passed to the ASR neural network. In the case of textual input, the mel spectrogram generator produces a spectrogram on the fly from the text. The spectrogram is improved by the enhancer (if present) and fed into the ASR model. \n",
+    "\n",
+    "### Capabilities and Limitations\n",
+    "\n",
+    "This approach can be used to finetune the pretrained ASR model using text-only data. Training new models from scratch is also possible. The text should contain phrases and sentences and be split into sentences (~45 words maximum, corresponding to ~16.7 seconds of synthesized audio). Using only separate words is not recommended since this doesn't allow to adapt ASR model adapts to recognize new words in context. \n",
+    "\n",
+    "Mixing audio-text pairs with text-only data from the original domain is recommended to preserve performance on the original data. \n",
+    "Also, fusing BatchNorm (see parameters below) is recommended for the best performance when using a large proportion of text compared to the amount of audio-text pairs in finetuning process.\n",
+    "\n",
+    "\n",
+    "### Implementation Details and Experiments\n",
+    "\n",
+    "Further details about implementation and experiments can be found in the paper [Text-only domain adaptation for end-to-end ASR using integrated text-to-mel-spectrogram generator](https://arxiv.org/abs/2302.14036)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2702d081-c675-4a96-8263-6059e310d048",
+   "metadata": {},
+   "source": [
+    "## Example: Finetuning ASR Model Using Text-Only Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "30fe41a3-f36c-4803-a7f0-4260fb111478",
+   "metadata": {},
+   "source": [
+    "In this example, we will finetune a pretrained small Conformer-CTC model using text-only data from the AN4 dataset. [AN4 dataset](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/datasets.html#an4-dataset) is a small dataset that consists of sentences of people spelling out addresses, names, and other entities.\n",
+    "\n",
+    "The model is pretrained on LibriSpeech data and performs poorly on AN4 data (`~17.7%` WER on test data).\n",
+    "We will use only text from the train part to construct text-only training data for our model and will achieve a good performance on the test part of the AN4 dataset (`~2%` WER)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "923819bb-7822-412a-8f9b-98c76c70e0bb",
+   "metadata": {},
+   "source": [
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run the following cell to set up dependencies.\n",
+    "\n",
+    "NOTE: The user is responsible for checking the content of datasets and the applicable licenses and determining if they are suitable for the intended use."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4685a9da-b3f8-4b95-ba74-64a114223233",
+   "metadata": {},
+   "source": [
+    "### Install Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d22d241-6c46-492c-99db-3bd69777243c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "try:\n",
+    "    import google.colab\n",
+    "\n",
+    "    IN_COLAB = True\n",
+    "except (ImportError, ModuleNotFoundError):\n",
+    "    IN_COLAB = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc38a961-8822-4685-89ae-ab6f591f9c28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BRANCH = 'main'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd60b1c4-7b1d-421d-9d63-95d7458bbcbd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "\n",
+    "if IN_COLAB:\n",
+    "    ## Install dependencies\n",
+    "    !pip install wget\n",
+    "    !apt-get install sox libsndfile1 ffmpeg\n",
+    "    !pip install text-unidecode\n",
+    "\n",
+    "    ## Install NeMo\n",
+    "    !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "08f99618-6f83-44b3-bc8e-f7df04fc471c",
+   "metadata": {},
+   "source": [
+    "### Import necessary libraries and utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "74f780b1-9b72-4acf-bcf0-64e1ce84e76d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "import string\n",
+    "import tempfile\n",
+    "\n",
+    "from omegaconf import OmegaConf\n",
+    "import pytorch_lightning as pl\n",
+    "import torch\n",
+    "from tqdm.auto import tqdm\n",
+    "import wget\n",
+    "\n",
+    "from nemo.collections.asr.models import EncDecCTCModelBPE\n",
+    "from nemo.collections.asr.models.hybrid_asr_tts_models import ASRWithTTSModel\n",
+    "from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest\n",
+    "from nemo.collections.tts.models import FastPitchModel, SpectrogramEnhancerModel\n",
+    "from nemo.utils.notebook_utils import download_an4\n",
+    "\n",
+    "from nemo_text_processing.text_normalization.normalize import Normalizer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca928d36-fb0d-439b-bac0-299e98a72d02",
+   "metadata": {},
+   "source": [
+    "### Prepare Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "702e8e92-17b2-4f34-a2d9-c72b94501bf5",
+   "metadata": {},
+   "source": [
+    "Download and preprocess AN4 data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62c7cfec-aa98-4fc5-8b31-23ee1d59f311",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DATASETS_DIR = Path(\"./datasets\")  # directory for data\n",
+    "CHECKPOINTS_DIR = Path(\"./checkpoints/\")  # directory for checkpoints"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "659db73e-dcd7-455c-8140-20e104d6ac00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create directories if necessary\n",
+    "DATASETS_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "CHECKPOINTS_DIR.mkdir(parents=True, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "36830e7f-5293-4401-8c56-780127b47385",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "download_an4(data_dir=f\"{DATASETS_DIR}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e77f5062-9acb-4f39-b811-a5b11dd6f76f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "AN4_DATASET = DATASETS_DIR / \"an4\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "403b63b0-8aab-43aa-a455-31f588d1772f",
+   "metadata": {},
+   "source": [
+    "### Construct text-only training data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35654ee1-3869-4289-bd52-15818c0ccf69",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read original training data\n",
+    "an4_train_data = read_manifest(AN4_DATASET / \"train_manifest.json\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a17f583c-2a5c-4faf-84bd-eb04c2921e01",
+   "metadata": {},
+   "source": [
+    "Text-only manifest should contain three fields:\n",
+    "- `text`: target text for the ASR model\n",
+    "- `tts_text`: text to use as a source for the TTS model (unnormalized)\n",
+    "- `tts_text_normalized`: text to use as a source for TTS model (normalized)\n",
+    "\n",
+    "If `tts_text_normalized` is not present, `tts_text` will be used, and normalization will be done when loading the dataset.\n",
+    "It is highly recommended to normalize the text and manually create the `tts_text_normalized` field since current normalizers are unsuitable for processing a large amount of text on the fly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5938a8c2-e239-4a45-a716-dc11a981aec7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fill `text` and `tts_text` fields with the source data\n",
+    "textonly_data = []\n",
+    "for record in an4_train_data:\n",
+    "    text = record[\"text\"]\n",
+    "    textonly_data.append({\"text\": text, \"tts_text\": text})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f6a5735-a5c2-4a8b-8116-bfc535a2c299",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "WHITELIST_URL = (\n",
+    "    \"https://raw.githubusercontent.com/NVIDIA/NeMo-text-processing/main/\"\n",
+    "    \"nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def get_normalizer() -> Normalizer:\n",
+    "    with tempfile.TemporaryDirectory() as data_dir:\n",
+    "        whitelist_path = Path(data_dir) / \"lj_speech.tsv\"\n",
+    "        if not whitelist_path.exists():\n",
+    "            wget.download(WHITELIST_URL, out=str(data_dir))\n",
+    "\n",
+    "        normalizer = Normalizer(\n",
+    "            lang=\"en\",\n",
+    "            input_case=\"cased\",\n",
+    "            whitelist=str(whitelist_path),\n",
+    "            overwrite_cache=True,\n",
+    "            cache_dir=None,\n",
+    "        )\n",
+    "    return normalizer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dd0253aa-d7f1-47ee-a142-099b71241270",
+   "metadata": {},
+   "source": [
+    "Сonstruct the `tts_text_normalized` field by applying an English normalizer to the text.\n",
+    "\n",
+    "AN4 data doesn't contain numbers, currency, and other entities, so the normalizer is used here only for demonstration purposes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27bb29d5-d44d-4026-98f8-5f0b1241b39a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "normalizer = get_normalizer()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9400e6d3-ba92-442a-8dd4-117e95dce2ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for record in tqdm(textonly_data):\n",
+    "    record[\"tts_text_normalized\"] = normalizer.normalize(\n",
+    "        record[\"tts_text\"], verbose=False, punct_pre_process=True, punct_post_process=True\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "30a934b0-9b58-4bad-bb9a-ab78d81c3859",
+   "metadata": {},
+   "source": [
+    "Save manifest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1833ac15-1750-4468-88bc-2343fbabe4d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "write_manifest(AN4_DATASET / \"train_text_manifest.json\", textonly_data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fa3a2371-8c78-4dd1-9605-a668adf52b4a",
+   "metadata": {},
+   "source": [
+    "### Save pretrained checkpoints"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7eb14117-8b8b-4170-ab8c-ce496522a361",
+   "metadata": {},
+   "source": [
+    "Firstly we will load pretrained models from NGC and save them as `nemo` checkpoints. \n",
+    "Our hybrid model will be constructed from these checkpoints.\n",
+    "We will use:\n",
+    "- small Conformer-CTC ASR model trained on LibriSpeech data (for finetuning)\n",
+    "- multi-speaker TTS FastPitch model is trained on LibriTTS data. Spectrogram parameters for this model are the same as those used in the ASR model\n",
+    "- enhancer, which is trained adversarially on the output of the TTS model and natural spectrograms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43c5c75a-b6e0-4b3c-ad26-a07b483d84e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ASR_MODEL_PATH = CHECKPOINTS_DIR / \"stt_en_conformer_ctc_small_ls.nemo\"\n",
+    "TTS_MODEL_PATH = CHECKPOINTS_DIR / \"fastpitch.nemo\"\n",
+    "ENHANCER_MODEL_PATH = CHECKPOINTS_DIR / \"enhancer.nemo\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40976e22-7a7b-42b2-86a1-9eaaef4c1c22",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# asr model: stt_en_conformer_ctc_small_ls\n",
+    "asr_model = EncDecCTCModelBPE.from_pretrained(model_name=\"stt_en_conformer_ctc_small_ls\")\n",
+    "asr_model.save_to(f\"{ASR_MODEL_PATH}\")\n",
+    "\n",
+    "# tts model: tts_en_fastpitch_for_asr_finetuning\n",
+    "tts_model = FastPitchModel.from_pretrained(model_name=\"tts_en_fastpitch_for_asr_finetuning\")\n",
+    "tts_model.save_to(f\"{TTS_MODEL_PATH}\")\n",
+    "\n",
+    "# enhancer model: tts_en_spectrogram_enhancer_for_asr_finetuning\n",
+    "enhancer_model = SpectrogramEnhancerModel.from_pretrained(model_name=\"tts_en_spectrogram_enhancer_for_asr_finetuning\")\n",
+    "enhancer_model.save_to(f\"{ENHANCER_MODEL_PATH}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "32d1e242-0ab0-43bf-aaa0-997d284c2c1b",
+   "metadata": {},
+   "source": [
+    "### Construct hybrid ASR-TTS model "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2210eb07-6d44-44e0-a0ad-866f1e89873a",
+   "metadata": {},
+   "source": [
+    "#### Config Parameters\n",
+    "\n",
+    "`Hybrid ASR-TTS model` consists of three parts:\n",
+    "\n",
+    "* ASR model (``EncDecCTCModelBPE``, ``EncDecRNNTBPEModel`` or ``EncDecHybridRNNTCTCBPEModel``)\n",
+    "* TTS Mel Spectrogram Generator (currently, only `FastPitch` model is supported)\n",
+    "* Enhancer model (optional)\n",
+    "\n",
+    "Also, the config allows to specify a text-only dataset.\n",
+    "\n",
+    "Main parts of the config:\n",
+    "\n",
+    "* ASR model\n",
+    "    * ``asr_model_path``: path to the ASR model checkpoint (`.nemo`) file, loaded only once, then the config of the ASR model is stored in the ``asr_model`` field\n",
+    "    * ``asr_model_type``: needed only when training from scratch. ``rnnt_bpe`` corresponds to ``EncDecRNNTBPEModel``, ``ctc_bpe`` to ``EncDecCTCModelBPE``, ``hybrid_rnnt_ctc_bpe`` to ``EncDecHybridRNNTCTCBPEModel``\n",
+    "    * ``asr_model_fuse_bn``: fusing BatchNorm in the pretrained ASR model, can improve quality in finetuning scenario\n",
+    "* TTS model\n",
+    "    * ``tts_model_path``: path to the pretrained TTS model checkpoint (`.nemo`) file, loaded only once, then the config of the model is stored in the ``tts_model`` field\n",
+    "* Enhancer model\n",
+    "    * ``enhancer_model_path``: optional path to the enhancer model. Loaded only once, the config is stored in the ``enhancer_model`` field\n",
+    "* ``train_ds``\n",
+    "    * ``text_data``: properties related to text-only data\n",
+    "        * ``manifest_filepath``: path (or paths) to text-only dataset manifests\n",
+    "        * ``speakers_filepath``: path (or paths) to the text file containing speaker ids for the multi-speaker TTS model (speakers are sampled randomly during training)\n",
+    "        * ``min_words`` and ``max_words``: parameters to filter text-only manifests by the number of words\n",
+    "        * ``tokenizer_workers``: number of workers for initial tokenization (when loading the data). ``num_CPUs / num_GPUs`` is a recommended value.\n",
+    "    * ``asr_tts_sampling_technique``, ``asr_tts_sampling_temperature``, ``asr_tts_sampling_probabilities``: sampling parameters for text-only and audio-text data (if both specified). Correspond to ``sampling_technique``, ``sampling_temperature``, and ``sampling_probabilities`` parameters of the `nemo.collections.common.data.dataset.ConcatDataset`.\n",
+    "    * all other components are similar to conventional ASR models\n",
+    "* ``validation_ds`` and ``test_ds`` correspond to the underlying ASR model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d6dd499-d388-4ee3-9a01-d739b16e6ad7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load config\n",
+    "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d6701dc8-cb3b-44cc-aab5-fb6e2c1dadb5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = OmegaConf.load(\"./configs/hybrid_asr_tts.yaml\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c13b3c96-4074-415f-95d2-17569886bfcd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NUM_EPOCHS = 10"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4d090c5d-44a7-401a-a753-b8779b1c1e0b",
+   "metadata": {},
+   "source": [
+    "We will use all available speakers (sampled uniformly)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c41e5e8-d926-4b83-8725-bae5a82121cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TTS_SPEAKERS_PATH = Path(\"./checkpoints/speakers.txt\")\n",
+    "\n",
+    "with open(TTS_SPEAKERS_PATH, \"w\", encoding=\"utf-8\") as f:\n",
+    "    for speaker_id in range(tts_model.cfg.n_speakers):\n",
+    "        print(speaker_id, file=f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c07c07c-cb15-4a1c-80bf-20eaffaa65d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config.model.asr_model_path = ASR_MODEL_PATH\n",
+    "config.model.tts_model_path = TTS_MODEL_PATH\n",
+    "config.model.enhancer_model_path = ENHANCER_MODEL_PATH\n",
+    "\n",
+    "# fuse BathNorm automatically in Conformer for better performance\n",
+    "config.model.asr_model_fuse_bn = True\n",
+    "\n",
+    "# training data\n",
+    "# constructed dataset\n",
+    "config.model.train_ds.text_data.manifest_filepath = str(AN4_DATASET / \"train_text_manifest.json\")\n",
+    "# speakers for TTS model\n",
+    "config.model.train_ds.text_data.speakers_filepath = f\"{TTS_SPEAKERS_PATH}\"\n",
+    "config.model.train_ds.manifest_filepath = None  # audio-text pairs - we don't use them here\n",
+    "config.model.train_ds.batch_size = 8\n",
+    "\n",
+    "# validation data\n",
+    "config.model.validation_ds.manifest_filepath = str(AN4_DATASET / \"test_manifest.json\")\n",
+    "config.model.validation_ds.batch_size = 8\n",
+    "\n",
+    "config.trainer.max_epochs = NUM_EPOCHS\n",
+    "\n",
+    "config.trainer.devices = 1\n",
+    "config.trainer.strategy = None  # use 1 device, no need for ddp strategy\n",
+    "\n",
+    "OmegaConf.resolve(config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ae6cb2e-f571-4b53-8897-bb8ba0fc1146",
+   "metadata": {},
+   "source": [
+    "#### Construct trainer and ASRWithTTSModel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac4ae885-dec4-4ce9-8f69-a1f35d04b08c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer = pl.Trainer(**config.trainer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f815762-b08d-4d3c-8fd3-61afa511eab4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "hybrid_model = ASRWithTTSModel(config.model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca2c1bf2-28a9-4902-9c73-d96e04b21a46",
+   "metadata": {},
+   "source": [
+    "#### Validate the model\n",
+    "\n",
+    "Expect `~17.7%` WER on the AN4 test data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ffa5f5c6-0609-4f46-aa0c-747319035417",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer.validate(hybrid_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "701ee9c7-91a1-4917-bf7d-ab26b625c7bf",
+   "metadata": {},
+   "source": [
+    "#### Train the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f79761c9-b882-4f14-911f-4a960ff81554",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "trainer.fit(hybrid_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "eac18c7c-bdcb-40ad-9c50-37f89fb4aa2a",
+   "metadata": {},
+   "source": [
+    "#### Validate the model after training\n",
+    "\n",
+    "Expect `~2%` WER on the AN4 test data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd927e87-13fb-4b61-8b4a-a6850780f605",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "trainer.validate(hybrid_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6d25a77d-35ed-44b5-9ef5-318afa321acf",
+   "metadata": {},
+   "source": [
+    "### Save final model. Extract pure ASR model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f53ebd3-b89a-47e4-a0a5-ed3a3572f7c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save full model: the model can be further used for finetuning\n",
+    "hybrid_model.save_to(\"checkpoints/finetuned_hybrid_model.nemo\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0560c2c-af28-4d8f-b36d-c18ec6a482a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# extract the resulting ASR model from the hybrid model\n",
+    "hybrid_model.save_asr_model_to(\"checkpoints/finetuned_asr_model.nemo\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2de58fbb-50be-42cd-9095-01cacfdb6931",
+   "metadata": {},
+   "source": [
+    "## Using Scritps (examples)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "86655198-b1fc-4615-958c-7c01f3cbd024",
+   "metadata": {},
+   "source": [
+    "`<NeMo_git_root>/examples/asr/asr_with_tts/` contains scripts for finetuning existing models and training new models from scratch."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5837536-8280-475c-a581-caaee00edfca",
+   "metadata": {},
+   "source": [
+    "### Finetuning Existing Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "84df9aeb-3b5e-41fc-a8d0-dfc660e71375",
+   "metadata": {},
+   "source": [
+    "To finetune existing ASR model using text-only data use `<NeMo_git_root>/examples/asr/asr_with_tts/speech_to_text_bpe_with_text_finetune.py` script with the corresponding config `<NeMo_git_root>/examples/asr/conf/asr_tts/hybrid_asr_tts.yaml`.\n",
+    "\n",
+    "Please specify paths to all the required models (ASR, TTS, and Enhancer checkpoints), along with `train_ds.text_data.manifest_filepath` and `train_ds.text_data.speakers_filepath`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78b9028c-02ce-4af4-b510-a431f4a2f62b",
+   "metadata": {},
+   "source": [
+    "```shell\n",
+    "python speech_to_text_bpe_with_text_finetune.py \\\n",
+    "    model.asr_model_path=<path to ASR model> \\\n",
+    "    model.tts_model_path=<path to compatible TTS model> \\\n",
+    "    model.enhancer_model_path=<optional path to enhancer model> \\\n",
+    "    model.asr_model_fuse_bn=<true recommended if ConformerEncoder with BatchNorm, false otherwise> \\\n",
+    "    model.train_ds.manifest_filepath=<path to manifest with audio-text pairs or null> \\\n",
+    "    model.train_ds.text_data.manifest_filepath=<path(s) to manifest with train text> \\\n",
+    "    model.train_ds.text_data.speakers_filepath=<path(s) to speakers list> \\\n",
+    "    model.train_ds.text_data.tokenizer_workers=4 \\\n",
+    "    model.validation_ds.manifest_filepath=<path to validation manifest> \\\n",
+    "    model.train_ds.batch_size=<batch_size>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0b17c097-a3b1-49a3-8f54-f07b94218d0b",
+   "metadata": {},
+   "source": [
+    "### Training a New Model from Scratch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6d75b928-57b3-4180-bd09-37e018eef7ef",
+   "metadata": {},
+   "source": [
+    "```shell\n",
+    "python speech_to_text_bpe_with_text.py \\\n",
+    "    # (Optional: --config-path=<path to dir of configs> --config-name=<name of config without .yaml>) \\\n",
+    "    ++asr_model_type=<rnnt_bpe, ctc_bpe or hybrid_rnnt_ctc_bpe> \\\n",
+    "    ++tts_model_path=<path to compatible tts model> \\\n",
+    "    ++enhancer_model_path=<optional path to enhancer model> \\\n",
+    "    model.tokenizer.dir=<path to tokenizer> \\\n",
+    "    model.tokenizer.type=\"bpe\" \\\n",
+    "    model.train_ds.manifest_filepath=<path(s) to manifest with audio-text pairs or null> \\\n",
+    "    ++model.train_ds.text_data.manifest_filepath=<path(s) to manifests with train text> \\\n",
+    "    ++model.train_ds.text_data.speakers_filepath=<path(s) to speakers list> \\\n",
+    "    ++model.train_ds.text_data.min_words=1 \\\n",
+    "    ++model.train_ds.text_data.max_words=45 \\\n",
+    "    ++model.train_ds.text_data.tokenizer_workers=4 \\\n",
+    "    model.validation_ds.manifest_filepath=<path(s) to val/test manifest> \\\n",
+    "    model.train_ds.batch_size=<batch size> \\\n",
+    "    trainer.max_epochs=<num epochs> \\\n",
+    "    trainer.num_nodes=<number of nodes> \\\n",
+    "    trainer.accumulate_grad_batches=<grad accumultion> \\\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "01c17712-ae8d-49cb-ade1-ded168676e27",
+   "metadata": {},
+   "source": [
+    "## Training TTS Models for ASR Finetuning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "422dc3b2-d29f-4ed0-b4d2-6d32b35dfb7b",
+   "metadata": {},
+   "source": [
+    "### TTS Model (FastPitch)\n",
+    "\n",
+    "TTS model for the purpose of ASR model finetuning should be trained with the same mel spectrogram parameters as used in the ASR model. The typical parameters are `10ms` hop length, `25ms` window length, and the highest band of 8kHz (for 16kHz data). Other parameters are the same as for common multi-speaker TTS models.\n",
+    "\n",
+    "Mainly we observed two differences specific to TTS models for ASR:\n",
+    "- adding more speakers and more data improves the final ASR model quality (but not the perceptual quality of the TTS model)\n",
+    "- training for more epochs can also improve the quality of the ASR system (but MSE loss used for the TTS model can be higher than optimal on validation data)\n",
+    "\n",
+    "Use script `<NeMo_git_root>/examples/tts/fastpitch.py` to train a FastPitch model.\n",
+    "More details about the FastPitch model can be found in the [documentation](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/tts/models.html#fastpitch). \n",
+    "\n",
+    "### Enhancer\n",
+    "Use script `<NeMo_git_root>/examples/tts/spectrogram_enhancer.py` to train an Enhancer model. More details can be found in the \n",
+    "[documentation](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/tts/models.html).\n",
+    "\n",
+    "### Models Used in This Tutorial\n",
+    "\n",
+    "Some details about the models used in this tutorial can be found on [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/tts_en_fastpitch_spectrogram_enhancer_for_asr_finetuning).\n",
+    "\n",
+    "The system is also described in detail in the paper in the paper [Text-only domain adaptation for end-to-end ASR using integrated text-to-mel-spectrogram generator](https://arxiv.org/abs/2302.14036)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a9a6cd3-4bdc-4b6e-b4b1-3bfd50fd01b3",
+   "metadata": {},
+   "source": [
+    "## Summary"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e2890c61-e4b7-47aa-a086-bc483ae7141f",
+   "metadata": {},
+   "source": [
+    "The tutorial demonstrated the main concepts related to hybrid ASR-TTS models to finetune ASR models and train new ones from scratch. \n",
+    "The ability to achieve good text-only adaptation results is demonstrated by finetuning a small Conformer model on text-only data from the AN4 dataset."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ml38",
+   "language": "python",
+   "name": "ml38"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From f7e33fc1a0dad23109ac81d824006350f6ad2b0b Mon Sep 17 00:00:00 2001
From: Markel Sanz Ausin <markelsanz14@gmail.com>
Date: Thu, 13 Jul 2023 12:15:44 -0700
Subject: [PATCH 107/123] Add end_strings to SamplingParams (#6986)

* Add end_strings to SamplingParams

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Add end_strings to megatron_gpt_inference.yaml

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Add end_strings to sampling params

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Remove extra_id_1 from default end_strings

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Fix require_grad typos (#6930)

Signed-off-by: Sergii Dymchenko <sdym@fb.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* fix syntax error

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* fix the mpt chatbot (#6957) (#6968)

Signed-off-by: Yi Dong <yidong@nvidia.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* add support for max_total_length=4096 for 43b (#6763)

* add support for max_total_length=4096 for 43b

Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* rnnt_greedy_decoding.py: typos? auto-repressively -> auto-regressively (#6989)

Signed-off-by: Vadim Kantorov <vadimkantorov@gmail.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Cache handling without input tensors mutation (#6980) (#6996)

* Cache handling without input tensors mutation


* Cleanup


* Cleanup#2


* Cleanup#3


---------

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Hybrid conformer export (#6983) (#6995)

* Implemented generic kv-pair setting of export_config from args


* Hybrid conformer export


* Hybrid decoder export


* Cleanup


* Changed from **kwargs


* Docstring


* Docs added


* Stringify args


* Added docs for ASR export configs


* lowercase ctc


---------

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Fixing an issue with confidence ensembles (#6987) (#7004)

* Bug fix for the confidence ensembles


* Relax constraints for the test


---------

Signed-off-by: Igor Gitman <igitman@nvidia.com>
Co-authored-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* [TTS] Add cosine distance option to TTS aligner (#6806)

* [TTS] Add cosine distance option to TTS aligner

Signed-off-by: Ryan <rlangman@nvidia.com>

* [TTS] Update aligner comments

Signed-off-by: Ryan <rlangman@nvidia.com>

---------

Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Minor MPT-7B fixes and creation script update (#6982)

* Initial commit of minor MPT-7B fixes

Signed-off-by: Daniel Egert <degert@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Daniel Egert <degert@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Change Jenkins timeout (#6997)

* change timeout

Signed-off-by: ericharper <complex451@gmail.com>

* change to 8 hours

Signed-off-by: ericharper <complex451@gmail.com>

---------

Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* remove hard coded input and output fields (#7008)

* remove hard coded input and output fields

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* RoPE length extrapolation with interpolation (#7005)

* Push changes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fixes

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* add continue training script

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [WIP] nonlinear interp

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Fix

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* override encoder_seq_len

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Remove nonlinear

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* sft with pi (#7006)

* sft with pi

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* update values only if not None"

Signed-off-by: Evelina <ebakhturina@nvidia.com>

---------

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* Address comments

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add info

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

* Empty

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>

---------

Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Evelina <ebakhturina@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
Signed-off-by: Gerald Shen <geshen@nvidia.com>

* use proper config

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Add end_strings to SamplingParams

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Add end_strings to megatron_gpt_inference.yaml

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Add end_strings to sampling params

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* Remove extra_id_1 from default end_strings

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* fix syntax error

Signed-off-by: Gerald Shen <geshen@nvidia.com>

* use proper config

Signed-off-by: Gerald Shen <geshen@nvidia.com>

---------

Signed-off-by: Gerald Shen <geshen@nvidia.com>
Signed-off-by: Sergii Dymchenko <sdym@fb.com>
Signed-off-by: Yi Dong <yidong@nvidia.com>
Signed-off-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Signed-off-by: Vadim Kantorov <vadimkantorov@gmail.com>
Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
Signed-off-by: Igor Gitman <igitman@nvidia.com>
Signed-off-by: Ryan <rlangman@nvidia.com>
Signed-off-by: Daniel Egert <degert@nvidia.com>
Signed-off-by: ericharper <complex451@gmail.com>
Signed-off-by: arendu <adithya.r@gmail.com>
Signed-off-by: MaximumEntropy <sandeep.subramanian.1@umontreal.ca>
Signed-off-by: Evelina <ebakhturina@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Sergii Dymchenko <kit1980@gmail.com>
Co-authored-by: Gerald Shen <geshen@nvidia.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
Co-authored-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Co-authored-by: Vadim Kantorov <vadimkantorov@gmail.com>
Co-authored-by: Boris Fomitchev <borisfom@users.noreply.github.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Co-authored-by: Igor Gitman <igitman@nvidia.com>
Co-authored-by: Ryan Langman <rlangman@nvidia.com>
Co-authored-by: trias702 <25867060+trias702@users.noreply.github.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
Co-authored-by: Adi Renduchintala <adithyare@nvidia.com>
Co-authored-by: Sandeep Subramanian <sandeep.subramanian.1@umontreal.ca>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
---
 .../conf/megatron_gpt_inference.yaml           |  2 +-
 .../nlp/language_modeling/megatron_gpt_eval.py |  1 +
 .../megatron_gpt_prompt_learning_model.py      |  2 ++
 .../megatron_gpt_sft_model.py                  |  1 +
 .../modules/common/text_generation_server.py   | 18 +++++++++---------
 .../modules/common/text_generation_utils.py    |  9 ++++++---
 .../common/transformer/text_generation.py      |  2 ++
 tests/collections/nlp/test_gpt_eval.py         |  1 +
 8 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml
index 53d4e9b7e82b..b5b053fc1549 100644
--- a/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_inference.yaml
@@ -9,7 +9,7 @@ inference:
   repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
   min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
   compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-
+  end_strings: ["<|endoftext|>"]  # generation will stop when one of these tokens is generated
 
 trainer:
   devices: 1
diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py
index 2a6890e1a9b4..76e68d24bae8 100644
--- a/examples/nlp/language_modeling/megatron_gpt_eval.py
+++ b/examples/nlp/language_modeling/megatron_gpt_eval.py
@@ -267,6 +267,7 @@ def main(cfg) -> None:
         "add_BOS": cfg.inference.add_BOS,
         "all_probs": cfg.inference.all_probs,
         "compute_logprob": cfg.inference.compute_logprob,
+        "end_strings": cfg.inference.end_strings,
     }
 
     fp8_enabled = hasattr(model.cfg, "fp8") and (model.cfg.fp8 == True)
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
index 81ca1c283ad0..d14466dd18ee 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
@@ -217,6 +217,7 @@ def init_model(self, cfg: DictConfig, trainer: Trainer):
                 "add_BOS": True,
                 "all_probs": False,
                 "compute_logprob": False,
+                "end_strings": self.cfg.inference.get('end_strings', ["<|endoftext|>"]),
             }
         elif self.cfg.get("report_validation_metric", False) and not hasattr(self.cfg, 'inference'):
             raise ValueError("Must provide inference parameters for reporting validation metric!")
@@ -754,6 +755,7 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
                 "all_probs": inference_config["all_probs"],
                 "compute_logprob": inference_config["compute_logprob"],
                 "compute_attention_mask": inference_config.get("compute_attention_mask", True),
+                "end_strings": inference_config.get('end_strings', ["<|endoftext|>"]),
             }
 
             task_ids, processed_inputs = batch
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
index c80d2272613e..c390a8c440bf 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
@@ -390,6 +390,7 @@ def inference_step(self, dataloader_iter, batch_idx, mode, dataloader_idx=0):
             "add_BOS": False,
             "all_probs": False,
             "compute_logprob": False,
+            "end_strings": ["<|endoftext|>"],
         }
         result = megatron_gpt_generate(
             model=self,
diff --git a/nemo/collections/nlp/modules/common/text_generation_server.py b/nemo/collections/nlp/modules/common/text_generation_server.py
index 5eb69eefcc3e..a9d3b2097af7 100644
--- a/nemo/collections/nlp/modules/common/text_generation_server.py
+++ b/nemo/collections/nlp/modules/common/text_generation_server.py
@@ -141,6 +141,14 @@ def put(self):
             if not (1.0 <= repetition_penalty):
                 return "repetition_penalty must be a positive number no less than 1.0"
 
+        end_strings = ['<|endoftext|>']
+        if 'end_strings' in request.get_json():
+            end_strings = request.get_json()['end_strings']
+            if not isinstance(end_strings, list):
+                return "expect end_strings to be a list of strings"
+            if not all([isinstance(s, str) for s in end_strings]):
+                return "expect end_strings to be a list of strings"
+
         min_tokens_to_generate = 0
         if "min_tokens_to_generate" in request.get_json():
             min_tokens_to_generate = request.get_json()["min_tokens_to_generate"]
@@ -157,14 +165,6 @@ def put(self):
             if neighbors < 0:
                 return "num of neighbors must be an integer no less than 0"
 
-        end_strings = ['<|endoftext|>']
-        if 'end_strings' in request.get_json():
-            end_strings = request.get_json()['end_strings']
-            if not isinstance(end_strings, list):
-                return "expect end_strings to be a list of strings"
-            if not all([isinstance(s, str) for s in end_strings]):
-                return "expect end_strings to be a list of strings"
-
         with lock:  # Need to get lock to keep multiple threads from hitting code
             MegatronGenerate.send_do_generate()  # Tell other ranks we're doing generate
             extra = {}
@@ -190,8 +190,8 @@ def put(self):
                 top_p,
                 greedy,
                 repetition_penalty,
-                min_tokens_to_generate,
                 end_strings=end_strings,
+                min_tokens_to_generate=min_tokens_to_generate,
                 **extra,
             )
             for k in output:
diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index d84d16efb5ba..545ea5cb346c 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -69,6 +69,7 @@ def get_default_sampling_params():
         "add_BOS": True,
         "all_probs": False,
         "compute_logprob": False,
+        "end_strings": ["<|endoftext|>", "<extra_id_1>"],
     }
 
     return sampling_params
@@ -104,6 +105,7 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para
             top_p=sampling_params['top_p'],
             greedy=sampling_params['use_greedy'],
             repetition_penalty=sampling_params['repetition_penalty'],
+            end_strings=sampling_params['end_strings'],
             min_tokens_to_generate=length_params['min_length'],
             compute_attention_mask=sampling_params.get("compute_attention_mask", True),
             **strategy_args,
@@ -125,6 +127,7 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para
                 top_p=sampling_params['top_p'],
                 greedy=sampling_params['use_greedy'],
                 repetition_penalty=sampling_params['repetition_penalty'],
+                end_strings=sampling_params['end_strings'],
                 min_tokens_to_generate=length_params['min_length'],
                 **strategy_args,
             )
@@ -380,8 +383,8 @@ def synced_generate(
     compute_attention_mask=True,
     compute_logprob=False,
     repetition_penalty=1.2,
-    min_tokens_to_generate=0,
     end_strings=[],
+    min_tokens_to_generate=0,
 ):
     context_length = context_length_tensor.min().item()
     tokenizer = model.tokenizer
@@ -475,8 +478,8 @@ def generate(
     compute_attention_mask=True,
     compute_logprob=False,
     repetition_penalty=1.0,
-    min_tokens_to_generate=0,
     end_strings=['<|endoftext|>'],
+    min_tokens_to_generate=0,
     **strategy_args,
 ) -> OutputType:
     """
@@ -560,8 +563,8 @@ def generate(
         top_p=top_p,
         greedy=greedy,
         repetition_penalty=repetition_penalty,
-        min_tokens_to_generate=min_tokens_to_generate,
         end_strings=end_strings,
+        min_tokens_to_generate=min_tokens_to_generate,
     )
     special_tokens = set()
     if hasattr(tokenizer, 'pad_token') and tokenizer.pad_token is not None:
diff --git a/nemo/collections/nlp/modules/common/transformer/text_generation.py b/nemo/collections/nlp/modules/common/transformer/text_generation.py
index a261e925691f..28db41b8a27a 100644
--- a/nemo/collections/nlp/modules/common/transformer/text_generation.py
+++ b/nemo/collections/nlp/modules/common/transformer/text_generation.py
@@ -37,6 +37,7 @@ class SamplingParam(TypedDict):
     add_BOS: bool  # add the bos token at the begining of the prompt
     all_probs: bool  # whether return the log prob for all the tokens in vocab
     compute_logprob: bool  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
+    end_strings: List[str]  # generation will stop when one of these tokens is generated
 
 
 class OutputType(TypedDict):
@@ -88,6 +89,7 @@ def generate(
                     add_BOS: bool, Whether add the bos token at the begining of the prompt
                     all_probs: bool  # whether return the log prob for all the tokens in vocab
                     compute_logprob: bool  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
+                    end_strings: List[str]  # generation will stop when one of these tokens is generated
                 Default None, If it is None, use_greedy will be "True".
         Returns:
             OutputType: It generates the output in a dictionary type. It has the following keys:
diff --git a/tests/collections/nlp/test_gpt_eval.py b/tests/collections/nlp/test_gpt_eval.py
index 0e64b989176f..fb3f9fda5ac3 100644
--- a/tests/collections/nlp/test_gpt_eval.py
+++ b/tests/collections/nlp/test_gpt_eval.py
@@ -78,6 +78,7 @@ def test_gpt_eval(self):
             "add_BOS": True,
             "all_probs": False,
             "compute_logprob": False,
+            "end_strings": ["<|endoftext|>"],
         }
 
         # test logprob

From 573397549a3755649ab2476b50684ea7f415205e Mon Sep 17 00:00:00 2001
From: Kim Ngo <6362111+findkim@users.noreply.github.com>
Date: Thu, 13 Jul 2023 15:25:03 -0500
Subject: [PATCH 108/123] Fix race condition when executing with multi-node
 where some ranks does not wait for setup (#7016)

Signed-off-by: Kim Ngo <6362111+findkim@users.noreply.github.com>
---
 .../modules/common/megatron/megatron_utils.py | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_utils.py b/nemo/collections/nlp/modules/common/megatron/megatron_utils.py
index d901a00a343b..68437921f930 100644
--- a/nemo/collections/nlp/modules/common/megatron/megatron_utils.py
+++ b/nemo/collections/nlp/modules/common/megatron/megatron_utils.py
@@ -14,13 +14,14 @@
 # limitations under the License.
 
 import os
+import shutil
 from typing import Dict, List
 
 import torch
 import wget
 from torch.hub import _get_torch_home
 
-from nemo.utils import logging
+from nemo.utils import get_rank, logging
 
 __all__ = [
     "get_megatron_lm_model",
@@ -202,16 +203,14 @@ def _download(path: str, url: str):
     if url is None:
         return None
 
-    if not os.path.exists(path):
-        master_device = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
-        if not os.path.exists(path):
-            if master_device:
-                os.makedirs(MEGATRON_CACHE, exist_ok=True)
-                logging.info(f"Downloading from {url}")
-                wget.download(url, path)
-            # wait until the master process downloads the file and writes it to the cache dir
-            if torch.distributed.is_initialized():
-                torch.distributed.barrier()
+    if get_rank.is_global_rank_zero() and not os.path.exists(path):
+        os.makedirs(MEGATRON_CACHE, exist_ok=True)
+        logging.info(f"Downloading from {url} to {path}")
+        downloaded_path = wget.download(url)
+        shutil.move(downloaded_path, path)
+    # wait until the master process downloads the file and writes it to the cache dir
+    if torch.distributed.is_initialized():
+        torch.distributed.barrier()
 
     return path
 

From 470f178f7f6a7f166761d14aa647168cc0dd157c Mon Sep 17 00:00:00 2001
From: tbartley94 <90423858+tbartley94@users.noreply.github.com>
Date: Fri, 14 Jul 2023 00:57:09 -0400
Subject: [PATCH 109/123] Added bool types to neural_types export (#7032)

Signed-off-by: tbartley94 <tbartley@nvidia.com>
---
 nemo/core/neural_types/elements.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py
index 10638a9c461a..f2de48da26d0 100644
--- a/nemo/core/neural_types/elements.py
+++ b/nemo/core/neural_types/elements.py
@@ -21,6 +21,7 @@
 __all__ = [
     'ElementType',
     'VoidType',
+    'BoolType',
     'ChannelType',
     'AcousticEncodedRepresentation',
     'AudioSignal',

From e859e43ef85cc6bcdde697f634bb3b16ee16bc6b Mon Sep 17 00:00:00 2001
From: Nikolay Karpov <karpnv@gmail.com>
Date: Fri, 14 Jul 2023 13:24:14 +0400
Subject: [PATCH 110/123] rnnt and char utils (#6971)

* rnnt_ngram_merge

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* char level bug

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
---
 .../ngram_lm/kenlm_utils.py                   | 18 ++--
 .../ngram_lm/ngram_merge.py                   | 83 +++++++++----------
 2 files changed, 49 insertions(+), 52 deletions(-)

diff --git a/scripts/asr_language_modeling/ngram_lm/kenlm_utils.py b/scripts/asr_language_modeling/ngram_lm/kenlm_utils.py
index 9e255ddc50ca..d9b48afab292 100644
--- a/scripts/asr_language_modeling/ngram_lm/kenlm_utils.py
+++ b/scripts/asr_language_modeling/ngram_lm/kenlm_utils.py
@@ -79,11 +79,8 @@ def setup_tokenizer(nemo_model_file):
         )
         model = nemo_asr.models.ASRModel.from_pretrained(nemo_model_file, map_location=torch.device('cpu'))
 
-    if type(model.tokenizer).__name__ == 'AggregateTokenizer':
-        is_aggregate_tokenizer = True
-    else:
-        is_aggregate_tokenizer = False
-
+    is_aggregate_tokenizer = False
+    tokenizer_nemo = None
     encoding_level = SUPPORTED_MODELS.get(type(model).__name__, None)
     if not encoding_level:
         logging.warning(
@@ -91,7 +88,12 @@ def setup_tokenizer(nemo_model_file):
         )
         encoding_level = 'char'
 
-    tokenizer_nemo = model.tokenizer
+    if encoding_level == 'subword':
+        if type(model.tokenizer).__name__ == 'AggregateTokenizer':
+            is_aggregate_tokenizer = True
+
+        tokenizer_nemo = model.tokenizer
+
     del model
 
     return tokenizer_nemo, encoding_level, is_aggregate_tokenizer
@@ -117,10 +119,10 @@ def iter_files(source_path, dest_path, tokenizer, encoding_level, is_aggregate_t
             if isinstance(dest_path, str):
                 with open(dest_path, 'w', encoding='utf-8') as f:
                     for line in dataset:
-                        f.write(line + "\n")
+                        f.write(line[0] + "\n")
             else:  # write to stdin of KenLM
                 for line in dataset:
-                    dest_path.write((line + '\n').encode())
+                    dest_path.write((line[0] + '\n').encode())
 
 
 def read_train_file(
diff --git a/scripts/asr_language_modeling/ngram_lm/ngram_merge.py b/scripts/asr_language_modeling/ngram_lm/ngram_merge.py
index abffc6372518..b6606286ae5b 100644
--- a/scripts/asr_language_modeling/ngram_lm/ngram_merge.py
+++ b/scripts/asr_language_modeling/ngram_lm/ngram_merge.py
@@ -51,6 +51,7 @@
 import torch
 
 import nemo.collections.asr as nemo_asr
+from nemo.collections.asr.modules.rnnt import RNNTDecoder
 from nemo.collections.asr.parts.submodules.ctc_beam_decoding import DEFAULT_TOKEN_OFFSET
 from nemo.utils import logging
 
@@ -207,9 +208,7 @@ def make_arpa(self, ngram_mod: str, ngram_arpa: str, force: bool):
             ]
             return subprocess.run(sh_args, capture_output=False, text=True, stdout=sys.stdout, stderr=sys.stderr,)
 
-    def test_perplexity(
-        self, mod_c: str, symbols: str, test_txt: str, nemo_model_file: str, tmp_path: str, force: bool
-    ) -> str:
+    def test_perplexity(self, mod_c: str, symbols: str, test_txt: str, nemo_model_file: str, tmp_path: str) -> str:
         """
         Tests the perplexity of a given ngram model on a test file.
 
@@ -229,12 +228,12 @@ def test_perplexity(
             'Perplexity: 123.45'
         """
 
-        test_far = farcompile(symbols, test_txt, tmp_path, nemo_model_file, force)
+        test_far = farcompile(symbols, test_txt, tmp_path, nemo_model_file)
         res_p = self.perplexity(mod_c, test_far)
         return res_p
 
 
-def farcompile(symbols: str, text_file: str, tmp_path: str, nemo_model_file: str, force: bool,) -> str:
+def farcompile(symbols: str, text_file: str, tmp_path: str, nemo_model_file: str) -> str:
     """
     Compiles a text file into a FAR file using the given symbol table or tokenizer.
 
@@ -253,43 +252,35 @@ def farcompile(symbols: str, text_file: str, tmp_path: str, nemo_model_file: str
     """
     test_far = os.path.join(tmp_path, os.path.split(text_file)[1] + ".far")
 
-    if os.path.isfile(test_far) and not force:
-        logging.info("File " + test_far + " exists. Skipping.")
-        return None
-    else:
-        sh_args = [
-            "farcompilestrings",
-            "--generate_keys=10",
-            "--fst_type=compact",
-            "--symbols=" + symbols,
-            "--keep_symbols",
-            ">",
-            test_far,
-        ]
-
-        tokenizer, encoding_level, is_aggregate_tokenizer = kenlm_utils.setup_tokenizer(nemo_model_file)
-
-        ps = subprocess.Popen(
-            " ".join(sh_args), shell=True, stdin=subprocess.PIPE, stdout=sys.stdout, stderr=sys.stderr,
-        )
-
-        kenlm_utils.iter_files(
-            source_path=[text_file],
-            dest_path=ps.stdin,
-            tokenizer=tokenizer,
-            encoding_level=encoding_level,
-            is_aggregate_tokenizer=is_aggregate_tokenizer,
-            verbose=1,
-        )
-        stdout, stderr = ps.communicate()
+    sh_args = [
+        "farcompilestrings",
+        "--generate_keys=10",
+        "--fst_type=compact",
+        "--symbols=" + symbols,
+        "--keep_symbols",
+        ">",
+        test_far,
+    ]
+
+    tokenizer, encoding_level, is_aggregate_tokenizer = kenlm_utils.setup_tokenizer(nemo_model_file)
+
+    ps = subprocess.Popen(" ".join(sh_args), shell=True, stdin=subprocess.PIPE, stdout=sys.stdout, stderr=sys.stderr,)
+
+    kenlm_utils.iter_files(
+        source_path=[text_file],
+        dest_path=ps.stdin,
+        tokenizer=tokenizer,
+        encoding_level=encoding_level,
+        is_aggregate_tokenizer=is_aggregate_tokenizer,
+        verbose=1,
+    )
+    stdout, stderr = ps.communicate()
 
-        exit_code = ps.returncode
+    exit_code = ps.returncode
 
-        command = " ".join(sh_args)
-        assert (
-            exit_code == 0
-        ), f"Exit_code must be 0.\n bash command: {command} \n stdout: {stdout} \n stderr: {stderr}"
-        return test_far
+    command = " ".join(sh_args)
+    assert exit_code == 0, f"Exit_code must be 0.\n bash command: {command} \n stdout: {stdout} \n stderr: {stderr}"
+    return test_far
 
 
 def make_kenlm(kenlm_bin_path: str, ngram_arpa: str, force: bool):
@@ -310,7 +301,7 @@ def make_kenlm(kenlm_bin_path: str, ngram_arpa: str, force: bool):
         logging.info("File " + ngram_kenlm + " exists. Skipping.")
         return None
     else:
-        sh_args = [kenlm_bin_path, "trie", "-i", ngram_arpa, ngram_kenlm]
+        sh_args = [os.path.join(kenlm_bin_path, "build_binary"), "trie", "-i", ngram_arpa, ngram_kenlm]
         return subprocess.run(sh_args, capture_output=False, text=True, stdout=sys.stdout, stderr=sys.stderr,)
 
 
@@ -336,12 +327,15 @@ def make_symbol_list(nemo_model_file, symbols, force):
     else:
         if nemo_model_file.endswith('.nemo'):
             asr_model = nemo_asr.models.ASRModel.restore_from(nemo_model_file, map_location=torch.device('cpu'))
-            vocab_size = len(asr_model.decoder.vocabulary)
         else:
             logging.warning(
                 "nemo_model_file does not end with .nemo, therefore trying to load a pretrained model with this name."
             )
             asr_model = nemo_asr.models.ASRModel.from_pretrained(nemo_model_file, map_location=torch.device('cpu'))
+
+        if isinstance(asr_model.decoder, RNNTDecoder):
+            vocab_size = asr_model.decoder.blank_idx
+        else:
             vocab_size = len(asr_model.decoder.vocabulary)
 
         vocab = [chr(idx + DEFAULT_TOKEN_OFFSET) for idx in range(vocab_size)]
@@ -389,8 +383,9 @@ def main(
         if not symbols:
             symbols = os.path.join(out_path, os.path.split(nemo_model_file)[1] + ".syms")
             make_symbol_list(nemo_model_file, symbols, force)
-        test_p = nm.test_perplexity(mod_c, symbols, test_file, nemo_model_file, out_path, force)
-        logging.info("Perplexity summary:" + test_p)
+        for test_f in test_file.split(","):
+            test_p = nm.test_perplexity(mod_c, symbols, test_f, nemo_model_file, out_path)
+            logging.info("Perplexity summary " + test_f + " : " + test_p)
 
     logging.info("Making ARPA and Kenlm model " + arpa_c)
     out = nm.make_arpa(mod_c, arpa_c, force)

From 18f283efcb855cb8797fc2c3c9fafd4567136f8e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 14 Jul 2023 08:40:05 -0400
Subject: [PATCH 111/123] fix tab text gen (#7022) (#7031)

Signed-off-by: Yi Dong <yidong@nvidia.com>
Co-authored-by: Yi Dong <43824965+yidong72@users.noreply.github.com>
---
 .../collections/nlp/modules/common/text_generation_utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py
index 545ea5cb346c..36b30aae47b9 100644
--- a/nemo/collections/nlp/modules/common/text_generation_utils.py
+++ b/nemo/collections/nlp/modules/common/text_generation_utils.py
@@ -396,6 +396,7 @@ def synced_generate(
             context_length_tensor,
             tokens_to_generate,
             all_probs,
+            compute_attention_mask=compute_attention_mask,
             temperature=temperature,
         )
     else:
@@ -825,6 +826,7 @@ def tab_sample_sequence_batch(
     context_lengths,
     tokens_to_generate,
     all_probs=True,
+    compute_attention_mask=True,
     type_ids=None,
     temperature=None,
 ):
@@ -848,7 +850,7 @@ def tab_sample_sequence_batch(
     # initialize the batch
     with torch.no_grad():
         context_length = context_lengths.min().item()
-        inference_strategy.init_batch(context_tokens, context_length)
+        inference_strategy.init_batch(context_tokens, context_length, compute_attention_mask)
         context = context_tokens[:, :context_length]
         # the context may start in the middle of the row,
         # calculate the offset according to the position of '\n' or '<|endoftext|>'
@@ -882,7 +884,7 @@ def tab_sample_sequence_batch(
 
         while context_length < maxlen:
             batch, tensor_shape = inference_strategy.prepare_batch_at_step(
-                tokens, maxlen, micro_batch_size, counter, context_length
+                tokens, maxlen, micro_batch_size, counter, context_length, compute_attention_mask
             )
             output = inference_strategy.forward_step(batch, tensor_shape)
 

From 33100e0ad3c504eb0b402bc7e3ba2aa0479913c5 Mon Sep 17 00:00:00 2001
From: Aleksandr Laptev <alaptev@nvidia.com>
Date: Sun, 16 Jul 2023 02:03:45 +0700
Subject: [PATCH 112/123] ASR Confidence update and tutorial (#6810)

* small fixes and tests

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* various fixes for the tutorial

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* tutorial added

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* for for a little oops after rebasement

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix tests

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* unused import removed

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* fix review comments

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* deprecated parameters for greedy configs

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* move re-assigning to configs

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* fix comments 2

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* fix config tests

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* fix ece test (my env was bugged apparently)

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* renamings for confidence ensemble

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fox comments 3

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* return dropped tutorial

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

* CI flips back and forth, increasing tolerance

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>

---------

Signed-off-by: Aleksandr Laptev <alaptev@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 docs/source/starthere/tutorials.rst           |    6 +
 nemo/collections/asr/metrics/rnnt_wer.py      |   70 +-
 nemo/collections/asr/metrics/rnnt_wer_bpe.py  |   21 +-
 nemo/collections/asr/metrics/wer.py           |  146 +-
 nemo/collections/asr/metrics/wer_bpe.py       |   20 +-
 .../asr/models/confidence_ensemble.py         |   12 +-
 .../parts/submodules/ctc_greedy_decoding.py   |   55 +-
 .../parts/submodules/rnnt_greedy_decoding.py  |  252 +--
 .../asr_confidence_benchmarking_utils.py      |  183 +++
 .../asr/parts/utils/asr_confidence_utils.py   |  251 ++-
 .../asr/parts/utils/confidence_metrics.py     |  202 ++-
 .../confidence_ensembles/build_ensemble.py    |   17 +-
 .../confidence_ensembles/ensemble_config.yaml |    6 +-
 .../confidence/benchmark_asr_confidence.py    |  173 +-
 .../asr/confidence/test_asr_confidence.py     |  144 ++
 .../confidence/test_asr_confidence_metrics.py |  115 ++
 .../test_asr_confidence_primitives.py         |  142 ++
 .../test_asr_hybrid_rnnt_ctc_model_char.py    |    6 +-
 tests/collections/asr/test_asr_metrics.py     |   10 +
 .../asr/test_asr_rnnt_encdec_model.py         |    6 +-
 .../asr/test_confidence_ensembles.py          |   16 +-
 tutorials/asr/ASR_Confidence_Estimation.ipynb | 1432 +++++++++++++++++
 tutorials/asr/Confidence_Ensembles.ipynb      |    2 +-
 23 files changed, 2836 insertions(+), 451 deletions(-)
 create mode 100644 nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py
 create mode 100644 tests/collections/asr/confidence/test_asr_confidence.py
 create mode 100644 tests/collections/asr/confidence/test_asr_confidence_metrics.py
 create mode 100644 tests/collections/asr/confidence/test_asr_confidence_primitives.py
 create mode 100644 tutorials/asr/ASR_Confidence_Estimation.ipynb

diff --git a/docs/source/starthere/tutorials.rst b/docs/source/starthere/tutorials.rst
index 2f4ea59cff5c..586ce46c0c38 100644
--- a/docs/source/starthere/tutorials.rst
+++ b/docs/source/starthere/tutorials.rst
@@ -109,6 +109,12 @@ To run a tutorial:
    * - ASR
      - Hybrid ASR-TTS Models Tutorial
      - `Multi-lingual ASR <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/ASR_TTS_Tutorial.ipynb>`_
+   * - ASR
+     - ASR Confidence Estimation
+     - `ASR Confidence Estimation <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/ASR_Confidence_Estimation.ipynb>`_
+   * - ASR
+     - Confidence-based Ensembles
+     - `Confidence-based Ensembles <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/Confidence_Ensembles.ipynb>`_
    * - NLP
      - Using Pretrained Language Models for Downstream Tasks
      - `Pretrained Language Models for Downstream Tasks <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb>`_
diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py
index 7e5636191a1d..87a48e50d58a 100644
--- a/nemo/collections/asr/metrics/rnnt_wer.py
+++ b/nemo/collections/asr/metrics/rnnt_wer.py
@@ -100,32 +100,33 @@ class AbstractRNNTDecoding(ConfidenceMixin):
                     from the `token_confidence`.
                 aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
                     Valid options are `mean`, `min`, `max`, `prod`.
-                method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+                measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
                     confidence scores.
 
-                    name: The method name (str).
+                    name: The measure name (str).
                         Supported values:
                             - 'max_prob' for using the maximum token probability as a confidence.
                             - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
                     entropy_type: Which type of entropy to use (str).
-                        Used if confidence_method_cfg.name is set to `entropy`.
+                        Used if confidence_measure_cfg.name is set to `entropy`.
                         Supported values:
-                            - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                            - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                                 the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                                Note that for this entropy, the temperature should comply the following inequality:
-                                1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                                Note that for this entropy, the alpha should comply the following inequality:
+                                (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                                where V is the model vocabulary size.
                             - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                                 Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                            - 'renui' for the Rényi entropy.
+                            - 'renyi' for the Rényi entropy.
                                 Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-                    temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                        When the temperature equals one, scaling is not applied to 'max_prob',
+                    alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                        When the alpha equals one, scaling is not applied to 'max_prob',
                         and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
                     entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -139,7 +140,7 @@ class AbstractRNNTDecoding(ConfidenceMixin):
                     timestep during greedy decoding. Setting to larger values allows longer sentences
                     to be decoded, at the cost of increased execution time.
                 preserve_frame_confidence: Same as above, overrides above value.
-                confidence_method: Same as above, overrides confidence_cfg.method.
+                confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
 
             "beam":
                 beam_size: int, defining the beam size for beam search. Must be >= 1.
@@ -255,15 +256,13 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
         # initialize confidence-related fields
         self._init_confidence(self.cfg.get('confidence_cfg', None))
 
-        # Update preserve frame confidence
-        if self.preserve_frame_confidence is False:
-            if self.cfg.strategy in ['greedy', 'greedy_batch']:
-                self.preserve_frame_confidence = self.cfg.greedy.get('preserve_frame_confidence', False)
-                self.confidence_method_cfg = self.cfg.greedy.get('confidence_method_cfg', None)
-
-            elif self.cfg.strategy in ['beam', 'tsd', 'alsd', 'maes']:
-                # Not implemented
-                pass
+        # Confidence estimation is not implemented for these strategies
+        if (
+            not self.preserve_frame_confidence
+            and self.cfg.strategy in ['beam', 'tsd', 'alsd', 'maes']
+            and self.cfg.beam.get('preserve_frame_confidence', False)
+        ):
+            raise NotImplementedError(f"Confidence calculation is not supported for strategy `{self.cfg.strategy}`")
 
         if self.cfg.strategy == 'greedy':
             if self.big_blank_durations is None:
@@ -278,7 +277,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
                         ),
                         preserve_alignments=self.preserve_alignments,
                         preserve_frame_confidence=self.preserve_frame_confidence,
-                        confidence_method_cfg=self.confidence_method_cfg,
+                        confidence_measure_cfg=self.confidence_measure_cfg,
                     )
                 else:
                     self.decoding = greedy_decode.GreedyTDTInfer(
@@ -292,7 +291,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
                         ),
                         preserve_alignments=self.preserve_alignments,
                         preserve_frame_confidence=self.preserve_frame_confidence,
-                        confidence_method_cfg=self.confidence_method_cfg,
+                        confidence_measure_cfg=self.confidence_measure_cfg,
                     )
             else:
                 self.decoding = greedy_decode.GreedyMultiblankRNNTInfer(
@@ -305,7 +304,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
                     ),
                     preserve_alignments=self.preserve_alignments,
                     preserve_frame_confidence=self.preserve_frame_confidence,
-                    confidence_method_cfg=self.confidence_method_cfg,
+                    confidence_measure_cfg=self.confidence_measure_cfg,
                 )
 
         elif self.cfg.strategy == 'greedy_batch':
@@ -321,7 +320,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
                         ),
                         preserve_alignments=self.preserve_alignments,
                         preserve_frame_confidence=self.preserve_frame_confidence,
-                        confidence_method_cfg=self.confidence_method_cfg,
+                        confidence_measure_cfg=self.confidence_measure_cfg,
                     )
                 else:
                     self.decoding = greedy_decode.GreedyBatchedTDTInfer(
@@ -335,7 +334,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
                         ),
                         preserve_alignments=self.preserve_alignments,
                         preserve_frame_confidence=self.preserve_frame_confidence,
-                        confidence_method_cfg=self.confidence_method_cfg,
+                        confidence_measure_cfg=self.confidence_measure_cfg,
                     )
 
             else:
@@ -349,7 +348,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
                     ),
                     preserve_alignments=self.preserve_alignments,
                     preserve_frame_confidence=self.preserve_frame_confidence,
-                    confidence_method_cfg=self.confidence_method_cfg,
+                    confidence_measure_cfg=self.confidence_measure_cfg,
                 )
 
         elif self.cfg.strategy == 'beam':
@@ -1006,32 +1005,33 @@ class RNNTDecoding(AbstractRNNTDecoding):
                     from the `token_confidence`.
                 aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
                     Valid options are `mean`, `min`, `max`, `prod`.
-                method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+                measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
                     confidence scores.
 
-                    name: The method name (str).
+                    name: The measure name (str).
                         Supported values:
                             - 'max_prob' for using the maximum token probability as a confidence.
                             - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
                     entropy_type: Which type of entropy to use (str).
-                        Used if confidence_method_cfg.name is set to `entropy`.
+                        Used if confidence_measure_cfg.name is set to `entropy`.
                         Supported values:
-                            - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                            - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                                 the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                                Note that for this entropy, the temperature should comply the following inequality:
-                                1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                                Note that for this entropy, the alpha should comply the following inequality:
+                                (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                                where V is the model vocabulary size.
                             - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                                 Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                            - 'renui' for the Rényi entropy.
+                            - 'renyi' for the Rényi entropy.
                                 Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-                    temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                        When the temperature equals one, scaling is not applied to 'max_prob',
+                    alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                        When the alpha equals one, scaling is not applied to 'max_prob',
                         and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
                     entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -1047,7 +1047,7 @@ class RNNTDecoding(AbstractRNNTDecoding):
 
                 preserve_frame_confidence: Same as above, overrides above value.
 
-                confidence_method: Same as above, overrides confidence_cfg.method.
+                confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
 
             "beam":
                 beam_size: int, defining the beam size for beam search. Must be >= 1.
diff --git a/nemo/collections/asr/metrics/rnnt_wer_bpe.py b/nemo/collections/asr/metrics/rnnt_wer_bpe.py
index d2e2c3cc5923..3fb50d2a1ee2 100644
--- a/nemo/collections/asr/metrics/rnnt_wer_bpe.py
+++ b/nemo/collections/asr/metrics/rnnt_wer_bpe.py
@@ -100,32 +100,33 @@ class RNNTBPEDecoding(AbstractRNNTDecoding):
                     from the `token_confidence`.
                 aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
                     Valid options are `mean`, `min`, `max`, `prod`.
-                method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+                measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
                     confidence scores.
 
-                    name: The method name (str).
+                    name: The measure name (str).
                         Supported values:
                             - 'max_prob' for using the maximum token probability as a confidence.
                             - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
                     entropy_type: Which type of entropy to use (str).
-                        Used if confidence_method_cfg.name is set to `entropy`.
+                        Used if confidence_measure_cfg.name is set to `entropy`.
                         Supported values:
-                            - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                            - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                                 the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                                Note that for this entropy, the temperature should comply the following inequality:
-                                1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                                Note that for this entropy, the alpha should comply the following inequality:
+                                (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                                where V is the model vocabulary size.
                             - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                                 Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                            - 'renui' for the Rényi entropy.
+                            - 'renyi' for the Rényi entropy.
                                 Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-                    temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                        When the temperature equals one, scaling is not applied to 'max_prob',
+                    alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                        When the alpha equals one, scaling is not applied to 'max_prob',
                         and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
                     entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -141,7 +142,7 @@ class RNNTBPEDecoding(AbstractRNNTDecoding):
 
                 preserve_frame_confidence: Same as above, overrides above value.
 
-                confidence_method: Same as above, overrides confidence_cfg.method.
+                confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
 
             "beam":
                 beam_size: int, defining the beam size for beam search. Must be >= 1.
diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py
index 4d90810cc3df..a88895763edc 100644
--- a/nemo/collections/asr/metrics/wer.py
+++ b/nemo/collections/asr/metrics/wer.py
@@ -35,14 +35,17 @@
 def word_error_rate(hypotheses: List[str], references: List[str], use_cer=False) -> float:
     """
     Computes Average Word Error rate between two texts represented as
-    corresponding lists of string. Hypotheses and references must have same
-    length.
+    corresponding lists of string.
+
+    Hypotheses and references must have same length.
+
     Args:
-      hypotheses: list of hypotheses
-      references: list of references
-      use_cer: bool, set True to enable cer
+        hypotheses (list): list of hypotheses
+        references(list) : list of references
+        use_cer (bool): set True to enable cer
+
     Returns:
-      (float) average word error rate
+        wer (float): average word error rate
     """
     scores = 0
     words = 0
@@ -78,17 +81,18 @@ def word_error_rate_detail(
     between two texts represented as corresponding lists of string.
 
     Hypotheses and references must have same length.
+
     Args:
-      hypotheses (list): list of hypotheses
-      references(list) : list of references
-      use_cer (bool): set True to enable cer
-    Returns:
-      wer (float): average word error rate
-      words (int):  Total number of words/charactors of given reference texts
-      ins_rate (float): average insertion error rate
-      del_rate (float): average deletion error rate
-      sub_rate (float): average substitution error rate
+        hypotheses (list): list of hypotheses
+        references(list) : list of references
+        use_cer (bool): set True to enable cer
 
+    Returns:
+        wer (float): average word error rate
+        words (int):  Total number of words/charactors of given reference texts
+        ins_rate (float): average insertion error rate
+        del_rate (float): average deletion error rate
+        sub_rate (float): average substitution error rate
     """
     scores = 0
     words = 0
@@ -141,6 +145,68 @@ def word_error_rate_detail(
     return wer, words, ins_rate, del_rate, sub_rate
 
 
+def word_error_rate_per_utt(hypotheses: List[str], references: List[str], use_cer=False) -> Tuple[List[float], float]:
+    """
+    Computes Word Error Rate per utterance and the average WER
+    between two texts represented as corresponding lists of string. 
+    
+    Hypotheses and references must have same length.
+
+    Args:
+        hypotheses (list): list of hypotheses
+        references(list) : list of references
+        use_cer (bool): set True to enable cer
+
+    Returns:
+        wer_per_utt (List[float]): word error rate per utterance
+        avg_wer (float): average word error rate
+    """
+    scores = 0
+    words = 0
+    wer_per_utt = []
+
+    if len(hypotheses) != len(references):
+        raise ValueError(
+            "In word error rate calculation, hypotheses and reference"
+            " lists must have the same number of elements. But I got:"
+            "{0} and {1} correspondingly".format(len(hypotheses), len(references))
+        )
+
+    for h, r in zip(hypotheses, references):
+        if use_cer:
+            h_list = list(h)
+            r_list = list(r)
+        else:
+            h_list = h.split()
+            r_list = r.split()
+
+        # To get rid of the issue that jiwer does not allow empty string
+        if len(r_list) == 0:
+            if len(h_list) != 0:
+                errors = len(h_list)
+                wer_per_utt.append(float('inf'))
+        else:
+            if use_cer:
+                measures = jiwer.cer(r, h, return_dict=True)
+                er = measures['cer']
+            else:
+                measures = jiwer.compute_measures(r, h)
+                er = measures['wer']
+
+            errors = measures['insertions'] + measures['deletions'] + measures['substitutions']
+            wer_per_utt.append(er)
+
+        scores += errors
+        words += len(r_list)
+
+    if words != 0:
+        avg_wer = 1.0 * scores / words
+    else:
+        avg_wer = float('inf')
+
+    return wer_per_utt, avg_wer
+
+
 def move_dimension_to_the_front(tensor, dim_index):
     all_dims = list(range(tensor.ndim))
     return tensor.permute(*([dim_index] + all_dims[:dim_index] + all_dims[dim_index + 1 :]))
@@ -192,32 +258,33 @@ class AbstractCTCDecoding(ConfidenceMixin):
                     from the `token_confidence`.
                 aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
                     Valid options are `mean`, `min`, `max`, `prod`.
-                method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+                measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
                     confidence scores.
 
-                    name: The method name (str).
+                    name: The measure name (str).
                         Supported values:
                             - 'max_prob' for using the maximum token probability as a confidence.
                             - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
                     entropy_type: Which type of entropy to use (str).
-                        Used if confidence_method_cfg.name is set to `entropy`.
+                        Used if confidence_measure_cfg.name is set to `entropy`.
                         Supported values:
-                            - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                            - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                                 the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                                Note that for this entropy, the temperature should comply the following inequality:
-                                1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                                Note that for this entropy, the alpha should comply the following inequality:
+                                (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                                where V is the model vocabulary size.
                             - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                                 Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                            - 'renui' for the Rényi entropy.
+                            - 'renyi' for the Rényi entropy.
                                 Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-                    temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                        When the temperature equals one, scaling is not applied to 'max_prob',
+                    alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                        When the alpha equals one, scaling is not applied to 'max_prob',
                         and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
                     entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -233,6 +300,7 @@ class AbstractCTCDecoding(ConfidenceMixin):
                 preserve_alignments: Same as above, overrides above value.
                 compute_timestamps: Same as above, overrides above value.
                 preserve_frame_confidence: Same as above, overrides above value.
+                confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
 
             "beam":
                 beam_size: int, defining the beam size for beam search. Must be >= 1.
@@ -302,6 +370,14 @@ def __init__(self, decoding_cfg, blank_id: int):
         # initialize confidence-related fields
         self._init_confidence(self.cfg.get('confidence_cfg', None))
 
+        # Confidence estimation is not implemented for strategies other than `greedy`
+        if (
+            not self.preserve_frame_confidence
+            and self.cfg.strategy != 'greedy'
+            and self.cfg.beam.get('preserve_frame_confidence', False)
+        ):
+            raise NotImplementedError(f"Confidence calculation is not supported for strategy `{self.cfg.strategy}`")
+
         # we need timestamps to extract non-blank per-frame confidence
         if self.compute_timestamps is not None:
             self.compute_timestamps |= self.preserve_frame_confidence
@@ -313,7 +389,7 @@ def __init__(self, decoding_cfg, blank_id: int):
                 preserve_alignments=self.preserve_alignments,
                 compute_timestamps=self.compute_timestamps,
                 preserve_frame_confidence=self.preserve_frame_confidence,
-                confidence_method_cfg=self.confidence_method_cfg,
+                confidence_measure_cfg=self.confidence_measure_cfg,
             )
 
         elif self.cfg.strategy == 'beam':
@@ -961,32 +1037,33 @@ class CTCDecoding(AbstractCTCDecoding):
                     from the `token_confidence`.
                 aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
                     Valid options are `mean`, `min`, `max`, `prod`.
-                method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+                measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
                     confidence scores.
 
-                    name: The method name (str).
+                    name: The measure name (str).
                         Supported values:
                             - 'max_prob' for using the maximum token probability as a confidence.
                             - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
                     entropy_type: Which type of entropy to use (str).
-                        Used if confidence_method_cfg.name is set to `entropy`.
+                        Used if confidence_measure_cfg.name is set to `entropy`.
                         Supported values:
-                            - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                            - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                                 the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                                Note that for this entropy, the temperature should comply the following inequality:
-                                1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                                Note that for this entropy, the alpha should comply the following inequality:
+                                (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                                where V is the model vocabulary size.
                             - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                                 Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                            - 'renui' for the Rényi entropy.
+                            - 'renyi' for the Rényi entropy.
                                 Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-                    temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                        When the temperature equals one, scaling is not applied to 'max_prob',
+                    alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                        When the alpha equals one, scaling is not applied to 'max_prob',
                         and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
                     entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -1002,6 +1079,7 @@ class CTCDecoding(AbstractCTCDecoding):
                 preserve_alignments: Same as above, overrides above value.
                 compute_timestamps: Same as above, overrides above value.
                 preserve_frame_confidence: Same as above, overrides above value.
+                confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
 
             "beam":
                 beam_size: int, defining the beam size for beam search. Must be >= 1.
diff --git a/nemo/collections/asr/metrics/wer_bpe.py b/nemo/collections/asr/metrics/wer_bpe.py
index 8a92e4745a1b..524294d61c50 100644
--- a/nemo/collections/asr/metrics/wer_bpe.py
+++ b/nemo/collections/asr/metrics/wer_bpe.py
@@ -74,32 +74,33 @@ class CTCBPEDecoding(AbstractCTCDecoding):
                     from the `token_confidence`.
                 aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
                     Valid options are `mean`, `min`, `max`, `prod`.
-                method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+                measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
                     confidence scores.
 
-                    name: The method name (str).
+                    name: The measure name (str).
                         Supported values:
                             - 'max_prob' for using the maximum token probability as a confidence.
                             - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
                     entropy_type: Which type of entropy to use (str).
-                        Used if confidence_method_cfg.name is set to `entropy`.
+                        Used if confidence_measure_cfg.name is set to `entropy`.
                         Supported values:
-                            - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                            - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                                 the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                                Note that for this entropy, the temperature should comply the following inequality:
-                                1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                                Note that for this entropy, the alpha should comply the following inequality:
+                                (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                                where V is the model vocabulary size.
                             - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                                 Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                            - 'renui' for the Rényi entropy.
+                            - 'renyi' for the Rényi entropy.
                                 Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                                 where α is a parameter. When α == 1, it works like the Gibbs entropy.
                                 More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-                    temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                        When the temperature equals one, scaling is not applied to 'max_prob',
+                    alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                        When the alpha equals one, scaling is not applied to 'max_prob',
                         and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
                     entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -115,6 +116,7 @@ class CTCBPEDecoding(AbstractCTCDecoding):
                 preserve_alignments: Same as above, overrides above value.
                 compute_timestamps: Same as above, overrides above value.
                 preserve_frame_confidence: Same as above, overrides above value.
+                confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg.
 
             "beam":
                 beam_size: int, defining the beam size for beam search. Must be >= 1.
diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py
index 9b3191c8874d..bf65ff96ef5c 100644
--- a/nemo/collections/asr/models/confidence_ensemble.py
+++ b/nemo/collections/asr/models/confidence_ensemble.py
@@ -25,7 +25,7 @@
 from nemo.collections.asr.models.hybrid_rnnt_ctc_models import EncDecHybridRNNTCTCModel
 from nemo.collections.asr.parts.utils.asr_confidence_utils import (
     ConfidenceConfig,
-    ConfidenceMethodConfig,
+    ConfidenceMeasureConfig,
     get_confidence_aggregation_bank,
     get_confidence_measure_bank,
 )
@@ -61,8 +61,8 @@ def to_confidence_config(self) -> ConfidenceConfig:
         return ConfidenceConfig(
             exclude_blank=self.exclude_blank,
             aggregation=self.aggregation,
-            method_cfg=ConfidenceMethodConfig(
-                name=name, entropy_type=entropy_type, temperature=self.alpha, entropy_norm=entropy_norm,
+            measure_cfg=ConfidenceMeasureConfig(
+                name=name, entropy_type=entropy_type, alpha=self.alpha, entropy_norm=entropy_norm,
             ),
         )
 
@@ -135,12 +135,12 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig)
     filtered_logprobs = get_filtered_logprobs(hypothesis, confidence_cfg.exclude_blank)
     vocab_size = filtered_logprobs.shape[1]
     aggr_func = get_confidence_aggregation_bank()[confidence_cfg.aggregation]
-    if confidence_cfg.method_cfg.name == "max_prob":
+    if confidence_cfg.measure_cfg.name == "max_prob":
         conf_type = "max_prob"
         alpha = 1.0
     else:
-        conf_type = f"entropy_{confidence_cfg.method_cfg.entropy_type}_{confidence_cfg.method_cfg.entropy_norm}"
-        alpha = confidence_cfg.method_cfg.temperature
+        conf_type = f"entropy_{confidence_cfg.measure_cfg.entropy_type}_{confidence_cfg.measure_cfg.entropy_norm}"
+        alpha = confidence_cfg.measure_cfg.alpha
     conf_func = get_confidence_measure_bank()[conf_type]
 
     conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=alpha)).cpu().item()
diff --git a/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py b/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py
index a64eded97208..1f29a511fc9c 100644
--- a/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py
+++ b/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py
@@ -16,12 +16,13 @@
 from typing import List, Optional
 
 import torch
-from omegaconf import DictConfig
+from omegaconf import DictConfig, OmegaConf
 
 from nemo.collections.asr.parts.utils import rnnt_utils
-from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMeasureMixin, ConfidenceMethodConfig
+from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMeasureConfig, ConfidenceMeasureMixin
 from nemo.core.classes import Typing, typecheck
 from nemo.core.neural_types import HypothesisType, LengthsType, LogprobsType, NeuralType
+from nemo.utils import logging
 
 
 def pack_hypotheses(hypotheses: List[rnnt_utils.Hypothesis], logitlen: torch.Tensor,) -> List[rnnt_utils.Hypothesis]:
@@ -70,31 +71,32 @@ class GreedyCTCInfer(Typing, ConfidenceMeasureMixin):
         preserve_frame_confidence: Bool flag which preserves the history of per-frame confidence scores
             generated during decoding. When set to true, the Hypothesis will contain
             the non-null value for `frame_confidence` in it. Here, `frame_confidence` is a List of floats.
-        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+        confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
             confidence scores.
 
-            name: The method name (str).
+            name: The measure name (str).
                 Supported values:
                     - 'max_prob' for using the maximum token probability as a confidence.
                     - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
-            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
                 Supported values:
-                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                         the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
                     - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                         Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                    - 'renui' for the Rényi entropy.
+                    - 'renyi' for the Rényi entropy.
                         Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                When the temperature equals one, scaling is not applied to 'max_prob',
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
                 and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
             entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -128,7 +130,7 @@ def __init__(
         preserve_alignments: bool = False,
         compute_timestamps: bool = False,
         preserve_frame_confidence: bool = False,
-        confidence_method_cfg: Optional[DictConfig] = None,
+        confidence_measure_cfg: Optional[DictConfig] = None,
     ):
         super().__init__()
 
@@ -138,8 +140,8 @@ def __init__(
         self.compute_timestamps = compute_timestamps | preserve_frame_confidence
         self.preserve_frame_confidence = preserve_frame_confidence
 
-        # set confidence calculation method
-        self._init_confidence_measure(confidence_method_cfg)
+        # set confidence calculation measure
+        self._init_confidence_measure(confidence_measure_cfg)
 
     @typecheck()
     def forward(
@@ -251,4 +253,27 @@ class GreedyCTCInferConfig:
     preserve_alignments: bool = False
     compute_timestamps: bool = False
     preserve_frame_confidence: bool = False
-    confidence_method_cfg: Optional[ConfidenceMethodConfig] = None
+    confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig()
+    confidence_method_cfg: str = "DEPRECATED"
+
+    def __post_init__(self):
+        # OmegaConf.structured ensures that post_init check is always executed
+        self.confidence_measure_cfg = OmegaConf.structured(
+            self.confidence_measure_cfg
+            if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig)
+            else ConfidenceMeasureConfig(**self.confidence_measure_cfg)
+        )
+        if self.confidence_method_cfg != "DEPRECATED":
+            logging.warning(
+                "`confidence_method_cfg` is deprecated and will be removed in the future. "
+                "Please use `confidence_measure_cfg` instead."
+            )
+
+            # TODO (alaptev): delete the following two lines sometime in the future
+            logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.")
+            # OmegaConf.structured ensures that post_init check is always executed
+            self.confidence_measure_cfg = OmegaConf.structured(
+                self.confidence_method_cfg
+                if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig)
+                else ConfidenceMeasureConfig(**self.confidence_method_cfg)
+            )
diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
index ac10e54bb249..dfa3ac27854b 100644
--- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
+++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
@@ -31,11 +31,11 @@
 
 import numpy as np
 import torch
-from omegaconf import DictConfig
+from omegaconf import DictConfig, OmegaConf
 
 from nemo.collections.asr.modules import rnnt_abstract
 from nemo.collections.asr.parts.utils import rnnt_utils
-from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMeasureMixin, ConfidenceMethodConfig
+from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMeasureConfig, ConfidenceMeasureMixin
 from nemo.collections.common.parts.rnn import label_collate
 from nemo.core.classes import Typing, typecheck
 from nemo.core.neural_types import AcousticEncodedRepresentation, ElementType, HypothesisType, LengthsType, NeuralType
@@ -96,34 +96,32 @@ class _GreedyRNNTInfer(Typing, ConfidenceMeasureMixin):
             The length of the list corresponds to the Acoustic Length (T).
             Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
             U is the number of target tokens for the current timestep Ti.
-        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+        confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
             confidence scores.
 
-            name: The method name (str).
+            name: The measure name (str).
                 Supported values:
                     - 'max_prob' for using the maximum token probability as a confidence.
-                    - 'entropy' for using normalized entropy of a log-likelihood vector.
+                    - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
-            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
                 Supported values:
-                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                         the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size. If the temperature α is provided,
-                        the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
                     - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                         Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                    - 'renui' for the Rényi entropy.
+                    - 'renyi' for the Rényi entropy.
                         Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                When the temperature equals one, scaling is not applied to 'max_prob',
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
                 and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
             entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -156,7 +154,7 @@ def __init__(
         max_symbols_per_step: Optional[int] = None,
         preserve_alignments: bool = False,
         preserve_frame_confidence: bool = False,
-        confidence_method_cfg: Optional[DictConfig] = None,
+        confidence_measure_cfg: Optional[DictConfig] = None,
     ):
         super().__init__()
         self.decoder = decoder_model
@@ -168,8 +166,8 @@ def __init__(
         self.preserve_alignments = preserve_alignments
         self.preserve_frame_confidence = preserve_frame_confidence
 
-        # set confidence calculation method
-        self._init_confidence_measure(confidence_method_cfg)
+        # set confidence calculation measure
+        self._init_confidence_measure(confidence_measure_cfg)
 
     def __call__(self, *args, **kwargs):
         return self.forward(*args, **kwargs)
@@ -265,31 +263,32 @@ class GreedyRNNTInfer(_GreedyRNNTInfer):
             The length of the list corresponds to the Acoustic Length (T).
             Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
             U is the number of target tokens for the current timestep Ti.
-        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+        confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
             confidence scores.
 
-            name: The method name (str).
+            name: The measure name (str).
                 Supported values:
                     - 'max_prob' for using the maximum token probability as a confidence.
-                    - 'entropy' for using normalized entropy of a log-likelihood vector.
+                    - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
-            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
                 Supported values:
-                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                         the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
                     - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                         Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                    - 'renui' for the Rényi entropy.
+                    - 'renyi' for the Rényi entropy.
                         Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                When the temperature equals one, scaling is not applied to 'max_prob',
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
                 and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
             entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -306,7 +305,7 @@ def __init__(
         max_symbols_per_step: Optional[int] = None,
         preserve_alignments: bool = False,
         preserve_frame_confidence: bool = False,
-        confidence_method_cfg: Optional[DictConfig] = None,
+        confidence_measure_cfg: Optional[DictConfig] = None,
     ):
         super().__init__(
             decoder_model=decoder_model,
@@ -315,7 +314,7 @@ def __init__(
             max_symbols_per_step=max_symbols_per_step,
             preserve_alignments=preserve_alignments,
             preserve_frame_confidence=preserve_frame_confidence,
-            confidence_method_cfg=confidence_method_cfg,
+            confidence_measure_cfg=confidence_measure_cfg,
         )
 
     @typecheck()
@@ -503,31 +502,32 @@ class GreedyBatchedRNNTInfer(_GreedyRNNTInfer):
             The length of the list corresponds to the Acoustic Length (T).
             Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
             U is the number of target tokens for the current timestep Ti.
-        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+        confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
             confidence scores.
 
-            name: The method name (str).
+            name: The measure name (str).
                 Supported values:
                     - 'max_prob' for using the maximum token probability as a confidence.
-                    - 'entropy' for using normalized entropy of a log-likelihood vector.
+                    - 'entropy' for using a normalized entropy of a log-likelihood vector.
 
-            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
                 Supported values:
-                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                         the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
                     - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                         Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                    - 'renui' for the Rényi entropy.
+                    - 'renyi' for the Rényi entropy.
                         Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
 
-            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                When the temperature equals one, scaling is not applied to 'max_prob',
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
                 and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
 
             entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -544,7 +544,7 @@ def __init__(
         max_symbols_per_step: Optional[int] = None,
         preserve_alignments: bool = False,
         preserve_frame_confidence: bool = False,
-        confidence_method_cfg: Optional[DictConfig] = None,
+        confidence_measure_cfg: Optional[DictConfig] = None,
     ):
         super().__init__(
             decoder_model=decoder_model,
@@ -553,7 +553,7 @@ def __init__(
             max_symbols_per_step=max_symbols_per_step,
             preserve_alignments=preserve_alignments,
             preserve_frame_confidence=preserve_frame_confidence,
-            confidence_method_cfg=confidence_method_cfg,
+            confidence_measure_cfg=confidence_measure_cfg,
         )
 
         # Depending on availability of `blank_as_pad` support
@@ -1478,29 +1478,34 @@ class GreedyMultiblankRNNTInfer(GreedyRNNTInfer):
             The length of the list corresponds to the Acoustic Length (T).
             Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
             U is the number of target tokens for the current timestep Ti.
-        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+        confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
             confidence scores.
-            name: The method name (str).
+
+            name: The measure name (str).
                 Supported values:
                     - 'max_prob' for using the maximum token probability as a confidence.
-                    - 'entropy' for using normalized entropy of a log-likelihood vector.
-            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+                    - 'entropy' for using a normalized entropy of a log-likelihood vector.
+
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
                 Supported values:
-                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                         the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
                     - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                         Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                    - 'renui' for the Rényi entropy.
+                    - 'renyi' for the Rényi entropy.
                         Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
-            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                When the temperature equals one, scaling is not applied to 'max_prob',
+
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
                 and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
+
             entropy_norm: A mapping of the entropy value to the interval [0,1].
                 Supported values:
                     - 'lin' for using the linear mapping.
@@ -1516,7 +1521,7 @@ def __init__(
         max_symbols_per_step: Optional[int] = None,
         preserve_alignments: bool = False,
         preserve_frame_confidence: bool = False,
-        confidence_method_cfg: Optional[DictConfig] = None,
+        confidence_measure_cfg: Optional[DictConfig] = None,
     ):
         super().__init__(
             decoder_model=decoder_model,
@@ -1525,7 +1530,7 @@ def __init__(
             max_symbols_per_step=max_symbols_per_step,
             preserve_alignments=preserve_alignments,
             preserve_frame_confidence=preserve_frame_confidence,
-            confidence_method_cfg=confidence_method_cfg,
+            confidence_measure_cfg=confidence_measure_cfg,
         )
         self.big_blank_durations = big_blank_durations
         self._SOS = blank_index - len(big_blank_durations)
@@ -1677,29 +1682,34 @@ class GreedyBatchedMultiblankRNNTInfer(GreedyBatchedRNNTInfer):
             The length of the list corresponds to the Acoustic Length (T).
             Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
             U is the number of target tokens for the current timestep Ti.
-        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+        confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
             confidence scores.
-            name: The method name (str).
+
+            name: The measure name (str).
                 Supported values:
                     - 'max_prob' for using the maximum token probability as a confidence.
-                    - 'entropy' for using normalized entropy of a log-likelihood vector.
-            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+                    - 'entropy' for using a normalized entropy of a log-likelihood vector.
+
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
                 Supported values:
-                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                         the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
                     - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                         Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                    - 'renui' for the Rényi entropy.
+                    - 'renyi' for the Rényi entropy.
                         Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
-            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                When the temperature equals one, scaling is not applied to 'max_prob',
+
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
                 and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
+
             entropy_norm: A mapping of the entropy value to the interval [0,1].
                 Supported values:
                     - 'lin' for using the linear mapping.
@@ -1715,7 +1725,7 @@ def __init__(
         max_symbols_per_step: Optional[int] = None,
         preserve_alignments: bool = False,
         preserve_frame_confidence: bool = False,
-        confidence_method_cfg: Optional[DictConfig] = None,
+        confidence_measure_cfg: Optional[DictConfig] = None,
     ):
         super().__init__(
             decoder_model=decoder_model,
@@ -1724,7 +1734,7 @@ def __init__(
             max_symbols_per_step=max_symbols_per_step,
             preserve_alignments=preserve_alignments,
             preserve_frame_confidence=preserve_frame_confidence,
-            confidence_method_cfg=confidence_method_cfg,
+            confidence_measure_cfg=confidence_measure_cfg,
         )
         self.big_blank_durations = big_blank_durations
 
@@ -2193,7 +2203,31 @@ class GreedyRNNTInferConfig:
     max_symbols_per_step: Optional[int] = 10
     preserve_alignments: bool = False
     preserve_frame_confidence: bool = False
-    confidence_method_cfg: Optional[ConfidenceMethodConfig] = None
+    confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig()
+    confidence_method_cfg: str = "DEPRECATED"
+
+    def __post_init__(self):
+        # OmegaConf.structured ensures that post_init check is always executed
+        self.confidence_measure_cfg = OmegaConf.structured(
+            self.confidence_measure_cfg
+            if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig)
+            else ConfidenceMeasureConfig(**self.confidence_measure_cfg)
+        )
+        if self.confidence_method_cfg != "DEPRECATED":
+            logging.warning(
+                "`confidence_method_cfg` is deprecated and will be removed in the future. "
+                "Please use `confidence_measure_cfg` instead."
+            )
+
+            # TODO (alaptev): delete the following two lines sometime in the future
+            logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.")
+            # OmegaConf.structured ensures that post_init check is always executed
+            self.confidence_measure_cfg = OmegaConf.structured(
+                self.confidence_method_cfg
+                if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig)
+                else ConfidenceMeasureConfig(**self.confidence_method_cfg)
+            )
+            self.confidence_method_cfg = "DEPRECATED"
 
 
 @dataclass
@@ -2201,7 +2235,31 @@ class GreedyBatchedRNNTInferConfig:
     max_symbols_per_step: Optional[int] = 10
     preserve_alignments: bool = False
     preserve_frame_confidence: bool = False
-    confidence_method_cfg: Optional[ConfidenceMethodConfig] = None
+    confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig()
+    confidence_method_cfg: str = "DEPRECATED"
+
+    def __post_init__(self):
+        # OmegaConf.structured ensures that post_init check is always executed
+        self.confidence_measure_cfg = OmegaConf.structured(
+            self.confidence_measure_cfg
+            if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig)
+            else ConfidenceMeasureConfig(**self.confidence_measure_cfg)
+        )
+        if self.confidence_method_cfg != "DEPRECATED":
+            logging.warning(
+                "`confidence_method_cfg` is deprecated and will be removed in the future. "
+                "Please use `confidence_measure_cfg` instead."
+            )
+
+            # TODO (alaptev): delete the following two lines sometime in the future
+            logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.")
+            # OmegaConf.structured ensures that post_init check is always executed
+            self.confidence_measure_cfg = OmegaConf.structured(
+                self.confidence_method_cfg
+                if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig)
+                else ConfidenceMeasureConfig(**self.confidence_method_cfg)
+            )
+            self.confidence_method_cfg = "DEPRECATED"
 
 
 class GreedyTDTInfer(_GreedyRNNTInfer):
@@ -2230,29 +2288,34 @@ class GreedyTDTInfer(_GreedyRNNTInfer):
             The length of the list corresponds to the Acoustic Length (T).
             Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
             U is the number of target tokens for the current timestep Ti.
-        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+        confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
             confidence scores.
-            name: The method name (str).
+
+            name: The measure name (str).
                 Supported values:
                     - 'max_prob' for using the maximum token probability as a confidence.
-                    - 'entropy' for using normalized entropy of a log-likelihood vector.
-            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+                    - 'entropy' for using a normalized entropy of a log-likelihood vector.
+
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
                 Supported values:
-                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                         the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
                     - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                         Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                    - 'renui' for the Rényi entropy.
+                    - 'renyi' for the Rényi entropy.
                         Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
-            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                When the temperature equals one, scaling is not applied to 'max_prob',
+
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
                 and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
+
             entropy_norm: A mapping of the entropy value to the interval [0,1].
                 Supported values:
                     - 'lin' for using the linear mapping.
@@ -2268,7 +2331,7 @@ def __init__(
         max_symbols_per_step: Optional[int] = None,
         preserve_alignments: bool = False,
         preserve_frame_confidence: bool = False,
-        confidence_method_cfg: Optional[DictConfig] = None,
+        confidence_measure_cfg: Optional[DictConfig] = None,
     ):
         super().__init__(
             decoder_model=decoder_model,
@@ -2277,7 +2340,7 @@ def __init__(
             max_symbols_per_step=max_symbols_per_step,
             preserve_alignments=preserve_alignments,
             preserve_frame_confidence=preserve_frame_confidence,
-            confidence_method_cfg=confidence_method_cfg,
+            confidence_measure_cfg=confidence_measure_cfg,
         )
         self.durations = durations
 
@@ -2481,29 +2544,34 @@ class GreedyBatchedTDTInfer(_GreedyRNNTInfer):
             The length of the list corresponds to the Acoustic Length (T).
             Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores.
             U is the number of target tokens for the current timestep Ti.
-        confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame
+        confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
             confidence scores.
-            name: The method name (str).
+
+            name: The measure name (str).
                 Supported values:
                     - 'max_prob' for using the maximum token probability as a confidence.
-                    - 'entropy' for using normalized entropy of a log-likelihood vector.
-            entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`.
+                    - 'entropy' for using a normalized entropy of a log-likelihood vector.
+
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
                 Supported values:
-                    - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
                         the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
-                        Note that for this entropy, the temperature should comply the following inequality:
-                        1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
                     - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
                         Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/Tsallis_entropy
-                    - 'renui' for the Rényi entropy.
+                    - 'renyi' for the Rényi entropy.
                         Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
                         where α is a parameter. When α == 1, it works like the Gibbs entropy.
                         More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
-            temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
-                When the temperature equals one, scaling is not applied to 'max_prob',
+
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
                 and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
+
             entropy_norm: A mapping of the entropy value to the interval [0,1].
                 Supported values:
                     - 'lin' for using the linear mapping.
@@ -2519,7 +2587,7 @@ def __init__(
         max_symbols_per_step: Optional[int] = None,
         preserve_alignments: bool = False,
         preserve_frame_confidence: bool = False,
-        confidence_method_cfg: Optional[DictConfig] = None,
+        confidence_measure_cfg: Optional[DictConfig] = None,
     ):
         super().__init__(
             decoder_model=decoder_model,
@@ -2528,7 +2596,7 @@ def __init__(
             max_symbols_per_step=max_symbols_per_step,
             preserve_alignments=preserve_alignments,
             preserve_frame_confidence=preserve_frame_confidence,
-            confidence_method_cfg=confidence_method_cfg,
+            confidence_measure_cfg=confidence_measure_cfg,
         )
         self.durations = durations
 
diff --git a/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py
new file mode 100644
index 000000000000..958195a4bb11
--- /dev/null
+++ b/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py
@@ -0,0 +1,183 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import contextlib
+import copy
+import os
+from pathlib import Path
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import texterrors
+import torch
+from omegaconf import open_dict
+
+from nemo.collections.asr.models import ASRModel, EncDecRNNTModel
+from nemo.collections.asr.parts.utils.confidence_metrics import (
+    auc_nt,
+    auc_pr,
+    auc_roc,
+    auc_yc,
+    ece,
+    nce,
+    save_confidence_hist,
+    save_custom_confidence_curve,
+    save_nt_curve,
+    save_pr_curve,
+    save_roc_curve,
+)
+from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis
+
+
+def get_correct_marks(r: Union[List[int], List[str]], h: Union[List[int], List[str]]) -> List[bool]:
+    """Get correct marks by aligning the reference text with a hypothesis.
+
+    This method considers only insertions and substitutions as incorrect marks.
+    """
+    return [
+        a == b
+        for a, b in zip(*(texterrors.align_texts([str(rr) for rr in r], [str(hh) for hh in h], False)[:-1]))
+        if b != "<eps>"
+    ]
+
+
+def get_token_targets_with_confidence(hyp: Hypothesis) -> List[Tuple[str, float]]:
+    return [(y, c) for y, c in zip(hyp.y_sequence, hyp.token_confidence)]
+
+
+def get_word_targets_with_confidence(hyp: Hypothesis) -> List[Tuple[str, float]]:
+    return [(y, c) for y, c in zip(hyp.words, hyp.word_confidence)]
+
+
+def run_confidence_benchmark(
+    model: ASRModel,
+    target_level: str,
+    filepaths: List[str],
+    reference_texts: List[str],
+    batch_size: int = 8,
+    num_workers: int = 4,
+    plot_dir: Optional[Union[str, Path]] = None,
+    autocast: Optional = None,
+):
+    """Run benchmark and plot histograms and curves, if plot_dir is provided.
+
+    Returns:
+        Dictionary with benchmark results of the following scheme:
+        `level: (auc_roc, auc_pr, auc_nt, nce, ece, auc_yc, std_yc, max_yc)` with `level` being 'token' or 'word'.
+    """
+    draw_plot = plot_dir is not None
+    if isinstance(plot_dir, str):
+        plot_dir = Path(plot_dir)
+    is_rnnt = isinstance(model, EncDecRNNTModel)
+
+    # setup autocast if necessary
+    if autocast is None:
+
+        @contextlib.contextmanager
+        def autocast():
+            yield
+
+    # transcribe audio
+    with autocast():
+        with torch.no_grad():
+            transcriptions = model.transcribe(
+                paths2audio_files=filepaths, batch_size=batch_size, return_hypotheses=True, num_workers=num_workers
+            )
+    if is_rnnt:
+        transcriptions = transcriptions[0]
+
+    levels = []
+    if target_level != "word":
+        levels.append("token")
+    if target_level != "token":
+        levels.append("word")
+    results = {}
+    for level in levels:
+        if level == "token":
+            targets_with_confidence = [get_token_targets_with_confidence(tran) for tran in transcriptions]
+            correct_marks = [
+                get_correct_marks(model.tokenizer.text_to_ids(r), model.tokenizer.text_to_ids(h.text))
+                for r, h in zip(reference_texts, transcriptions)
+            ]
+        else:  # "word"
+            targets_with_confidence = [get_word_targets_with_confidence(tran) for tran in transcriptions]
+            correct_marks = [get_correct_marks(r.split(), h.words) for r, h in zip(reference_texts, transcriptions)]
+
+        y_true, y_score = np.array(
+            [[f, p[1]] for cm, twc in zip(correct_marks, targets_with_confidence) for f, p in zip(cm, twc)]
+        ).T
+        # output scheme: yc.mean(), yc.max(), yc.std() or yc.mean(), yc.max(), yc.std(), (thresholds, yc)
+        result_yc = auc_yc(y_true, y_score, return_std_maximum=True, return_curve=draw_plot)
+        # output scheme: ece or ece, (thresholds, ece_curve)
+        results_ece = ece(y_true, y_score, return_curve=draw_plot)
+        results[level] = [
+            auc_roc(y_true, y_score),
+            auc_pr(y_true, y_score),
+            auc_nt(y_true, y_score),
+            nce(y_true, y_score),
+            results_ece if isinstance(results_ece, float) else results_ece[0],
+        ] + list(result_yc[:3])
+
+        if draw_plot:
+            os.makedirs(plot_dir, exist_ok=True)
+
+            mask_correct = y_true == 1
+            y_score_correct = y_score[mask_correct]
+            y_score_incorrect = y_score[~mask_correct]
+            # histogram of the correct distribution
+            save_confidence_hist(y_score_correct, plot_dir, level + "_" + "hist_correct")
+            # histogram of the incorrect distribution
+            save_confidence_hist(y_score_incorrect, plot_dir, level + "_" + "hist_incorrect")
+            # AUC-ROC curve
+            save_roc_curve(y_true, y_score, plot_dir, level + "_" + "roc")
+            # AUC-PR curve
+            save_pr_curve(y_true, y_score, plot_dir, level + "_" + "pr")
+            # AUC-NT curve
+            save_nt_curve(y_true, y_score, plot_dir, level + "_" + "nt")
+            # AUC-YC curve
+            yc_thresholds, yc_values = result_yc[-1]
+            save_custom_confidence_curve(
+                yc_thresholds,
+                yc_values,
+                plot_dir,
+                level + "_" + "yc",
+                "Threshold",
+                "True positive rate − False Positive Rate",
+            )
+            # ECE curve
+            ece_thresholds, ece_values = results_ece[-1]
+            ece_values /= max(ece_values)
+            save_custom_confidence_curve(
+                ece_thresholds, ece_values, plot_dir, level + "_" + "ece", "Threshold", "|Accuracy − Confidence score|"
+            )
+
+    return results
+
+
+def apply_confidence_parameters(decoding_cfg, hp):
+    """Apply parameters from a parameter grid to a decoding config.
+
+    Returns:
+        Updated decoding config.
+    """
+    new_decoding_cfg = copy.deepcopy(decoding_cfg)
+    confidence_cfg_fields = ("aggregation", "exclude_blank")
+    confidence_measure_cfg_fields = ("name", "alpha", "entropy_type", "entropy_norm")
+    with open_dict(new_decoding_cfg):
+        for p, v in hp.items():
+            if p in confidence_cfg_fields:
+                new_decoding_cfg.confidence_cfg[p] = v
+            elif p in confidence_measure_cfg_fields:
+                new_decoding_cfg.confidence_cfg.measure_cfg[p] = v
+    return new_decoding_cfg
diff --git a/nemo/collections/asr/parts/utils/asr_confidence_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_utils.py
index 1387f6940b38..29c49529a509 100644
--- a/nemo/collections/asr/parts/utils/asr_confidence_utils.py
+++ b/nemo/collections/asr/parts/utils/asr_confidence_utils.py
@@ -18,46 +18,197 @@
 from functools import partial
 from typing import List, Optional
 
+import torch
 from omegaconf import DictConfig, OmegaConf
 
 from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis
+from nemo.utils import logging
+
+
+class ConfidenceMeasureConstants:
+    NAMES = ("max_prob", "entropy")
+    ENTROPY_TYPES = ("gibbs", "tsallis", "renyi")
+    ENTROPY_NORMS = ("lin", "exp")
+
+    @classmethod
+    def print(cls):
+        return (
+            cls.__name__
+            + ": "
+            + str({"NAMES": cls.NAMES, "ENTROPY_TYPES": cls.ENTROPY_TYPES, "ENTROPY_NORMS": cls.ENTROPY_NORMS})
+        )
+
+
+class ConfidenceConstants:
+    AGGREGATIONS = ("mean", "min", "max", "prod")
+
+    @classmethod
+    def print(cls):
+        return cls.__name__ + ": " + str({"AGGREGATIONS": cls.AGGREGATIONS})
 
 
 @dataclass
-class ConfidenceMethodConfig:
+class ConfidenceMeasureConfig:
+    """A Config which contains the measure name and settings to compute per-frame confidence scores.
+
+    Args:
+        name: The measure name (str).
+            Supported values:
+                - 'max_prob' for using the maximum token probability as a confidence.
+                - 'entropy' for using a normalized entropy of a log-likelihood vector.
+
+        entropy_type: Which type of entropy to use (str).
+            Used if confidence_measure_cfg.name is set to `entropy`.
+            Supported values:
+                - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
+                    the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
+                    Note that for this entropy, the alpha should comply the following inequality:
+                    (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                    where V is the model vocabulary size.
+                - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
+                    Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
+                    where α is a parameter. When α == 1, it works like the Gibbs entropy.
+                    More: https://en.wikipedia.org/wiki/Tsallis_entropy
+                - 'renyi' for the Rényi entropy.
+                    Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
+                    where α is a parameter. When α == 1, it works like the Gibbs entropy.
+                    More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
+
+        alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+            When the alpha equals one, scaling is not applied to 'max_prob',
+            and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
+
+        entropy_norm: A mapping of the entropy value to the interval [0,1].
+            Supported values:
+                - 'lin' for using the linear mapping.
+                - 'exp' for using exponential mapping with linear shift.
+    """
+
     name: str = "entropy"
     entropy_type: str = "tsallis"
-    temperature: float = 0.33
+    alpha: float = 0.33
     entropy_norm: str = "exp"
+    temperature: str = "DEPRECATED"
 
     def __post_init__(self):
-        if self.name not in ("max_prob", "entropy"):
-            raise ValueError(f"`name` has to be one of the following: `max_prob`, `entropy`. Provided: {self.name}")
-        if self.entropy_type not in ("gibbs", "tsallis", "renui"):
+        if self.temperature != "DEPRECATED":
+            logging.warning(
+                "`temperature` is deprecated and will be removed in the future. Please use `alpha` instead."
+            )
+
+            # TODO (alaptev): delete the following two lines sometime in the future
+            logging.warning("Re-writing `alpha` with the value of `temperature`.")
+            # self.temperature has type str
+            self.alpha = float(self.temperature)
+            self.temperature = "DEPRECATED"
+        if self.name not in ConfidenceMeasureConstants.NAMES:
             raise ValueError(
-                f"`entropy_type` has to be one of the following: `gibbs`, `tsallis`, `renui`. Provided: {self.entropy_type}"
+                f"`name` must be one of the following: "
+                f"{'`' + '`, `'.join(ConfidenceMeasureConstants.NAMES) + '`'}. Provided: `{self.name}`"
             )
-        if self.temperature <= 0.0:
-            raise ValueError(f"`temperature` has to be > 0. Provided: {self.temperature}")
-        if self.entropy_norm not in ("lin", "exp"):
+        if self.entropy_type not in ConfidenceMeasureConstants.ENTROPY_TYPES:
             raise ValueError(
-                f"`entropy_norm` has to be one of the following: `lin`, `exp`. Provided: {self.entropy_norm}"
+                f"`entropy_type` must be one of the following: "
+                f"{'`' + '`, `'.join(ConfidenceMeasureConstants.ENTROPY_TYPES) + '`'}. Provided: `{self.entropy_type}`"
+            )
+        if self.alpha <= 0.0:
+            raise ValueError(f"`alpha` must be > 0. Provided: {self.alpha}")
+        if self.entropy_norm not in ConfidenceMeasureConstants.ENTROPY_NORMS:
+            raise ValueError(
+                f"`entropy_norm` must be one of the following: "
+                f"{'`' + '`, `'.join(ConfidenceMeasureConstants.ENTROPY_NORMS) + '`'}. Provided: `{self.entropy_norm}`"
             )
 
 
 @dataclass
 class ConfidenceConfig:
+    """A config which contains the following key-value pairs related to confidence scores.
+
+    Args:
+        preserve_frame_confidence: Bool flag which preserves the history of per-frame confidence scores
+            generated during decoding. When set to true, the Hypothesis will contain
+            the non-null value for `frame_confidence` in it. Here, `frame_confidence` is a List of floats.
+        preserve_token_confidence: Bool flag which preserves the history of per-token confidence scores
+            generated during greedy decoding (sample / batched). When set to true, the Hypothesis will contain
+            the non-null value for `token_confidence` in it. Here, `token_confidence` is a List of floats.
+
+            The length of the list corresponds to the number of recognized tokens.
+        preserve_word_confidence: Bool flag which preserves the history of per-word confidence scores
+            generated during greedy decoding (sample / batched). When set to true, the Hypothesis will contain
+            the non-null value for `word_confidence` in it. Here, `word_confidence` is a List of floats.
+
+            The length of the list corresponds to the number of recognized words.
+        exclude_blank: Bool flag indicating that blank token confidence scores are to be excluded
+            from the `token_confidence`.
+        aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
+            Valid options are `mean`, `min`, `max`, `prod`.
+        measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame
+            confidence scores.
+
+            name: The measure name (str).
+                Supported values:
+                    - 'max_prob' for using the maximum token probability as a confidence.
+                    - 'entropy' for using a normalized entropy of a log-likelihood vector.
+
+            entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`.
+                Supported values:
+                    - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
+                        the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
+                        Note that for this entropy, the alpha should comply the following inequality:
+                        (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
+                        where V is the model vocabulary size.
+                    - 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
+                        Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
+                        where α is a parameter. When α == 1, it works like the Gibbs entropy.
+                        More: https://en.wikipedia.org/wiki/Tsallis_entropy
+                    - 'renyi' for the Rényi entropy.
+                        Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
+                        where α is a parameter. When α == 1, it works like the Gibbs entropy.
+                        More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
+
+            alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
+                When the alpha equals one, scaling is not applied to 'max_prob',
+                and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
+
+            entropy_norm: A mapping of the entropy value to the interval [0,1].
+                Supported values:
+                    - 'lin' for using the linear mapping.
+                    - 'exp' for using exponential mapping with linear shift.
+    """
+
     preserve_frame_confidence: bool = False
     preserve_token_confidence: bool = False
     preserve_word_confidence: bool = False
     exclude_blank: bool = True
     aggregation: str = "min"
-    method_cfg: ConfidenceMethodConfig = ConfidenceMethodConfig()
+    measure_cfg: ConfidenceMeasureConfig = ConfidenceMeasureConfig()
+    method_cfg: str = "DEPRECATED"
 
     def __post_init__(self):
-        if self.aggregation not in ("mean", "min", "max", "prod"):
+        # OmegaConf.structured ensures that post_init check is always executed
+        self.measure_cfg = OmegaConf.structured(
+            self.measure_cfg
+            if isinstance(self.measure_cfg, ConfidenceMeasureConfig)
+            else ConfidenceMeasureConfig(**self.measure_cfg)
+        )
+        if self.method_cfg != "DEPRECATED":
+            logging.warning(
+                "`method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead."
+            )
+
+            # TODO (alaptev): delete the following two lines sometime in the future
+            logging.warning("Re-writing `measure_cfg` with the value of `method_cfg`.")
+            # OmegaConf.structured ensures that post_init check is always executed
+            self.measure_cfg = OmegaConf.structured(
+                self.method_cfg
+                if isinstance(self.method_cfg, ConfidenceMeasureConfig)
+                else ConfidenceMeasureConfig(**self.method_cfg)
+            )
+            self.method_cfg = "DEPRECATED"
+        if self.aggregation not in ConfidenceConstants.AGGREGATIONS:
             raise ValueError(
-                f"`aggregation` has to be one of the following: `mean`, `min`, `max`, `prod`. Provided: {self.aggregation}"
+                f"`aggregation` has to be one of the following: "
+                f"{'`' + '`, `'.join(ConfidenceMeasureConstants.AGGREGATIONS) + '`'}. Provided: `{self.aggregation}`"
             )
 
 
@@ -70,32 +221,32 @@ def get_confidence_measure_bank():
         entropy_gibbs_exp: Gibbs entropy with exponential normalization
         entropy_tsallis_lin: Tsallis entropy with linear normalization
         entropy_tsallis_exp: Tsallis entropy with exponential normalization
-        entropy_renui_lin: Rényi entropy with linear normalization
-        entropy_renui_exp: Rényi entropy with exponential normalization
+        entropy_renyi_lin: Rényi entropy with linear normalization
+        entropy_renyi_exp: Rényi entropy with exponential normalization
 
     Returns:
         dictionary with lambda functions.
     """
     # helper functions
-    # Gibbs entropy is implemented without temperature
+    # Gibbs entropy is implemented without alpha
     neg_entropy_gibbs = lambda x: (x.exp() * x).sum(-1)
-    neg_entropy_temperature = lambda x, t: (x * t).exp().sum(-1)
-    neg_entropy_temperature_gibbs = lambda x, t: ((x * t).exp() * x).sum(-1)
+    neg_entropy_alpha = lambda x, t: (x * t).exp().sum(-1)
+    neg_entropy_alpha_gibbs = lambda x, t: ((x * t).exp() * x).sum(-1)
     # too big for a lambda
     def entropy_tsallis_exp(x, v, t):
         exp_neg_max_ent = math.exp((1 - math.pow(v, 1 - t)) / (1 - t))
-        return (((1 - neg_entropy_temperature(x, t)) / (1 - t)).exp() - exp_neg_max_ent) / (1 - exp_neg_max_ent)
+        return (((1 - neg_entropy_alpha(x, t)) / (1 - t)).exp() - exp_neg_max_ent) / (1 - exp_neg_max_ent)
 
     def entropy_gibbs_exp(x, v, t):
         exp_neg_max_ent = math.pow(v, -t * math.pow(v, 1 - t))
-        return ((neg_entropy_temperature_gibbs(x, t) * t).exp() - exp_neg_max_ent) / (1 - exp_neg_max_ent)
+        return ((neg_entropy_alpha_gibbs(x, t) * t).exp() - exp_neg_max_ent) / (1 - exp_neg_max_ent)
 
     # use Gibbs entropies for Tsallis and Rényi with t == 1.0
     entropy_gibbs_lin_baseline = lambda x, v: 1 + neg_entropy_gibbs(x) / math.log(v)
     entropy_gibbs_exp_baseline = lambda x, v: (neg_entropy_gibbs(x).exp() * v - 1) / (v - 1)
     # fill the measure bank
     confidence_measure_bank = {}
-    # Maximum probability measure is implemented without temperature
+    # Maximum probability measure is implemented without alpha
     confidence_measure_bank["max_prob"] = (
         lambda x, v, t: (x.max(dim=-1)[0].exp() * v - 1) / (v - 1)
         if t == 1.0
@@ -104,7 +255,7 @@ def entropy_gibbs_exp(x, v, t):
     confidence_measure_bank["entropy_gibbs_lin"] = (
         lambda x, v, t: entropy_gibbs_lin_baseline(x, v)
         if t == 1.0
-        else 1 + neg_entropy_temperature_gibbs(x, t) / math.log(v) / math.pow(v, 1 - t)
+        else 1 + neg_entropy_alpha_gibbs(x, t) / math.log(v) / math.pow(v, 1 - t)
     )
     confidence_measure_bank["entropy_gibbs_exp"] = (
         lambda x, v, t: entropy_gibbs_exp_baseline(x, v) if t == 1.0 else entropy_gibbs_exp(x, v, t)
@@ -112,20 +263,20 @@ def entropy_gibbs_exp(x, v, t):
     confidence_measure_bank["entropy_tsallis_lin"] = (
         lambda x, v, t: entropy_gibbs_lin_baseline(x, v)
         if t == 1.0
-        else 1 + (1 - neg_entropy_temperature(x, t)) / (math.pow(v, 1 - t) - 1)
+        else 1 + (1 - neg_entropy_alpha(x, t)) / (math.pow(v, 1 - t) - 1)
     )
     confidence_measure_bank["entropy_tsallis_exp"] = (
         lambda x, v, t: entropy_gibbs_exp_baseline(x, v) if t == 1.0 else entropy_tsallis_exp(x, v, t)
     )
-    confidence_measure_bank["entropy_renui_lin"] = (
+    confidence_measure_bank["entropy_renyi_lin"] = (
         lambda x, v, t: entropy_gibbs_lin_baseline(x, v)
         if t == 1.0
-        else 1 + neg_entropy_temperature(x, t).log2() / (t - 1) / math.log(v, 2)
+        else 1 + neg_entropy_alpha(x, t).log2() / (t - 1) / math.log(v, 2)
     )
-    confidence_measure_bank["entropy_renui_exp"] = (
+    confidence_measure_bank["entropy_renyi_exp"] = (
         lambda x, v, t: entropy_gibbs_exp_baseline(x, v)
         if t == 1.0
-        else (neg_entropy_temperature(x, t).pow(1 / (t - 1)) * v - 1) / (v - 1)
+        else (neg_entropy_alpha(x, t).pow(1 / (t - 1)) * v - 1) / (v - 1)
     )
     return confidence_measure_bank
 
@@ -160,48 +311,55 @@ class ConfidenceMeasureMixin(ABC):
     It initializes per-frame confidence measure.
     """
 
-    def _init_confidence_measure(self, confidence_method_cfg: Optional[DictConfig] = None):
+    def _init_confidence_measure(self, confidence_measure_cfg: Optional[DictConfig] = None):
         """Initialize per-frame confidence measure from config.
         """
-        if confidence_method_cfg is None:
-            confidence_method_cfg = OmegaConf.structured(ConfidenceMethodConfig())
+        # OmegaConf.structured ensures that post_init check is always executed
+        confidence_measure_cfg = OmegaConf.structured(
+            ConfidenceMeasureConfig()
+            if confidence_measure_cfg is None
+            else ConfidenceMeasureConfig(**confidence_measure_cfg)
+        )
 
-        # set confidence calculation method
+        # set confidence calculation measure
         # we suppose that self.blank_id == len(vocabulary)
         self.num_tokens = (self.blank_id if hasattr(self, "blank_id") else self._blank_index) + 1
-        self.temperature = confidence_method_cfg.temperature
+        self.alpha = confidence_measure_cfg.alpha
 
         # init confidence measure bank
         self.confidence_measure_bank = get_confidence_measure_bank()
 
-        method = None
+        measure = None
         # construct measure_name
         measure_name = ""
-        if confidence_method_cfg.name == "max_prob":
+        if confidence_measure_cfg.name == "max_prob":
             measure_name = "max_prob"
-        elif confidence_method_cfg.name == "entropy":
+        elif confidence_measure_cfg.name == "entropy":
             measure_name = '_'.join(
-                [confidence_method_cfg.name, confidence_method_cfg.entropy_type, confidence_method_cfg.entropy_norm]
+                [confidence_measure_cfg.name, confidence_measure_cfg.entropy_type, confidence_measure_cfg.entropy_norm]
             )
         else:
-            raise ValueError(f"Unsupported `confidence_method_cfg.name`: `{confidence_method_cfg.name}`")
+            raise ValueError(f"Unsupported `confidence_measure_cfg.name`: `{confidence_measure_cfg.name}`")
         if measure_name not in self.confidence_measure_bank:
             raise ValueError(f"Unsupported measure setup: `{measure_name}`")
-        method = partial(self.confidence_measure_bank[measure_name], v=self.num_tokens, t=self.temperature)
-        self._get_confidence = lambda x: method(x).tolist()
+        measure = partial(self.confidence_measure_bank[measure_name], v=self.num_tokens, t=self.alpha)
+        self._get_confidence = lambda x: measure(torch.nan_to_num(x)).tolist()
 
 
 class ConfidenceMixin(ABC):
     """Confidence Mixin class.
 
-    It initializes per-frame confidence measure.
+    It is responsible for confidence estimation method initialization and high-level confidence score calculation.
     """
 
     def _init_confidence(self, confidence_cfg: Optional[DictConfig] = None):
         """Initialize confidence-related fields and confidence aggregation function from config.
         """
-        if confidence_cfg is None:
-            confidence_cfg = OmegaConf.structured(ConfidenceConfig())
+        # OmegaConf.structured ensures that post_init check is always executed
+        confidence_cfg = OmegaConf.structured(
+            ConfidenceConfig() if confidence_cfg is None else ConfidenceConfig(**confidence_cfg)
+        )
+        self.confidence_measure_cfg = confidence_cfg.measure_cfg
 
         # extract the config
         self.preserve_word_confidence = confidence_cfg.get('preserve_word_confidence', False)
@@ -216,7 +374,6 @@ def _init_confidence(self, confidence_cfg: Optional[DictConfig] = None):
         )
         self.exclude_blank_from_confidence = confidence_cfg.get('exclude_blank', True)
         self.word_confidence_aggregation = confidence_cfg.get('aggregation', "min")
-        self.confidence_method_cfg = confidence_cfg.get('method_cfg', None)
 
         # define aggregation functions
         self.confidence_aggregation_bank = get_confidence_aggregation_bank()
@@ -226,7 +383,13 @@ def _init_confidence(self, confidence_cfg: Optional[DictConfig] = None):
         if self.preserve_frame_confidence is False:
             if self.cfg.strategy in ['greedy', 'greedy_batch']:
                 self.preserve_frame_confidence = self.cfg.greedy.get('preserve_frame_confidence', False)
-                self.confidence_method_cfg = self.cfg.greedy.get('confidence_method_cfg', None)
+                # OmegaConf.structured ensures that post_init check is always executed
+                confidence_measure_cfg = OmegaConf.structured(self.cfg.greedy).get('confidence_measure_cfg', None)
+                self.confidence_measure_cfg = (
+                    OmegaConf.structured(ConfidenceMeasureConfig())
+                    if confidence_measure_cfg is None
+                    else OmegaConf.structured(ConfidenceMeasureConfig(**confidence_measure_cfg))
+                )
 
     @abstractmethod
     def compute_confidence(self, hypotheses_list: List[Hypothesis]) -> List[Hypothesis]:
diff --git a/nemo/collections/asr/parts/utils/confidence_metrics.py b/nemo/collections/asr/parts/utils/confidence_metrics.py
index 28aa49959041..7d793c9df607 100644
--- a/nemo/collections/asr/parts/utils/confidence_metrics.py
+++ b/nemo/collections/asr/parts/utils/confidence_metrics.py
@@ -13,47 +13,94 @@
 # limitations under the License.
 
 import math
+import os
+from pathlib import Path
+from typing import List, Optional, Tuple, Union
 
+import matplotlib.pyplot as plt
 import numpy as np
-from sklearn.metrics import average_precision_score, log_loss, roc_auc_score
+from sklearn.metrics import (
+    PrecisionRecallDisplay,
+    RocCurveDisplay,
+    average_precision_score,
+    log_loss,
+    precision_recall_curve,
+    roc_auc_score,
+    roc_curve,
+)
 
 
-def auc_roc(y_true, y_score):
+def auc_roc(y_true: Union[List[int], np.ndarray], y_score: Union[List[float], np.ndarray]) -> float:
     """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores.
+
+    Note: If only one class is present in y_true, 0.5 is returned.
     """
+    y_true = np.array(y_true)
+    y_score = np.array(y_score)
+    assert len(y_true) == len(y_score)
+    assert np.all(y_true >= 0) and np.all(y_true <= 1)
+    if np.all(y_true == 0) or np.all(y_true == 1):
+        return 0.5
     return roc_auc_score(y_true, y_score)
 
 
-def auc_pr(y_true, y_score):
+def auc_pr(y_true: Union[List[int], np.ndarray], y_score: Union[List[float], np.ndarray]) -> float:
     """Compute Area Under the Precision-Recall Curve (PR AUC) from prediction scores.
+
+    Note: If only regatives are present in y_true, 0.0 is returned.
     """
+    y_true = np.array(y_true)
+    y_score = np.array(y_score)
+    assert len(y_true) == len(y_score)
+    assert np.all(y_true >= 0) and np.all(y_true <= 1)
+    if np.all(y_true == 0):
+        return 0.0
     return average_precision_score(y_true, y_score)
 
 
-def auc_nt(y_true, y_score):
+def auc_nt(y_true: Union[List[int], np.ndarray], y_score: Union[List[float], np.ndarray]) -> float:
     """Compute Area Under the Negative Predictive Value vs. True Negative Rate Curve (NT AUC) from prediction scores.
 
     This metric can be thought of as a PR AUC in which errors are treated as positives.
+
+    Note: If only positives are present in y_true, 0.0 is returned.
     """
     y_true = np.array(y_true)
     y_score = np.array(y_score)
+    assert len(y_true) == len(y_score)
+    assert np.all(y_true >= 0) and np.all(y_true <= 1)
+    if np.all(y_true == 1):
+        return 0.0
     return average_precision_score(1 - y_true, 1 - y_score)
 
 
-def nce(y_true, y_score):
+def nce(y_true: Union[List[int], np.ndarray], y_score: Union[List[float], np.ndarray]) -> float:
     """Compute Normalized Cross Entropy (NCE) from prediction scores. Also known as the Normalized Mutual Information.
 
     NCE measures how close the correct prediction scores are to one and the incorrect prediction scores are to zero.
     Negative NCE values indicate that the classifier performs worse than the setting all prediction scores
     as the proportion of correct predictions.
+
+    Note: If only one class is present in y_true, 0.5 is returned.
     """
-    p = sum(y_true) / len(y_true)
+    y_true = np.array(y_true)
+    y_score = np.array(y_score)
+    assert len(y_true) == len(y_score)
+    assert np.all(y_true >= 0) and np.all(y_true <= 1)
+    if np.all(y_true == 0) or np.all(y_true == 1):
+        return -math.inf
+    p = y_true.mean()
     eps = 1e-15
     Hp = -(math.log(p + eps) * p + math.log(1 - p + eps) * (1 - p))
     return (Hp - log_loss(y_true, y_score)) / Hp
 
 
-def ece(y_true, y_score, n_bins=100):
+def ece(
+    y_true: Union[List[int], np.ndarray],
+    y_score: Union[List[float], np.ndarray],
+    n_bins: int = 100,
+    return_curve: bool = False,
+) -> Union[float, Tuple[float, Tuple[List[int], List[float]]]]:
     """Compute Expected Calibration Error (ECE) from prediction scores.
 
     ECE measures how close the correct prediction scores are to one and the incorrect prediction scores are to zero.
@@ -61,54 +108,159 @@ def ece(y_true, y_score, n_bins=100):
     """
     y_true = np.array(y_true)
     y_score = np.array(y_score)
+    assert len(y_true) == len(y_score)
+    assert np.all(y_true >= 0) and np.all(y_true <= 1)
     py = np.array([1 - y_score, y_score]).T
     acc, conf = np.zeros(n_bins), np.zeros(n_bins)
     Bm = np.zeros(n_bins)
+    ece_curve = []
+    thresholds = []
     for m in range(n_bins):
         a, b = m / n_bins, (m + 1) / n_bins
         threshold = (a + b) / 2
+        thresholds.append(threshold)
         py_index = (py.T[1] >= threshold).astype(int)
         py_value = py[np.arange(len(py_index)), py_index]
         bin_range = ((py_value > a) & (py_value <= b)).nonzero()[0]
         Bm[m] = len(bin_range)
         if Bm[m] > 0:
-            acc[m] = (py_index[bin_range] == y_true[bin_range]).sum()
-            conf[m] = py_value[bin_range].sum()
-        if Bm[m] != 0:
-            acc[m] /= Bm[m]
-            conf[m] /= Bm[m]
-    ece = 0
-    for m in range(n_bins):
-        ece += Bm[m] * np.abs((acc[m] - conf[m]))
-    return ece / sum(Bm)
+            acc[m] = (py_index[bin_range] == y_true[bin_range]).sum() / Bm[m]
+            conf[m] = py_value[bin_range].sum() / Bm[m]
+        ece_curve.append(Bm[m] * np.abs(acc[m] - conf[m]))
+    ece = sum(ece_curve) / sum(Bm)
+    if return_curve:
+        return ece, (thresholds, ece_curve)
+    else:
+        return ece
 
 
-def auc_yc(y_true, y_score, return_std_maximum=False, return_curve=False, n_bins=100):
+def auc_yc(
+    y_true: Union[List[int], np.ndarray],
+    y_score: Union[List[float], np.ndarray],
+    n_bins: int = 100,
+    return_std_maximum: bool = False,
+    return_curve: bool = False,
+) -> Union[
+    float,
+    Tuple[float, Tuple[List[int], List[float]]],
+    Tuple[float, float, float],
+    Tuple[float, float, float, Tuple[List[int], List[float]]],
+]:
     """Compute Area Under the Youden's Curve (YC AUC) from prediction scores.
 
     YC AUC represents the rate of the effective threshold range.
 
     If return_std_maximum is set to True, std and maximum values of the Youden's Curve are returned with the AUC.
+
+    Note: If only one class is present in y_true, zeroes are returned for every entity.
     """
     y_true = np.array(y_true)
     y_score = np.array(y_score)
+    thresholds = np.linspace(0, 1, n_bins + 1)
+    assert len(y_true) == len(y_score)
+    assert np.all(y_true >= 0) and np.all(y_true <= 1)
+    if np.all(y_true == 0) or np.all(y_true == 1):
+        if return_std_maximum and return_curve:
+            return 0.0, 0.0, 0.0, (thresholds, np.zeros(len(thresholds)))
+        elif return_std_maximum:
+            return 0.0, 0.0, 0.0
+        elif return_curve:
+            return 0.0, (thresholds, np.zeros(len(thresholds)))
+        else:
+            return 0.0
     mask_correct = y_true == 1
-    count_correct = len(mask_correct.nonzero()[0])
-    count_incorrect = len(y_true) - count_correct
+    count_correct = max(len(mask_correct.nonzero()[0]), 1)
+    count_incorrect = max(len(y_true) - count_correct, 1)
     y_score_correct = y_score[mask_correct]
     y_score_incorrect = y_score[~mask_correct]
     yc = []
-    thresholds = [i / n_bins for i in range(0, n_bins + 1)]
     for threshold in thresholds:
-        tnr = len((np.array(y_score_incorrect) < threshold).nonzero()[0]) / count_incorrect
-        fnr = len((np.array(y_score_correct) < threshold).nonzero()[0]) / count_correct
-        yc.append(tnr - fnr)
+        tnr = len((y_score_incorrect < threshold).nonzero()[0]) / count_incorrect
+        fnr = len((y_score_correct < threshold).nonzero()[0]) / count_correct
+        yc.append(abs(tnr - fnr))
     yc = np.array(yc)
     if return_std_maximum and return_curve:
-        return yc.mean(), yc.max(), yc.std(), (thresholds, yc)
+        return yc.mean(), yc.std(), yc.max(), (thresholds, yc)
     elif return_std_maximum:
-        return yc.mean(), yc.max(), yc.std()
+        return yc.mean(), yc.std(), yc.max()
     elif return_curve:
         return yc.mean(), (thresholds, yc)
     else:
         return yc.mean()
+
+
+def save_confidence_hist(y_score: Union[List[float], np.ndarray], plot_dir: Union[str, Path], name: str = "hist"):
+    os.makedirs(plot_dir, exist_ok=True)
+    plt.hist(np.array(y_score), 50, range=(0, 1))
+    plt.title(name)
+    plt.xlabel("Confidence score")
+    plt.ylabel("Count")
+    plt.savefig(Path(plot_dir) / Path(name + ".png"), dpi=300)
+    plt.clf()
+
+
+def save_roc_curve(
+    y_true: Union[List[int], np.ndarray],
+    y_score: Union[List[float], np.ndarray],
+    plot_dir: Union[str, Path],
+    name: str = "roc",
+):
+    assert len(y_true) == len(y_score)
+    os.makedirs(plot_dir, exist_ok=True)
+    fpr, tpr, _ = roc_curve(1 - np.array(y_true), 1 - np.array(y_score))
+    RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
+    plt.title(name)
+    plt.savefig(Path(plot_dir) / Path(name + ".png"), dpi=300)
+    plt.clf()
+
+
+def save_pr_curve(
+    y_true: Union[List[int], np.ndarray],
+    y_score: Union[List[float], np.ndarray],
+    plot_dir: Union[str, Path],
+    name: str = "pr",
+):
+    assert len(y_true) == len(y_score)
+    os.makedirs(plot_dir, exist_ok=True)
+    precision, recall, _ = precision_recall_curve(np.array(y_true), np.array(y_score))
+    PrecisionRecallDisplay(precision=precision, recall=recall).plot()
+    plt.title(name)
+    plt.savefig(Path(plot_dir) / Path(name + ".png"), dpi=300)
+    plt.clf()
+
+
+def save_nt_curve(
+    y_true: Union[List[int], np.ndarray],
+    y_score: Union[List[float], np.ndarray],
+    plot_dir: Union[str, Path],
+    name: str = "nt",
+):
+    assert len(y_true) == len(y_score)
+    os.makedirs(plot_dir, exist_ok=True)
+    precision, recall, _ = precision_recall_curve(1 - np.array(y_true), 1 - np.array(y_score))
+    PrecisionRecallDisplay(precision=precision, recall=recall).plot()
+    plt.title(name)
+    plt.savefig(Path(plot_dir) / Path(name + ".png"), dpi=300)
+    plt.clf()
+
+
+def save_custom_confidence_curve(
+    thresholds: Union[List[float], np.ndarray],
+    values: Union[List[float], np.ndarray],
+    plot_dir: Union[str, Path],
+    name: str = "my_awesome_curve",
+    xlabel: Optional[str] = None,
+    ylabel: Optional[str] = None,
+):
+    assert len(thresholds) == len(values)
+    os.makedirs(plot_dir, exist_ok=True)
+    plt.plot(thresholds, values)
+    plt.xlim([0, 1])
+    plt.ylim([0, 1])
+    plt.title(name)
+    if xlabel is not None:
+        plt.xlabel(xlabel)
+    if ylabel is not None:
+        plt.ylabel(ylabel)
+    plt.savefig(Path(plot_dir) / Path(name + ".png"), dpi=300)
+    plt.clf()
diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py
index b5685c63aa25..bc32a4f99840 100644
--- a/scripts/confidence_ensembles/build_ensemble.py
+++ b/scripts/confidence_ensembles/build_ensemble.py
@@ -59,7 +59,7 @@
 
     python build_ensemble.py
         <all arguments as in the previous examples>
-        tune_confidence_config.confidence_type='[entropy_renui_exp,entropy_tsallis_exp]'  # only tune over this set
+        tune_confidence_config.confidence_type='[entropy_renyi_exp,entropy_tsallis_exp]'  # only tune over this set
         tune_confidence_config.alpha='[0.1,0.5,1.0]'  # only tune over this set
 
 You can check the dataclasses in this file for the full list of supported
@@ -97,7 +97,7 @@
 )
 from nemo.collections.asr.parts.utils.asr_confidence_utils import (
     ConfidenceConfig,
-    ConfidenceMethodConfig,
+    ConfidenceMeasureConfig,
     get_confidence_aggregation_bank,
     get_confidence_measure_bank,
 )
@@ -143,8 +143,8 @@ class TuneConfidenceConfig:
     # not including max prob, as there is always an entropy-based metric
     # that's better but otherwise including everything
     confidence_type: Tuple[str] = (
-        "entropy_renui_exp",
-        "entropy_renui_lin",
+        "entropy_renyi_exp",
+        "entropy_renyi_lin",
         "entropy_tsallis_exp",
         "entropy_tsallis_lin",
         "entropy_gibbs_lin",
@@ -214,14 +214,9 @@ class BuildEnsembleConfig:
         preserve_frame_confidence=True,
         exclude_blank=True,
         aggregation="mean",
-        method_cfg=ConfidenceMethodConfig(
-            name="entropy",
-            entropy_type="renui",
-            temperature=0.25,  # this is not really temperature, but alpha, see https://arxiv.org/abs/2212.08703
-            entropy_norm="lin",
-        ),
+        measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",),
     )
-    temperature: float = 1.0  # this is a real temperature that will be applied to logits
+    temperature: float = 1.0
 
     # this is optional, but can be used to change any aspect of the transcription
     # config, such as batch size or amp usage. Note that model, data and confidence
diff --git a/scripts/confidence_ensembles/ensemble_config.yaml b/scripts/confidence_ensembles/ensemble_config.yaml
index 954876a0c3cc..590318ee3b28 100644
--- a/scripts/confidence_ensembles/ensemble_config.yaml
+++ b/scripts/confidence_ensembles/ensemble_config.yaml
@@ -16,8 +16,8 @@ temperature: 1.0
 confidence:
     exclude_blank: True
     aggregation: mean
-    method_cfg:
+    measure_cfg:
         name: entropy
-        entropy_type: renui
-        temperature: 0.25  # this is not really temperature, but alpha, see https://arxiv.org/abs/2212.08703
+        entropy_type: renyi
+        alpha: 0.25
         entropy_norm: lin
diff --git a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py
index a43e80b2bc3f..8922fe09176d 100644
--- a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py
+++ b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py
@@ -12,32 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import contextlib
-import copy
 import json
 import os
 from dataclasses import dataclass, is_dataclass
 from pathlib import Path
 from typing import Optional
 
-import matplotlib.pyplot as plt
-import numpy as np
 import pytorch_lightning as pl
-import texterrors
 import torch
-from omegaconf import MISSING, OmegaConf, open_dict
-from sklearn.metrics import PrecisionRecallDisplay, RocCurveDisplay, precision_recall_curve, roc_curve
+from omegaconf import MISSING, OmegaConf
 from sklearn.model_selection import ParameterGrid
 
 from nemo.collections.asr.metrics.rnnt_wer import RNNTDecodingConfig
 from nemo.collections.asr.metrics.wer import CTCDecodingConfig
-from nemo.collections.asr.models import ASRModel
+from nemo.collections.asr.models import ASRModel, EncDecRNNTModel
+from nemo.collections.asr.parts.utils.asr_confidence_benchmarking_utils import (
+    apply_confidence_parameters,
+    run_confidence_benchmark,
+)
 from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig
-from nemo.collections.asr.parts.utils.confidence_metrics import auc_nt, auc_pr, auc_roc, auc_yc, ece, nce
 from nemo.core.config import hydra_runner
 from nemo.utils import logging
 
-
 """
 Get confidence metrics and curve plots for a given model, dataset, and confidence parameters.
 
@@ -74,125 +70,10 @@
     amp=True \
     target_level="word" \
     confidence_cfg.exclude_blank=False \
-    'grid_params="{\"aggregation\": [\"min\", \"prod\"], \"temperature\": [0.33, 0.5]}"'
+    'grid_params="{\"aggregation\": [\"min\", \"prod\"], \"alpha\": [0.33, 0.5]}"'
 """
 
 
-def get_correct_marks(r, h):
-    """Get correct marks by aligning the reference text with a hypothesis.
-
-    This method considers only insertions and substitutions as incorrect marks.
-    """
-    return [
-        a == b
-        for a, b in zip(*(texterrors.align_texts([str(rr) for rr in r], [str(hh) for hh in h], False)[:-1]))
-        if b != "<eps>"
-    ]
-
-
-def get_token_targets_with_confidence(hyp):
-    return [[y, c] for y, c in zip(hyp.y_sequence, hyp.token_confidence)]
-
-
-def get_word_targets_with_confidence(hyp):
-    return [[y, c] for y, c in zip(hyp.words, hyp.word_confidence)]
-
-
-def run_benchmark(
-    model, batch_size, num_workers, is_rnnt, target_level, filepaths, reference_texts, plot_dir, autocast
-):
-    """Run benchmark and plot histograms and curves.
-
-    Returns:
-        Dictionary with benchmark results of the following scheme:
-        `level: (auc_roc, auc_pr, auc_nt, nce, ece, auc_yc, max_yc, std_yc)` with `level` being 'token' or 'word'.
-    """
-    # transcribe audio
-    with autocast():
-        with torch.no_grad():
-            transcriptions = model.transcribe(
-                paths2audio_files=filepaths, batch_size=batch_size, return_hypotheses=True, num_workers=num_workers
-            )
-    if is_rnnt:
-        transcriptions = transcriptions[0]
-
-    levels = []
-    if target_level != "word":
-        levels.append("token")
-    if target_level != "token":
-        levels.append("word")
-    results = {}
-    for level in levels:
-        if level == "token":
-            targets_with_confidence = [get_token_targets_with_confidence(tran) for tran in transcriptions]
-            correct_marks = [
-                get_correct_marks(model.tokenizer.text_to_ids(r), model.tokenizer.text_to_ids(h.text))
-                for r, h in zip(reference_texts, transcriptions)
-            ]
-        else:  # "word"
-            targets_with_confidence = [get_word_targets_with_confidence(tran) for tran in transcriptions]
-            correct_marks = [get_correct_marks(r.split(), h.words) for r, h in zip(reference_texts, transcriptions)]
-
-        y_true, y_score = np.array(
-            [[f, p[1]] for cm, twc in zip(correct_marks, targets_with_confidence) for f, p in zip(cm, twc)]
-        ).T
-        mask_correct = y_true == 1
-        y_score_correct = y_score[mask_correct]
-        y_score_incorrect = y_score[~mask_correct]
-        result_yc = auc_yc(y_true, y_score, return_std_maximum=True, return_curve=True)
-        results[level] = [
-            auc_roc(y_true, y_score),
-            auc_pr(y_true, y_score),
-            auc_nt(y_true, y_score),
-            nce(y_true, y_score),
-            ece(y_true, y_score),
-        ] + list(result_yc[:-1])
-
-        os.makedirs(plot_dir, exist_ok=True)
-        plt.hist(np.array(y_score_correct), 50, range=(0, 1))
-        plt.savefig(plot_dir / Path(level + "_" + "hist_correct.png"), dpi=300)
-        plt.clf()
-        plt.hist(np.array(y_score_incorrect), 50, range=(0, 1))
-        plt.savefig(plot_dir / Path(level + "_" + "hist_incorrect.png"), dpi=300)
-        plt.clf()
-        fpr, tpr, _ = roc_curve(1 - y_true, 1 - y_score)
-        RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
-        plt.savefig(plot_dir / Path(level + "_" + "roc.png"), dpi=300)
-        plt.clf()
-        precision, recall, _ = precision_recall_curve(y_true, y_score)
-        PrecisionRecallDisplay(precision=precision, recall=recall).plot()
-        plt.savefig(plot_dir / Path(level + "_" + "pr.png"), dpi=300)
-        plt.clf()
-        precision, recall, _ = precision_recall_curve(1 - y_true, 1 - y_score)
-        PrecisionRecallDisplay(precision=precision, recall=recall).plot()
-        plt.savefig(plot_dir / Path(level + "_" + "nt.png"), dpi=300)
-        plt.clf()
-        plt.plot(*result_yc[-1])
-        plt.ylim([0, 1])
-        plt.savefig(plot_dir / Path(level + "_" + "yc.png"), dpi=300)
-        plt.clf()
-
-    return results
-
-
-def apply_parameters(decoding_cfg, hp):
-    """Apply parameters from a parameter grid to a decoding config.
-
-    Returns:
-        Updated decoding config.
-    """
-    new_decoding_cfg = copy.deepcopy(decoding_cfg)
-    confidence_cfg_fields = ("aggregation", "exclude_blank")
-    confidence_method_cfg_fields = ("name", "temperature", "entropy_type", "entropy_norm")
-    with open_dict(new_decoding_cfg):
-        for p, v in hp.items():
-            if p in confidence_cfg_fields:
-                new_decoding_cfg.confidence_cfg[p] = v
-            elif p in confidence_method_cfg_fields:
-                new_decoding_cfg.confidence_cfg.method_cfg[p] = v
-    return new_decoding_cfg
-
-
 def get_experiment_params(cfg):
     """Get experiment parameters from a confidence config and generate the experiment name.
 
@@ -202,23 +83,23 @@ def get_experiment_params(cfg):
     """
     blank = "no_blank" if cfg.exclude_blank else "blank"
     aggregation = cfg.aggregation
-    method_name = cfg.method_cfg.name
-    temperature = cfg.method_cfg.temperature
+    method_name = cfg.measure_cfg.name
+    alpha = cfg.measure_cfg.alpha
     if method_name == "entropy":
-        entropy_type = cfg.method_cfg.entropy_type
-        entropy_norm = cfg.method_cfg.entropy_norm
+        entropy_type = cfg.measure_cfg.entropy_type
+        entropy_norm = cfg.measure_cfg.entropy_norm
         experiment_param_list = [
             aggregation,
             str(cfg.exclude_blank),
             method_name,
             entropy_type,
             entropy_norm,
-            str(temperature),
+            str(alpha),
         ]
-        experiment_str = "-".join([aggregation, blank, method_name, entropy_type, entropy_norm, str(temperature)])
+        experiment_str = "-".join([aggregation, blank, method_name, entropy_type, entropy_norm, str(alpha)])
     else:
-        experiment_param_list = [aggregation, str(cfg.exclude_blank), method_name, "-", "-", str(temperature)]
-        experiment_str = "-".join([aggregation, blank, method_name, str(temperature)])
+        experiment_param_list = [aggregation, str(cfg.exclude_blank), method_name, "-", "-", str(alpha)]
+        experiment_str = "-".join([aggregation, blank, method_name, str(alpha)])
     return experiment_param_list, experiment_str
 
 
@@ -294,7 +175,7 @@ def main(cfg: ConfidenceBenchmarkingConfig):
     asr_model = asr_model.eval()
 
     # Check if ctc or rnnt model
-    is_rnnt = hasattr(asr_model, 'joint')
+    is_rnnt = isinstance(asr_model, EncDecRNNTModel)
 
     # Check that the model has the `change_decoding_strategy` method
     if not hasattr(asr_model, 'change_decoding_strategy'):
@@ -317,14 +198,10 @@ def main(cfg: ConfidenceBenchmarkingConfig):
             reference_texts.append(item['text'])
 
     # setup AMP (optional)
+    autocast = None
     if cfg.amp and torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and hasattr(torch.cuda.amp, 'autocast'):
         logging.info("AMP enabled!\n")
         autocast = torch.cuda.amp.autocast
-    else:
-
-        @contextlib.contextmanager
-        def autocast():
-            yield
 
     # do grid-based benchmarking if grid_params is provided, otherwise a regular one
     work_dir = Path(cfg.output_dir)
@@ -338,7 +215,7 @@ def autocast():
                 "method_name",
                 "entropy_type",
                 "entropy_norm",
-                "temperature",
+                "alpha",
                 "target_level",
                 "auc_roc",
                 "auc_pr",
@@ -346,8 +223,8 @@ def autocast():
                 "nce",
                 "ece",
                 "auc_yc",
-                "max_yc",
                 "std_yc",
+                "max_yc",
             ]
         )
         + "\n"
@@ -374,17 +251,16 @@ def autocast():
             f.flush()
             for i, hp in enumerate(hp_grid):
                 logging.info(f"Run # {i + 1}, grid: `{hp}`")
-                asr_model.change_decoding_strategy(apply_parameters(asr_model.cfg.decoding, hp))
+                asr_model.change_decoding_strategy(apply_confidence_parameters(asr_model.cfg.decoding, hp))
                 param_list, experiment_name = get_experiment_params(asr_model.cfg.decoding.confidence_cfg)
                 plot_dir = work_dir / Path(experiment_name)
-                results = run_benchmark(
+                results = run_confidence_benchmark(
                     asr_model,
-                    cfg.batch_size,
-                    cfg.num_workers,
-                    is_rnnt,
                     cfg.target_level,
                     filepaths,
                     reference_texts,
+                    cfg.batch_size,
+                    cfg.num_workers,
                     plot_dir,
                     autocast,
                 )
@@ -406,11 +282,10 @@ def autocast():
         with open(report_file, "tw", encoding="utf-8") as f:
             f.write(report_legend)
             f.flush()
-            results = run_benchmark(
+            results = run_confidence_benchmark(
                 asr_model,
                 cfg.batch_size,
                 cfg.num_workers,
-                is_rnnt,
                 cfg.target_level,
                 filepaths,
                 reference_texts,
diff --git a/tests/collections/asr/confidence/test_asr_confidence.py b/tests/collections/asr/confidence/test_asr_confidence.py
new file mode 100644
index 000000000000..11b127424908
--- /dev/null
+++ b/tests/collections/asr/confidence/test_asr_confidence.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import math
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import pytest
+from omegaconf import OmegaConf
+from pytorch_lightning import Trainer
+
+from nemo.collections.asr.metrics.rnnt_wer import RNNTDecodingConfig
+from nemo.collections.asr.metrics.wer import CTCDecodingConfig
+from nemo.collections.asr.models import ASRModel, EncDecCTCModelBPE, EncDecRNNTBPEModel
+from nemo.collections.asr.parts.submodules.ctc_greedy_decoding import GreedyCTCInferConfig
+from nemo.collections.asr.parts.submodules.rnnt_greedy_decoding import GreedyRNNTInferConfig
+from nemo.collections.asr.parts.utils.asr_confidence_benchmarking_utils import run_confidence_benchmark
+from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig
+
+# both models recognize the test data without errors, thus every metric except ece return default values
+ECE_VALUES = {("token", "ctc"): 0.87, ("token", "rnnt"): 0.82, ("word", "ctc"): 0.91, ("word", "rnnt"): 0.88}
+
+TOL_DEGREE = 2
+TOL = 1 / math.pow(10, TOL_DEGREE)
+
+
+@pytest.fixture(scope="module")
+def conformer_ctc_bpe_model():
+    model = EncDecCTCModelBPE.from_pretrained(model_name="stt_en_conformer_ctc_small")
+    model.set_trainer(Trainer(devices=1, accelerator="cpu"))
+    model = model.eval()
+    return model
+
+
+@pytest.fixture(scope="module")
+def conformer_rnnt_bpe_model():
+    model = EncDecRNNTBPEModel.from_pretrained(model_name="stt_en_conformer_transducer_small")
+    model.set_trainer(Trainer(devices=1, accelerator="cpu"))
+    model = model.eval()
+    return model
+
+
+@pytest.mark.with_downloads
+@pytest.fixture(scope="module")
+# @pytest.fixture
+def audio_and_texts(test_data_dir):
+    # get filenames and reference texts from manifest
+    filepaths = []
+    reference_texts = []
+    manifest = Path(test_data_dir) / Path("asr/an4_val.json")
+    with open(manifest, 'r') as f:
+        for line in f:
+            item = json.loads(line)
+            # alaptev: maybe fix those paths in the manifest?
+            audio_file = Path(item['audio_filepath'].replace("/data/", "/.data/"))
+            filepaths.append(str(audio_file.absolute()))
+            reference_texts.append(item['text'])
+    return filepaths, reference_texts
+
+
+class TestASRConfidenceBenchmark:
+    @pytest.mark.integration
+    @pytest.mark.with_downloads
+    @pytest.mark.parametrize('model_name', ("ctc", "rnnt"))
+    @pytest.mark.parametrize('target_level', ("token", "word"))
+    def test_run_confidence_benchmark(
+        self, model_name, target_level, audio_and_texts, conformer_ctc_bpe_model, conformer_rnnt_bpe_model
+    ):
+        model = conformer_ctc_bpe_model if model_name == "ctc" else conformer_rnnt_bpe_model
+        assert isinstance(model, ASRModel)
+        filepaths, reference_texts = audio_and_texts
+        confidence_cfg = (
+            ConfidenceConfig(preserve_token_confidence=True)
+            if target_level == "token"
+            else ConfidenceConfig(preserve_word_confidence=True)
+        )
+        model.change_decoding_strategy(
+            RNNTDecodingConfig(fused_batch_size=-1, strategy="greedy_batch", confidence_cfg=confidence_cfg)
+            if model_name == "rnnt"
+            else CTCDecodingConfig(confidence_cfg=confidence_cfg)
+        )
+        with tempfile.TemporaryDirectory() as tmpdir:
+            assert np.allclose(
+                np.array(
+                    run_confidence_benchmark(model, target_level, filepaths, reference_texts, plot_dir=tmpdir)[
+                        target_level
+                    ]
+                ),
+                np.array([0.5, 1.0, 0.0, -math.inf, ECE_VALUES[(target_level, model_name)], 0.0, 0.0, 0.0]),
+                atol=TOL,
+            )
+
+    @pytest.mark.integration
+    @pytest.mark.with_downloads
+    @pytest.mark.parametrize('model_name', ("ctc", "rnnt"))
+    @pytest.mark.parametrize('arg', ("method_cfg", "temperature", "all"))
+    def test_deprecated_config_args(self, model_name, arg, conformer_ctc_bpe_model, conformer_rnnt_bpe_model):
+        assert ConfidenceConfig().measure_cfg.alpha == 0.33, "default `alpha` is supposed to be 0.33"
+        model = conformer_ctc_bpe_model if model_name == "ctc" else conformer_rnnt_bpe_model
+        assert isinstance(model, ASRModel)
+        if arg == "all":
+            conf = OmegaConf.create({"temperature": 0.5})
+            test_args_main = {"method_cfg": conf}
+            test_args_greedy = {"confidence_method_cfg": conf}
+        elif arg == "method_cfg":
+            conf = OmegaConf.create({"alpha": 0.5})
+            test_args_main = {"method_cfg": conf}
+            test_args_greedy = {"confidence_method_cfg": conf}
+        elif arg == "temperature":
+            conf = OmegaConf.create({"temperature": 0.5})
+            test_args_main = {"measure_cfg": conf}
+            test_args_greedy = {"confidence_measure_cfg": conf}
+        else:
+            raise NotImplementedError(arg)
+        confidence_cfg = ConfidenceConfig(preserve_word_confidence=True, **test_args_main)
+        model.change_decoding_strategy(
+            RNNTDecodingConfig(fused_batch_size=-1, strategy="greedy", confidence_cfg=confidence_cfg)
+            if model_name == "rnnt"
+            else CTCDecodingConfig(confidence_cfg=confidence_cfg)
+        )
+        assert model.cfg.decoding.confidence_cfg.measure_cfg.alpha == 0.5
+        model.change_decoding_strategy(
+            RNNTDecodingConfig(
+                fused_batch_size=-1,
+                strategy="greedy",
+                greedy=GreedyRNNTInferConfig(preserve_frame_confidence=True, **test_args_greedy),
+            )
+            if model_name == "rnnt"
+            else CTCDecodingConfig(greedy=GreedyCTCInferConfig(preserve_frame_confidence=True, **test_args_greedy))
+        )
+        assert model.cfg.decoding.greedy.confidence_measure_cfg.alpha == 0.5
diff --git a/tests/collections/asr/confidence/test_asr_confidence_metrics.py b/tests/collections/asr/confidence/test_asr_confidence_metrics.py
new file mode 100644
index 000000000000..fde5f322a988
--- /dev/null
+++ b/tests/collections/asr/confidence/test_asr_confidence_metrics.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import tempfile
+
+import numpy as np
+import pytest
+from scipy.stats import uniform
+
+from nemo.collections.asr.parts.utils.confidence_metrics import (
+    auc_nt,
+    auc_pr,
+    auc_roc,
+    auc_yc,
+    ece,
+    nce,
+    save_confidence_hist,
+    save_custom_confidence_curve,
+    save_nt_curve,
+    save_pr_curve,
+    save_roc_curve,
+)
+
+# set convenient name2metric mapping
+name2metric = {
+    f.__name__: (f, ans)
+    for f, ans in zip((auc_roc, auc_pr, auc_nt, auc_yc, ece, nce), (0.833, 0.917, 0.833, 0.421, 0.232, 0.403))
+}
+# ece does not have a default value
+name2metric_all_correct = {
+    f.__name__: (f, ans) for f, ans in zip((auc_roc, auc_pr, auc_nt, auc_yc, nce), (0.5, 1.0, 0.0, 0.0, -math.inf))
+}
+name2metric_all_incorrect = {
+    f.__name__: (f, ans) for f, ans in zip((auc_roc, auc_pr, auc_nt, auc_yc, nce), (0.5, 0.0, 1.0, 0.0, -math.inf))
+}
+
+# Initialize data
+Y_TRUE = [1, 0, 0, 1, 1]
+Y_TRUE_ALL_CORRECT = [1, 1, 1, 1, 1]
+Y_TRUE_ALL_INCORRECT = [0, 0, 0, 0, 0]
+Y_SCORE = [0.6, 0.7, 0.02, 0.95, 0.8]
+Y_TRUE_RANDOM = np.random.choice(2, 1000, p=[0.2, 0.8])
+# probability distribution with mean ~= 0.65 and std ~= 0.25
+Y_SCORE_RANDOM = uniform.rvs(size=1000, loc=0.5, scale=0.5) - 0.5 * np.random.choice(2, 1000, p=[0.8, 0.2])
+
+TOL_DEGREE = 3
+TOL = 1 / math.pow(10, TOL_DEGREE)
+
+
+class TestConfidenceMetrics:
+    @pytest.mark.unit
+    @pytest.mark.parametrize('metric_name', name2metric.keys())
+    def test_metric_main(self, metric_name):
+        metric, ans = name2metric[metric_name]
+
+        assert round(metric(Y_TRUE, Y_SCORE), TOL_DEGREE) == ans
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize('metric_name', name2metric_all_correct.keys())
+    def test_metric_all_correct(self, metric_name):
+        metric, ans = name2metric_all_correct[metric_name]
+
+        assert round(metric(Y_TRUE_ALL_CORRECT, Y_SCORE), TOL_DEGREE) == ans
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize('metric_name', name2metric_all_incorrect.keys())
+    def test_metric_all_incorrect(self, metric_name):
+        metric, ans = name2metric_all_incorrect[metric_name]
+
+        assert round(metric(Y_TRUE_ALL_INCORRECT, Y_SCORE), TOL_DEGREE) == ans
+
+    @pytest.mark.unit
+    def test_metric_auc_yc_aux(self):
+        n_bins = 10
+        result, result_std, result_max, (thresholds, yc_curve) = auc_yc(
+            Y_TRUE, Y_SCORE, n_bins=n_bins, return_std_maximum=True, return_curve=True
+        )
+
+        assert round(result_std, TOL_DEGREE) == 0.228
+        assert round(result_max, TOL_DEGREE) == 0.667
+        assert np.allclose(np.array(thresholds), np.array([i / n_bins for i in range(0, n_bins + 1)]), atol=TOL)
+        assert np.allclose(
+            np.array(yc_curve), np.array([0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.167, 0.667, 0.667, 0.333, 0.0]), atol=TOL
+        )
+
+
+class TestSaveConfidencePlot:
+    @pytest.mark.unit
+    def test_save_confidence_hist(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            save_confidence_hist(Y_SCORE_RANDOM, tmpdir)
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize('plot_func', (save_roc_curve, save_pr_curve, save_nt_curve))
+    def test_save_simple_confidence_curve(self, plot_func):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            plot_func(Y_TRUE_RANDOM, Y_SCORE_RANDOM, tmpdir)
+
+    @pytest.mark.unit
+    def test_save_custom_confidence_curve(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            ranges = np.arange(0, 1, 0.01)
+            save_custom_confidence_curve(ranges, ranges, tmpdir)
diff --git a/tests/collections/asr/confidence/test_asr_confidence_primitives.py b/tests/collections/asr/confidence/test_asr_confidence_primitives.py
new file mode 100644
index 000000000000..d1111406ca62
--- /dev/null
+++ b/tests/collections/asr/confidence/test_asr_confidence_primitives.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import pytest
+import torch
+
+from nemo.collections.asr.parts.utils.asr_confidence_utils import (
+    get_confidence_aggregation_bank,
+    get_confidence_measure_bank,
+)
+
+# Initialize probability vectors
+VOCAB_SIZES = (100, 1000, 10000)
+ONE_VEC_SET, ZERO_VEC_SET, RAND_VEC_SET, OVERFIT_RAND_VEC_SET = {}, {}, {}, {}
+for vocab_size in VOCAB_SIZES:
+    # batch size 2 to test different positions of probability one
+    ONE_VEC_SET[vocab_size] = torch.nan_to_num(
+        torch.cat(
+            [
+                torch.tensor([[0] + [float('-inf')] * (vocab_size - 1)]),
+                torch.tensor([[float('-inf')] * (vocab_size - 3) + [0] + [float('-inf')] * 2]),
+            ]
+        )
+    )
+    ZERO_VEC_SET[vocab_size] = torch.nan_to_num(torch.tensor([[math.log(1 / vocab_size)] * vocab_size] * 2))
+    # batch size 1
+    rand_logit = torch.rand((1, vocab_size))
+    rand_logit_overfit = rand_logit.clone()
+    rand_logit_overfit[0, 0] += vocab_size
+    RAND_VEC_SET[vocab_size] = torch.nan_to_num(torch.nn.functional.log_softmax(rand_logit, -1))
+    OVERFIT_RAND_VEC_SET[vocab_size] = torch.nan_to_num(torch.nn.functional.log_softmax(rand_logit_overfit, -1))
+AGGREGATION_VEC_SIMPLE = [0.0, 0.5, 1]
+
+TOL_DEGREE = 6
+TOL = 1 / math.pow(10, TOL_DEGREE)
+
+
+def get_measure_parametrize_ranges():
+    confidence_measure_bank = {}
+    alpha_range = (0.25, 0.5, 1.0)
+    bank_exception = None
+    try:
+        confidence_measure_bank = get_confidence_measure_bank()
+    except Exception as e:
+        alpha_range = ()
+        bank_exception = e
+    return confidence_measure_bank, alpha_range, bank_exception
+
+
+def get_aggregation_parametrize_ranges():
+    confidence_aggregation_bank = {}
+    bank_exception = None
+    try:
+        confidence_aggregation_bank = get_confidence_aggregation_bank()
+    except Exception as e:
+        bank_exception = e
+    return confidence_aggregation_bank, bank_exception
+
+
+class TestConfidenceMeasureBank:
+    measure_bank, alphas, bank_build_exception = get_measure_parametrize_ranges()
+
+    @pytest.mark.unit
+    def test_measure_bank(self):
+        if self.bank_build_exception is not None:
+            raise self.bank_build_exception
+
+        assert isinstance(self.measure_bank, dict)
+        assert len(self.measure_bank) > 0
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize('measure_name', measure_bank.keys())
+    @pytest.mark.parametrize('alpha', alphas)
+    @pytest.mark.parametrize('vocab_size', VOCAB_SIZES)
+    def test_confidence_measures_one(self, measure_name, alpha, vocab_size):
+        measure = self.measure_bank[measure_name]
+
+        assert torch.allclose(measure(ONE_VEC_SET[vocab_size], vocab_size, alpha), torch.tensor([1.0, 1.0]), atol=TOL)
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize('measure_name', measure_bank.keys())
+    @pytest.mark.parametrize('alpha', alphas)
+    @pytest.mark.parametrize('vocab_size', VOCAB_SIZES)
+    def test_confidence_measures_zero(self, measure_name, alpha, vocab_size):
+        measure = self.measure_bank[measure_name]
+
+        assert torch.allclose(measure(ZERO_VEC_SET[vocab_size], vocab_size, alpha), torch.tensor([0.0, 0.0]), atol=TOL)
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize('measure_name', measure_bank.keys())
+    @pytest.mark.parametrize('alpha', alphas)
+    @pytest.mark.parametrize('vocab_size', VOCAB_SIZES)
+    def test_confidence_measures_partial_order(self, measure_name, alpha, vocab_size):
+        measure = self.measure_bank[measure_name]
+        value_normal = round(float(measure(RAND_VEC_SET[vocab_size], vocab_size, alpha)[0]), TOL_DEGREE)
+        value_overfit = round(float(measure(OVERFIT_RAND_VEC_SET[vocab_size], vocab_size, alpha)[0]), TOL_DEGREE)
+
+        assert 0 <= value_normal < value_overfit <= 1, (
+            measure(RAND_VEC_SET[vocab_size], vocab_size, alpha),
+            measure(OVERFIT_RAND_VEC_SET[vocab_size], vocab_size, alpha),
+        )
+
+
+class TestConfidenceAggregationBank:
+    aggregation_bank, bank_build_exception = get_aggregation_parametrize_ranges()
+
+    @pytest.mark.unit
+    def test_aggregation_bank(self):
+        if self.bank_build_exception is not None:
+            raise self.bank_build_exception
+
+        assert isinstance(self.aggregation_bank, dict)
+        assert len(self.aggregation_bank) > 0
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize('aggregation_name', aggregation_bank.keys())
+    def test_confidence_agregation_simple(self, aggregation_name):
+        # alaptev: would skipif work with parametrize arguments?
+        if aggregation_name not in ("mean", "min", "max", "prod"):
+            pytest.skip(f"{aggregation_name} is not a simple aggregation")
+        aggregation = self.aggregation_bank[aggregation_name]
+        if aggregation_name == "mean":
+            assert aggregation(AGGREGATION_VEC_SIMPLE) == 0.5
+        elif aggregation_name == "min":
+            assert aggregation(AGGREGATION_VEC_SIMPLE) == 0.0
+        if aggregation_name == "max":
+            assert aggregation(AGGREGATION_VEC_SIMPLE) == 1.0
+        if aggregation_name == "prod":
+            assert aggregation(AGGREGATION_VEC_SIMPLE) == 0.0
diff --git a/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py b/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py
index 22926b6516ee..8687ed683833 100644
--- a/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py
+++ b/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py
@@ -242,7 +242,8 @@ def test_decoding_change(self, hybrid_asr_model):
 
     @pytest.mark.unit
     def test_GreedyRNNTInferConfig(self):
-        IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index']
+        # confidence_method_cfg is deprecated
+        IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg']
 
         result = assert_dataclass_signature_match(
             greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyRNNTInferConfig, ignore_args=IGNORE_ARGS
@@ -256,7 +257,8 @@ def test_GreedyRNNTInferConfig(self):
 
     @pytest.mark.unit
     def test_GreedyBatchedRNNTInferConfig(self):
-        IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index']
+        # confidence_method_cfg is deprecated
+        IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg']
 
         result = assert_dataclass_signature_match(
             greedy_decode.GreedyBatchedRNNTInfer, greedy_decode.GreedyBatchedRNNTInferConfig, ignore_args=IGNORE_ARGS
diff --git a/tests/collections/asr/test_asr_metrics.py b/tests/collections/asr/test_asr_metrics.py
index 9a43ed4e2b90..2c4ec0953444 100644
--- a/tests/collections/asr/test_asr_metrics.py
+++ b/tests/collections/asr/test_asr_metrics.py
@@ -32,6 +32,7 @@
     CTCDecodingConfig,
     word_error_rate,
     word_error_rate_detail,
+    word_error_rate_per_utt,
 )
 from nemo.collections.asr.metrics.wer_bpe import WERBPE, CTCBPEDecoding, CTCBPEDecodingConfig
 from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis
@@ -136,6 +137,15 @@ def test_wer_function(self):
             0.0,
         )
 
+        assert word_error_rate_per_utt(hypotheses=['kat'], references=['cat']) == ([1.0], 1.0)
+        assert word_error_rate_per_utt(hypotheses=['cat', ''], references=['', 'gpu']) == ([float("inf"), 1.0], 2.0)
+        assert word_error_rate_per_utt(
+            hypotheses=['ducuti motorcycle', 'G P U'], references=['ducati motorcycle', 'GPU']
+        ) == ([0.5, 3.0], 4 / 3)
+        assert word_error_rate_per_utt(
+            hypotheses=['ducuti motorcycle', 'G P U'], references=['ducati motorcycle', 'GPU'], use_cer=True
+        ) == ([1 / 17, 2 / 3], 0.15)
+
     @pytest.mark.unit
     @pytest.mark.parametrize("batch_dim_index", [0, 1])
     @pytest.mark.parametrize("test_wer_bpe", [False, True])
diff --git a/tests/collections/asr/test_asr_rnnt_encdec_model.py b/tests/collections/asr/test_asr_rnnt_encdec_model.py
index 68f1e38f797b..775a146c74c4 100644
--- a/tests/collections/asr/test_asr_rnnt_encdec_model.py
+++ b/tests/collections/asr/test_asr_rnnt_encdec_model.py
@@ -242,7 +242,8 @@ def test_decoding_change(self, asr_model):
 
     @pytest.mark.unit
     def test_GreedyRNNTInferConfig(self):
-        IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index']
+        # confidence_method_cfg is deprecated
+        IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg']
 
         result = assert_dataclass_signature_match(
             greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyRNNTInferConfig, ignore_args=IGNORE_ARGS
@@ -256,7 +257,8 @@ def test_GreedyRNNTInferConfig(self):
 
     @pytest.mark.unit
     def test_GreedyBatchedRNNTInferConfig(self):
-        IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index']
+        # confidence_method_cfg is deprecated
+        IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg']
 
         result = assert_dataclass_signature_match(
             greedy_decode.GreedyBatchedRNNTInfer, greedy_decode.GreedyBatchedRNNTInferConfig, ignore_args=IGNORE_ARGS
diff --git a/tests/collections/asr/test_confidence_ensembles.py b/tests/collections/asr/test_confidence_ensembles.py
index ad14a2a7e6ff..b8b027dd3426 100644
--- a/tests/collections/asr/test_confidence_ensembles.py
+++ b/tests/collections/asr/test_confidence_ensembles.py
@@ -19,7 +19,7 @@
 from nemo.collections.asr.metrics.wer import CTCDecodingConfig
 from nemo.collections.asr.models import EncDecCTCModel, EncDecHybridRNNTCTCModel, EncDecRNNTModel
 from nemo.collections.asr.models.confidence_ensemble import ConfidenceEnsembleModel
-from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, ConfidenceMethodConfig
+from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, ConfidenceMeasureConfig
 
 
 def get_model_config(model_class):
@@ -117,12 +117,7 @@ def test_model_creation_2models(self, tmp_path, model_class0, model_class1):
             preserve_frame_confidence=True,
             exclude_blank=True,
             aggregation="mean",
-            method_cfg=ConfidenceMethodConfig(
-                name="entropy",
-                entropy_type="renui",
-                temperature=0.25,  # this is not really temperature, but alpha, see https://arxiv.org/abs/2212.08703
-                entropy_norm="lin",
-            ),
+            measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",),
         )
 
         # just checking that no errors are raised when creating the model
@@ -153,12 +148,7 @@ def test_model_creation_5models(self, tmp_path):
             preserve_frame_confidence=True,
             exclude_blank=True,
             aggregation="mean",
-            method_cfg=ConfidenceMethodConfig(
-                name="entropy",
-                entropy_type="renui",
-                temperature=0.25,  # this is not really temperature, but alpha, see https://arxiv.org/abs/2212.08703
-                entropy_norm="lin",
-            ),
+            measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",),
         )
 
         # just checking that no errors are raised when creating the model
diff --git a/tutorials/asr/ASR_Confidence_Estimation.ipynb b/tutorials/asr/ASR_Confidence_Estimation.ipynb
new file mode 100644
index 000000000000..2a1ad024a889
--- /dev/null
+++ b/tutorials/asr/ASR_Confidence_Estimation.ipynb
@@ -0,0 +1,1432 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "abe9913d",
+   "metadata": {
+    "id": "1a0f93c6"
+   },
+   "outputs": [],
+   "source": [
+    "BRANCH = 'main'\n",
+    "\n",
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd26974d",
+   "metadata": {
+    "id": "ffdfe626"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "# either provide a path to local NeMo repository with NeMo already installed or git clone\n",
+    "\n",
+    "# option #1: local path to NeMo repo with NeMo already installed\n",
+    "NEMO_DIR_PATH = os.path.dirname(os.path.dirname(os.path.abspath('')))\n",
+    "is_colab = False\n",
+    "\n",
+    "# option #2: download NeMo repo\n",
+    "if 'google.colab' in str(get_ipython()) or not os.path.exists(os.path.join(NEMO_DIR_PATH, \"nemo\")):\n",
+    "    ## Install dependencies\n",
+    "    !apt-get install sox libsndfile1 ffmpeg\n",
+    "\n",
+    "    !git clone -b $BRANCH https://github.com/NVIDIA/NeMo\n",
+    "    %cd NeMo\n",
+    "    !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n",
+    "    NEMO_DIR_PATH = os.path.abspath('')\n",
+    "    is_colab = True\n",
+    "\n",
+    "import sys\n",
+    "sys.path.insert(0, NEMO_DIR_PATH)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3f35d50",
+   "metadata": {
+    "id": "bcc3e593"
+   },
+   "source": [
+    "# 1. Introduction to ASR confidence estimation\n",
+    "Confidence estimation is a crucial yet sometimes overlooked aspect of automatic speech recognition (ASR) systems. Confidence estimation for ASR is the process of estimating the rate of reliability of the output generated by an ASR system. For an output transcription, confidence estimation answers the question \"how accurate this transcription is\", or \"how likely this  transcription is correct\".\n",
+    "\n",
+    "Confidence score is the result of confidence estimation. It lies in range from 0 to 1, where zero signals that the confidence estimator is completely unsure, and one indicates that the estimator is confident in the output. Confidence scores are often used to guide downstream processing in ASR applications. For example, in a voice dictation application, a low confidence score could trigger the system to ask the user to repeat the input or to suggest alternative transcriptions.\n",
+    "\n",
+    "There are several approaches to confidence estimation in ASR, including:\n",
+    "\n",
+    "1. Acoustic modeling-based methods: These methods use the acoustic model scores to estimate the confidence score. The acoustic model represents the relationship between the acoustic signal and the corresponding linguistic units, and the score reflects the similarity between the observed signal and the predicted model output. Here, the acoustic model can be the ASR model itself (non-trainable methods), or a trainable external estimator, accepting acoustic features or output probabilities and predicting confidence scores.\n",
+    "\n",
+    "2. Language modeling-based methods: These methods use the language model scores to estimate the confidence score. The language model represents the probability distribution of the sequence of words, and the score reflects the likelihood of the transcription given the language model. \n",
+    "\n",
+    "3. Combination methods: These methods combine the scores from both the acoustic and language models to estimate the confidence score. This approach can leverage the strengths of both models to achieve more accurate confidence scores.\n",
+    "\n",
+    "In this introductory tutorial we will cover only the non-trainable acoustic-based methods."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "34e356bf",
+   "metadata": {
+    "id": "59100fb9"
+   },
+   "source": [
+    "## 1.1. Optional resources\n",
+    "This tutorial is self-contained, but if you want to dive deeper into the topic, you can check out these resources:\n",
+    "* Paper behind this tutorial: https://arxiv.org/abs/2212.08703\n",
+    "* Supplementary blog on how and why confidence estimation methods of this tutorial were developed: https://developer.nvidia.com/blog/entropy-based-methods-for-word-level-asr-confidence-estimation/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9739cb35",
+   "metadata": {
+    "id": "cd7226c5"
+   },
+   "source": [
+    "# 2. Data Download\n",
+    "First, let's download audio and text data. Here we will use LibriSpeech *dev-other* and *test-other*."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46b2861b",
+   "metadata": {
+    "id": "fd542e62"
+   },
+   "outputs": [],
+   "source": [
+    "## create data directory and download an audio file\n",
+    "WORK_DIR = 'WORK_DIR'\n",
+    "DATA_DIR = WORK_DIR + '/DATA'\n",
+    "os.makedirs(DATA_DIR, exist_ok=True)\n",
+    "\n",
+    "print('downloading audio data...')\n",
+    "!python $NEMO_DIR_PATH/scripts/dataset_processing/get_librispeech_data.py --data_root=$DATA_DIR --data_set=test_other\n",
+    "!rm $DATA_DIR/test_other.tar.gz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ba5ad12",
+   "metadata": {
+    "id": "383eee71"
+   },
+   "source": [
+    "# 3. Confidence estimation example\n",
+    "Let's see how confidence scores can be obtained with NeMo models."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a95697fe",
+   "metadata": {
+    "id": "7c7c0170"
+   },
+   "source": [
+    "## 3.1. Helper functions\n",
+    "The following functions are to pretty-print confidence scores for word-level ASR hypotheses."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bd12b7b",
+   "metadata": {
+    "id": "20cf0b38"
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "from termcolor import colored\n",
+    "from typing import List, Optional, Tuple, Union\n",
+    "\n",
+    "from IPython.display import Audio, HTML, Image, display\n",
+    "import numpy as np\n",
+    "import texterrors\n",
+    "\n",
+    "def get_detailed_wer_labels(ref: List[str], hyp: List[str], return_eps_padded_hyp: bool = False):\n",
+    "    \"\"\"Get detailed WER labels, aligning reference with hypothesis.\n",
+    "    \n",
+    "    Possible WER labels:\n",
+    "        - 'C' for Correct,\n",
+    "        - 'I' for Insertion,\n",
+    "        - 'D' for Deletion,\n",
+    "        - 'S' for Substitution.\n",
+    "\n",
+    "    Returns:\n",
+    "        WER labels list.\n",
+    "        [Optional] Epsilin-padded hypothesis if return_eps_padded_hyp set to True.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Align reference and hypothesis using \"<eps>\"\n",
+    "    aligned_ref, aligned_hyp = texterrors.align_texts(ref, hyp, False)[:-1]\n",
+    "\n",
+    "    # Determine labels\n",
+    "    labels = []\n",
+    "    for r, h in zip(aligned_ref, aligned_hyp):\n",
+    "        if r == h:\n",
+    "            labels.append(\"C\")\n",
+    "        elif r == \"<eps>\":\n",
+    "            labels.append(\"I\")\n",
+    "        elif h == \"<eps>\":\n",
+    "            labels.append(\"D\")\n",
+    "        else:\n",
+    "            labels.append(\"S\")\n",
+    "\n",
+    "    return labels if not return_eps_padded_hyp else labels, aligned_hyp\n",
+    "\n",
+    "\n",
+    "def fill_confidence_deletions(confidence_scores: List[float], labels: List[str], fill_value: float = 0.0):\n",
+    "    \"\"\"Fill confidence scores list with the provided value for deletions.\n",
+    "    Assumes that we have no natural confidence scores for deletions.\n",
+    "    \n",
+    "    Returns:\n",
+    "        Confidence scores list with deletion scores.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    assert len(confidence_scores) <= len(labels)\n",
+    "\n",
+    "    # If the lengths of confidence_scores and labels are equal, then we assume that there are no deletions\n",
+    "    if len(confidence_scores) == len(labels):\n",
+    "        return confidence_scores\n",
+    "\n",
+    "    # Insert fill_value into confidence_scores where label == \"D\"\n",
+    "    new_confidence_scores = []\n",
+    "    score_index = 0\n",
+    "    for label in labels:\n",
+    "        if label == \"D\":\n",
+    "            new_confidence_scores.append(fill_value)\n",
+    "        else:\n",
+    "            new_confidence_scores.append(confidence_scores[score_index])\n",
+    "            score_index += 1\n",
+    "    return new_confidence_scores\n",
+    "\n",
+    "\n",
+    "def pretty_pad_word_labels(labels: List[str], words: List[str]):\n",
+    "    \"\"\"Pad word labels with dash for pretty printing.\n",
+    "    Expects labels and words to have the same length.\n",
+    "    \n",
+    "    Returns:\n",
+    "        Padded labels list.\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Check that words and labels without 'D' have the same length\n",
+    "    assert len(words) == len(labels)\n",
+    "\n",
+    "    # Pad the labels with dashes to align them with the words\n",
+    "    padded_labels = []\n",
+    "    for word, label in zip(words, labels):\n",
+    "        label_len = len(word)\n",
+    "        left_padding = (label_len - 1) // 2\n",
+    "        right_padding = label_len - left_padding - 1\n",
+    "        padded_label = \"-\" * left_padding + label + \"-\" * right_padding\n",
+    "        padded_labels.append(padded_label)\n",
+    "\n",
+    "    return padded_labels\n",
+    "\n",
+    "\n",
+    "def _html_paint_word_grey(word: str, shade: str):\n",
+    "    if shade == \"black\":\n",
+    "        color = \"0,0,0\"\n",
+    "    elif shade == \"grey\":\n",
+    "        color = \"150,150,150\"\n",
+    "    elif shade == \"light_grey\":\n",
+    "        color = \"200,200,200\"\n",
+    "    else:\n",
+    "        raise ValueError(\n",
+    "            f\"`shade` has to be one of the following: `black`, `grey`, `light_grey`. Provided: `{shade}`\"\n",
+    "        )\n",
+    "    return f'<mark style=\"color:rgb({color});background-color:rgb(255,255,255);\">{word}</font></mark>'\n",
+    "\n",
+    "\n",
+    "def pretty_print_transcript_with_confidence(\n",
+    "    transcript: str,\n",
+    "    confidence_scores: List[float],\n",
+    "    threshold: float,\n",
+    "    reference: Optional[str] = None,\n",
+    "    terminal_width: int = 120,\n",
+    "    html: bool = False,\n",
+    "):\n",
+    "    if html:\n",
+    "        shade_if_low_confidence = lambda x, y: _html_paint_word_grey(x, 'light_grey' if y < threshold else 'black')\n",
+    "        new_line_mark = \"<br>\"\n",
+    "        pretty_print = lambda x: display(HTML(\"<code>\" + new_line_mark.join(x) + \"</code>\"))\n",
+    "    else:\n",
+    "        shade_if_low_confidence = lambda x, y: colored(x, 'light_grey') if y < threshold else x\n",
+    "        new_line_mark = \"\\n\"\n",
+    "        pretty_print = lambda x: print(new_line_mark.join(x))\n",
+    "    with_labels = reference is not None\n",
+    "    transcript_list = transcript.split()\n",
+    "    output_lines = []\n",
+    "    if with_labels:\n",
+    "        reference_list = reference.split()\n",
+    "        labels, eps_padded_hyp = get_detailed_wer_labels(reference_list, transcript_list, True)\n",
+    "        padded_labels = pretty_pad_word_labels(labels, eps_padded_hyp)\n",
+    "        current_line_len = 0\n",
+    "        current_word_line = \"\"\n",
+    "        current_label_line = \"\"\n",
+    "        for word, label, padded_label, score in zip(\n",
+    "            eps_padded_hyp, labels, padded_labels, fill_confidence_deletions(confidence_scores, labels)\n",
+    "        ):\n",
+    "            word_len = len(word)\n",
+    "            # shield angle brakets for <eps>\n",
+    "            if html and word == \"<eps>\":\n",
+    "                word = \"&lt;eps&gt;\"\n",
+    "            if current_line_len + word_len + 1 <= terminal_width:\n",
+    "                if current_line_len > 0:\n",
+    "                    current_line_len += 1\n",
+    "                    current_word_line += \" \"\n",
+    "                    current_label_line += \"-\"\n",
+    "                current_line_len += word_len\n",
+    "                current_word_line += shade_if_low_confidence(word, score)\n",
+    "                current_label_line += padded_label\n",
+    "            else:\n",
+    "                output_lines.append(current_word_line + new_line_mark + current_label_line)\n",
+    "                current_line_len = word_len\n",
+    "                current_word_line = shade_if_low_confidence(word, score)\n",
+    "                current_label_line = padded_label\n",
+    "        if current_word_line:\n",
+    "            output_lines.append(current_word_line + new_line_mark + current_label_line)\n",
+    "    else:\n",
+    "        current_line_len = 0\n",
+    "        current_word_line = \"\"\n",
+    "        for word, score in zip(transcript_list, confidence_scores):\n",
+    "            word_len = len(word)\n",
+    "            # shield angle brakets for <eps>\n",
+    "            if html and word == \"<eps>\":\n",
+    "                word = \"&lt;eps&gt;\"\n",
+    "            if current_line_len + word_len + 1 <= terminal_width:\n",
+    "                if current_line_len > 0:\n",
+    "                    current_line_len += 1\n",
+    "                    current_word_line += \" \"\n",
+    "                current_line_len += word_len\n",
+    "                current_word_line += shade_if_low_confidence(word, score)\n",
+    "            else:\n",
+    "                output_lines.append(current_word_line)\n",
+    "                current_line_len = word_len\n",
+    "                current_word_line = shade_if_low_confidence(word, score)\n",
+    "        if current_word_line:\n",
+    "            output_lines.append(current_word_line)\n",
+    "\n",
+    "    pretty_print(output_lines)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed997bfd",
+   "metadata": {
+    "id": "dec57a27"
+   },
+   "source": [
+    "## 3.2. Data and model loading\n",
+    "This tutorial uses CTC and RNN-T Conformer models trained on LibriSpeech.\n",
+    "\n",
+    "You can try to use other pre-trained models as well."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70c1a27a",
+   "metadata": {
+    "id": "b66c60a3"
+   },
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass\n",
+    "from omegaconf import DictConfig, OmegaConf\n",
+    "\n",
+    "from nemo.collections.asr.models import ASRModel\n",
+    "\n",
+    "def load_model(name: str):\n",
+    "    \"\"\"Load a pre-trained model.\n",
+    "\n",
+    "    Args:\n",
+    "        name: Pre-trained model name.\n",
+    "            Reserved names:\n",
+    "            - 'ctc' for 'stt_en_conformer_ctc_large_ls'\n",
+    "            - 'rnnt' for 'stt_en_conformer_transducer_large_ls'\n",
+    "\n",
+    "    Returns:\n",
+    "        A model loaded into GPU with .eval() mode set.\n",
+    "    \"\"\"\n",
+    "    if name == \"ctc\":\n",
+    "        name = \"stt_en_conformer_ctc_large_ls\"\n",
+    "    elif name == \"rnnt\":\n",
+    "        name = \"stt_en_conformer_transducer_large_ls\"\n",
+    "\n",
+    "    model = ASRModel.from_pretrained(model_name=name, map_location=\"cuda:0\")\n",
+    "    model.eval()\n",
+    "\n",
+    "    return model\n",
+    "\n",
+    "@dataclass\n",
+    "class TestSet:\n",
+    "    filepaths: List[str]\n",
+    "    reference_texts: List[str]\n",
+    "    durations: List[float]\n",
+    "\n",
+    "def load_data(manifest_path: str):\n",
+    "    filepaths = []\n",
+    "    reference_texts = []\n",
+    "    durations = []\n",
+    "    with open(manifest_path, \"r\") as f:\n",
+    "        for line in f:\n",
+    "            item = json.loads(line)\n",
+    "            audio_file = item[\"audio_filepath\"]\n",
+    "            filepaths.append(str(audio_file))\n",
+    "            text = item[\"text\"]\n",
+    "            reference_texts.append(text)\n",
+    "            durations.append(float(item[\"duration\"]))\n",
+    "    return TestSet(filepaths, reference_texts, durations)\n",
+    "\n",
+    "TEST_MANIFESTS = {\n",
+    "    \"test_other\": DATA_DIR + \"/test_other.json\",\n",
+    "}\n",
+    "\n",
+    "\n",
+    "# Load data\n",
+    "test_sets = {manifest: load_data(path) for manifest, path in TEST_MANIFESTS.items()}\n",
+    "\n",
+    "# Load model\n",
+    "is_rnnt = False\n",
+    "# is_rnnt = True\n",
+    "\n",
+    "model = load_model(\"rnnt\" if is_rnnt else \"ctc\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9c5db700",
+   "metadata": {
+    "id": "88c3d7ee"
+   },
+   "source": [
+    "## 3.3. Setting up confidence estimation\n",
+    "To set up confidence estimation for NeMo ASR models, you need to:\n",
+    "1. Initialize _ConfidenceConfig_\n",
+    "2. Put the created _ConfidenceConfig_ into the model decoding config.\n",
+    "\n",
+    "The folloving cell contains an example of _ConfidenceConfig_ initialization and updating the the model's decoding config.\n",
+    "\n",
+    "For the _ConfidenceConfig_ there are also listed possible values for its parameters.\n",
+    "\n",
+    "Note that only `strategy=\"greedy\"` (or `greedy_batch` for RNN-T) supports computing confidence scores."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d3e8c11",
+   "metadata": {
+    "id": "078005f1"
+   },
+   "outputs": [],
+   "source": [
+    "from nemo.collections.asr.metrics.rnnt_wer import RNNTDecodingConfig\n",
+    "from nemo.collections.asr.metrics.wer import CTCDecodingConfig\n",
+    "from nemo.collections.asr.parts.utils.asr_confidence_utils import (\n",
+    "    ConfidenceConfig,\n",
+    "    ConfidenceConstants,\n",
+    "    ConfidenceMeasureConfig,\n",
+    "    ConfidenceMeasureConstants,\n",
+    ")\n",
+    "from nemo.collections.asr.parts.utils.asr_confidence_benchmarking_utils import (\n",
+    "    apply_confidence_parameters,\n",
+    "    get_correct_marks,\n",
+    "    get_token_targets_with_confidence,\n",
+    "    get_word_targets_with_confidence,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# List allowed options for ConfidenceMeasureConfig and ConfidenceConfig\n",
+    "print(f\"Allowed options for ConfidenceMeasureConfig: {ConfidenceMeasureConstants.print()}\\n\")\n",
+    "print(f\"Allowed options for ConfidenceConfig: {ConfidenceConstants.print()}\\n\")\n",
+    "\n",
+    "# Initialize ConfidenceConfig and ConfidenceMeasureConfig\n",
+    "confidence_cfg = ConfidenceConfig(\n",
+    "    preserve_frame_confidence=True, # Internally set to true if preserve_token_confidence == True\n",
+    "    # or preserve_word_confidence == True\n",
+    "    preserve_token_confidence=True, # Internally set to true if preserve_word_confidence == True\n",
+    "    preserve_word_confidence=True,\n",
+    "    aggregation=\"prod\", # How to aggregate frame scores to token scores and token scores to word scores\n",
+    "    exclude_blank=False, # If true, only non-blank emissions contribute to confidence scores\n",
+    "    measure_cfg=ConfidenceMeasureConfig( # Config for per-frame scores calculation (before aggregation)\n",
+    "        name=\"max_prob\", # Or \"entropy\" (default), which usually works better\n",
+    "        entropy_type=\"gibbs\", # Used only for name == \"entropy\". Recommended: \"tsallis\" (default) or \"renyi\"\n",
+    "        alpha=0.5, # Low values (<1) increase sensitivity, high values decrease sensitivity\n",
+    "        entropy_norm=\"lin\" # How to normalize (map to [0,1]) entropy. Default: \"exp\"\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Alternalively, look at ConfidenceConfig's docstring\n",
+    "print(f\"More info on ConfidenceConfig here:\\n{ConfidenceConfig().__doc__}\\n\")\n",
+    "\n",
+    "# Put the created ConfidenceConfig into the model decoding config via .change_decoding_strategy()\n",
+    "model.change_decoding_strategy(\n",
+    "    RNNTDecodingConfig(fused_batch_size=-1, strategy=\"greedy_batch\", confidence_cfg=confidence_cfg)\n",
+    "    if is_rnnt\n",
+    "    else CTCDecodingConfig(confidence_cfg=confidence_cfg)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "04581687",
+   "metadata": {
+    "id": "efe0baea"
+   },
+   "source": [
+    "## 3.4. Decode test set and get transcriptions with confidence scores\n",
+    "Let's transcribe Librispeech _test-other_ and see what confidence scores are inside."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c5f92257",
+   "metadata": {
+    "id": "ccd8d0de"
+   },
+   "outputs": [],
+   "source": [
+    "current_test_set = test_sets[\"test_other\"]\n",
+    "transcriptions = model.transcribe(paths2audio_files=current_test_set.filepaths, batch_size=16, return_hypotheses=True, num_workers=4)\n",
+    "if is_rnnt:\n",
+    "    transcriptions = transcriptions[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca282352",
+   "metadata": {
+    "id": "0500514e"
+   },
+   "source": [
+    "For a transcribed hypothesis, there can be `frame_confidence` and aggregated from them `token_confidence` and `word_confidence`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18663384",
+   "metadata": {
+    "id": "98035fd2"
+   },
+   "outputs": [],
+   "source": [
+    "tran = transcriptions[0]\n",
+    "print(\n",
+    "    f\"\"\"    Recognized text: `{tran.text}`\\n\n",
+    "    Word confidence: {[round(c, 3) for c in tran.word_confidence]}\\n\n",
+    "    Token confidence: {[round(c, 3) for c in tran.token_confidence]}\\n\n",
+    "    Frame confidence: {[([round(cc, 3) for cc in c] if is_rnnt else round(c, 3)) for c in tran.frame_confidence]}\"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "783e9e2a",
+   "metadata": {
+    "id": "9613bfc1"
+   },
+   "source": [
+    "Now let's draw the recognition results highlighted according to their confidence scores.\n",
+    "\n",
+    "There are four options: plain text and HTML with or without WER labels."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "642fe059",
+   "metadata": {
+    "id": "a83295ff"
+   },
+   "outputs": [],
+   "source": [
+    "from nemo.collections.asr.metrics.wer import word_error_rate, word_error_rate_detail, word_error_rate_per_utt\n",
+    "\n",
+    "def show_dataset_with_confidence(\n",
+    "    indices,\n",
+    "    transcriptions,\n",
+    "    test_set,\n",
+    "    threshold,\n",
+    "    filepaths=None,\n",
+    "    html_show=False,\n",
+    "    min_dur_to_show=0.0,\n",
+    "    utt_to_show=10\n",
+    "):\n",
+    "    utt_shown = 0\n",
+    "    for i, _ in indices:\n",
+    "        if utt_shown >= utt_to_show:\n",
+    "            break\n",
+    "        if test_set.durations[i] >= min_dur_to_show:\n",
+    "            print(\"=\"*120)\n",
+    "            hyp = transcriptions[i].text\n",
+    "            scores = transcriptions[i].word_confidence\n",
+    "            ref = test_set.reference_texts[i]\n",
+    "            pretty_print_transcript_with_confidence(hyp, scores, threshold, ref, html=html_show)\n",
+    "            if filepaths is not None:\n",
+    "                display(Audio(filepaths[i]))\n",
+    "            utt_shown += 1\n",
+    "\n",
+    "\n",
+    "# you can play with these parameters\n",
+    "threshold = 0.52\n",
+    "# in colab, you may want to use `html_show = True` as non-html colorion displayed incorrectly in colab\n",
+    "html_show = is_colab\n",
+    "min_dur_to_show = 4.0\n",
+    "utt_to_show = 5\n",
+    "\n",
+    "wer_per_utt, avg_wer = word_error_rate_per_utt([h.text for h in transcriptions], current_test_set.reference_texts)\n",
+    "sorted_wer_indices = sorted(enumerate(wer_per_utt), key=lambda x: x[1])[::-1]\n",
+    "\n",
+    "show_dataset_with_confidence(\n",
+    "    indices=sorted_wer_indices,\n",
+    "    transcriptions=transcriptions,\n",
+    "    test_set=current_test_set,\n",
+    "    threshold=threshold,\n",
+    "    filepaths=current_test_set.filepaths,\n",
+    "    html_show=html_show,\n",
+    "    min_dur_to_show=min_dur_to_show,\n",
+    "    utt_to_show=utt_to_show\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9468ad3e",
+   "metadata": {
+    "id": "dbfcb2da"
+   },
+   "source": [
+    "## 3.5. Confidence metrics\n",
+    "\n",
+    "There are several metrics to evaluate the effectiveness of a confidence estimation method. Some of them consider confidence estimation as a binary classification task. Other measure how close the correct word confidence scores are to $1.0$ and the incorrect word scores are to $0.0$.\n",
+    "\n",
+    "Some of them are:\n",
+    "1. Area Under the Receiver Operating Characteristics Curve ($\\mathrm{AUC}_\\mathrm{ROC}$): class separability metric.\n",
+    "2. Area Under the Precision-Recall Curve ($\\mathrm{AUC}_\\mathrm{PR}$): how well the correct words are detected.\n",
+    "3. Area Under the Negative Predictive Value vs. True Negative Rate Curve ($\\mathrm{AUC}_\\mathrm{NT}$): how well the incorrect words are detected ($\\mathrm{AUC}_\\mathrm{PR}$ in which errors are treated as positives).\n",
+    "4. Normalized Cross Entropy ($\\mathrm{NCE}$): how close of confidence for correct predictions to $1.0$ and of incorrect predictions to $0.0$. It ranges from $-\\infty$ to $1.0$, with negative scores indicating that the conﬁdence method performs worse than the setting confidence score to $1-\\mathrm{WER}$. This metric is also known as Normalized Mutual Information.\n",
+    "5. Expected Calibration Error ($\\mathrm{ECE}$): a weighted average over the absolute accuracy/confidence difference. It ranges from $0.0$ to $1.0$ with the best value $0.0$.\n",
+    "\n",
+    "Metrics based on the Youden's curve (see https://en.wikipedia.org/wiki/Youden%27s_J_statistic) can also be condsidered. They are:\n",
+    "1. Area Under the Youden's curve ($\\mathrm{AUC}_\\mathrm{YC}$): the rate of the effective threshold range (i.e. the adjustability or responsiveness). It ranges from $0.0$ to $1.0$ with the best value $0.5$.\n",
+    "2. Maximum of the Youden's curve $\\mathrm{MAX}_\\mathrm{YC}$: the optimal $\\mathrm{TNR}$ vs. $\\mathrm{FNR}$ tradeoff. It's unnormalized version can be used as a criterion for selecting the optimal $\\tau$. It ranges from $0.0$ to $1.0$ with the best value $1.0$.\n",
+    "3. The standard deviation of the Youden's curve values ($\\mathrm{STD}_\\mathrm{YC}$): indicates that $\\mathrm{TNR}$ and $\\mathrm{FNR}$ increase at different rates (viz. $\\mathrm{TNR}$ grows faster) as the $\\tau$ increases. It ranges from $0.0$ to $0.5$ with the best value around $0.25$.\n",
+    "\n",
+    "When selecting/tuning a confidence method, it is recommended to maximize $\\mathrm{AUC}_\\mathrm{ROC}$ first as this is the main mectic of confidence estimation quality. Then, for overconfident models, maximizing $\\mathrm{AUC}_\\mathrm{NT}$ should take precedence over $\\mathrm{AUC}_\\mathrm{PR}$. Finally, a trade-off between $\\mathrm{NCE}$/$\\mathrm{ECE}$ and the family of $\\mathrm{YC}$ metrics considered as a compromise between formal correctness and controllability.\n",
+    "\n",
+    "Let's see how well our confidence performs according to the metrcis above."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b0fa793",
+   "metadata": {
+    "id": "5d152775"
+   },
+   "outputs": [],
+   "source": [
+    "from nemo.collections.asr.parts.utils.confidence_metrics import (\n",
+    "    auc_nt,\n",
+    "    auc_pr,\n",
+    "    auc_roc,\n",
+    "    auc_yc,\n",
+    "    ece,\n",
+    "    nce,\n",
+    "    save_confidence_hist,\n",
+    "    save_custom_confidence_curve,\n",
+    "    save_nt_curve,\n",
+    "    save_pr_curve,\n",
+    "    save_roc_curve,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "targets_with_confidence = [get_word_targets_with_confidence(tran) for tran in transcriptions]\n",
+    "correct_marks = [get_correct_marks(r.split(), h.words) for r, h in zip(current_test_set.reference_texts, transcriptions)]\n",
+    "\n",
+    "y_true, y_score = np.array(\n",
+    "    [[f, p[1]] for cm, twc in zip(correct_marks, targets_with_confidence) for f, p in zip(cm, twc)]\n",
+    ").T\n",
+    "\n",
+    "\n",
+    "# output scheme: yc.mean(), yc.max(), yc.std() or yc.mean(), yc.max(), yc.std(), (thresholds, yc)\n",
+    "result_yc = auc_yc(y_true, y_score, return_std_maximum=True, return_curve=True)\n",
+    "# output scheme: ece or ece, (thresholds, ece_curve)\n",
+    "results_ece = ece(y_true, y_score, return_curve=True)\n",
+    "results = [\n",
+    "    auc_roc(y_true, y_score),\n",
+    "    auc_pr(y_true, y_score),\n",
+    "    auc_nt(y_true, y_score),\n",
+    "    nce(y_true, y_score),\n",
+    "    results_ece[0],\n",
+    "] + list(result_yc[:3])\n",
+    "\n",
+    "print(\n",
+    "    f\"\"\"    AUC_ROC:\\t{results[0]:.5f}\n",
+    "    AUC_PR:\\t{results[1]:.5f}\n",
+    "    AUC_NT:\\t{results[2]:.5f}\n",
+    "    NCE:\\t{results[3]:.5f}\n",
+    "    ECE:\\t{results[4]:.5f}\n",
+    "    AUC_YC:\\t{results[5]:.5f}\n",
+    "    MAX_YC:\\t{results[7]:.5f}\n",
+    "    STD_YC:\\t{results[6]:.5f}\n",
+    "    \"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c3f6299",
+   "metadata": {
+    "id": "4159034d"
+   },
+   "source": [
+    "Confidence metrics for the maximum probability confidence are not that great.\n",
+    "\n",
+    "Let's re-run and benchmark confidence estimation with the default confidence estimator."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c0e3a9f",
+   "metadata": {
+    "id": "d2e16f5f"
+   },
+   "outputs": [],
+   "source": [
+    "confidence_cfg = ConfidenceConfig(\n",
+    "    preserve_word_confidence=True,\n",
+    "    preserve_token_confidence=True,\n",
+    ")\n",
+    "\n",
+    "model.change_decoding_strategy(\n",
+    "    RNNTDecodingConfig(fused_batch_size=-1, strategy=\"greedy_batch\", confidence_cfg=confidence_cfg)\n",
+    "    if is_rnnt\n",
+    "    else CTCDecodingConfig(confidence_cfg=confidence_cfg)\n",
+    ")\n",
+    "\n",
+    "transcriptions = model.transcribe(paths2audio_files=current_test_set.filepaths, batch_size=16, return_hypotheses=True, num_workers=4)\n",
+    "if is_rnnt:\n",
+    "    transcriptions = transcriptions[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8f1cc77",
+   "metadata": {
+    "id": "6201ea4d"
+   },
+   "outputs": [],
+   "source": [
+    "targets_with_confidence = [get_word_targets_with_confidence(tran) for tran in transcriptions]\n",
+    "correct_marks = [get_correct_marks(r.split(), h.words) for r, h in zip(current_test_set.reference_texts, transcriptions)]\n",
+    "\n",
+    "y_true, y_score = np.array(\n",
+    "    [[f, p[1]] for cm, twc in zip(correct_marks, targets_with_confidence) for f, p in zip(cm, twc)]\n",
+    ").T\n",
+    "\n",
+    "result_yc = auc_yc(y_true, y_score, return_std_maximum=True, return_curve=True)\n",
+    "results_ece = ece(y_true, y_score, return_curve=True)\n",
+    "results = [\n",
+    "    auc_roc(y_true, y_score),\n",
+    "    auc_pr(y_true, y_score),\n",
+    "    auc_nt(y_true, y_score),\n",
+    "    nce(y_true, y_score),\n",
+    "    results_ece[0],\n",
+    "] + list(result_yc[:3])\n",
+    "\n",
+    "print(\n",
+    "    f\"\"\"    AUC_ROC:\\t{results[0]:.5f}\n",
+    "    AUC_PR:\\t{results[1]:.5f}\n",
+    "    AUC_NT:\\t{results[2]:.5f}\n",
+    "    NCE:\\t{results[3]:.5f}\n",
+    "    ECE:\\t{results[4]:.5f}\n",
+    "    AUC_YC:\\t{results[5]:.5f}\n",
+    "    MAX_YC:\\t{results[7]:.5f}\n",
+    "    STD_YC:\\t{results[6]:.5f}\n",
+    "    \"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9ab2b130",
+   "metadata": {
+    "id": "498e03d0"
+   },
+   "source": [
+    "Note that despite the overall improvement, $NCE$ and $ECE$ have gotten worse. This is due to class imbalance caused by low WER."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f96cea04",
+   "metadata": {
+    "id": "45856cba"
+   },
+   "source": [
+    "Now, let's draw $\\mathrm{ROC}$ as well as histograms of correctly and incorrectly recognized words."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "81844713",
+   "metadata": {
+    "id": "ff049043"
+   },
+   "outputs": [],
+   "source": [
+    "from tempfile import TemporaryDirectory\n",
+    "\n",
+    "\n",
+    "plot_dir = TemporaryDirectory()\n",
+    "os.makedirs(plot_dir.name, exist_ok=True)\n",
+    "\n",
+    "mask_correct = y_true == 1\n",
+    "y_score_correct = y_score[mask_correct]\n",
+    "y_score_incorrect = y_score[~mask_correct]\n",
+    "\n",
+    "# histogram of the correct distribution\n",
+    "save_confidence_hist(y_score_correct, plot_dir.name, \"hist_correct\")\n",
+    "# histogram of the incorrect distribution\n",
+    "save_confidence_hist(y_score_incorrect, plot_dir.name, \"hist_incorrect\")\n",
+    "# AUC-ROC curve\n",
+    "save_roc_curve(y_true, y_score, plot_dir.name, \"roc\")\n",
+    "\n",
+    "\n",
+    "display(\n",
+    "    Image(filename=os.path.join(plot_dir.name, \"hist_correct.png\"), retina=True),\n",
+    "    Image(filename=os.path.join(plot_dir.name, \"hist_incorrect.png\"), retina=True),\n",
+    "    Image(filename=os.path.join(plot_dir.name, \"roc.png\"), retina=True),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "841a27ca",
+   "metadata": {},
+   "source": [
+    "Optionally, you can look at curves for other metrics ($\\mathrm{PR}$, $\\mathrm{NT}$, $\\mathrm{ECE}$, and $\\mathrm{YC}$)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6164e8f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# AUC-PR curve\n",
+    "save_pr_curve(y_true, y_score, plot_dir.name, \"pr\")\n",
+    "# AUC-NT curve\n",
+    "save_nt_curve(y_true, y_score, plot_dir.name, \"nt\")\n",
+    "# ECE curve\n",
+    "ece_thresholds, ece_values = results_ece[-1]\n",
+    "ece_values /= max(ece_values)\n",
+    "save_custom_confidence_curve(\n",
+    "    ece_thresholds, ece_values, plot_dir.name, \"ece\", \"Threshold\", \"|Accuracy − Confidence score|\"\n",
+    ")\n",
+    "# AUC-YC curve\n",
+    "yc_thresholds, yc_values = result_yc[-1]\n",
+    "save_custom_confidence_curve(\n",
+    "    yc_thresholds, yc_values, plot_dir.name, \"yc\", \"Threshold\", \"True positive rate − False Positive Rate\"\n",
+    ")\n",
+    "\n",
+    "\n",
+    "display(\n",
+    "    Image(filename=os.path.join(plot_dir.name, \"pr.png\"), retina=True),\n",
+    "    Image(filename=os.path.join(plot_dir.name, \"nt.png\"), retina=True),\n",
+    "    Image(filename=os.path.join(plot_dir.name, \"ece.png\"), retina=True),\n",
+    "    Image(filename=os.path.join(plot_dir.name, \"yc.png\"), retina=True),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f63a172",
+   "metadata": {
+    "id": "ad78630a"
+   },
+   "source": [
+    "You can use `scripts/speech_recognition/confidence/benchmark_asr_confidence.py` to find optimal confidence hyperparameters."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1d9a822d",
+   "metadata": {
+    "id": "15e25521"
+   },
+   "source": [
+    "# 4. Confidence applications"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ab6e666",
+   "metadata": {
+    "id": "dbb82877"
+   },
+   "source": [
+    "## 4.1. Small WER improvenent\n",
+    "\n",
+    "Good confidence scores can slightly reduce WER by removing low confidence words from recognition results.\n",
+    "\n",
+    "Consider the following example."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4038863c",
+   "metadata": {
+    "id": "02eb4e1f"
+   },
+   "source": [
+    "Let's look at the detailed WER of the transcribed test set before and after removing words with low confidence score."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "204d36ac",
+   "metadata": {
+    "id": "fdf790b5"
+   },
+   "outputs": [],
+   "source": [
+    "drop_low_confidence_words = lambda x, y, z: \" \".join([xx for xx, yy in zip(x.split(), y) if yy >= z])\n",
+    "\n",
+    "\n",
+    "threshold = 0.001\n",
+    "\n",
+    "wer_initial = word_error_rate_detail([h.text for h in transcriptions], current_test_set.reference_texts)\n",
+    "print(\n",
+    "    f\"\"\"WER detail before removing low confidence words:\n",
+    "    WER:\\t{wer_initial[0]:.5f}\n",
+    "    INS_rate:\\t{wer_initial[2]:.5f}\n",
+    "    DEL_rate:\\t{wer_initial[3]:.5f}\n",
+    "    SUB_rate:\\t{wer_initial[4]:.5f}\"\"\"\n",
+    ")\n",
+    "\n",
+    "wer_conf_dropped = word_error_rate_detail(\n",
+    "    [drop_low_confidence_words(hyp.text, hyp.word_confidence, threshold) for hyp in transcriptions],\n",
+    "    current_test_set.reference_texts,\n",
+    ")\n",
+    "print(\n",
+    "    f\"\"\"WER detail after removing low confidence words:\n",
+    "    WER:\\t{wer_conf_dropped[0]:.5f}\n",
+    "    INS_rate:\\t{wer_conf_dropped[2]:.5f}\n",
+    "    DEL_rate:\\t{wer_conf_dropped[3]:.5f}\n",
+    "    SUB_rate:\\t{wer_conf_dropped[4]:.5f}\"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f153cdd",
+   "metadata": {
+    "id": "28ac85b1"
+   },
+   "source": [
+    "You can see that with the right (in this example, extremely low) `threshold` can reduce WER by a tiny bit, reducing insertions and substitutions yet increasing deletions.\n",
+    "\n",
+    "Now let's see how to find the optimal threshold.\n",
+    "\n",
+    "The most commonly used method for automatically determining the optimal cutoff threshold is taking the value which delivers the maximum of the unnormalized Youden's curve. This method allows you to remove the largest number of incorrect entities, sacrificing the minimum number of correct entities.\n",
+    "\n",
+    "However, the unnormalized $\\mathrm{MAX}_\\mathrm{YC}$ method does not work well for the purpose of the WER reduction. Let's compare this method to explicitly minimizing WER with respect to a threshold."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19147b4a",
+   "metadata": {
+    "id": "9b81e449"
+   },
+   "outputs": [],
+   "source": [
+    "from joblib import Parallel, delayed\n",
+    "from multiprocessing import cpu_count\n",
+    "from tqdm.notebook import tqdm\n",
+    "\n",
+    "def max_unnnormalized_yc(\n",
+    "    y_true: Union[List[int], np.ndarray],\n",
+    "    y_score: Union[List[float], np.ndarray],\n",
+    "    n_bins: int = 100,\n",
+    "    start: float = 0.0,\n",
+    "    stop: float = 1.0,\n",
+    "):\n",
+    "    \"\"\"Calculate the maximum of the unnormalized Youden's curve.\n",
+    "    \"\"\"\n",
+    "    y_true = np.array(y_true)\n",
+    "    y_score = np.array(y_score)\n",
+    "    thresholds = np.linspace(start, stop, n_bins + 1)\n",
+    "    assert len(y_true) == len(y_score)\n",
+    "    assert np.all(y_true >= 0) and np.all(y_true <= 1)\n",
+    "    if np.all(y_true == 0) or np.all(y_true == 1):\n",
+    "        return 0.0, 0.0\n",
+    "    mask_correct = y_true == 1\n",
+    "    y_score_correct = y_score[mask_correct]\n",
+    "    y_score_incorrect = y_score[~mask_correct]\n",
+    "    unnnormalized_yc = []\n",
+    "    for threshold in thresholds:\n",
+    "        tn = len((y_score_incorrect < threshold).nonzero()[0])\n",
+    "        fn = len((y_score_correct < threshold).nonzero()[0])\n",
+    "        unnnormalized_yc.append((threshold, tn - fn))\n",
+    "    return max(unnnormalized_yc, key=lambda x: x[1])[0]\n",
+    "\n",
+    "\n",
+    "def min_wer(ref: List[str], transcriptions, n_bins: int = 100, start: float = 0.0, stop: float = 1.0):\n",
+    "    \"\"\"Find the threshold value that delivers the minimum WER.\n",
+    "    \"\"\"\n",
+    "    thresholds = np.linspace(start, stop, n_bins + 1)\n",
+    "    hyp = [(hyp.text, hyp.word_confidence) for hyp in transcriptions]\n",
+    "    _get_wer = lambda x, y, z: (x, word_error_rate_detail([drop_low_confidence_words(yy[0], yy[1], x) for yy in y], z)[0])\n",
+    "    wers = Parallel(n_jobs=cpu_count())(delayed(_get_wer)(threshold, hyp, ref) for threshold in tqdm(thresholds))\n",
+    "    return min(wers, key=lambda x: x[1])\n",
+    "\n",
+    "\n",
+    "targets_with_confidence = [get_word_targets_with_confidence(tran) for tran in transcriptions]\n",
+    "correct_marks = [\n",
+    "    get_correct_marks(r.split(), h.words) for r, h in zip(current_test_set.reference_texts, transcriptions)\n",
+    "]\n",
+    "y_true, y_score = np.array(\n",
+    "    [[f, p[1]] for cm, twc in zip(correct_marks, targets_with_confidence) for f, p in zip(cm, twc)]\n",
+    ").T\n",
+    "\n",
+    "threshold_yc = max_unnnormalized_yc(y_true, y_score)\n",
+    "yc_wer_value = word_error_rate(\n",
+    "    [drop_low_confidence_words(hyp.text, hyp.word_confidence, threshold_yc) for hyp in transcriptions],\n",
+    "    current_test_set.reference_texts,\n",
+    ")\n",
+    "threshold_min_wer, min_wer_value = min_wer(current_test_set.reference_texts, transcriptions, stop=0.1)\n",
+    "\n",
+    "print(\n",
+    "    f\"\"\"    Initial WER: {wer_initial[0]:.5f}\n",
+    "    Optimal threshold and WER based on the Youden's curve: {threshold_yc}, {yc_wer_value:.5f}\n",
+    "    Optimal threshold for the minimum WER: {threshold_min_wer}, {min_wer_value:.5f}\n",
+    "    \"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "425d010e",
+   "metadata": {
+    "id": "3b278d2d"
+   },
+   "source": [
+    "As you can see, the optimal cutoff threshold as the maximum of the Youden's curve makes WER significantly worse, and the optimal threshold for the minimum WER is near zero.\n",
+    "\n",
+    "Let's use a different confidence estimation setup to see if we can improve WER at least a bit further."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d080686",
+   "metadata": {
+    "id": "39f72c78"
+   },
+   "outputs": [],
+   "source": [
+    "confidence_cfg = ConfidenceConfig(\n",
+    "    preserve_word_confidence=True,\n",
+    "    preserve_token_confidence=True,\n",
+    "    aggregation=\"min\",\n",
+    "    measure_cfg=DictConfig({\"entropy_type\": \"tsallis\", \"alpha\": 1.5, \"entropy_norm\": \"lin\"}),\n",
+    ")\n",
+    "\n",
+    "model.change_decoding_strategy(\n",
+    "    RNNTDecodingConfig(fused_batch_size=-1, strategy=\"greedy_batch\", confidence_cfg=confidence_cfg)\n",
+    "    if is_rnnt\n",
+    "    else CTCDecodingConfig(confidence_cfg=confidence_cfg)\n",
+    ")\n",
+    "\n",
+    "transcriptions = model.transcribe(paths2audio_files=current_test_set.filepaths, batch_size=16, return_hypotheses=True, num_workers=4)\n",
+    "if is_rnnt:\n",
+    "    transcriptions = transcriptions[0]\n",
+    "\n",
+    "threshold_min_wer, min_wer_value = min_wer(current_test_set.reference_texts, transcriptions)\n",
+    "\n",
+    "print(\n",
+    "    f\"\"\"    Initial WER: {wer_initial[0]:.5f}\n",
+    "    Optimal threshold for the minimum WER: {threshold_min_wer}, {min_wer_value:.5f}\n",
+    "    \"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3c9cc02",
+   "metadata": {
+    "id": "e00581b1"
+   },
+   "source": [
+    "Overall, such an improvement in WER is too small to be considered. However, this opens up the possibility of improving WER through the use of more accurate confidence estimation methods."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "694d1752",
+   "metadata": {
+    "id": "f9f89665"
+   },
+   "source": [
+    "## 4.2. Reducing hallucinations with confidence scores\n",
+    "\n",
+    "One common application of confidence scores is the removal of recognition hallucinations.\n",
+    "\n",
+    "Let's see how this can be done."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98a1ef83",
+   "metadata": {
+    "id": "c1c28379"
+   },
+   "source": [
+    "Firstly, let's obtain a dataset on which the ASR model can hallucinate.\n",
+    "\n",
+    "Here we make it from the librosa examples, reversing them and convolving with each other."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f12a5041",
+   "metadata": {
+    "id": "3b0a0b4c"
+   },
+   "outputs": [],
+   "source": [
+    "from itertools import combinations\n",
+    "import json\n",
+    "import librosa\n",
+    "import soundfile as sf\n",
+    "\n",
+    "def cyclic_sum(x, y):\n",
+    "    if x.shape[0] < y.shape[0]:\n",
+    "        x, y = y, x\n",
+    "    if x.shape[0] > y.shape[0]:\n",
+    "        y = np.take(y, range(0, x.shape[0]), mode='wrap')\n",
+    "    return x + y\n",
+    "\n",
+    "def generate_noise_examples(example_list: List[str], save_dir: str, samplerate: int = 16000):\n",
+    "    \"\"\"Generate noise examples with librosa.\n",
+    "    It loads the selected example, inverts and perturbs them with each other.\n",
+    "\n",
+    "    Returns:\n",
+    "        A manifest with the noise wavs.\n",
+    "    \"\"\"\n",
+    "    samples = {ex: librosa.core.load(librosa.util.example(key=ex, hq=True), sr=samplerate)[0] \n",
+    "               for ex in example_list}\n",
+    "    noise_samples = {\"_\".join([left, right]): cyclic_sum(samples[left][::-1], samples[right][::-1]) \n",
+    "                     for left, right in combinations(samples.keys(), 2)}\n",
+    "\n",
+    "    os.makedirs(save_dir, exist_ok=True)\n",
+    "    manifest = os.path.join(save_dir, \"manifest.json\")\n",
+    "    with open(manifest, \"tw\", encoding=\"utf-8\") as fout:\n",
+    "        for k, v in noise_samples.items():\n",
+    "            audio_path = os.path.join(save_dir, f\"{k}.wav\")\n",
+    "            sf.write(audio_path, v, samplerate=samplerate)\n",
+    "            metadata = {\n",
+    "                \"audio_filepath\": audio_path,\n",
+    "                \"duration\": librosa.core.get_duration(y=v, sr=samplerate),\n",
+    "                \"label\": \"noise\",\n",
+    "                \"text\": \"_\"\n",
+    "            }\n",
+    "            json.dump(metadata, fout)\n",
+    "            fout.write('\\n')\n",
+    "\n",
+    "    return manifest\n",
+    "\n",
+    "librosa_list_examples = ['brahms',\n",
+    "                         'choice',\n",
+    "                         'fishin',\n",
+    "                         'humpback',\n",
+    "                         'libri1',\n",
+    "                         'libri2',\n",
+    "                         'libri3',\n",
+    "                         'nutcracker',\n",
+    "                         'pistachio',\n",
+    "                         'robin',\n",
+    "                         'sweetwaltz',\n",
+    "                         'trumpet',\n",
+    "                         'vibeace']\n",
+    "sr = 16000\n",
+    "\n",
+    "noise_dir = os.path.join(DATA_DIR, \"noise\")\n",
+    "noise_manifest = generate_noise_examples(librosa_list_examples, noise_dir, sr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f28da61f",
+   "metadata": {},
+   "source": [
+    "The original examples contain speech, music, or noise. The resulring audio recordings are considered to contain no recognizable speech.\n",
+    "\n",
+    "You can listen to an example of the audios."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b4e7007",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "noise_data = load_data(noise_manifest)\n",
+    "\n",
+    "display(Audio(noise_data.filepaths[0]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1db80ae4",
+   "metadata": {
+    "id": "f7f9ddca"
+   },
+   "source": [
+    "Now let's transcribe our new data, setting the default confidence estimator."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a872926",
+   "metadata": {
+    "id": "60f39094"
+   },
+   "outputs": [],
+   "source": [
+    "confidence_cfg = ConfidenceConfig(\n",
+    "    preserve_word_confidence=True,\n",
+    "    preserve_token_confidence=True,\n",
+    ")\n",
+    "\n",
+    "model.change_decoding_strategy(\n",
+    "    RNNTDecodingConfig(fused_batch_size=-1, strategy=\"greedy_batch\", confidence_cfg=confidence_cfg)\n",
+    "    if is_rnnt\n",
+    "    else CTCDecodingConfig(confidence_cfg=confidence_cfg)\n",
+    ")\n",
+    "\n",
+    "noise_transcriptions = model.transcribe(\n",
+    "    paths2audio_files=noise_data.filepaths, batch_size=4, return_hypotheses=True, num_workers=4\n",
+    ")\n",
+    "if is_rnnt:\n",
+    "    noise_transcriptions = noise_transcriptions[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3d097ca6",
+   "metadata": {
+    "id": "2f192186"
+   },
+   "source": [
+    "On a fully non-speech dataset, hallucinations can be measured as the Word Insertions per Second (WIS) value."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19c6321c",
+   "metadata": {
+    "id": "3589da00"
+   },
+   "outputs": [],
+   "source": [
+    "def word_insertions_per_second(texts: List[str], durations: List[float]):\n",
+    "    \"\"\"Calculate the Word Insertions per Second (WIS) value for the given recognition results \n",
+    "    and their corresponding audio duration.\n",
+    "    \"\"\"\n",
+    "    assert len(texts) == len(durations)\n",
+    "\n",
+    "    wis_per_utt = [len(text.split(\" \")) / duration for text, duration in zip(texts, durations)]\n",
+    "    return sum(wis_per_utt) / len(wis_per_utt), wis_per_utt\n",
+    "\n",
+    "wis, wis_per_utt = word_insertions_per_second([t.text for t in noise_transcriptions], noise_data.durations)\n",
+    "print(f\"Original Word Insertions per Second: {wis:.5f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bcf44daf",
+   "metadata": {
+    "id": "a0d8135d"
+   },
+   "source": [
+    "Now, the ability of a confidence estimator to detect hallucinations is computed as the Hallucination Detection Rate (HDR).\n",
+    "\n",
+    "It shows how many of all hallucinations can be removed, provided that no more than some fixed percentage of correct words are erroneously removed (under normal recognition conditions).\n",
+    "\n",
+    "HDR is another name of the metric $\\mathrm{TNR}_{FNR=e}$ which is calculated as $\\mathrm{TNR}(Y,\\tau): \\mathrm{FNR}(X,\\tau) \\approx e$, where $X$ is the dataset with supervision (to tune $\\tau$) and $Y$ is the noise-only dataset. Typical $e$ value is 0.05.\n",
+    "\n",
+    "Let's compute HDR and the new WIS.\n",
+    "\n",
+    "The generated dataset is clearly distinct from speech, so $e=0.01$ is sufficient."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3dac1f7d",
+   "metadata": {
+    "id": "0612ccf6"
+   },
+   "outputs": [],
+   "source": [
+    "def hdr(\n",
+    "    y_true_speech: Union[List[int], np.ndarray],\n",
+    "    y_score_speech: Union[List[float], np.ndarray],\n",
+    "    y_score_noise: Union[List[float], np.ndarray],\n",
+    "    max_fnr: float = 0.05,\n",
+    "    n_bins: int = 100,\n",
+    ") -> Tuple[float, float]:\n",
+    "    \"\"\"Compute Hallucination Detection Rate (HDR) from prediction scores.\n",
+    "\n",
+    "    Returns:\n",
+    "        tnr: True-Negateve Rate for HDR\n",
+    "        threshold_hdr: Optomal threshold \n",
+    "    \"\"\"\n",
+    "    y_true_speech = np.array(y_true_speech)\n",
+    "    y_score_speech = np.array(y_score_speech)\n",
+    "    y_score_noise = np.array(y_score_noise)\n",
+    "    thresholds = np.linspace(0, 1, n_bins + 1)\n",
+    "    assert y_true_speech.shape[0] == y_score_speech.shape[0]\n",
+    "    assert np.all(y_true_speech >= 0) and np.all(y_true_speech <= 1)\n",
+    "    if np.all(y_true_speech == 0) or np.all(y_true_speech == 1):\n",
+    "        return 0.0, 0.0\n",
+    "    mask_correct = y_true_speech == 1\n",
+    "    count_correct = max(mask_correct.nonzero()[0].shape[0], 1)\n",
+    "    y_score_correct = y_score_speech[mask_correct]\n",
+    "    threshold_hdr = 0.0\n",
+    "    for threshold in thresholds:\n",
+    "        fnr = (y_score_correct < threshold).nonzero()[0].shape[0] / count_correct\n",
+    "        if fnr <= max_fnr:\n",
+    "            threshold_hdr = threshold\n",
+    "        else:\n",
+    "            break\n",
+    "    tnr = (y_score_noise < threshold_hdr).nonzero()[0].shape[0] / y_score_noise.shape[0]\n",
+    "    return tnr, threshold_hdr\n",
+    "\n",
+    "\n",
+    "# e\n",
+    "max_fnr = 0.01\n",
+    "\n",
+    "correct_marks = [\n",
+    "    mark for r, h in zip(current_test_set.reference_texts, transcriptions) for mark in get_correct_marks(r.split(), h.words)\n",
+    "]\n",
+    "y_score_speech = [w for h in transcriptions for w in h.word_confidence]\n",
+    "y_score_noise = [w for h in noise_transcriptions for w in h.word_confidence]\n",
+    "hdr_score, threshold_hdr = hdr(correct_marks, y_score_speech, y_score_noise, max_fnr=max_fnr)\n",
+    "wis_new = wis - wis * hdr_score\n",
+    "\n",
+    "hdr_score, wis_new\n",
+    "print(\n",
+    "    f\"\"\"    Hallucination Detection Rate for max_fnr={max_fnr} : {hdr_score:.5f}\n",
+    "    New Word Insertions Per Second: {wis_new:.5f}\"\"\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "443938bc",
+   "metadata": {
+    "id": "418297d6"
+   },
+   "source": [
+    "Finally, let's print the noisy utterances to see if any more hallucinations persist."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dde9e7db",
+   "metadata": {
+    "id": "3815e8e3"
+   },
+   "outputs": [],
+   "source": [
+    "sorted_wis_indices = sorted(enumerate(wis_per_utt), key=lambda x: x[1])[::-1]\n",
+    "\n",
+    "show_dataset_with_confidence(\n",
+    "    indices=sorted_wis_indices,\n",
+    "    transcriptions=noise_transcriptions,\n",
+    "    test_set=noise_data,\n",
+    "    threshold=threshold_hdr,\n",
+    "    filepaths=noise_data.filepaths,\n",
+    "    html_show=is_colab,\n",
+    "    min_dur_to_show=0.0,\n",
+    "    utt_to_show=5,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "66f92938",
+   "metadata": {
+    "id": "0ac58ef2"
+   },
+   "source": [
+    "# Summary\n",
+    "This tutorial covered the basics of ASR confidence estimation and two examples of using ASR word confidence: WER reduction and hallusinations removal.\n",
+    "\n",
+    "You can follow this tutorial on [ASR Confidence-based Ensembles](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/Confidence_Ensembles.ipynb) to see another important application of ASR confidence estimation."
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tutorials/asr/Confidence_Ensembles.ipynb b/tutorials/asr/Confidence_Ensembles.ipynb
index f9617c75e36a..4516d2b70d6d 100644
--- a/tutorials/asr/Confidence_Ensembles.ipynb
+++ b/tutorials/asr/Confidence_Ensembles.ipynb
@@ -110,7 +110,7 @@
     "\n",
     "### How to estimate a model's confidence?\n",
     "\n",
-    "Good news, we have a whole separate [tutorial](TBD) on this topic! You can go through it if you want to know all the details about different ways to estimate confidence of NeMo ASR models. There are different confidence measures and aggregation functions and for the absolute best performance, you will need to run a grid-search to pick the best confidence estimation way for your specific models and data.\n",
+    "Good news, we have a whole separate [tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_Confidence_Estimation.ipynb) on this topic! You can go through it if you want to know all the details about different ways to estimate confidence of NeMo ASR models. There are different confidence measures and aggregation functions and for the absolute best performance, you will need to run a grid-search to pick the best confidence estimation way for your specific models and data.\n",
     "\n",
     "That being said, we found that there exist a set of confidence parameters that work pretty well on a large set of models and datsets. They are default in NeMo and so you might not need to worry about running the search. If you do want to maximize the performance by tuning the confidence parameters, you only need to add [a few extra config lines](#Building-and-evaluating-ensemble-(tuned-parameters)).\n",
     "\n",

From 2ef544ffe6daa80d38d0f494a7e42adcac50a4b9 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 16 Jul 2023 18:19:47 -0700
Subject: [PATCH 113/123] install_bs (#7019) (#7028)

Signed-off-by: Nikolay Karpov <karpnv@gmail.com>
Co-authored-by: Nikolay Karpov <karpnv@gmail.com>
---
 .../ngram_lm/install_beamsearch_decoders.sh         | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh
index 558a84698f49..3ba337a6afd3 100755
--- a/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh
+++ b/scripts/asr_language_modeling/ngram_lm/install_beamsearch_decoders.sh
@@ -26,14 +26,15 @@ KENLM_MAX_ORDER=10 # Maximum order of KenLM model, also specified in the setup_o
 cd $NEMO_PATH
 
 if [ $(id -u) -eq 0 ]; then
-  alias aptupdate='apt-get update'
-  alias b2install='./b2'
-else
-  alias aptupdate='sudo apt-get update'
-  alias b2install='sudo ./b2'
+   alias aptupdate='apt-get update'
+   alias b2install='./b2'
+ else
+   alias aptupdate='sudo apt-get update'
+   alias b2install='sudo ./b2'
 fi
 
-aptupdate && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # liblzma needed for flashlight decoder'
+aptupdate && apt-get upgrade -y && apt-get install -y liblzma-dev && rm -rf /var/lib/apt/lists/* # liblzma needed for flashlight decoder
+
 
 git clone https://github.com/NVIDIA/OpenSeq2Seq
 cd OpenSeq2Seq

From 8b4b3820cee4612ef49884df3edc0d035b47cd13 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 17 Jul 2023 21:50:04 -0700
Subject: [PATCH 114/123] fixes for spellmapper (#6994) (#7000)

Signed-off-by: Alexandra Antonova <antonova_sasha@list.ru>
Co-authored-by: bene-ges <antonova_sasha@list.ru>
Co-authored-by: Evelina <10428420+ekmb@users.noreply.github.com>
---
 .../create_custom_vocab_index.py              |  2 +-
 .../run_infer.sh                              |  2 +-
 .../spellchecking_asr_customization/utils.py  | 84 +++++++++++++++++++
 3 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py b/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py
index 07d64ec5b723..68c55ff51a4f 100644
--- a/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py
+++ b/examples/nlp/spellchecking_asr_customization/create_custom_vocab_index.py
@@ -53,7 +53,7 @@
 print("Size of customization vocabulary:", len(custom_phrases))
 
 # Load n-gram mappings vocabulary
-ngram_mapping_vocab, ban_ngram = load_ngram_mappings(args.ngram_mappings, max_misspelled_freq=125000)
+ngram_mapping_vocab, ban_ngram = load_ngram_mappings(args.ngram_mappings, max_misspelled_freq=args.max_misspelled_freq)
 
 # Generate index of custom phrases
 phrases, ngram2phrases = get_index(
diff --git a/examples/nlp/spellchecking_asr_customization/run_infer.sh b/examples/nlp/spellchecking_asr_customization/run_infer.sh
index 09da98171c16..b4bbdc4da375 100644
--- a/examples/nlp/spellchecking_asr_customization/run_infer.sh
+++ b/examples/nlp/spellchecking_asr_customization/run_infer.sh
@@ -31,7 +31,7 @@ BIG_SAMPLE=spellmapper_asr_customization_en/big_sample.txt
 ## File with input nemo ASR manifest
 INPUT_MANIFEST=spellmapper_en_evaluation/medical_manifest_ctc.json
 ## File containing custom words and phrases (plain text)
-CUSTOM_VOCAB=spellmapper_en_evaluation/medical_custom_vocab.json
+CUSTOM_VOCAB=spellmapper_en_evaluation/medical_custom_vocab.txt
 
 ## Other files will be created 
 ## File with index of custom vocabulary
diff --git a/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py b/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py
index cda551189d78..7385f19b414a 100644
--- a/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py
+++ b/nemo/collections/nlp/data/spellchecking_asr_customization/utils.py
@@ -764,12 +764,30 @@ def check_banned_replacements(src: str, dst: str) -> bool:
     # anticipated => anticipate
     if src.endswith("ed") and dst.endswith("e") and src[0:-2] == dst[0:-1]:
         return True
+    # blocks => blocked
+    if src.endswith("s") and dst.endswith("ed") and src[0:-1] == dst[0:-2]:
+        return True
+    # blocked => blocks
+    if src.endswith("ed") and dst.endswith("s") and src[0:-2] == dst[0:-1]:
+        return True
+    # lives => lived
+    if src.endswith("es") and dst.endswith("ed") and src[0:-2] == dst[0:-2]:
+        return True
+    # lived => lives
+    if src.endswith("ed") and dst.endswith("es") and src[0:-2] == dst[0:-2]:
+        return True
     # regarded => regard
     if src.endswith("ed") and src[0:-2] == dst:
         return True
     # regard => regarded
     if dst.endswith("ed") and dst[0:-2] == src:
         return True
+    # regardeding => regard
+    if src.endswith("ing") and src[0:-3] == dst:
+        return True
+    # regard => regarding
+    if dst.endswith("ing") and dst[0:-3] == src:
+        return True
     # longer => long
     if src.endswith("er") and src[0:-2] == dst:
         return True
@@ -782,48 +800,102 @@ def check_banned_replacements(src: str, dst: str) -> bool:
     # discussing => discussed
     if src.endswith("ing") and dst.endswith("ed") and src[0:-3] == dst[0:-2]:
         return True
+    # live => living
+    if src.endswith("e") and dst.endswith("ing") and src[0:-1] == dst[0:-3]:
+        return True
+    # living => live
+    if src.endswith("ing") and dst.endswith("e") and src[0:-3] == dst[0:-1]:
+        return True
     # discussion => discussing
     if src.endswith("ion") and dst.endswith("ing") and src[0:-3] == dst[0:-3]:
         return True
     # discussing => discussion
     if src.endswith("ing") and dst.endswith("ion") and src[0:-3] == dst[0:-3]:
         return True
+    # alignment => aligning
+    if src.endswith("ment") and dst.endswith("ing") and src[0:-4] == dst[0:-3]:
+        return True
+    # aligning => alignment
+    if src.endswith("ing") and dst.endswith("ment") and src[0:-3] == dst[0:-4]:
+        return True
     # dispensers => dispensing
     if src.endswith("ers") and dst.endswith("ing") and src[0:-3] == dst[0:-3]:
         return True
     # dispensing => dispensers
     if src.endswith("ing") and dst.endswith("ers") and src[0:-3] == dst[0:-3]:
         return True
+    # integrate => integrity
+    if src.endswith("ate") and dst.endswith("ity") and src[0:-3] == dst[0:-3]:
+        return True
+    # integrity => integrate
+    if src.endswith("ity") and dst.endswith("ate") and src[0:-3] == dst[0:-3]:
+        return True
     # discussion => discussed
     if src.endswith("ion") and dst.endswith("ed") and src[0:-3] == dst[0:-2]:
         return True
     # discussed => discussion
     if src.endswith("ed") and dst.endswith("ion") and src[0:-2] == dst[0:-3]:
         return True
+    # anticipation => anticipate
+    if src.endswith("ion") and dst.endswith("e") and src[0:-3] == dst[0:-1]:
+        return True
+    # anticipate => anticipation
+    if src.endswith("e") and dst.endswith("ion") and src[0:-1] == dst[0:-3]:
+        return True
     # incremental => increment
     if src.endswith("ntal") and dst.endswith("nt") and src[0:-4] == dst[0:-2]:
         return True
     # increment => incremental
     if src.endswith("nt") and dst.endswith("ntal") and src[0:-2] == dst[0:-4]:
         return True
+    # national => nation
+    if src.endswith("nal") and dst.endswith("n") and src[0:-3] == dst[0:-1]:
+        return True
+    # nation => national
+    if src.endswith("n") and dst.endswith("nal") and src[0:-1] == dst[0:-3]:
+        return True
+    # significantly => significant
+    if src.endswith("ntly") and dst.endswith("nt") and src[0:-4] == dst[0:-2]:
+        return True
+    # significant => significantly
+    if src.endswith("nt") and dst.endswith("ntly") and src[0:-2] == dst[0:-4]:
+        return True
     # delivery => deliverer
     if src.endswith("ery") and dst.endswith("erer") and src[0:-3] == dst[0:-4]:
         return True
     # deliverer => delivery
     if src.endswith("erer") and dst.endswith("ery") and src[0:-4] == dst[0:-3]:
         return True
+    # deliver => deliverer
+    if src.endswith("er") and dst.endswith("erer") and src[0:-2] == dst[0:-4]:
+        return True
+    # deliverer => deliver
+    if src.endswith("erer") and dst.endswith("er") and src[0:-4] == dst[0:-2]:
+        return True
     # comparably => comparable
     if src.endswith("bly") and dst.endswith("ble") and src[0:-3] == dst[0:-3]:
         return True
     # comparable => comparably
     if src.endswith("ble") and dst.endswith("bly") and src[0:-3] == dst[0:-3]:
         return True
+    # comparably => comparability
+    if src.endswith("bly") and dst.endswith("bility") and src[0:-3] == dst[0:-6]:
+        return True
+    # comparability => comparably
+    if src.endswith("bility") and dst.endswith("bly") and src[0:-6] == dst[0:-3]:
+        return True
     # beautiful => beautifully
     if src.endswith("l") and dst.endswith("lly") and src[0:-1] == dst[0:-3]:
         return True
     # beautifully => beautiful
     if src.endswith("lly") and dst.endswith("l") and src[0:-3] == dst[0:-1]:
         return True
+    # active => actively
+    if src.endswith("e") and dst.endswith("ely") and src[0:-1] == dst[0:-3]:
+        return True
+    # actively => active
+    if src.endswith("ely") and dst.endswith("e") and src[0:-3] == dst[0:-1]:
+        return True
     # america => american
     if src.endswith("a") and dst.endswith("an") and src[0:-1] == dst[0:-2]:
         return True
@@ -836,6 +908,18 @@ def check_banned_replacements(src: str, dst: str) -> bool:
     # investing => reinvesting
     if dst.startswith("re") and dst[2:] == src:
         return True
+    # unchanged => changed
+    if src.startswith("un") and src[2:] == dst:
+        return True
+    # changed => unchanged
+    if dst.startswith("un") and dst[2:] == src:
+        return True
+    # disrespected => respected
+    if src.startswith("dis") and src[3:] == dst:
+        return True
+    # respected => disrespected
+    if dst.startswith("dis") and dst[3:] == src:
+        return True
     # outperformance => performance
     if src.startswith("out") and src[3:] == dst:
         return True

From 9051440b313ff3bba07928319a4e4840c9aa55bb Mon Sep 17 00:00:00 2001
From: Yi Dong <43824965+yidong72@users.noreply.github.com>
Date: Tue, 18 Jul 2023 11:38:25 -0400
Subject: [PATCH 115/123] added back the retro documents (#7033)

Signed-off-by: Yi Dong <yidong@nvidia.com>
---
 .../nlp/nemo_megatron/retro/retro_model.rst   | 446 +++++++++++++++++-
 1 file changed, 444 insertions(+), 2 deletions(-)

diff --git a/docs/source/nlp/nemo_megatron/retro/retro_model.rst b/docs/source/nlp/nemo_megatron/retro/retro_model.rst
index edbec3d1c2ca..ceff1baf857f 100644
--- a/docs/source/nlp/nemo_megatron/retro/retro_model.rst
+++ b/docs/source/nlp/nemo_megatron/retro/retro_model.rst
@@ -1,2 +1,444 @@
-Coming Soon ...
-================
\ No newline at end of file
+NeMo RETRO Model
+================
+
+The Retrieval-Enhanced Transformer (RETRO) model is an autoregressive language model that takes into account document chunks retrieved from a large 
+corpus when making predictions. The RETRO model has a similar architecture to the GPT model, but it includes an encoder that encodes the retrieved 
+context and cross-attention layers that integrate the context to improve the model's output. Below is a simple diagram of the RETRO model architecture.
+
+.. image:: images/arch.png
+    :align: center
+    :width: 800px
+    :alt: RETRO model architecture
+
+For more detailed information on the model, please refer to the `RETRO paper <https://arxiv.org/abs/2112.04426>`_ :cite:`nlp-retro-borgeaud2021improving` by Deepmind. 
+The NeMo RETRO Model is an open-source implementation of the paper, and it has the following differences/features compared to Deepmind's proposed implementation:
+
+1. The NeMo RETRO Model is built on top of NeMo Megatron code, allowing for efficient training of large language models in a cluster environment.
+2. The NeMo RETRO Model uses `Faiss <https://github.com/facebookresearch/faiss>`_ :cite:`nlp-retro-jegou2022faiss` as the K$N search library, which can be accelerated by GPUs. 
+3. The NeMo RETRO uses `RoPe relative positional encoding <https://arxiv.org/abs/2104.09864>`_ :cite:`nlp-retro-su2021roformer`. 
+4. The NeMo RETRO uses `SentenceTransformers <https://www.sbert.net>`_ :cite:`nlp-retro-reimers2019sentence` as the retriever encoder.
+5. The NeMo RETRO supports `mu-Transfer <https://openreview.net/pdf?id=Bx6qKuBM2AD>`_ :cite:`nlp-retro-yang2022tensor`, allowing for scalable training of the RETRO model via Zero-Shot Hyperparameter Transfer.
+
+Quick start
+************
+Steps below demonstrate training and evaluating a NeMo RETRO model
+
+Data pre-processing
+-------------------
+
+Step 1: Collect training data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The RETRO model uses two types of data: training data, which typically consists of 64-token chunks, and retrieval data, which typically consists of 128-token chunks.
+The training data is used to train the model, while the retrieval data is used to supplement the language model. 
+It's possible to use the same data for both training and retrieval, as long as duplicates are removed properly, as described below. 
+Both types of data are stored in a loose JSON format, with each line containing a single text sample. For example:
+
+.. code-block:: json
+    {"src": "www.nvidia.com", "text": "The quick brown fox", "type": "Eng", "id": "0", "title": "First Part"}
+    {"src": "The Internet", "text": "jumps over the lazy dog", "type": "Eng", "id": "42", "title": "Second Part"}
+The name of the text field of the json can be changed by using the ``--json-key`` flag in ``preprocess_data_for_megatron.py``.  The other metadata are optional and are not used in training.
+
+Step 2: Convert training data into memory map format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The loose json is then processed into a binary format for training and retrieval. To convert the json into mmap, cached index file. 
+Set the ``--dataset-impl`` flag to `retmmap`, which is the memory map format dedicated for RETRO model. 
+
+An example script to prepare data for RETRO training is:
+
+.. code-block:: bash
+    python scripts/nlp_language_modeling/preprocess_data_for_megatron.py \
+        --input=/dataset/pubmed_train.jsonl \
+        --json-keys=text \
+        --tokenizer-library=megatron \
+        --apply-ftfy \
+        --dataset-impl=retmmap \
+        --merge-file=/dataset/gpt2-merges.txt \
+        --vocab-file=/dataset/gpt2-vocab.json \
+        --tokenizer-type=GPT2BPETokenizer \
+        --output-prefix=/result/pubmed_train \
+        --need-pad-id \
+        --append-eod \
+        --retrieval-db \
+        --chunk_size=64 \
+        --workers=48
+The RETRO model processes chunked documents using 64 tokens as the default chunk size. The RETRO memory map dataset will add padding 
+tokens to the end of each document to make it a multiple of 64. The ``--need-pad-id`` argument adds a padding token to the tokenizer
+if it doesn't already have one. The ``--append-eod`` argument controls whether to add ``end-of-document`` tokens to the preprocessed 
+data, and the ``--retrieval-db`` argument indicates whether to create a retrieval database for the preprocessed data. If ``--retrieval-db``
+is used, it will add an additional 64 padding tokens at the end of the document. The ``--chunk_size`` and ``--workers`` arguments 
+control the size of the data chunks to be processed and the number of worker processes to use, respectively.
+
+Following is the retro memory map index data format:
+
+.. list-table::
+   :widths: 25 25 25 25 25 25
+
+   * - 'MMIDRET\x00\x00' (header 9 bytes)
+     - 1 (version 8 byte)
+     - dtype code :sup:`1` (1 byte)
+     - sentence count (8 byte)
+     - chunk size (8 byte)
+     - chunk count (8 byte)
+   * - retrieved db :sup:`2` (1 byte)
+     - number of tokens for each of sentences ( int32 array)
+     - start of sentence address in byte (int64 array)	
+     - start of chunk id (int64 array)
+     - chunk id address in byte (int64 array)
+     -
+
+:sup:`1` 1: np.uint8, 2: np.int8, 3: np.int16, 4: np.int32, 5: np.int64, 6: np.float, 7: np.double, 8: np.uint16
+
+:sup:`2` When building the indexed dataset, we pad each sentence to be a multiple of ``chunk_size`` with ``pad_id`` from the tokenizer. 
+The number of tokens for each sentence includes the padded token ids. For retrieval data, there is an extra ``chunk_size`` padding at 
+the end of each sentence, and the ``retrieved_db`` flag is set to True. However, the number of tokens for each sentence excludes this extra ``chunk_size`` padding.
+
+Following is the retro memory map binary data format:
+
+.. list-table::
+   :widths: 65
+
+   * - token id array for sentence 0,1, 2 ... (dtype :sup:`3` array)
+
+:sup:`3` np.uint16 vocab_size < 65500 else np.int32
+
+Step 3: Create Faiss index for retrieval data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+After creating the memory map retrieval data binary file and index files, we can build a Faiss index that can quickly find the K-nearest neighbors of a given
+chunk ID based on a query embedding vector. Because the retrieval data is typically very large, we break this process down into three steps.
+
+Step 3.1: Train the Faiss index structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In this step, it uses a subset of the retrieval data to train a empty Faiss index. An example script is:
+
+.. code-block:: bash
+    python scripts/nlp_language_modeling/build_retrieval_index.py \
+        --input_file=/result/pubmed_train_text_document  \
+        --tokenizer-library=megatron \
+        --tokenizer-type=GPT2BPETokenizer \
+        --merge-file=/dataset/gpt2-merges.txt \
+        --vocab-file=/dataset/gpt2-vocab.json \
+        --percent=1.0 \
+        --sentence_transformer_model=all-mpnet-base-v2 \
+        --batch_size=1024 \
+        --train_index_size=2000000 \
+        --workers=2 \
+        --devices=0,1,2,3,4,5,6,7 \
+        --stage=0 \
+        --output_file=/result/pubmed_faiss_learn.index
+This command is used to build an empty Faiss index using the 2000000 training data in ``pubmed_train_text_document``. 
+The ``all-mpnet-base-v2`` sentence transformer model is used to encode the chunk tokens into an embedding vector. 
+The index will be saved in the result directory as ``pubmed_faiss_learn.index``. This command specifies using 8 GPUs to train the Faiss index.
+
+Step 3.2: Add retrieval data into sharding index
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This step adds all the retrieval data to the empty Faiss index created in the previous step. An example script is:
+
+.. code-block:: bash
+    python scripts/nlp_language_modeling/build_retrieval_index.py \
+        --input_file=/result/pubmed_train_text_document  \
+        --tokenizer-library=megatron \
+        --tokenizer-type=GPT2BPETokenizer \
+        --merge-file=/dataset/gpt2-merges.txt \
+        --vocab-file=/dataset/gpt2-vocab.json \
+        --percent=1.0 \
+        --sentence_transformer_model=all-mpnet-base-v2 \
+        --batch_size=1024 \
+        --shard_id=0 \
+        --total_shards=10 \
+        --workers=2 \
+        --devices=0,1,2,3,4,5,6,7 \
+        --stage=1 \
+        --learned_index=/result/pubmed_faiss_learn.index \
+        --output_file=/result/pubmed_faiss_shard0.save
+This command breaks the retrieval data into ``total_shards`` shards and adds the data in the shard specified by ``shard_id``. 
+The result is saved to a file specified by ``output_file``. In the example above, 10 sharding indexes are created.
+
+Step 3.3: Merge the sharding indexes into final Faiss index
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This step merges all the sharding indexes created in the previous step into the final Faiss index.  An example script is:
+
+.. code-block:: bash
+    python scripts/nlp_language_modeling/build_retrieval_index.py \
+        --stage=2 \
+        --devices=0,1,2,3,4,5,6,7 \
+        --learned_index=/result/pubmed_faiss_learn.index \
+        --shard_index_input=/result/pubmed_faiss_shard \
+        --output_file=/result/pubmed_faiss_final.index
+Step 4: Build KNN index
+^^^^^^^^^^^^^^^^^^^^^^^
+
+During training, it is inefficient to run a query to find the K-nearest neighbor chunk IDs for each training data point. 
+This can be pre-calculated by building a KNN index before training. The KNN index maps the training data chunk IDs to the K-nearest neighbor chunk IDs 
+in the retrieval data. As with building the Faiss index, this process is divided into two steps.
+
+Following is the KNN index data format:
+
+.. list-table::
+   :widths: 25 25 25 25 45
+
+   * - 'KNNRETM\x00\x00' (header 9 bytes)
+     - 1 (version 8 byte)
+     - K number of neighbors (8 byte)
+     - Number chunks (8 byte)
+     - Map to K retrieval data chunk IDs, shape (number_chunks, K) ( int64 array)
+
+Step 4.1: Build KNN sharding index
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The KNN index is built using the memory-mapped training data created by the ``preprocess_data_for_megatron.py`` script and the Faiss index 
+file for the retrieval data built by the ``build_retrieval_index.py`` script.
+
+An example script is:
+
+.. code-block:: bash
+    python scripts/nlp_language_modeling/build_knn_map_index.py \
+        --input_file=/result/pubmed_eval_text_document  \
+        --tokenizer-library=megatron \
+        --tokenizer-type=GPT2BPETokenizer \
+        --merge-file=/dataset/gpt2-merges.txt \
+        --vocab-file=/dataset/gpt2-vocab.json \
+        --process_chunk_size=10000 \
+        --sentence_transformer_model=all-mpnet-base-v2 \
+        --batch_size=1024 \
+        --K_neighbors=50 \
+        --workers=2 \
+        --devices=0,1,2,3,4,5,6,7 \
+        --remove_duplicate \
+        --dedup_margin=70 \
+        --nprobe=100 \
+        --shard_id=0 \
+        --total_shards=10 \
+        --stage=1 \
+        --output_file=/dataset/pubmed_knn_shard0.save \
+        --faiss_index=/result/pubmed_faiss_final.index
+In this example, the training data is broken into ``total_shards`` shards, and the KNN index is calculated for the shard specified by ``shard_id``. 
+The result is saved to a file specified by ``output_file``. In the example above, 10 KNN sharding indexes are created.
+
+Use the ``remove_duplicate`` flag if the training data and retrieval data are the same to remove neighbors from the same document.
+
+Step 4.2: Merge KNN sharding index into final KNN index
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An example script is:
+
+.. code-block:: bash
+    python scripts/nlp_language_modeling/build_knn_map_index.py  \
+    --stage=2 \
+    --output_file=pubmed_knn_final.save \
+    --shard_index_input=pubmed_knn_shard
+Train NeMo RETRO Model
+-----------------------
+
+Once the training data, retrieval data, KNN index, and Faiss index are prepared, we are ready to train the RETRO model. In the NeMo implementation, 
+the RETRO model can be pre-trained with or without the `mu-Transfer <https://openreview.net/pdf?id=Bx6qKuBM2AD>`_ :cite:`nlp-retro-yang2022tensor` feature. We will introduce both ways.
+
+
+The table below lists some of the common parameters that can be configured for model pre-training.
+
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| **Parameter**                    | **Default** | **Description**                                                                        |
++==================================+=============+========================================================================================+
+| model.micro_batch_size           | 4           | the micro batch size used for training                                                 |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.tensor_model_parallel_size | 1           | tensor model parallel size                                                             |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.encoder_seq_length         | 2048        | token sequence length                                                                  |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.chunk_size                 | 64          | the chunk size used to retrieve                                                        |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.enc_num_layers             | 4           | total number of encoder layers                                                         |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.dec_num_layers             | 6           | total number of decoder layers                                                         |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.enc_cross_attention        | [3]         | layer numbers for cross attention in encoder                                           |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.dec_cross_attention        | [3,4,5]     | layer numbers for chunked cross attention in decoder                                   |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.add_position_embedding     | FALSE       | whether to add the absolute position encoding                                          |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.hidden_size                | 768         | model hidden size                                                                      |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.ffn_hidden_size            | 3072        | model FFN hidden size. Usually 4 * hidden_size                                         |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.num_attention_heads        | 12          | number of attention heads                                                              |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.init_method_std            | 0.02        | standard deviation of the zero mean normal distribution used for weight initialization |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.hidden_dropout             | 0.1         | dropout probability for hidden state transformer                                       |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.attention_dropout          | 0.1         | dropout probability in the attention layer                                             |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+| model.ffn_dropout                | 0           | dropout probability in the feed-forward layer                                          |
++----------------------------------+-------------+----------------------------------------------------------------------------------------+
+
+
+Option 1: Train the NeMo RETRO model *without* mu-Transfer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An example RETRO pre-training script is:
+
+.. code-block:: bash
+    python examples/nlp/language_modeling/megatron_retro_pretraining.py \
+        trainer.devices=8 \
+        trainer.num_nodes=2 \
+        trainer.accelerator=gpu \
+        trainer.max_steps=800000 \
+        trainer.precision=16 \
+        exp_manager.exp_dir=/result/retro_model \
+        model.apply_query_key_layer_scaling=False \
+        model.tensor_model_parallel_size=8 \
+        model.optim.name=adamw \
+        model.enc_num_layers=2 \
+        model.dec_num_layers=32 \
+        model.enc_cross_attention=[0] \
+        model.dec_cross_attention=[8,11,14,17,20,23,26,29,31] \
+        model.hidden_size=4096 \
+        model.ffn_hidden_size=16384 \
+        model.num_attention_heads=32 \
+        model.tokenizer.merge_file=/dataset/gpt2-merges.txt \
+        model.tokenizer.vocab_file=/dataset/gpt2-vocab.json \
+        model.data.data_prefix=[/result/pubmed_eval_text_document] \
+        model.data.knn_index=[dataset/pubmed_knn_final.save] \
+        model.data.retrieval_prefix=/result/pubmed_eval_text_document \
+        model.micro_batch_size=8
+During the training, launch Tensorboard to monitor training like so:
+
+.. code-block:: bash
+    tensorboard --logdir /result/retro_model --bind_all
+.. note:: Weights and Biases (WandB) is supported too. Add ``exp_manager.create_wandb_logger=True`` to the model training arguments to enable it.
+
+After the training, the model nemo file can be found at the result checkpoint directory.
+
+Option 2: Train the NeMo RETRO model *with* mu-Transfer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+`mu-Transfer <https://openreview.net/pdf?id=Bx6qKuBM2AD>`_ :cite:`nlp-retro-yang2022tensor` paper proposed a method to zero-shot transfer hyperparameter to train a larger model.
+This can be done in 3 steps in NeMo RETRO implementation. 
+
+
+Step 1. find optimal hyper parameter for a small base model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use the pre-training code in Option 1, either manually or automatically ind a set of optimal hyperparameter for a small base RETRO 
+model. This is can be done cheaply ans fast due to the small model size.
+
+Step 2. calculate the shape file that can be used to run mu-Transfer 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The shape file determines which hyperparameters will be scaled up, allowing the model to adjust the learning rate, weight scaling factor, etc.
+
+Here is an example shape file calculation script:
+
+
+.. code-block:: bash
+    python examples/nlp/language_modeling/megatron_retro_cal_shape.py \
+        trainer.devices=8 \
+        trainer.num_nodes=1 \
+        trainer.accelerator=gpu \
+        exp_manager.exp_dir=/result/retro_model \
+        base_model.enc_num_layers=2 \
+        delta_model.enc_num_layers=2 \
+        base_model.dec_num_layers=32 \
+        delta_model.dec_num_layers=32 \
+        base_model.tensor_model_parallel_size=8 \
+        delta_model.tensor_model_parallel_size=8 \
+        base_model.dec_cross_attention=[8,11,14,17,20,23,26,29,31] \
+        delta_model.dec_cross_attention=[8,11,14,17,20,23,26,29,31] \
+        base_model.enc_cross_attention=[0] \
+        delta_model.enc_cross_attention=[0] \
+        base_model.hidden_size=768 \
+        base_model.ffn_hidden_size=3072 \
+        delta_model.hidden_size=96 \
+        delta_model.ffn_hidden_size=384 \
+        base_model.num_attention_heads=16 \
+        delta_model.num_attention_heads=16 \
+        model.shape_file=tp8_32depth_o1_rel_shape_info.yaml 
+In this example, the ``base_model`` refers to the small base model for which an optimal set of hyperparameters has been determined. 
+The ``delta_model`` refers to a model with certain hyperparameters that have been scaled up or down. In this case, 
+the ``hidden_size`` and ``ffn_hidden_size`` have been changed in the ``delta_model``, allowing these two parameters to be scaled freely later.
+
+Step 3. Pretrain mu-Transfer RETRO model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Once the shape file is created, we can start training a RETRO model.  The model training can be scale up freely using the hyperparameters 
+specified by the delta model and the shape file. 
+
+An example mu-Transfer pre-training script is:
+
+.. code-block:: bash
+    python examples/nlp/language_modeling/megatron_retro_mutransfer_pretrain.py \
+        trainer.devices=8 \
+        trainer.num_nodes=2 \
+        trainer.accelerator=gpu \
+        trainer.max_steps=500000 \
+        trainer.precision=16 \
+        exp_manager.exp_dir=/result/retro_model \
+        model.apply_query_key_layer_scaling=False \
+        model.tensor_model_parallel_size=8 \
+        model.optim.name=muadamw \
+        model.enc_num_layers=2 \
+        model.dec_num_layers=32 \
+        model.enc_cross_attention=[0] \
+        model.dec_cross_attention=[8,11,14,17,20,23,26,29,31] \
+        model.hidden_size=4096 \
+        model.ffn_hidden_size=16384 \
+        model.num_attention_heads=32 \
+        model.tokenizer.merge_file=/dataset/gpt2-merges.txt \
+        model.tokenizer.vocab_file=/dataset/gpt2-vocab.json \
+        model.data.data_prefix=[/result/pubmed_eval_text_document] \
+        model.data.knn_index=[dataset/pubmed_knn_final.save] \
+        model.data.retrieval_prefix=/result/pubmed_eval_text_document \
+        model.micro_batch_size=8 \
+        model.shape_file=tp8_32depth_o1_rel_shape_info.yaml
+.. note:: We have chosen to use ``muadamw`` as the optimizer for use with the mu-transfer method.  Currently, only ``muadam`` and ``muadamw`` are supported. 
+
+Similarly to the pre-training in Option 1, the model nemo file can be found at the result checkpoint directory after training is complete.
+
+Run NeMo RETRO Model Inference
+-------------------------------
+
+Once the NeMo RETRO model has been trained, we can put it into inference mode and experiment with it. 
+During inference, we are not limited to the static Faiss index that we built earlier for KNN queries. 
+We can feed any external data to the model as retrieval context. NeMo RETRO implementation supports dynamic retrieval service, 
+allowing users to add, reset, and query new documents on the fly.
+
+We have built a simple web client that makes it easy for users to play around with the model. Here is an example script to launch the server:
+
+.. code-block:: bash
+    python examples/nlp/language_modeling/megatron_retro_eval.py \
+        trainer.devices=8 \
+        trainer.num_nodes=1 \
+        trainer.accelerator=gpu \
+        trainer.precision=16 \
+        retro_model_file=megatron_retro.nemo \
+        tensor_model_parallel_size=8 \
+        pipeline_model_parallel_size=1 \
+        retrieval_service.sentence_bert.devices=\'0,1,2,3,4,5,6,7\' \
+        retrieval_service.services.0.faiss_devices=\'0,1,2,3,4,5,6,7\' \
+        retrieval_service.services.1.faiss_devices=\'0,1,2,3,4,5,6,7\' \
+        retrieval_service.services.0.faiss_index=/result/pubmed_faiss_final.index \
+        retrieval_service.services.0.retrieval_index=/result/pubmed_eval_text_document \
+        retrieval_service.neighbors=2 \
+        retrieval_service.pad_tokens=True \
+        retrieval_service.store_retrieved=True \
+        server=True \
+        web_server=True \
+        share=True \
+        username=test \
+        password=test123
+Set the retro_model_file to use the nemo file generated in the pre-training step. After launching the server, copy-paste the URL from 
+the terminal into your browser. Use the specified username and password to log in and have fun experimenting with the RETRO model.
+
+References
+************
+
+.. bibliography:: ../../nlp_all.bib
+    :style: plain
+    :labelprefix: nlp-retro
+    :keyprefix: nlp-retro-

From 84ae944f4a9af5612389e26a1a15e63368737abc Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 18 Jul 2023 09:26:28 -0700
Subject: [PATCH 116/123] Remove pyyaml (#7052) (#7054)

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
---
 requirements/requirements_lightning.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements/requirements_lightning.txt b/requirements/requirements_lightning.txt
index 100216aebc54..9c41c355e8cd 100644
--- a/requirements/requirements_lightning.txt
+++ b/requirements/requirements_lightning.txt
@@ -1,7 +1,6 @@
 hydra-core>=1.2.0,<1.3
 omegaconf>=2.2,<2.3
 pytorch-lightning>=1.9.0,<=1.9.4
-pyyaml<6  # Pinned until omegaconf works with pyyaml>=6
 torchmetrics>=0.11.0
 transformers>=4.0.1
 wandb

From 47e782a7fee12e0e0109004c6b395ca71524f16f Mon Sep 17 00:00:00 2001
From: "Aleksey Grinchuk (Oleksii Hrinchuk)" <grinchuk.alexey@gmail.com>
Date: Tue, 18 Jul 2023 12:17:41 -0700
Subject: [PATCH 117/123] st standalone model (#6969)

* st standalone model

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* style fix

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* sacrebleu import fix, unused imports removed

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* import guard for nlp inside asr transformer bpe model

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* codeql fixes

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* comments answered

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* import ordering fix

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* yttm for asr removed

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* logging added

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* added inference and translate method

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: AlexGrinch <grinchuk.alexey@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../fast-conformer_transformer.yaml           | 218 +++++++
 .../speech_to_text_transformer.py             |  70 ++
 .../speech_translation/translate_speech.py    | 210 ++++++
 nemo/collections/asr/models/__init__.py       |   1 +
 .../asr/models/transformer_bpe_models.py      | 614 ++++++++++++++++++
 .../tokenizers/sentencepiece_tokenizer.py     |   2 +-
 6 files changed, 1114 insertions(+), 1 deletion(-)
 create mode 100644 examples/asr/conf/speech_translation/fast-conformer_transformer.yaml
 create mode 100644 examples/asr/speech_translation/speech_to_text_transformer.py
 create mode 100644 examples/asr/speech_translation/translate_speech.py
 create mode 100644 nemo/collections/asr/models/transformer_bpe_models.py

diff --git a/examples/asr/conf/speech_translation/fast-conformer_transformer.yaml b/examples/asr/conf/speech_translation/fast-conformer_transformer.yaml
new file mode 100644
index 000000000000..4e480df62e59
--- /dev/null
+++ b/examples/asr/conf/speech_translation/fast-conformer_transformer.yaml
@@ -0,0 +1,218 @@
+# It contains the default values for training an autoregressive FastConformer-Transformer ST model with sub-word encoding.
+
+# Architecture and training config:
+# Default learning parameters in this config are set for effective batch size of 2K. To train it with smaller effective
+# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches.
+# Here are the recommended configs for different variants of FastConformer-Transformer, other parameters are the same as in this config file.
+# One extra (linear projection) layer is added between FastConformer encoder and Transformer decoder if they have different hidden sizes
+# It is recommended to initialize FastConformer with ASR pre-trained encoder for better accuracy and faster convergence
+
+name: "FastConformer-Transformer-BPE-st"
+
+# Initialize model encoder with pre-trained ASR FastConformer encoder for faster convergence and improved accuracy
+init_from_nemo_model:
+  model0:
+    path: ???
+    include: ["preprocessor", "encoder"]
+
+model:
+  sample_rate: 16000
+  label_smoothing: 0.0
+  log_prediction: true # enables logging sample predictions in the output during training
+
+  train_ds:
+    is_tarred: true
+    tarred_audio_filepaths: ???
+    manifest_filepath: ???
+    sample_rate: 16000
+    shuffle: false
+    trim_silence: false
+    batch_size: 4
+    num_workers: 8
+
+  validation_ds:
+    manifest_filepath: ???
+    sample_rate: ${model.sample_rate}
+    batch_size: 16 # you may increase batch_size if your memory allows
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+    use_start_end_token: true
+
+  test_ds:
+    manifest_filepath: ???
+    sample_rate: ${model.sample_rate}
+    batch_size: 16 # you may increase batch_size if your memory allows
+    shuffle: false
+    num_workers: 4
+    pin_memory: true
+    use_start_end_token: true
+
+  # recommend small vocab size of 128 or 256 when using 4x sub-sampling
+  # you may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py
+  tokenizer:
+    dir: ???  # path to directory which contains either tokenizer.model (bpe) or vocab.txt (wpe)
+    type: bpe  # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer)
+
+  preprocessor:
+    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
+    sample_rate: ${model.sample_rate}
+    normalize: "per_feature"
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    features: 80
+    n_fft: 512
+    log: true
+    frame_splicing: 1
+    dither: 0.00001
+    pad_to: 0
+    pad_value: 0.0
+
+  spec_augment:
+    _target_: nemo.collections.asr.modules.SpectrogramAugmentation
+    freq_masks: 2 # set to zero to disable it
+    # you may use lower time_masks for smaller models to have a faster convergence
+    time_masks: 10 # set to zero to disable it
+    freq_width: 27
+    time_width: 0.05
+
+  encoder:
+    _target_: nemo.collections.asr.modules.ConformerEncoder
+    feat_in: ${model.preprocessor.features}
+    feat_out: -1 # you may set it if you need different output size other than the default d_model
+    n_layers: 17
+    d_model: 512
+
+    # Sub-sampling params
+    subsampling: dw_striding # vggnet or striding, vggnet may give better results but needs more memory
+    subsampling_factor: 8 # must be power of 2
+    subsampling_conv_channels: 256 # -1 sets it to d_model
+    causal_downsampling: false
+    reduction: null
+    reduction_position: null
+    reduction_factor: 1
+
+    # Feed forward module's params
+    ff_expansion_factor: 4
+
+    # Multi-headed Attention Module's params
+    self_attention_model: rel_pos # rel_pos or abs_pos
+    n_heads: 8 # may need to be lower for smaller d_models
+    # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention
+    att_context_size: [-1, -1] # -1 means unlimited context
+    xscaling: true # scales up the input embeddings by sqrt(d_model)
+    untie_biases: true # unties the biases of the TransformerXL layers
+    pos_emb_max_len: 5000
+
+    # Convolution module's params
+    conv_kernel_size: 9
+    conv_norm_type: batch_norm
+    conv_context_size: null
+
+    ### regularization
+    dropout: 0.1 # The dropout used in most of the Conformer Modules
+    dropout_pre_encoder: 0.1
+    dropout_emb: 0.0 # The dropout used for embeddings
+    dropout_att: 0.1 # The dropout for multi-headed attention modules
+
+  transf_encoder:
+    num_layers: 0
+    hidden_size: 512
+    inner_size: 2048
+    num_attention_heads: 8
+    ffn_dropout: 0.1
+    attn_score_dropout: 0.1
+    attn_layer_dropout: 0.1
+
+  transf_decoder:
+    library: nemo
+    model_name: null
+    pretrained: false
+    max_sequence_length: 512
+    num_token_types: 0
+    embedding_dropout: 0.1
+    learn_positional_encodings: false
+    hidden_size: 512
+    inner_size: 2048
+    num_layers: 6
+    num_attention_heads: 4
+    ffn_dropout: 0.1
+    attn_score_dropout: 0.1
+    attn_layer_dropout: 0.1
+    hidden_act: relu
+    pre_ln: true
+    pre_ln_final_layer_norm: true
+
+  head:
+    num_layers: 1
+    activation: relu
+    log_softmax: true
+    dropout: 0.0
+    use_transformer_init: true
+    
+  beam_search:
+    beam_size: 4
+    len_pen: 0.0
+    max_generation_delta: 50
+
+  optim:
+    name: adam
+    lr: 0.0001
+    # optimizer arguments
+    betas: [0.9, 0.98]
+    # less necessity for weight_decay as we already have large augmentations with SpecAug
+    # you may need weight_decay for large models, stable AMP training, small datasets, or when lower augmentations are used
+    # weight decay of 0.0 with lr of 2.0 also works fine
+    #weight_decay: 1e-3
+
+    # scheduler setup
+    sched:
+      name: InverseSquareRootAnnealing
+      #d_model: ${model.encoder.d_model}
+      # scheduler config override
+      warmup_steps: 1000
+      warmup_ratio: null
+      min_lr: 1e-6
+
+trainer:
+  gpus: -1 # number of GPUs, -1 would use all available GPUs
+  num_nodes: 1
+  max_epochs: 100
+  max_steps: -1 # computed at runtime if not set
+  val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
+  accelerator: auto
+  strategy: ddp
+  accumulate_grad_batches: 1
+  gradient_clip_val: 0.0
+  precision: 16 # Should be set to 16 for O1 and O2 to enable the AMP.
+  log_every_n_steps: 100  # Interval of logging.
+  enable_progress_bar: True
+  resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
+  num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
+  check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs
+  sync_batchnorm: true
+  enable_checkpointing: False  # Provided by exp_manager
+  logger: false  # Provided by exp_manager
+
+exp_manager:
+  exp_dir: null
+  name: ${name}
+  create_tensorboard_logger: true
+  create_checkpoint_callback: true
+  checkpoint_callback_params:
+    # in case of multiple validation sets, first one is used
+    monitor: "val_sacreBLEU"
+    mode: "max"
+    save_top_k: 3
+    always_save_nemo: True # saves the checkpoints as nemo files instead of PTL checkpoints
+
+  # you need to set these two to True to continue the training
+  resume_if_exists: false
+  resume_ignore_no_checkpoint: false
+
+  # You may use this section to create a W&B logger
+  create_wandb_logger: false
+  wandb_logger_kwargs:
+    name: null
+    project: null
\ No newline at end of file
diff --git a/examples/asr/speech_translation/speech_to_text_transformer.py b/examples/asr/speech_translation/speech_to_text_transformer.py
new file mode 100644
index 000000000000..0c0882859b88
--- /dev/null
+++ b/examples/asr/speech_translation/speech_to_text_transformer.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+# Training the model
+```sh
+python speech_to_text_transformer.py \
+    # (Optional: --config-path=<path to dir of configs> --config-name=<name of config without .yaml>) \
+    model.train_ds.audio.tarred_audio_filepaths=<path to tar files with audio> \
+    model.train_ds.audio_manifest_filepath=<path to audio data manifest> \
+    model.validation_ds.manifest_filepath=<path to validation manifest> \
+    model.test_ds.manifest_filepath=<path to test manifest> \
+    model.tokenizer.dir=<path to directory of tokenizer (not full path to the vocab file!)> \
+    model.tokenizer.model_path=<path to speech tokenizer model> \
+    model.tokenizer.type=<either bpe, wpe, or yttm> \
+    trainer.gpus=-1 \
+    trainer.accelerator="ddp" \
+    trainer.max_epochs=100 \
+    model.optim.name="adamw" \
+    model.optim.lr=0.001 \
+    model.optim.betas=[0.9,0.999] \
+    model.optim.weight_decay=0.0001 \
+    model.optim.sched.warmup_steps=2000
+    exp_manager.create_wandb_logger=True \
+    exp_manager.wandb_logger_kwargs.name="<Name of experiment>" \
+    exp_manager.wandb_logger_kwargs.project="<Name of project>"
+```
+
+
+"""
+
+import pytorch_lightning as pl
+from omegaconf import OmegaConf
+
+from nemo.collections.asr.models import EncDecTransfModelBPE
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+from nemo.utils.exp_manager import exp_manager
+
+
+@hydra_runner(config_path="../conf/speech_translation/", config_name="fast-conformer_transformer")
+def main(cfg):
+    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
+
+    trainer = pl.Trainer(**cfg.trainer)
+    exp_manager(trainer, cfg.get("exp_manager", None))
+    asr_model = EncDecTransfModelBPE(cfg=cfg.model, trainer=trainer)
+
+    # Initialize the weights of the model from another model, if provided via config
+    asr_model.maybe_init_from_pretrained_checkpoint(cfg)
+    trainer.fit(asr_model)
+
+    if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
+        if asr_model.prepare_test(trainer):
+            trainer.test(asr_model)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/asr/speech_translation/translate_speech.py b/examples/asr/speech_translation/translate_speech.py
new file mode 100644
index 000000000000..203852b52ee9
--- /dev/null
+++ b/examples/asr/speech_translation/translate_speech.py
@@ -0,0 +1,210 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import contextlib
+import json
+import os
+from dataclasses import dataclass, is_dataclass
+from typing import List, Optional, Union
+
+import pytorch_lightning as pl
+import torch
+from omegaconf import OmegaConf
+
+from nemo.collections.asr.modules.conformer_encoder import ConformerChangeConfig
+from nemo.collections.asr.parts.utils.transcribe_utils import compute_output_filename, prepare_audio_data, setup_model
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+
+"""
+Translate audio file on a single CPU/GPU. Useful for translations of moderate amounts of audio data.
+
+# Arguments
+  model_path: path to .nemo ST checkpoint
+  pretrained_name: name of pretrained ST model (from NGC registry)
+  audio_dir: path to directory with audio files
+  dataset_manifest: path to dataset JSON manifest file (in NeMo format)
+
+  output_filename: Output filename where the translations will be written
+  batch_size: batch size during inference
+
+  cuda: Optional int to enable or disable execution of model on certain CUDA device.
+  allow_mps: Bool to allow using MPS (Apple Silicon M-series GPU) device if available
+  amp: Bool to decide if Automatic Mixed Precision should be used during inference
+  audio_type: Str filetype of the audio. Supported = wav, flac, mp3
+
+  overwrite_translations: Bool which when set allows repeated translations to overwrite previous results.
+
+# Usage
+ST model can be specified by either "model_path" or "pretrained_name".
+Data for translation can be defined with either "audio_dir" or "dataset_manifest".
+Results are returned in a JSON manifest file.
+
+python translate_speech.py \
+    model_path=null \
+    pretrained_name=null \
+    audio_dir="<remove or path to folder of audio files>" \
+    dataset_manifest="<remove or path to manifest>" \
+    output_filename="<remove or specify output filename>" \
+    batch_size=32 \
+    cuda=0 \
+    amp=True \
+"""
+
+
+@dataclass
+class ModelChangeConfig:
+
+    # Sub-config for changes specific to the Conformer Encoder
+    conformer: ConformerChangeConfig = ConformerChangeConfig()
+
+
+@dataclass
+class TranslationConfig:
+    # Required configs
+    model_path: Optional[str] = None  # Path to a .nemo file
+    pretrained_name: Optional[str] = None  # Name of a pretrained model
+    audio_dir: Optional[str] = None  # Path to a directory which contains audio files
+    dataset_manifest: Optional[str] = None  # Path to dataset's JSON manifest
+    audio_key: str = 'audio_filepath'  # Used to override the default audio key in dataset_manifest
+    eval_config_yaml: Optional[str] = None  # Path to a yaml file of config of evaluation
+
+    # General configs
+    output_filename: Optional[str] = None
+    batch_size: int = 32
+    random_seed: Optional[int] = None  # seed number going to be used in seed_everything()
+
+    # Set `cuda` to int to define CUDA device. If 'None', will look for CUDA
+    # device anyway, and do inference on CPU only if CUDA device is not found.
+    # If `cuda` is a negative number, inference will be on CPU only.
+    cuda: Optional[int] = None
+    allow_mps: bool = False  # allow to select MPS device (Apple Silicon M-series GPU)
+    amp: bool = False
+    audio_type: str = "wav"
+
+    # Recompute model translation, even if the output folder exists with scores.
+    overwrite_translations: bool = True
+
+    # can be set to True to return list of translations instead of the config
+    # if True, will also skip writing anything to the output file
+    return_translations: bool = False
+
+
+@hydra_runner(config_name="TranslationConfig", schema=TranslationConfig)
+def main(cfg: TranslationConfig) -> Union[TranslationConfig, List[str]]:
+    logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
+
+    for key in cfg:
+        cfg[key] = None if cfg[key] == 'None' else cfg[key]
+
+    if is_dataclass(cfg):
+        cfg = OmegaConf.structured(cfg)
+
+    if cfg.random_seed:
+        pl.seed_everything(cfg.random_seed)
+
+    if cfg.model_path is None and cfg.pretrained_name is None:
+        raise ValueError("Both cfg.model_path and cfg.pretrained_name cannot be None!")
+    if cfg.audio_dir is None and cfg.dataset_manifest is None:
+        raise ValueError("Both cfg.audio_dir and cfg.dataset_manifest cannot be None!")
+
+    # Load augmentor from exteranl yaml file which contains eval info, could be extend to other feature such VAD, P&C
+    augmentor = None
+    if cfg.eval_config_yaml:
+        eval_config = OmegaConf.load(cfg.eval_config_yaml)
+        augmentor = eval_config.test_ds.get("augmentor")
+        logging.info(f"Will apply on-the-fly augmentation on samples during translation: {augmentor} ")
+
+    # setup GPU
+    if cfg.cuda is None:
+        if torch.cuda.is_available():
+            device = [0]  # use 0th CUDA device
+            accelerator = 'gpu'
+            map_location = torch.device('cuda:0')
+        elif cfg.allow_mps and hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            logging.warning(
+                "MPS device (Apple Silicon M-series GPU) support is experimental."
+                " Env variable `PYTORCH_ENABLE_MPS_FALLBACK=1` should be set in most cases to avoid failures."
+            )
+            device = [0]
+            accelerator = 'mps'
+            map_location = torch.device('mps')
+        else:
+            device = 1
+            accelerator = 'cpu'
+            map_location = torch.device('cpu')
+    else:
+        device = [cfg.cuda]
+        accelerator = 'gpu'
+        map_location = torch.device(f'cuda:{cfg.cuda}')
+
+    logging.info(f"Inference will be done on device: {map_location}")
+
+    asr_model, model_name = setup_model(cfg, map_location)
+    trainer = pl.Trainer(devices=device, accelerator=accelerator)
+    asr_model.set_trainer(trainer)
+    asr_model = asr_model.eval()
+
+    # collect additional translation information
+    return_hypotheses = False
+
+    # prepare audio filepaths and decide wether it's partial audio
+    filepaths, partial_audio = prepare_audio_data(cfg)
+
+    # setup AMP (optional)
+    if cfg.amp and torch.cuda.is_available() and hasattr(torch.cuda, 'amp') and hasattr(torch.cuda.amp, 'autocast'):
+        logging.info("AMP enabled!\n")
+        autocast = torch.cuda.amp.autocast
+    else:
+
+        @contextlib.contextmanager
+        def autocast():
+            yield
+
+    # Compute output filename
+    cfg = compute_output_filename(cfg, model_name)
+
+    # if translations should not be overwritten, and already exists, skip re-translation step and return
+    if not cfg.return_translations and not cfg.overwrite_translations and os.path.exists(cfg.output_filename):
+        logging.info(
+            f"Previous translations found at {cfg.output_filename}, and flag `overwrite_translations`"
+            f"is {cfg.overwrite_translations}. Returning without re-translating text."
+        )
+        return cfg
+
+    # translate audio
+    with autocast():
+        with torch.no_grad():
+            translations = asr_model.translate(
+                paths2audio_files=filepaths, batch_size=cfg.batch_size, return_hypotheses=return_hypotheses,
+            )
+
+    logging.info(f"Finished translating {len(filepaths)} files !")
+    logging.info(f"Writing translations into file: {cfg.output_filename}")
+
+    if cfg.return_translations:
+        return translations
+
+    # write audio translations
+    with open(cfg.output_filename, 'w', encoding='utf-8', newline='\n') as f:
+        for filepath, translation in zip(filepaths, translations):
+            item = {'audio_filepath': filepath, 'pred_translation': translation}
+            f.write(json.dumps(item, ensure_ascii=False) + "\n")
+    logging.info(f"Finished writing predictions to {cfg.output_filename}!")
+
+    return cfg
+
+
+if __name__ == '__main__':
+    main()  # noqa pylint: disable=no-value-for-parameter
diff --git a/nemo/collections/asr/models/__init__.py b/nemo/collections/asr/models/__init__.py
index a7275faea3d0..34f2c4f62e29 100644
--- a/nemo/collections/asr/models/__init__.py
+++ b/nemo/collections/asr/models/__init__.py
@@ -33,3 +33,4 @@
 from nemo.collections.asr.models.rnnt_models import EncDecRNNTModel
 from nemo.collections.asr.models.slu_models import SLUIntentSlotBPEModel
 from nemo.collections.asr.models.ssl_models import SpeechEncDecSelfSupervisedModel
+from nemo.collections.asr.models.transformer_bpe_models import EncDecTransfModelBPE
diff --git a/nemo/collections/asr/models/transformer_bpe_models.py b/nemo/collections/asr/models/transformer_bpe_models.py
new file mode 100644
index 000000000000..178746795ae8
--- /dev/null
+++ b/nemo/collections/asr/models/transformer_bpe_models.py
@@ -0,0 +1,614 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+import json
+import os
+import tempfile
+from math import ceil
+from typing import Dict, List, Optional, Union
+
+import editdistance
+import torch
+import torch.distributed as dist
+from omegaconf import DictConfig, OmegaConf
+from pytorch_lightning import Trainer
+from tqdm.auto import tqdm
+
+from nemo.collections.asr.data import audio_to_text_dataset
+from nemo.collections.asr.data.audio_to_text_dali import DALIOutputs
+from nemo.collections.asr.models.asr_model import ASRModel, ExportableEncDecModel
+from nemo.collections.asr.parts.mixins import ASRBPEMixin
+from nemo.collections.common.losses import SmoothedCrossEntropyLoss
+from nemo.collections.common.metrics import GlobalAverageLossMetric
+from nemo.collections.common.parts import transformer_weights_init
+
+from nemo.core.classes.common import typecheck
+from nemo.core.neural_types import (
+    AudioSignal,
+    ChannelType,
+    LabelsType,
+    LengthsType,
+    LogprobsType,
+    MaskType,
+    NeuralType,
+    SpectrogramType,
+)
+from nemo.utils import logging
+
+try:
+    from sacrebleu import corpus_bleu
+    from nemo.collections.nlp.modules.common import TokenClassifier
+    from nemo.collections.nlp.modules.common.lm_utils import get_transformer
+    from nemo.collections.nlp.modules.common.transformer import BeamSearchSequenceGenerator, TransformerEncoder
+
+    NLP_AVAILABLE = True
+except (ImportError, ModuleNotFoundError):
+    NLP_AVAILABLE = False
+    logging.warning("Could not import NeMo NLP collection which is required for speech translation model.")
+
+__all__ = ['EncDecTransfModelBPE']
+
+
+def lens_to_mask(lens, max_length):
+    batch_size = lens.shape[0]
+    mask = torch.arange(max_length).repeat(batch_size, 1).to(lens.device) < lens[:, None]
+    return mask
+
+
+class EncDecTransfModelBPE(ASRModel, ExportableEncDecModel, ASRBPEMixin):
+    """Base class for encoder decoder CTC-based models."""
+
+    def __init__(self, cfg: DictConfig, trainer: Trainer = None):
+
+        if 'tokenizer' not in cfg:
+            raise ValueError("`cfg` must have `tokenizer` config to create a tokenizer !")
+
+        # Setup the tokenizer
+        self._setup_tokenizer(cfg.tokenizer)
+
+        super().__init__(cfg=cfg, trainer=trainer)
+
+        # Setup audio preprocessor
+        self.preprocessor = EncDecTransfModelBPE.from_config_dict(self.cfg.preprocessor)
+
+        # Setup audio encoder
+        self.encoder = EncDecTransfModelBPE.from_config_dict(self.cfg.encoder)
+
+        # Add projection layer if encoder and decoder differ in hidden size
+        if self.cfg.encoder['d_model'] != self.cfg.transf_decoder['hidden_size']:
+            self.adapter = torch.nn.Linear(self.cfg.encoder['d_model'], self.cfg.transf_decoder['hidden_size'])
+        else:
+            self.adapter = torch.nn.Identity()
+
+        transf_encoder_cfg_dict = OmegaConf.to_container(cfg.get('transf_encoder'))
+
+        # Whether to add Transformer Encoder block between Conformer and Transformer Decoder
+        self.use_transf_encoder = False
+        if transf_encoder_cfg_dict['num_layers'] > 0:
+            self.use_transf_encoder = True
+
+            self.transf_encoder = TransformerEncoder(
+                num_layers=transf_encoder_cfg_dict['num_layers'],
+                hidden_size=transf_encoder_cfg_dict['hidden_size'],
+                inner_size=transf_encoder_cfg_dict['inner_size'],
+                mask_future=False,
+                num_attention_heads=transf_encoder_cfg_dict['num_attention_heads'],
+                attn_score_dropout=transf_encoder_cfg_dict['attn_score_dropout'],
+                attn_layer_dropout=transf_encoder_cfg_dict['attn_layer_dropout'],
+                ffn_dropout=transf_encoder_cfg_dict['ffn_dropout'],
+                pre_ln=transf_encoder_cfg_dict.get('pre_ln', True),
+                pre_ln_final_layer_norm=transf_encoder_cfg_dict.get('pre_ln_final_layer_norm', True),
+            )
+            std_init_range = 1 / transf_encoder_cfg_dict['hidden_size'] ** 0.5
+            self.transf_encoder.apply(lambda module: transformer_weights_init(module, std_init_range))
+
+        transf_decoder_cfg_dict = OmegaConf.to_container(cfg.get('transf_decoder'))
+
+        # Transformer decoder
+        vocab_size = 8 * ceil(self.tokenizer.vocab_size / 8)
+        transf_decoder_cfg_dict['vocab_size'] = vocab_size
+        library = transf_decoder_cfg_dict.pop('library', 'nemo')
+        model_name = transf_decoder_cfg_dict.pop('model_name', None)
+        pretrained = transf_decoder_cfg_dict.pop('pretrained', False)
+        self.transf_decoder = get_transformer(
+            library=library,
+            model_name=model_name,
+            pretrained=pretrained,
+            config_dict=transf_decoder_cfg_dict,
+            encoder=False,
+            pre_ln_final_layer_norm=transf_decoder_cfg_dict.get("pre_ln_final_layer_norm", False),
+        )
+
+        self.log_softmax = TokenClassifier(
+            hidden_size=self.transf_decoder.hidden_size,
+            num_classes=vocab_size,
+            activation=self.cfg.head.activation,
+            log_softmax=self.cfg.head.log_softmax,
+            dropout=self.cfg.head.dropout,
+            use_transformer_init=self.cfg.head.use_transformer_init,
+        )
+        self.log_softmax.mlp.layer0.weight = self.transf_decoder.embedding.token_embedding.weight
+        std_init_range = 1 / self.transf_decoder.hidden_size ** 0.5
+        self.transf_decoder.apply(lambda module: transformer_weights_init(module, std_init_range))
+        self.log_softmax.apply(lambda module: transformer_weights_init(module, std_init_range))
+
+        # Beam Search decoding
+        self.beam_search = BeamSearchSequenceGenerator(
+            embedding=self.transf_decoder.embedding,
+            decoder=self.transf_decoder.decoder,
+            log_softmax=self.log_softmax,
+            max_sequence_length=self.transf_decoder.max_sequence_length,
+            beam_size=self.cfg.beam_search.beam_size,
+            bos=self.tokenizer.bos_id,
+            pad=self.tokenizer.pad_id,
+            eos=self.tokenizer.eos_id,
+            len_pen=self.cfg.beam_search.len_pen,
+            max_delta_length=self.cfg.beam_search.max_generation_delta,
+        )
+
+        # Define autoregressive CE loss
+        self.transf_loss = SmoothedCrossEntropyLoss(
+            pad_id=self.tokenizer.pad_id, label_smoothing=self.cfg.label_smoothing
+        )
+
+        if hasattr(self.cfg, 'spec_augment') and self.cfg.spec_augment is not None:
+            self.spec_augmentation = EncDecTransfModelBPE.from_config_dict(self.cfg.spec_augment)
+        else:
+            self.spec_augmentation = None
+
+        self.val_loss = GlobalAverageLossMetric(dist_sync_on_step=False, take_avg_loss=True)
+
+    @torch.no_grad()
+    def translate(
+        self,
+        paths2audio_files: List[str],
+        batch_size: int = 4,
+        logprobs: bool = False,
+        return_hypotheses: bool = False,
+    ) -> List[str]:
+        hypotheses = self.transcribe(paths2audio_files, batch_size, logprobs, return_hypotheses)
+        return hypotheses
+
+    @torch.no_grad()
+    def transcribe(
+        self,
+        paths2audio_files: List[str],
+        batch_size: int = 4,
+        logprobs: bool = False,
+        return_hypotheses: bool = False,
+    ) -> List[str]:
+        """
+        Uses greedy decoding to transcribe audio files. Use this method for debugging and prototyping.
+        Args:
+            paths2audio_files: (a list) of paths to audio files. \
+                Recommended length per file is between 5 and 25 seconds. \
+                But it is possible to pass a few hours long file if enough GPU memory is available.
+            batch_size: (int) batch size to use during inference.
+                Bigger will result in better throughput performance but would use more memory.
+            logprobs: (bool) pass True to get log probabilities instead of transcripts.
+            return_hypotheses: (bool) Either return hypotheses or text
+                With hypotheses can do some postprocessing like getting timestamp or rescoring
+        Returns:
+            A list of transcriptions (or raw log probabilities if logprobs is True) in the same order as paths2audio_files
+        """
+        if paths2audio_files is None or len(paths2audio_files) == 0:
+            return {}
+
+        if return_hypotheses and logprobs:
+            raise ValueError(
+                "Either `return_hypotheses` or `logprobs` can be True at any given time."
+                "Returned hypotheses will contain the logprobs."
+            )
+
+        # We will store transcriptions here
+        hypotheses = []
+
+        # Model's mode and device
+        mode = self.training
+        device = next(self.parameters()).device
+        dither_value = self.preprocessor.featurizer.dither
+        pad_to_value = self.preprocessor.featurizer.pad_to
+
+        try:
+            self.preprocessor.featurizer.dither = 0.0
+            self.preprocessor.featurizer.pad_to = 0
+            # Switch model to evaluation mode
+            self.eval()
+            # Freeze the encoder and decoder modules
+            self.encoder.freeze()
+            self.transf_decoder.freeze()
+            logging_level = logging.get_verbosity()
+            logging.set_verbosity(logging.WARNING)
+            # Work in tmp directory - will store manifest file there
+            with tempfile.TemporaryDirectory() as tmpdir:
+                with open(os.path.join(tmpdir, 'manifest.json'), 'w') as fp:
+                    for audio_file in paths2audio_files:
+                        entry = {'audio_filepath': audio_file, 'duration': 100000, 'text': 'nothing'}
+                        fp.write(json.dumps(entry) + '\n')
+
+                config = {'paths2audio_files': paths2audio_files, 'batch_size': batch_size, 'temp_dir': tmpdir}
+
+                temporary_datalayer = self._setup_transcribe_dataloader(config)
+                for test_batch in tqdm(temporary_datalayer, desc="Transcribing"):
+                    log_probs, encoded_len, enc_states, enc_mask = self.forward(
+                        input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device)
+                    )
+
+                    beam_hypotheses = (
+                        self.beam_search(
+                            encoder_hidden_states=enc_states, encoder_input_mask=enc_mask, return_beam_scores=False
+                        )
+                        .detach()
+                        .cpu()
+                        .numpy()
+                    )
+                    beam_hypotheses = [self.tokenizer.ids_to_text(hyp) for hyp in beam_hypotheses]
+
+                    if return_hypotheses:
+                        # dump log probs per file
+                        for idx in range(logits.shape[0]):
+                            current_hypotheses[idx].y_sequence = logits[idx][: logits_len[idx]]
+
+                    hypotheses += beam_hypotheses
+
+                    del test_batch, log_probs, encoded_len, enc_states, enc_mask
+        finally:
+            # set mode back to its original value
+            self.train(mode=mode)
+            self.preprocessor.featurizer.dither = dither_value
+            self.preprocessor.featurizer.pad_to = pad_to_value
+            if mode is True:
+                self.encoder.unfreeze()
+                self.transf_decoder.unfreeze()
+            logging.set_verbosity(logging_level)
+
+        return hypotheses
+
+    def _setup_dataloader_from_config(self, config: Optional[Dict]):
+
+        dataset = audio_to_text_dataset.get_audio_to_text_bpe_dataset_from_config(
+            config=config,
+            local_rank=self.local_rank,
+            global_rank=self.global_rank,
+            world_size=self.world_size,
+            tokenizer=self.tokenizer,
+            preprocessor_cfg=self.cfg.get("preprocessor", None),
+        )
+
+        if dataset is None:
+            return None
+
+        shuffle = config['shuffle']
+        if config.get('is_tarred', False):
+            shuffle = False
+
+        if hasattr(dataset, 'collate_fn'):
+            collate_fn = dataset.collate_fn
+        else:
+            collate_fn = dataset.datasets[0].collate_fn
+
+        return torch.utils.data.DataLoader(
+            dataset=dataset,
+            batch_size=config['batch_size'],
+            collate_fn=collate_fn,
+            drop_last=config.get('drop_last', False),
+            shuffle=shuffle,
+            num_workers=config.get('num_workers', 0),
+            pin_memory=config.get('pin_memory', False),
+        )
+
+    def setup_training_data(self, train_data_config: Optional[DictConfig]):
+
+        # create audio-only data loader
+        self._update_dataset_config(dataset_name='train', config=train_data_config)
+        self._train_dl = self._setup_dataloader_from_config(config=train_data_config)
+
+        # Need to set this because if using an IterableDataset, the length of the
+        # dataloader is the total number of samples rather than the number of batches,
+        # and this messes up the tqdm progress bar. So we set the number of steps manually
+        # (to the correct number) to fix this.
+        if 'is_tarred' in train_data_config and train_data_config['is_tarred']:
+            # We also need to check if limit_train_batches is already set.
+            # If it's an int, we assume that the user has set it to something sane,
+            # i.e. <= # training batches, and don't change it. Otherwise, adjust
+            # batches accordingly if it's a float (including 1.0).
+            if self._trainer is not None and isinstance(self._trainer.limit_train_batches, float):
+                self._trainer.limit_train_batches = int(
+                    self._trainer.limit_train_batches
+                    * ceil((len(self._train_dl.dataset) / self.world_size) / train_data_config['batch_size'])
+                )
+            elif self._trainer is None:
+                logging.warning(
+                    "Model Trainer was not set before constructing the dataset, incorrect number of "
+                    "training batches will be used. Please set the trainer and rebuild the dataset."
+                )
+
+    def setup_validation_data(self, val_data_config: Optional[Union[DictConfig, Dict]]):
+        """
+        Sets up the validation data loader via a Dict-like object.
+        Args:
+            val_data_config: A config that contains the information regarding construction
+                of an ASR Training dataset.
+        Supported Datasets:
+            -   :class:`~nemo.collections.asr.data.audio_to_text.AudioToCharDataset`
+            -   :class:`~nemo.collections.asr.data.audio_to_text.AudioToBPEDataset`
+            -   :class:`~nemo.collections.asr.data.audio_to_text.TarredAudioToCharDataset`
+            -   :class:`~nemo.collections.asr.data.audio_to_text.TarredAudioToBPEDataset`
+            -   :class:`~nemo.collections.asr.data.audio_to_text_dali.AudioToCharDALIDataset`
+        """
+        if 'shuffle' not in val_data_config:
+            val_data_config['shuffle'] = False
+
+        # preserve config
+        self._update_dataset_config(dataset_name='validation', config=val_data_config)
+        self._validation_dl = self._setup_dataloader_from_config(config=val_data_config)
+
+    def setup_test_data(self, test_data_config: Optional[Union[DictConfig, Dict]]):
+        """
+        Sets up the test data loader via a Dict-like object.
+        Args:
+            test_data_config: A config that contains the information regarding construction
+                of an ASR Training dataset.
+        Supported Datasets:
+            -   :class:`~nemo.collections.asr.data.audio_to_text.AudioToCharDataset`
+            -   :class:`~nemo.collections.asr.data.audio_to_text.AudioToBPEDataset`
+            -   :class:`~nemo.collections.asr.data.audio_to_text.TarredAudioToCharDataset`
+            -   :class:`~nemo.collections.asr.data.audio_to_text.TarredAudioToBPEDataset`
+            -   :class:`~nemo.collections.asr.data.audio_to_text_dali.AudioToCharDALIDataset`
+        """
+        if 'shuffle' not in test_data_config:
+            test_data_config['shuffle'] = False
+
+        # preserve config
+        self._update_dataset_config(dataset_name='test', config=test_data_config)
+        self._test_dl = self._setup_dataloader_from_config(config=test_data_config)
+
+    @property
+    def input_types(self) -> Optional[Dict[str, NeuralType]]:
+        if hasattr(self.preprocessor, '_sample_rate'):
+            input_signal_eltype = AudioSignal(freq=self.preprocessor._sample_rate)
+        else:
+            input_signal_eltype = AudioSignal()
+        return {
+            "input_signal": NeuralType(('B', 'T'), input_signal_eltype, optional=True),
+            "input_signal_length": NeuralType(tuple('B'), LengthsType(), optional=True),
+            "processed_signal": NeuralType(('B', 'D', 'T'), SpectrogramType(), optional=True),
+            "processed_signal_length": NeuralType(tuple('B'), LengthsType(), optional=True),
+            "transcript": NeuralType(('B', 'T'), LabelsType(), optional=True),
+            "transcript_length": NeuralType(tuple('B'), LengthsType(), optional=True),
+            "sample_id": NeuralType(tuple('B'), LengthsType(), optional=True),
+        }
+
+    @property
+    def output_types(self) -> Optional[Dict[str, NeuralType]]:
+        return {
+            "transf_log_probs": NeuralType(('B', 'T', 'D'), LogprobsType()),
+            "encoded_lengths": NeuralType(tuple('B'), LengthsType()),
+            "encoder_states": NeuralType(('B', 'T', 'D'), ChannelType()),
+            "encoder_mask": NeuralType(('B', 'T'), MaskType()),
+        }
+
+    @typecheck()
+    def forward(
+        self,
+        input_signal=None,
+        input_signal_length=None,
+        processed_signal=None,
+        processed_signal_length=None,
+        transcript=None,
+        transcript_length=None,
+    ):
+        """
+        Forward pass of the model.
+        Args:
+            input_signal: Tensor that represents a batch of raw audio signals,
+                of shape [B, T]. T here represents timesteps, with 1 second of audio represented as
+                `self.sample_rate` number of floating point values.
+            input_signal_length: Vector of length B, that contains the individual lengths of the audio
+                sequences.
+            processed_signal: Tensor that represents a batch of processed audio signals,
+                of shape (B, D, T) that has undergone processing via some DALI preprocessor.
+            processed_signal_length: Vector of length B, that contains the individual lengths of the
+                processed audio sequences.
+        Returns:
+            A tuple of 3 elements -
+            1) The log probabilities tensor of shape [B, T, D].
+            2) The lengths of the acoustic sequence after propagation through the encoder, of shape [B].
+            3) The greedy token predictions of the model of shape [B, T] (via argmax)
+        """
+        has_input_signal = input_signal is not None and input_signal_length is not None
+        has_processed_signal = processed_signal is not None and processed_signal_length is not None
+        if (has_input_signal ^ has_processed_signal) == False:
+            raise ValueError(
+                f"{self} Arguments ``input_signal`` and ``input_signal_length`` are mutually exclusive "
+                " with ``processed_signal`` and ``processed_signal_len`` arguments."
+            )
+
+        if not has_processed_signal:
+            processed_signal, processed_signal_length = self.preprocessor(
+                input_signal=input_signal, length=input_signal_length
+            )
+
+        if self.spec_augmentation is not None and self.training:
+            processed_signal = self.spec_augmentation(input_spec=processed_signal, length=processed_signal_length)
+
+        encoded, encoded_len = self.encoder(audio_signal=processed_signal, length=processed_signal_length)
+
+        enc_states = encoded.permute(0, 2, 1)
+        enc_states = self.adapter(enc_states)
+        enc_mask = lens_to_mask(encoded_len, enc_states.shape[1]).to(enc_states.dtype)
+        if self.use_transf_encoder:
+            enc_states = self.transf_encoder(encoder_states=enc_states, encoder_mask=enc_mask)
+
+        transf_log_probs = None
+        if transcript is not None:
+            dec_mask = lens_to_mask(transcript_length, transcript.shape[1]).to(transcript.dtype)
+            dec_states = self.transf_decoder(
+                input_ids=transcript, decoder_mask=dec_mask, encoder_embeddings=enc_states, encoder_mask=enc_mask
+            )
+            transf_log_probs = self.log_softmax(hidden_states=dec_states)
+
+        return transf_log_probs, encoded_len, enc_states, enc_mask
+
+    def compute_audio_loss(self, batch):
+
+        if batch is None:
+            return 0
+
+        signal, signal_len, transcript, transcript_len = batch
+        input_ids, labels = transcript[:, :-1], transcript[:, 1:]
+
+        transf_log_probs, encoded_len, enc_states, enc_mask = self.forward(
+            input_signal=signal,
+            input_signal_length=signal_len,
+            transcript=input_ids,
+            transcript_length=transcript_len,
+        )
+
+        transf_loss = self.transf_loss(log_probs=transf_log_probs, labels=labels)
+
+        return transf_loss
+
+    # PTL-specific methods
+    def training_step(self, batch, batch_nb):
+
+        audio_loss = self.compute_audio_loss(batch)
+
+        tensorboard_logs = {
+            'train_loss': audio_loss,
+            'learning_rate': self._optimizer.param_groups[0]['lr'],
+        }
+
+        return {'loss': audio_loss, 'log': tensorboard_logs}
+
+    def validation_step(self, batch, batch_idx, dataloader_idx=0, eval_mode="val"):
+        signal, signal_len, transcript, transcript_len = batch
+        input_ids, labels = transcript[:, :-1], transcript[:, 1:]
+
+        if isinstance(batch, DALIOutputs) and batch.has_processed_signal:
+            transf_log_probs, encoded_len, enc_states, enc_mask = self.forward(
+                processed_signal=signal,
+                processed_signal_length=signal_len,
+                transcript=input_ids,
+                transcript_length=transcript_len,
+            )
+        else:
+            transf_log_probs, encoded_len, enc_states, enc_mask = self.forward(
+                input_signal=signal,
+                input_signal_length=signal_len,
+                transcript=input_ids,
+                transcript_length=transcript_len,
+            )
+
+        beam_hypotheses = self.beam_search(
+            encoder_hidden_states=enc_states, encoder_input_mask=enc_mask, return_beam_scores=False
+        )
+        transf_loss = self.transf_loss(log_probs=transf_log_probs, labels=labels)
+
+        ground_truths = [self.tokenizer.ids_to_text(sent) for sent in transcript.detach().cpu().tolist()]
+        translations = [self.tokenizer.ids_to_text(sent) for sent in beam_hypotheses.detach().cpu().tolist()]
+
+        self.val_loss(loss=transf_loss, num_measurements=transf_log_probs.shape[0] * transf_log_probs.shape[1])
+
+        return {f'{eval_mode}_loss': transf_loss, 'translations': translations, 'ground_truths': ground_truths}
+
+    def test_step(self, batch, batch_idx, dataloader_idx=0):
+        return self.validation_step(batch, batch_idx, dataloader_idx, eval_mode="test")
+
+    def multi_validation_epoch_end(self, outputs, dataloader_idx: int = 0, eval_mode: str = "val"):
+        """
+        Called at the end of validation to aggregate outputs.
+        :param outputs: list of individual outputs of each validation step.
+        """
+        if not outputs:
+            return
+
+        if isinstance(outputs[0], dict):
+            outputs = [outputs]
+
+        for output in outputs:
+            eval_loss = getattr(self, 'val_loss').compute()
+            translations = list(itertools.chain(*[x['translations'] for x in output]))
+            ground_truths = list(itertools.chain(*[x['ground_truths'] for x in output]))
+
+            # Gather translations and ground truths from all workers
+            tr_and_gt = [None for _ in range(self.world_size)]
+            # we also need to drop pairs where ground truth is an empty string
+            if self.world_size > 1:
+                dist.all_gather_object(
+                    tr_and_gt, [(t, g) for (t, g) in zip(translations, ground_truths) if g.strip() != '']
+                )
+            else:
+                tr_and_gt[0] = [(t, g) for (t, g) in zip(translations, ground_truths) if g.strip() != '']
+
+            if self.global_rank == 0:
+                _translations = []
+                _ground_truths = []
+                for rank in range(0, self.world_size):
+                    _translations += [t for (t, g) in tr_and_gt[rank]]
+                    _ground_truths += [g for (t, g) in tr_and_gt[rank]]
+
+                sacre_bleu = corpus_bleu(_translations, [_ground_truths], tokenize="13a")
+                sb_score = sacre_bleu.score * self.world_size
+
+                wer_scores, wer_words = 0, 0
+                for h, r in zip(_translations, _ground_truths):
+                    wer_words += len(r.split())
+                    wer_scores += editdistance.eval(h.split(), r.split())
+                wer_score = 1.0 * wer_scores * self.world_size / wer_words
+
+            else:
+                sb_score = 0.0
+                wer_score = 0.0
+
+            self.log(f"{eval_mode}_loss", eval_loss, sync_dist=True)
+            self.log(f"{eval_mode}_sacreBLEU", sb_score, sync_dist=True)
+            self.log(f"{eval_mode}_WER", wer_score, sync_dist=True)
+            self.val_loss.reset()
+
+    def multi_test_epoch_end(self, outputs, dataloader_idx: int = 0):
+        return self.multi_validation_epoch_end(outputs, dataloader_idx, eval_mode="test")
+
+    def test_dataloader(self):
+        if self._test_dl is not None:
+            return self._test_dl
+
+    def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLoader':
+        """
+        Setup function for a temporary data loader which wraps the provided audio file.
+        Args:
+            config: A python dictionary which contains the following keys:
+            paths2audio_files: (a list) of paths to audio files. The files should be relatively short fragments. \
+                Recommended length per file is between 5 and 25 seconds.
+            batch_size: (int) batch size to use during inference. \
+                Bigger will result in better throughput performance but would use more memory.
+            temp_dir: (str) A temporary directory where the audio manifest is temporarily
+                stored.
+        Returns:
+            A pytorch DataLoader for the given audio file(s).
+        """
+        batch_size = min(config['batch_size'], len(config['paths2audio_files']))
+        dl_config = {
+            'manifest_filepath': os.path.join(config['temp_dir'], 'manifest.json'),
+            'sample_rate': self.preprocessor._sample_rate,
+            'batch_size': batch_size,
+            'trim_silence': False,
+            'shuffle': False,
+            'num_workers': min(batch_size, os.cpu_count() - 1),
+            'pin_memory': True,
+        }
+
+        temporary_datalayer = self._setup_dataloader_from_config(config=DictConfig(dl_config))
+        return temporary_datalayer
diff --git a/nemo/collections/common/tokenizers/sentencepiece_tokenizer.py b/nemo/collections/common/tokenizers/sentencepiece_tokenizer.py
index 0ab0cb784273..906154213ea1 100644
--- a/nemo/collections/common/tokenizers/sentencepiece_tokenizer.py
+++ b/nemo/collections/common/tokenizers/sentencepiece_tokenizer.py
@@ -299,7 +299,7 @@ def create_spt_model(
         byte_fallback: If <unk>, fallback to a byte sequence of the character.
         split_digits: If true, digits are split into individual tokens.
         split_by_whitespace: Whether to respect white space while creating subwords. If False, will learn merges across whitespace.
-        split_by_unicode_script: Whether to include multiple Unicode scripts. Ex. is Arabic diacritics which are considered part of the letter (عِدَّةُ)
+        split_by_unicode_script: Whether to include multiple Unicode scripts. Ex. is Arabic diacritics which are considered part of the letter (عِدَّةُ)
     """
 
     if not data_file or not os.path.exists(data_file):

From ea9d3fd7acd2f96dc7109752882c0ca53addf971 Mon Sep 17 00:00:00 2001
From: Evelina <10428420+ekmb@users.noreply.github.com>
Date: Tue, 18 Jul 2023 23:32:20 -0700
Subject: [PATCH 118/123] remove pos emb from state dict for old models (#7068)

* remove pos emb from state dict

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* move to nlp_model

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update comment

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* fix nmt test

Signed-off-by: Evelina <ebakhturina@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix nmt test

Signed-off-by: Evelina <ebakhturina@nvidia.com>

---------

Signed-off-by: Evelina <ebakhturina@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 nemo/collections/nlp/models/nlp_model.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/nlp/models/nlp_model.py b/nemo/collections/nlp/models/nlp_model.py
index 032a7449c27e..d739efa88485 100644
--- a/nemo/collections/nlp/models/nlp_model.py
+++ b/nemo/collections/nlp/models/nlp_model.py
@@ -16,7 +16,7 @@
 import hashlib
 import json
 import os
-from typing import Any, Optional
+from typing import Any, Mapping, Optional
 
 from omegaconf import DictConfig, OmegaConf
 from pytorch_lightning import Trainer
@@ -385,3 +385,13 @@ def load_from_checkpoint(
         finally:
             cls._set_model_restore_state(is_being_restored=False)
         return checkpoint
+
+    def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
+        # starting with trasformers v4.31.0, buffer for position_ids is persistent=False
+        if (
+            self.bert_model is not None
+            and "position_ids" not in self.bert_model.embeddings._modules
+            and "bert_model.embeddings.position_ids" in state_dict
+        ):
+            del state_dict["bert_model.embeddings.position_ids"]
+        super(NLPModel, self).load_state_dict(state_dict, strict=strict)

From b1aa4c2d8ba30a819e8fad85746799d0bc9e48ad Mon Sep 17 00:00:00 2001
From: Vladimir Bataev <vbataev@nvidia.com>
Date: Wed, 19 Jul 2023 16:03:56 +0400
Subject: [PATCH 119/123] Fix typo in ASR-TTS tutorial (#7049)

Signed-off-by: Vladimir Bataev <vbataev@nvidia.com>
---
 tutorials/asr/ASR_TTS_Tutorial.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/asr/ASR_TTS_Tutorial.ipynb b/tutorials/asr/ASR_TTS_Tutorial.ipynb
index 939ef8a28d29..007713ee3cc2 100644
--- a/tutorials/asr/ASR_TTS_Tutorial.ipynb
+++ b/tutorials/asr/ASR_TTS_Tutorial.ipynb
@@ -685,7 +685,7 @@
    "id": "2de58fbb-50be-42cd-9095-01cacfdb6931",
    "metadata": {},
    "source": [
-    "## Using Scritps (examples)"
+    "## Using Scripts (examples)"
    ]
   },
   {

From 1dde2676ad8b955cd7d75a2098e0a41e79b9fa27 Mon Sep 17 00:00:00 2001
From: Vitaly Lavrukhin <vlavrukhin@nvidia.com>
Date: Wed, 19 Jul 2023 07:36:37 -0700
Subject: [PATCH 120/123] Fixed tutorial's name (#7047)

Signed-off-by: Vitaly Lavrukhin <vlavrukhin@nvidia.com>
Co-authored-by: Vladimir Bataev <vbataev@nvidia.com>
---
 docs/source/starthere/tutorials.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/starthere/tutorials.rst b/docs/source/starthere/tutorials.rst
index 586ce46c0c38..3a0998197732 100644
--- a/docs/source/starthere/tutorials.rst
+++ b/docs/source/starthere/tutorials.rst
@@ -107,8 +107,8 @@ To run a tutorial:
      - Multi-lingual ASR
      - `Multi-lingual ASR <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/Multilang_ASR.ipynb>`_
    * - ASR
-     - Hybrid ASR-TTS Models Tutorial
-     - `Multi-lingual ASR <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/ASR_TTS_Tutorial.ipynb>`_
+     - Hybrid ASR-TTS Models
+     - `Hybrid ASR-TTS Models <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/ASR_TTS_Tutorial.ipynb>`_
    * - ASR
      - ASR Confidence Estimation
      - `ASR Confidence Estimation <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/ASR_Confidence_Estimation.ipynb>`_

From 6704a79d94eea55b07efa30b3c39c2998b2da5ed Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 19 Jul 2023 12:55:29 -0700
Subject: [PATCH 121/123] Fix documentation for Numba (#7065) (#7077)

* Fix documentation for Numba


* Update force float32 flag dynamically


* Update force float32 flag dynamically


* Fix nemo version


---------

Signed-off-by: smajumdar <titu1994@gmail.com>
Co-authored-by: Somshubra Majumdar <titu1994@gmail.com>
Co-authored-by: Eric Harper <complex451@gmail.com>
---
 README.rst                          |  4 ++--
 docs/source/nlp/api.rst             |  2 +-
 docs/source/starthere/intro.rst     |  4 ++--
 nemo/collections/asr/losses/rnnt.py |  7 +++++--
 nemo/core/utils/numba_utils.py      | 11 ++++++-----
 nemo/utils/model_utils.py           |  3 ++-
 6 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/README.rst b/README.rst
index 7ac95b8cef70..0d3b23a964c0 100644
--- a/README.rst
+++ b/README.rst
@@ -132,8 +132,8 @@ Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training t
 Requirements
 ------------
 
-1) Python 3.8 or above
-2) Pytorch 1.10.0 or above
+1) Python 3.9 or above
+2) Pytorch 1.13.1 or above
 3) NVIDIA GPU for training
 
 Documentation
diff --git a/docs/source/nlp/api.rst b/docs/source/nlp/api.rst
index 0822ade0224c..b13dedca300f 100755
--- a/docs/source/nlp/api.rst
+++ b/docs/source/nlp/api.rst
@@ -124,7 +124,7 @@ Datasets
 .. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTDataset
     :show-inheritance: 
 
-.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTChatDataset
+.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_chat_dataset.GPTSFTChatDataset
     :show-inheritance: 
 
 .. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.retro_dataset.RETRODataset
diff --git a/docs/source/starthere/intro.rst b/docs/source/starthere/intro.rst
index 2e0e272c93f4..70426d3fe4a0 100644
--- a/docs/source/starthere/intro.rst
+++ b/docs/source/starthere/intro.rst
@@ -34,9 +34,9 @@ Prerequisites
 
 Before you begin using NeMo, it's assumed you meet the following prerequisites.
 
-#. You have Python version 3.6, 3.7 or 3.8.
+#. You have Python version 3.9, 3.10.
 
-#. You have Pytorch version 1.8.1.
+#. You have Pytorch version 1.13.1 or 2.0+.
 
 #. You have access to an NVIDIA GPU for training.
 
diff --git a/nemo/collections/asr/losses/rnnt.py b/nemo/collections/asr/losses/rnnt.py
index a884f7d3cc68..894be6319c99 100644
--- a/nemo/collections/asr/losses/rnnt.py
+++ b/nemo/collections/asr/losses/rnnt.py
@@ -99,7 +99,7 @@ class RNNTLossConfig:
         min_version='0.53.0',
         is_available=NUMBA_RNNT_AVAILABLE,
         installation_msg=NUMBA_INSTALLATION_MESSAGE,
-        force_float32=not numba_utils.NUMBA_FP16_SUPPORTED,
+        force_float32=False,  # This is only temporarily false, will be dynamically updated during resolution
     ),
     "pytorch": RNNTLossConfig(
         loss_name="pytorch",
@@ -258,6 +258,9 @@ def resolve_rnnt_loss(loss_name: str, blank_idx: int, loss_kwargs: dict = None)
         _warn_unused_additional_kwargs(loss_name, loss_kwargs)
 
     elif loss_name == 'warprnnt_numba':
+        # Update loss config's forced float32 flag if set to None
+        loss_config.force_float32 = not numba_utils.is_numba_cuda_fp16_supported()
+
         fastemit_lambda = loss_kwargs.pop('fastemit_lambda', 0.0)
         clamp = loss_kwargs.pop('clamp', -1.0)
         loss_func = RNNTLossNumba(blank=blank_idx, reduction='none', fastemit_lambda=fastemit_lambda, clamp=clamp)
@@ -444,7 +447,7 @@ def forward(self, log_probs, targets, input_lengths, target_lengths):
         max_targets_len = target_lengths.max()
 
         # Force cast joint to float32
-        if not self._force_float32 and numba_utils.NUMBA_FP16_SUPPORTED:
+        if not self._force_float32 and numba_utils.is_numba_cuda_fp16_supported():
             # Execute the kernel in fp16
             pass
         elif self._force_float32 and log_probs.dtype != torch.float32:
diff --git a/nemo/core/utils/numba_utils.py b/nemo/core/utils/numba_utils.py
index 04010a2f7db4..9117b2ea1010 100644
--- a/nemo/core/utils/numba_utils.py
+++ b/nemo/core/utils/numba_utils.py
@@ -29,9 +29,6 @@
 __NUMBA_MINIMUM_VERSION__ = os.environ.get("NEMO_NUMBA_MINVER", __NUMBA_DEFAULT_MINIMUM_VERSION__)
 
 __NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__ = "0.57.0"
-NUMBA_FP16_SUPPORTED = model_utils.check_lib_version(
-    'numba', __NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__, operator=operator.ge
-)[0]
 
 
 NUMBA_INSTALLATION_MESSAGE = (
@@ -171,12 +168,16 @@ def is_numba_cuda_fp16_supported(return_reason: bool = False) -> Union[bool, Tup
         use_nvidia_binding = False
         reason += "Env variable `NUMBA_CUDA_USE_NVIDIA_BINDING` is not available or has not set to `1`."
 
-    if NUMBA_FP16_SUPPORTED:
+    numba_fp16_version_correct = model_utils.check_lib_version(
+        'numba', __NUMBA_MINIMUM_VERSION_FP16_SUPPORTED__, operator=operator.ge
+    )[0]
+
+    if numba_fp16_version_correct:
         reason += f"Numba CUDA FP16 is supported in installed numba version."
     else:
         reason += f"Numba CUDA FP16 is not supported in installed numba version."
 
-    result = use_nvidia_binding and NUMBA_FP16_SUPPORTED
+    result = use_nvidia_binding and numba_fp16_version_correct
 
     if return_reason:
         return result, reason
diff --git a/nemo/utils/model_utils.py b/nemo/utils/model_utils.py
index 211ffdcdf11e..42a0b108944d 100644
--- a/nemo/utils/model_utils.py
+++ b/nemo/utils/model_utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import copy
+import importlib
 import os
 from dataclasses import dataclass, is_dataclass
 from enum import Enum
@@ -554,7 +555,7 @@ def check_lib_version(lib_name: str, checked_version: str, operator) -> Tuple[Op
         if '.' in lib_name:
             mod = import_class_by_path(lib_name)
         else:
-            mod = __import__(lib_name)
+            mod = importlib.import_module(lib_name)
 
         if hasattr(mod, '__version__'):
             lib_ver = version.Version(mod.__version__)

From 39aff5ca3d52247e136a86429d567255c02a5d44 Mon Sep 17 00:00:00 2001
From: "He Huang (Steve)" <105218074+stevehuang52@users.noreply.github.com>
Date: Wed, 19 Jul 2023 16:45:35 -0400
Subject: [PATCH 122/123] Update Frame-VAD doc and fix onnx export (#7076)

* update fvad doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix typo

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update fvad example

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

* fix onnx export

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update test

Signed-off-by: stevehuang52 <heh@nvidia.com>

* refactor

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update doc

Signed-off-by: stevehuang52 <heh@nvidia.com>

* update

Signed-off-by: stevehuang52 <heh@nvidia.com>

---------

Signed-off-by: stevehuang52 <heh@nvidia.com>
Co-authored-by: fayejf <36722593+fayejf@users.noreply.github.com>
---
 .../conf/vad/frame_vad_infer_postprocess.yaml |  3 +-
 .../speech_classification/frame_vad_infer.py  | 24 ++++++++---
 .../asr/models/classification_models.py       | 43 +++++++++++++++++++
 nemo/collections/asr/parts/utils/vad_utils.py |  6 ++-
 .../asr/test_asr_classification_model.py      |  4 +-
 5 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
index d759a809ec37..30c082aff91f 100644
--- a/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
+++ b/examples/asr/conf/vad/frame_vad_infer_postprocess.yaml
@@ -1,6 +1,7 @@
 name: &name "vad_inference_postprocessing"
 
-dataset: null # Path of json file of evaluation data. Audio files should have unique names
+input_manifest: null # Path of json file of evaluation data. Audio files should have unique names
+output_dir: null  # Path to output directory where results will be stored
 num_workers: 12
 sample_rate: 16000
 evaluate: False  # whether to get AUROC and DERs, the manifest must contains groundtruth if enabled
diff --git a/examples/asr/speech_classification/frame_vad_infer.py b/examples/asr/speech_classification/frame_vad_infer.py
index f716eb45bb64..594cc9637d73 100644
--- a/examples/asr/speech_classification/frame_vad_infer.py
+++ b/examples/asr/speech_classification/frame_vad_infer.py
@@ -21,7 +21,8 @@
 ## Usage:
 python frame_vad_infer.py \
     --config-path="../conf/vad" --config-name="frame_vad_infer_postprocess" \
-    dataset=<Path of manifest file containing evaluation data. Audio files should have unique names>
+    input_manifest=<Path of manifest file containing evaluation data. Audio files should have unique names> \
+    output_dir=<Path of output directory>
 
 The manifest json file should have the following format (each line is a Python dictionary):
 {"audio_filepath": "/path/to/audio_file1", "offset": 0, "duration": 10000}  
@@ -58,15 +59,25 @@
 
 @hydra_runner(config_path="../conf/vad", config_name="frame_vad_infer_postprocess")
 def main(cfg):
-    if not cfg.dataset:
+    if not cfg.input_manifest:
         raise ValueError("You must input the path of json file of evaluation data")
+    output_dir = cfg.output_dir if cfg.output_dir else "frame_vad_outputs"
+    if os.path.exists(output_dir):
+        logging.warning(
+            f"Output directory {output_dir} already exists, use this only if you're tuning post-processing params."
+        )
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+
+    cfg.frame_out_dir = os.path.join(output_dir, "frame_preds")
+    cfg.smoothing_out_dir = os.path.join(output_dir, "smoothing_preds")
+    cfg.rttm_out_dir = os.path.join(output_dir, "rttm_preds")
 
-    # each line of dataset should be have different audio_filepath and unique name to simplify edge cases or conditions
-    logging.info(f"Loading manifest file {cfg.dataset}")
+    # each line of input_manifest should be have different audio_filepath and unique name to simplify edge cases or conditions
+    logging.info(f"Loading manifest file {cfg.input_manifest}")
     manifest_orig, key_labels_map, key_rttm_map = frame_vad_infer_load_manifest(cfg)
 
     # Prepare manifest for streaming VAD
-    manifest_vad_input = cfg.dataset
+    manifest_vad_input = cfg.input_manifest
     if cfg.prepare_manifest.auto_split:
         logging.info("Split long audio file to avoid CUDA memory issue")
         logging.debug("Try smaller split_duration if you still have CUDA memory issue")
@@ -76,6 +87,7 @@ def main(cfg):
             'split_duration': cfg.prepare_manifest.split_duration,
             'num_workers': cfg.num_workers,
             'prepared_manifest_vad_input': cfg.prepared_manifest_vad_input,
+            'out_dir': output_dir,
         }
         manifest_vad_input = prepare_manifest(config)
     else:
@@ -171,7 +183,7 @@ def main(cfg):
         key_pred_rttm_map[key] = entry['rttm_filepath']
 
     if not cfg.out_manifest_filepath:
-        out_manifest_filepath = "manifest_vad_output.json"
+        out_manifest_filepath = os.path.join(output_dir, "manifest_vad_output.json")
     else:
         out_manifest_filepath = cfg.out_manifest_filepath
     write_manifest(out_manifest_filepath, manifest_new)
diff --git a/nemo/collections/asr/models/classification_models.py b/nemo/collections/asr/models/classification_models.py
index 432674225f5a..264e9cef99f8 100644
--- a/nemo/collections/asr/models/classification_models.py
+++ b/nemo/collections/asr/models/classification_models.py
@@ -35,6 +35,7 @@
 from nemo.core.classes.common import PretrainedModelInfo, typecheck
 from nemo.core.neural_types import *
 from nemo.utils import logging, model_utils
+from nemo.utils.cast_utils import cast_all
 
 __all__ = ['EncDecClassificationModel', 'EncDecRegressionModel']
 
@@ -851,6 +852,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
         self.eval_loop_cnt = 0
         self.ratio_threshold = cfg.get('ratio_threshold', 0.2)
         super().__init__(cfg=cfg, trainer=trainer)
+        self.decoder.output_types = self.output_types
+        self.decoder.output_types_for_export = self.output_types
 
     @classmethod
     def list_available_models(cls) -> Optional[List[PretrainedModelInfo]]:
@@ -1148,3 +1151,43 @@ def get_metric_logits_labels(self, logits, labels, masks):
         labels = labels.gather(dim=0, index=idx.view(-1))
 
         return logits, labels
+
+    def forward_for_export(
+        self, input, length=None, cache_last_channel=None, cache_last_time=None, cache_last_channel_len=None
+    ):
+        """
+        This forward is used when we need to export the model to ONNX format.
+        Inputs cache_last_channel and cache_last_time are needed to be passed for exporting streaming models.
+        Args:
+            input: Tensor that represents a batch of raw audio signals,
+                of shape [B, T]. T here represents timesteps.
+            length: Vector of length B, that contains the individual lengths of the audio sequences.
+            cache_last_channel: Tensor of shape [N, B, T, H] which contains the cache for last channel layers
+            cache_last_time: Tensor of shape [N, B, H, T] which contains the cache for last time layers
+                N is the number of such layers which need caching, B is batch size, H is the hidden size of activations,
+                and T is the length of the cache
+
+        Returns:
+            the output of the model
+        """
+        enc_fun = getattr(self.input_module, 'forward_for_export', self.input_module.forward)
+        if cache_last_channel is None:
+            encoder_output = enc_fun(audio_signal=input, length=length)
+            if isinstance(encoder_output, tuple):
+                encoder_output = encoder_output[0]
+        else:
+            encoder_output, length, cache_last_channel, cache_last_time, cache_last_channel_len = enc_fun(
+                audio_signal=input,
+                length=length,
+                cache_last_channel=cache_last_channel,
+                cache_last_time=cache_last_time,
+                cache_last_channel_len=cache_last_channel_len,
+            )
+
+        dec_fun = getattr(self.output_module, 'forward_for_export', self.output_module.forward)
+        ret = dec_fun(hidden_states=encoder_output.transpose(1, 2))
+        if isinstance(ret, tuple):
+            ret = ret[0]
+        if cache_last_channel is not None:
+            ret = (ret, length, cache_last_channel, cache_last_time, cache_last_channel_len)
+        return cast_all(ret, from_dtype=torch.float16, to_dtype=torch.float32)
diff --git a/nemo/collections/asr/parts/utils/vad_utils.py b/nemo/collections/asr/parts/utils/vad_utils.py
index e4f024d231ad..d8860a0c7cff 100644
--- a/nemo/collections/asr/parts/utils/vad_utils.py
+++ b/nemo/collections/asr/parts/utils/vad_utils.py
@@ -275,7 +275,9 @@ def generate_overlap_vad_seq(
     if out_dir:
         overlap_out_dir = out_dir
     else:
-        overlap_out_dir = frame_pred_dir + "/overlap_smoothing_output" + "_" + smoothing_method + "_" + str(overlap)
+        overlap_out_dir = os.path.join(
+            frame_pred_dir, "/overlap_smoothing_output" + "_" + smoothing_method + "_" + str(overlap)
+        )
 
     if not os.path.exists(overlap_out_dir):
         os.mkdir(overlap_out_dir)
@@ -732,7 +734,7 @@ def generate_vad_segment_table(
     if not out_dir:
         out_dir_name = "seg_output_"
         for key in postprocessing_params:
-            out_dir_name = out_dir_name + str(key) + str(postprocessing_params[key]) + "-"
+            out_dir_name = out_dir_name + "-" + str(key) + str(postprocessing_params[key])
 
         out_dir = os.path.join(vad_pred_dir, out_dir_name)
 
diff --git a/tests/collections/asr/test_asr_classification_model.py b/tests/collections/asr/test_asr_classification_model.py
index 876bb6073a38..3888cb30204c 100644
--- a/tests/collections/asr/test_asr_classification_model.py
+++ b/tests/collections/asr/test_asr_classification_model.py
@@ -94,8 +94,8 @@ def frame_classification_model():
     }
 
     decoder = {
-        'cls': 'nemo.collections.asr.modules.ConvASRDecoderClassification',
-        'params': {'feat_in': 32, 'num_classes': 5,},
+        'cls': 'nemo.collections.common.parts.MultiLayerPerceptron',
+        'params': {'hidden_size': 32, 'num_classes': 5,},
     }
 
     modelConfig = DictConfig(

From d300a3af0af517c927f5e095cdd45abba65cc1c0 Mon Sep 17 00:00:00 2001
From: Adi Renduchintala <adithyare@nvidia.com>
Date: Thu, 20 Jul 2023 11:42:21 -0700
Subject: [PATCH 123/123] memmap worker arg (#7062)

* memmap worker arg

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update

Signed-off-by: arendu <adithya.r@gmail.com>

* update

Signed-off-by: arendu <adithya.r@gmail.com>

---------

Signed-off-by: arendu <adithya.r@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .../tuning/conf/megatron_gpt_peft_tuning_config.yaml  |  3 +++
 .../tuning/conf/megatron_gpt_sft.yaml                 |  3 +++
 .../language_modeling/megatron/gpt_sft_dataset.py     | 11 +++++++++--
 .../language_modeling/megatron_gpt_sft_model.py       |  3 +++
 4 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml
index 799d105aae7c..d26dd2922088 100755
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_peft_tuning_config.yaml
@@ -116,6 +116,7 @@ model:
       micro_batch_size: ${model.micro_batch_size}
       shuffle: True
       num_workers: 0
+      memmap_workers: null
       pin_memory: True
       max_seq_length: 2048
       min_seq_length: 1
@@ -143,6 +144,7 @@ model:
       micro_batch_size: ${model.micro_batch_size}
       shuffle: False
       num_workers: 0
+      memmap_workers: ${model.data.train_ds.memmap_workers}
       pin_memory: True
       max_seq_length: 2048
       min_seq_length: 1
@@ -170,6 +172,7 @@ model:
       micro_batch_size: ${model.micro_batch_size}
       shuffle: False
       num_workers: 4
+      memmap_workers: ${model.data.train_ds.memmap_workers}
       pin_memory: True
       max_seq_length: 2048
       min_seq_length: 1
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
index 0e3f0d712dd6..f15138c99264 100644
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_sft.yaml
@@ -82,6 +82,7 @@ model:
       micro_batch_size: ${model.micro_batch_size}
       shuffle: True
       num_workers: 4
+      memmap_workers: null
       pin_memory: True
       max_seq_length: 2048
       min_seq_length: 1
@@ -109,6 +110,7 @@ model:
       micro_batch_size: ${model.micro_batch_size}
       shuffle: True
       num_workers: 4
+      memmap_workers: ${model.data.train_ds.memmap_workers}
       pin_memory: True
       max_seq_length: 2048
       min_seq_length: 1
@@ -137,6 +139,7 @@ model:
       micro_batch_size: ${model.micro_batch_size}
       shuffle: True
       num_workers: 4
+      memmap_workers: ${model.data.train_ds.memmap_workers}
       pin_memory: True
       max_seq_length: 2048
       min_seq_length: 1
diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
index 756494f2f315..da3d03199c2e 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Optional
+
 import numpy as np
 import torch
 
@@ -40,12 +42,13 @@ def __init__(
         label_key: str = "answer",
         separate_prompt_and_response_with_newline: bool = False,
         answer_only_loss: bool = True,
-        truncation_field: str = "answer",
+        truncation_field: str = "context",
         pad_to_max_length: bool = False,  # (@adithyare) allows for much faster training especially in PEFT settings.
         index_mapping_dir: str = None,
         prompt_template: str = None,
         virtual_tokens: int = 0,
         tokens_to_generate: int = 0,
+        memmap_workers: Optional[int] = None,
     ):
         """
         file_path: Path to a JSONL GPT supervised fine-tuning dataset. Data is formatted as multiple JSON lines with each line formatted as follows. {'input': 'John von Neumann\nVon Neumann made fundamental contributions .... Q: What did the math of artificial viscosity do?', 'output': 'smoothed the shock transition without sacrificing basic physics'}
@@ -94,7 +97,11 @@ def __init__(
         assert self.truncation_field in ["answer", "context"]
 
         self.indexed_dataset = JSONLMemMapDataset(
-            dataset_paths=[file_path], tokenizer=None, header_lines=0, index_mapping_dir=index_mapping_dir
+            dataset_paths=[file_path],
+            tokenizer=None,
+            header_lines=0,
+            index_mapping_dir=index_mapping_dir,
+            workers=memmap_workers,
         )
 
         # Will be None after this call if `max_num_samples` is None
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
index c390a8c440bf..95108e90f087 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
@@ -266,6 +266,9 @@ def _build_dataset(self, data_cfg, is_train=True):
                 tokens_to_generate=data_cfg.get(
                     'tokens_to_generate', 0
                 ),  # used at inference time to allocate tensor positions for tokens that will be generated by inf procedure.
+                memmap_workers=data_cfg.get(
+                    'memmap_workers', None
+                ),  # used to set num. of workers to create the memmap index files
             )
             datasets.append(dataset)