From e9b670b62d0462f9e0c1dc06af6ef087edeb9610 Mon Sep 17 00:00:00 2001 From: Evelina <10428420+ekmb@users.noreply.github.com> Date: Fri, 30 Sep 2022 08:43:08 -0700 Subject: [PATCH] Bug fixes for parallel mp3 to wav conversion, PC notebook, update Readme for TN requirements (#5047) * bug fixes segmenation, pc Signed-off-by: ekmb * bug fixes segmenation, pc Signed-off-by: ekmb * install pynini Signed-off-by: ekmb * add requirements install back Signed-off-by: ekmb * update sox requirements Signed-off-by: ekmb Signed-off-by: ekmb --- Jenkinsfile | 2 +- README.rst | 8 ++ .../text_normalization/normalize.py | 4 +- tools/ctc_segmentation/README.md | 6 ++ tools/ctc_segmentation/run_filter.sh | 7 +- tools/ctc_segmentation/run_segmentation.sh | 4 +- tools/ctc_segmentation/scripts/cut_audio.py | 10 +++ .../scripts/get_metrics_and_filter.py | 13 +++- .../ctc_segmentation/scripts/prepare_data.py | 73 +++++++++++------- .../scripts/run_ctc_segmentation.py | 15 +++- .../export_grammars.sh | 2 +- tutorials/AudioTranslationSample.ipynb | 6 +- tutorials/VoiceSwapSample.ipynb | 6 +- .../nlp/Punctuation_and_Capitalization.ipynb | 75 ++++++++++--------- .../Text_(Inverse)_Normalization.ipynb | 6 +- tutorials/text_processing/WFST_Tutorial.ipynb | 6 +- 16 files changed, 166 insertions(+), 77 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a9c1459bfe01..0b07679b7457 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -760,7 +760,7 @@ pipeline { steps { sh 'cd tools/ctc_segmentation && \ pip install -r requirements.txt && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg' + apt-get update && apt-get install libsox-fmt-all -y' } } diff --git a/README.rst b/README.rst index 34b5a125380a..2ef2da5bd93f 100644 --- a/README.rst +++ b/README.rst @@ -207,6 +207,14 @@ Megatron GPT training requires NVIDIA Apex to be installed. git checkout nm_v1.11.0 pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" --global-option="--distributed_adam" --global-option="--deprecated_fused_adam" ./ +NeMo Text Processing +~~~~~~~~~~~~~~~~~~~~ +NeMo Text Processing, specifically (Inverse) Text Normalization, requires `Pynini `_ to be installed. + +.. code-block:: bash + + bash NeMo/nemo_text_processing/install_pynini.sh + Docker containers: ~~~~~~~~~~~~~~~~~~ To build a nemo container with Dockerfile from a branch, please run diff --git a/nemo_text_processing/text_normalization/normalize.py b/nemo_text_processing/text_normalization/normalize.py index 9a55300bcc95..6235b88e429c 100644 --- a/nemo_text_processing/text_normalization/normalize.py +++ b/nemo_text_processing/text_normalization/normalize.py @@ -156,7 +156,7 @@ def normalize_list( try: normalized_texts = Parallel(n_jobs=n_jobs)( - delayed(self.__process_batch)(texts[i : i + batch], verbose, punct_pre_process, punct_post_process) + delayed(self.process_batch)(texts[i : i + batch], verbose, punct_pre_process, punct_post_process) for i in range(0, len(texts), batch) ) except BaseException as e: @@ -165,7 +165,7 @@ def normalize_list( normalized_texts = list(itertools.chain(*normalized_texts)) return normalized_texts - def __process_batch(self, batch, verbose, punct_pre_process, punct_post_process): + def process_batch(self, batch, verbose, punct_pre_process, punct_post_process): """ Normalizes batch of text sequences Args: diff --git a/tools/ctc_segmentation/README.md b/tools/ctc_segmentation/README.md index bcfe5e7bd96a..c2f8c37466f5 100644 --- a/tools/ctc_segmentation/README.md +++ b/tools/ctc_segmentation/README.md @@ -29,3 +29,9 @@ abstract="Recent end-to-end Automatic Speech Recognition (ASR) systems demonstra isbn="978-3-030-60276-5" } ``` +Requirements +~~~~~~~~~~~~ +The tool requires: +- packages listed in requirements.txt +- NeMo ASR +- see pysox’s documentation (https://pysox.readthedocs.io/en/latest/) if you want support for mp3, flac and ogg files diff --git a/tools/ctc_segmentation/run_filter.sh b/tools/ctc_segmentation/run_filter.sh index 7291419e97b2..9220f4c2f0dc 100644 --- a/tools/ctc_segmentation/run_filter.sh +++ b/tools/ctc_segmentation/run_filter.sh @@ -12,6 +12,8 @@ CER_THRESHOLD=30 WER_THRESHOLD=75 CER_EDGE_THRESHOLD=60 LEN_DIFF_RATIO_THRESHOLD=0.3 +MIN_DURATION=1 # in seconds +MAX_DURATION=20 # in seconds for ARG in "$@" do @@ -46,6 +48,7 @@ python ${SCRIPTS_DIR}/../../../examples/asr/transcribe_speech.py \ $ARG_MODEL=$MODEL_NAME_OR_PATH \ dataset_manifest=$MANIFEST \ output_filename=${OUT_MANIFEST} \ +batch_size=${BATCH_SIZE} \ num_workers=0 || exit echo "--- Calculating metrics and filtering out samples based on thresholds ---" @@ -60,4 +63,6 @@ python ${SCRIPTS_DIR}/get_metrics_and_filter.py \ --max_cer=${CER_THRESHOLD} \ --max_wer=${WER_THRESHOLD} \ --max_len_diff_ratio=${LEN_DIFF_RATIO_THRESHOLD} \ ---max_edge_cer=${CER_EDGE_THRESHOLD} +--max_edge_cer=${CER_EDGE_THRESHOLD} \ +--min_duration=${MIN_DURATION} +--max_duration=${MAX_DURATION} diff --git a/tools/ctc_segmentation/run_segmentation.sh b/tools/ctc_segmentation/run_segmentation.sh index a87f4d1e782b..913c09aed4e5 100644 --- a/tools/ctc_segmentation/run_segmentation.sh +++ b/tools/ctc_segmentation/run_segmentation.sh @@ -11,6 +11,7 @@ ADDITIONAL_SPLIT_SYMBOLS=":|;" USE_NEMO_NORMALIZATION='True' NUM_JOBS=-2 # The maximum number of concurrently running jobs, `-2` - all CPUs but one are used SAMPLE_RATE=16000 # Target sample rate (default for ASR data - 16000 Hz) +MAX_DURATION=20 # Maximum audio segment duration, in seconds. Samples that are longer will be dropped. for ARG in "$@" do @@ -103,4 +104,5 @@ python $SCRIPTS_DIR/cut_audio.py \ --alignment=$OUTPUT_DIR/verified_segments \ --threshold=$MIN_SCORE \ --offset=$OFFSET \ ---sample_rate=$SAMPLE_RATE || exit +--sample_rate=$SAMPLE_RATE \ +--max_duration=$MAX_DURATION || exit diff --git a/tools/ctc_segmentation/scripts/cut_audio.py b/tools/ctc_segmentation/scripts/cut_audio.py index 5698733c4803..040b0d96dd09 100644 --- a/tools/ctc_segmentation/scripts/cut_audio.py +++ b/tools/ctc_segmentation/scripts/cut_audio.py @@ -39,6 +39,12 @@ default=0.05, ) parser.add_argument("--sample_rate", type=int, help="Sample rate, Hz", default=16000) +parser.add_argument( + "--max_duration", + type=int, + help="Maximum audio duration (seconds). Samples that are longer will be dropped", + default=60, +) def process_alignment(alignment_file: str, manifest: str, clips_dir: str, args): @@ -60,6 +66,7 @@ def process_alignment(alignment_file: str, manifest: str, clips_dir: str, args): segments = [] ref_text_processed = [] ref_text_no_preprocessing = [] + ref_text_normalized = [] with open(alignment_file, "r") as f: for line in f: line = line.split("|") @@ -69,6 +76,7 @@ def process_alignment(alignment_file: str, manifest: str, clips_dir: str, args): continue ref_text_processed.append(line[1].strip()) ref_text_no_preprocessing.append(line[2].strip()) + ref_text_normalized.append(line[3].strip()) line = line[0].split() segments.append((float(line[0]) + args.offset / 1000, float(line[1]) + args.offset / 1000, float(line[2]))) @@ -86,6 +94,7 @@ def process_alignment(alignment_file: str, manifest: str, clips_dir: str, args): if duration > 0: text_processed = ref_text_processed[i].strip() text_no_preprocessing = ref_text_no_preprocessing[i].strip() + text_normalized = ref_text_normalized[i].strip() if score >= args.threshold: high_score_dur += duration audio_filepath = os.path.join(clips_dir, f"{base_name}_{i:04}.wav") @@ -98,6 +107,7 @@ def process_alignment(alignment_file: str, manifest: str, clips_dir: str, args): "duration": duration, "text": text_processed, "text_no_preprocessing": text_no_preprocessing, + "text_normalized": text_normalized, "score": round(score, 2), "start_abs": float(np.mean(np.abs(segment[:num_samples]))), "end_abs": float(np.mean(np.abs(segment[-num_samples:]))), diff --git a/tools/ctc_segmentation/scripts/get_metrics_and_filter.py b/tools/ctc_segmentation/scripts/get_metrics_and_filter.py index ead54e795980..60b658e01af1 100644 --- a/tools/ctc_segmentation/scripts/get_metrics_and_filter.py +++ b/tools/ctc_segmentation/scripts/get_metrics_and_filter.py @@ -45,6 +45,7 @@ ) parser.add_argument("--max_edge_cer", type=int, help="Threshold edge CER value, %", default=60) parser.add_argument("--max_duration", type=int, help="Max duration of a segment, seconds", default=-1) +parser.add_argument("--min_duration", type=int, help="Min duration of a segment, seconds", default=1) parser.add_argument( "--num_jobs", default=-2, @@ -108,7 +109,15 @@ def get_metrics(manifest, manifest_out): def _apply_filters( - manifest, manifest_out, max_cer, max_wer, max_edge_cer, max_len_diff_ratio, max_dur=-1, original_duration=0 + manifest, + manifest_out, + max_cer, + max_wer, + max_edge_cer, + max_len_diff_ratio, + max_dur=-1, + min_dur=1, + original_duration=0, ): """ Filters out samples that do not satisfy specified threshold values and saves remaining samples to manifest_out""" remaining_duration = 0 @@ -128,6 +137,7 @@ def _apply_filters( and item["end_CER"] <= max_edge_cer and item["start_CER"] <= max_edge_cer and (max_dur == -1 or (max_dur > -1 and duration < max_dur)) + and duration > min_dur ): remaining_duration += duration f_out.write(json.dumps(item) + "\n") @@ -180,6 +190,7 @@ def filter(manifest): max_edge_cer=args.max_edge_cer, max_len_diff_ratio=args.max_len_diff_ratio, max_dur=args.max_duration, + min_dur=args.min_duration, original_duration=original_duration, ) diff --git a/tools/ctc_segmentation/scripts/prepare_data.py b/tools/ctc_segmentation/scripts/prepare_data.py index 2231c50820c3..147f337f4aa1 100644 --- a/tools/ctc_segmentation/scripts/prepare_data.py +++ b/tools/ctc_segmentation/scripts/prepare_data.py @@ -15,15 +15,14 @@ import argparse import os import re -from pathlib import Path -from typing import List +from glob import glob +from typing import List, Optional import regex from joblib import Parallel, delayed from normalization_helpers import LATIN_TO_RU, RU_ABBREVIATIONS from num2words import num2words -from pydub import AudioSegment -from pydub.utils import mediainfo +from sox import Transformer from tqdm import tqdm from nemo.collections.asr.models import ASRModel @@ -42,6 +41,7 @@ parser.add_argument("--output_dir", type=str, required=True, help="Path to output directory") parser.add_argument("--audio_dir", type=str, help="Path to folder with .mp3 or .wav audio files") parser.add_argument("--sample_rate", type=int, default=16000, help="Sampling rate used during ASR model training, Hz") +parser.add_argument("--bit_depth", type=int, default=16, help="Bit depth to use for processed audio files") parser.add_argument("--n_jobs", default=-2, type=int, help="The maximum number of concurrently running jobs") parser.add_argument( "--language", @@ -65,16 +65,21 @@ default="", help="Additional symbols to use for \ sentence split if eos sentence split resulted in sequence longer than --max_length. " - "Use '|' as a separator between symbols, for example: ';|:' ", + "Use '|' as a separator between symbols, for example: ';|:'. Use '\s' to split by space.", ) parser.add_argument( "--use_nemo_normalization", action="store_true", help="Set to True to use NeMo Normalization tool to convert numbers from written to spoken format.", ) +parser.add_argument( + "--batch_size", type=int, default=100, help="Batch size for NeMo Normalization tool.", +) -def process_audio(in_file: str, wav_file: str = None, cut_prefix: int = 0, sample_rate: int = 16000): +def process_audio( + in_file: str, wav_file: str = None, cut_prefix: int = 0, sample_rate: int = 16000, bit_depth: int = 16 +): """Process audio file: .mp3 to .wav conversion and cut a few seconds from the beginning of the audio Args: @@ -82,15 +87,15 @@ def process_audio(in_file: str, wav_file: str = None, cut_prefix: int = 0, sampl wav_file: path to the output .wav file cut_prefix: number of seconds to cut from the beginning of the audio file sample_rate: target sampling rate + bit_depth: target bit_depth """ try: - info = mediainfo(in_file) - sound = AudioSegment.from_file(in_file, start_second=cut_prefix) - if info["sample_rate"] != str(sample_rate): - sound = sound.set_frame_rate(sample_rate) - if info["channels"] != 1: - sound = sound.set_channels(1) - sound.export(wav_file, format="wav") + if not os.path.exists(in_file): + raise ValueError(f'{in_file} not found') + tfm = Transformer() + tfm.convert(samplerate=sample_rate, n_channels=1, bitdepth=bit_depth) + tfm.trim(cut_prefix) + tfm.build(input_filepath=in_file, output_filepath=wav_file) except Exception as e: print(f'{in_file} skipped - {e}') @@ -100,11 +105,13 @@ def split_text( out_file: str, vocabulary: List[str], language="en", - remove_brackets=True, - do_lower_case=True, - max_length=100, - additional_split_symbols=None, - use_nemo_normalization=False, + remove_brackets: bool = True, + do_lower_case: bool = True, + max_length: bool = 100, + additional_split_symbols: bool = None, + use_nemo_normalization: bool = False, + n_jobs: Optional[int] = 1, + batch_size: Optional[int] = 1.0, ): """ Breaks down the in_file roughly into sentences. Each sentence will be on a separate line. @@ -124,6 +131,10 @@ def split_text( use_nemo_normalization: Set to True to use NeMo normalization tool to convert numbers from written to spoken format. Normalization using num2words will be applied afterwards to make sure there are no numbers present in the text, otherwise they will be replaced with a space and that could deteriorate segmentation results. + n_jobs (if use_nemo_normalization=True): the maximum number of concurrently running jobs. If -1 all CPUs are used. If 1 is given, + no parallel computing code is used at all, which is useful for debugging. For n_jobs below -1, + (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. + batch_size (if use_nemo_normalization=True): Number of examples for each process """ print(f"Splitting text in {in_file} into sentences.") with open(in_file, "r") as f: @@ -140,7 +151,6 @@ def split_text( ) # remove extra space transcript = re.sub(r" +", " ", transcript) - transcript = re.sub(r"(\.+)", ". ", transcript) if remove_brackets: transcript = re.sub(r'(\[.*?\])', ' ', transcript) @@ -175,7 +185,7 @@ def split_text( sentences = [s.strip() for s in sentences if s.strip()] # Read and split transcript by utterance (roughly, sentences) - split_pattern = f"(? 0: start_time = time.time() - index_duration = len(signal) / log_probs.shape[0] / sample_rate + normalized_lines = Parallel(n_jobs=args.num_jobs)( delayed(get_segments)( all_log_probs[i], diff --git a/tools/text_processing_deployment/export_grammars.sh b/tools/text_processing_deployment/export_grammars.sh index a5960accf6a8..981caadadab5 100644 --- a/tools/text_processing_deployment/export_grammars.sh +++ b/tools/text_processing_deployment/export_grammars.sh @@ -32,7 +32,7 @@ GRAMMARS="itn_grammars" # tn_grammars INPUT_CASE="cased" # lower_cased, only for tn_grammars -LANGUAGE="en" # language, {'en', 'es', 'de','zh'} supports both TN and ITN, {'pt', 'ru', 'fr'} supports ITN only +LANGUAGE="en" # language, {'en', 'es', 'de','zh'} supports both TN and ITN, {'pt', 'ru', 'fr', 'vi'} supports ITN only MODE="export" OVERWRITE_CACHE="True" # Set to False to re-use .far files FORCE_REBUILD="False" # Set to True to re-build docker file diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index 0f12e73abf4d..958c6a9fe637 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -39,7 +39,11 @@ "outputs": [], "source": [ "BRANCH = 'r1.12.0'\n", - "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" + "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", + "\n", + "# install Pynini for text normalization\n", + "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/nemo_text_processing/install_pynini.sh\n", + "! bash install_pynini.sh" ] }, { diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb index a298eabe81d5..4fc7f9ae6f6c 100644 --- a/tutorials/VoiceSwapSample.ipynb +++ b/tutorials/VoiceSwapSample.ipynb @@ -40,7 +40,11 @@ "outputs": [], "source": [ "BRANCH = 'r1.12.0'\n", - "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" + "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", + "\n", + "# install Pynini for text normalization\n", + "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/nemo_text_processing/install_pynini.sh\n", + "! bash install_pynini.sh" ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index c4751336f4bc..400b71b8cb16 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -219,15 +219,15 @@ "id": "UOeeeCGqI-9c" }, "outputs": [], - "source": [ - "## download get_tatoeba_data.py script to download and preprocess the Tatoeba data\n", - "os.makedirs(WORK_DIR, exist_ok=True)\n", - "if not os.path.exists(WORK_DIR + '/get_tatoeba_data.py'):\n", - " print('Downloading get_tatoeba_data.py...')\n", - " wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/token_classification/data/get_tatoeba_data.py', WORK_DIR)\n", - "else:\n", - " print ('get_tatoeba_data.py is already exists')" - ] + "source": [ + "## download get_tatoeba_data.py script to download and preprocess the Tatoeba data\n", + "os.makedirs(WORK_DIR, exist_ok=True)\n", + "if not os.path.exists(f\"{WORK_DIR}/get_tatoeba_data.py\"):\n", + " print('Downloading get_tatoeba_data.py...')\n", + " wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/token_classification/data/get_tatoeba_data.py', WORK_DIR)\n", + "else:\n", + " print ('get_tatoeba_data.py already exists')" + ] }, { "cell_type": "code", @@ -293,21 +293,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As you see, `get_tatoeba_data.py` script provides not only downloads Tatoeba but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cwd = os.getcwd()\n", - "NEMO_ROOT = \"~/NeMo\"\n", - "!python $NEMO_ROOT/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py \\\n", - " --source_file $DATA_DIR/text_train.txt \\\n", - " --output_dir $DATA_DIR/my_train_preprocessed" - ] + "As you see, `get_tatoeba_data.py` script provides not only downloads Tatoeba but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script:\n", + "\n", + "```\n", + "NEMO_ROOT = \"\"\n", + "!python $NEMO_ROOT/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py \\\n", + " --source_file $DATA_DIR/text_train.txt \\\n", + " --output_dir $DATA_DIR/my_train_preprocessed\n", + "```" + ] }, { "cell_type": "code", @@ -345,18 +339,25 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "NEMO_ROOT = \"~/NeMo\"\n", - "!python $NEMO_ROOT/examples/nlp/token_classification/data/create_punctuation_capitalization_tarred_dataset.py \\\n", - " --text $DATA_DIR/text_train.txt \\\n", - " --labels $DATA_DIR/labels_train.txt \\\n", - " --output_dir $DATA_DIR/train_tarred \\\n", - " --num_batches_per_tarfile 5 \\\n", - " --tokens_in_batch 1024 \\\n", - " --lines_per_dataset_fragment 4000 \\\n", - " --tokenizer_name bert-base-uncased \\\n", - " --n_jobs 2" - ] + "source": [ + "## download script to prepare tarred dataset\n", + "os.makedirs(WORK_DIR, exist_ok=True)\n", + "if not os.path.exists(f\"{WORK_DIR}/create_punctuation_capitalization_tarred_dataset.py\"):\n", + " print('Downloading create_punctuation_capitalization_tarred_dataset.py...')\n", + " wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/token_classification/data/create_punctuation_capitalization_tarred_dataset.py', WORK_DIR)\n", + "else:\n", + " print (\"create_punctuation_capitalization_tarred_dataset.py script already exists\")\n", + "\n", + "!python $WORK_DIR/create_punctuation_capitalization_tarred_dataset.py \\\n", + " --text $DATA_DIR/text_train.txt \\\n", + " --labels $DATA_DIR/labels_train.txt \\\n", + " --output_dir $DATA_DIR/train_tarred \\\n", + " --num_batches_per_tarfile 5 \\\n", + " --tokens_in_batch 1024 \\\n", + " --lines_per_dataset_fragment 4000 \\\n", + " --tokenizer_name bert-base-uncased \\\n", + " --n_jobs 2" + ] }, { "cell_type": "code", @@ -440,7 +441,7 @@ " print('Downloading config file...')\n", " wget.download(f'https://raw.githubusercontent.com/NVIDIA/NeMo/{BRANCH}/examples/nlp/token_classification/conf/' + MODEL_CONFIG, config_dir)\n", "else:\n", - " print ('config file is already exists')" + " print ('config file already exists')" ] }, { diff --git a/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb b/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb index 40da331ba343..5bcf620ef451 100644 --- a/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb +++ b/tutorials/text_processing/Text_(Inverse)_Normalization.ipynb @@ -61,7 +61,11 @@ "source": [ "## Install NeMo, which installs both nemo and nemo_text_processing package\n", "BRANCH = 'r1.12.0'\n", - "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" + "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n", + "\n", + "# install Pynini for text normalization\n", + "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/nemo_text_processing/install_pynini.sh\n", + "! bash install_pynini.sh" ] }, { diff --git a/tutorials/text_processing/WFST_Tutorial.ipynb b/tutorials/text_processing/WFST_Tutorial.ipynb index 51000bb33f26..b7fb6d24af9a 100644 --- a/tutorials/text_processing/WFST_Tutorial.ipynb +++ b/tutorials/text_processing/WFST_Tutorial.ipynb @@ -40,7 +40,11 @@ "source": [ "## Install NeMo, which installs both nemo and nemo_text_processing package\n", "BRANCH = 'r1.12.0'\n", - "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nemo_text_processing]" + "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nemo_text_processing]\n", + "\n", + "# install Pynini for text normalization\n", + "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/nemo_text_processing/install_pynini.sh\n", + "! bash install_pynini.sh" ] }, {