Skip to content

Commit

Permalink
Merge branch 'main' into fix_full_bucket_hang
Browse files Browse the repository at this point in the history
  • Loading branch information
titu1994 authored Jun 8, 2022
2 parents 674a2aa + 0bc4607 commit 7d41f7f
Show file tree
Hide file tree
Showing 363 changed files with 137,032 additions and 4,028 deletions.
18 changes: 9 additions & 9 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -138,18 +138,18 @@ pipeline {
parallel {
stage('En TN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --text="1" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3'
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --text="1" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8'
}
}
stage('En ITN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language en --text="twenty" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3'
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language en --text="twenty" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8'
}
}
stage('Test En non-deterministic TN & Run all En TN/ITN tests (restore grammars from cache)') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --text "\$.01" --n_tagged 2 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3'
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/en/ -m "not pleasefixme" --cpu --tn_cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3'
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --text "\$.01" --n_tagged 2 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8'
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/en/ -m "not pleasefixme" --cpu --tn_cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8'
}
}
}
Expand All @@ -166,7 +166,7 @@ pipeline {
parallel {
stage('L2: Eng TN') {
steps {
sh 'cd tools/text_processing_deployment && python pynini_export.py --output=/home/TestData/nlp/text_norm/output/ --grammars=tn_grammars --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3 --language=en && ls -R /home/TestData/nlp/text_norm/output/ && echo ".far files created "|| exit 1'
sh 'cd tools/text_processing_deployment && python pynini_export.py --output=/home/TestData/nlp/text_norm/output/ --grammars=tn_grammars --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8 --language=en && ls -R /home/TestData/nlp/text_norm/output/ && echo ".far files created "|| exit 1'
sh 'cd nemo_text_processing/text_normalization/ && python normalize.py --input_file=/home/TestData/nlp/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output_file=/home/TestData/nlp/text_norm/output/test.pynini.txt --verbose'
sh 'cat /home/TestData/nlp/text_norm/output/test.pynini.txt'
sh 'cmp --silent /home/TestData/nlp/text_norm/output/test.pynini.txt /home/TestData/nlp/text_norm/ci/test_goal_py_05-25.txt || exit 1'
Expand All @@ -176,7 +176,7 @@ pipeline {

stage('L2: Eng ITN export') {
steps {
sh 'cd tools/text_processing_deployment && python pynini_export.py --output=/home/TestData/nlp/text_denorm/output/ --grammars=itn_grammars --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3 --language=en && ls -R /home/TestData/nlp/text_denorm/output/ && echo ".far files created "|| exit 1'
sh 'cd tools/text_processing_deployment && python pynini_export.py --output=/home/TestData/nlp/text_denorm/output/ --grammars=itn_grammars --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8 --language=en && ls -R /home/TestData/nlp/text_denorm/output/ && echo ".far files created "|| exit 1'
sh 'cd nemo_text_processing/inverse_text_normalization/ && python inverse_normalize.py --input_file=/home/TestData/nlp/text_denorm/ci/test.txt --language=en --output_file=/home/TestData/nlp/text_denorm/output/test.pynini.txt --verbose'
sh 'cmp --silent /home/TestData/nlp/text_denorm/output/test.pynini.txt /home/TestData/nlp/text_denorm/ci/test_goal_py.txt || exit 1'
sh 'rm -rf /home/TestData/nlp/text_denorm/output/*'
Expand All @@ -185,23 +185,23 @@ pipeline {
stage('L2: TN with Audio (audio and raw text)') {
steps {
sh 'cd nemo_text_processing/text_normalization && \
python normalize_with_audio.py --language=en --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3 --text "The total amounts to \\$4.76." \
python normalize_with_audio.py --language=en --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8 --text "The total amounts to \\$4.76." \
--audio_data /home/TestData/nlp/text_norm/audio_based/audio.wav | tail -n2 | head -n1 > /tmp/out_raw.txt 2>&1 && \
cmp --silent /tmp/out_raw.txt /home/TestData/nlp/text_norm/audio_based/result.txt || exit 1'
}
}
stage('L2: TN with Audio (audio and text file)') {
steps {
sh 'cd nemo_text_processing/text_normalization && \
python normalize_with_audio.py --language=en --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3 --text /home/TestData/nlp/text_norm/audio_based/text.txt \
python normalize_with_audio.py --language=en --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8 --text /home/TestData/nlp/text_norm/audio_based/text.txt \
--audio_data /home/TestData/nlp/text_norm/audio_based/audio.wav | tail -n2 | head -n1 > /tmp/out_file.txt 2>&1 && \
cmp --silent /tmp/out_file.txt /home/TestData/nlp/text_norm/audio_based/result.txt || exit 1'
}
}
stage('L2: TN with Audio (manifest)') {
steps {
sh 'cd nemo_text_processing/text_normalization && \
python normalize_with_audio.py --language=en --audio_data /home/TestData/nlp/text_norm/audio_based/manifest.json --n_tagged=120 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-3'
python normalize_with_audio.py --language=en --audio_data /home/TestData/nlp/text_norm/audio_based/manifest.json --n_tagged=120 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/6-8'
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@
from typing import List

from helpers import DECODER_MODEL, TAGGER_MODEL, instantiate_model_and_trainer
from nemo_text_processing.text_normalization.data_loader_utils import post_process_punct
from nn_wfst.en.electronic.normalize import ElectronicNormalizer
from nn_wfst.en.whitelist.normalize import WhitelistNormalizer
from omegaconf import DictConfig, OmegaConf

from nemo.collections.nlp.data.text_normalization import constants
from nemo.collections.nlp.models import DuplexTextNormalizationModel
from nemo.collections.nlp.models.duplex_text_normalization import post_process_punct
from nemo.core.config import hydra_runner
from nemo.utils import logging

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import os

import pynini
from nemo_text_processing.text_normalization.en.graph_utils import (
NEMO_WHITE_SPACE,
GraphFst,
Expand All @@ -25,17 +26,10 @@
from nemo_text_processing.text_normalization.en.taggers.electronic import ElectronicFst
from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst
from nemo_text_processing.text_normalization.en.taggers.word import WordFst
from pynini.lib import pynutil

from nemo.utils import logging

try:
import pynini
from pynini.lib import pynutil

PYNINI_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
PYNINI_AVAILABLE = False


class ClassifyFst(GraphFst):
"""
Expand Down Expand Up @@ -66,9 +60,10 @@ def __init__(
else:
logging.info(f"Creating ClassifyFst grammars.")

word_graph = WordFst(deterministic=deterministic).fst
punctuation = PunctuationFst(deterministic=deterministic)
punct_graph = punctuation.fst
word_graph = WordFst(deterministic=deterministic, punctuation=punctuation).fst
electonic_graph = ElectronicFst(deterministic=deterministic).fst
punct_graph = PunctuationFst(deterministic=deterministic).fst

classify = pynutil.add_weight(electonic_graph, 1.1) | pynutil.add_weight(word_graph, 100)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,11 @@
# limitations under the License.


import pynini
from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, delete_extra_space, delete_space
from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst
from nn_wfst.en.electronic.verbalize import VerbalizeFst

try:
import pynini
from pynini.lib import pynutil

PYNINI_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
PYNINI_AVAILABLE = False
from pynini.lib import pynutil


class VerbalizeFinalFst(GraphFst):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import os

import pynini
from nemo_text_processing.text_normalization.en.graph_utils import (
NEMO_WHITE_SPACE,
GraphFst,
Expand All @@ -25,17 +26,10 @@
from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst
from nemo_text_processing.text_normalization.en.taggers.whitelist import WhiteListFst
from nemo_text_processing.text_normalization.en.taggers.word import WordFst
from pynini.lib import pynutil

from nemo.utils import logging

try:
import pynini
from pynini.lib import pynutil

PYNINI_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
PYNINI_AVAILABLE = False


class ClassifyFst(GraphFst):
"""
Expand Down Expand Up @@ -75,9 +69,10 @@ def __init__(
else:
logging.info(f"Creating ClassifyFst grammars.")

word_graph = WordFst(deterministic=deterministic).fst
punctuation = PunctuationFst(deterministic=deterministic)
punct_graph = punctuation.fst
word_graph = WordFst(deterministic=deterministic, punctuation=punctuation).fst
whitelist_graph = WhiteListFst(input_case=input_case, deterministic=deterministic).fst
punct_graph = PunctuationFst(deterministic=deterministic).fst

classify = pynutil.add_weight(whitelist_graph, 1) | pynutil.add_weight(word_graph, 100)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,11 @@
# limitations under the License.


import pynini
from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, delete_extra_space, delete_space
from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst
from nn_wfst.en.electronic.verbalize import VerbalizeFst

try:
import pynini
from pynini.lib import pynutil

PYNINI_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
PYNINI_AVAILABLE = False
from pynini.lib import pynutil


class VerbalizeFinalFst(GraphFst):
Expand Down
Loading

0 comments on commit 7d41f7f

Please sign in to comment.