Skip to content

Commit

Permalink
Introduce pre-commit with pre-commit suggestions
Browse files Browse the repository at this point in the history
Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
  • Loading branch information
Sean Narenthiran authored and SeanNaren committed Sep 21, 2022
1 parent 5d5c0fb commit 81f2d2e
Show file tree
Hide file tree
Showing 620 changed files with 4,794 additions and 2,714 deletions.
44 changes: 44 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

default_language_version:
python: python3

ci:
autofix_prs: true
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
autoupdate_schedule: quarterly

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: check-yaml
- id: check-case-conflict
- id: detect-private-key


- repo: https://github.com/PyCQA/isort
rev: 4.3.21
hooks:
- id: isort
name: Format imports
args: [--multi-line=3, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=119, -rc, -ws]

- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
name: Format code
args: [--skip-string-normalization, --line-length=119]
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
import os
import re
import sys
import glob

import sphinx_book_theme
from package_info import __version__

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
Expand All @@ -30,7 +31,6 @@
sys.path.insert(0, os.path.abspath("../../nemo"))
sys.path.insert(0, os.path.abspath("../../nemo_text_processing"))

from package_info import __version__

templates_path = ["_templates"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ def get_wer_feat(mfst, asr, frame_len, tokens_per_chunk, delay, preprocessor_cfg
def main():
parser = ArgumentParser()
parser.add_argument(
"--asr_model", type=str, required=True, help="Path to asr model .nemo file",
"--asr_model",
type=str,
required=True,
help="Path to asr model .nemo file",
)
parser.add_argument("--test_manifest", type=str, required=True, help="path to evaluation data")
parser.add_argument("--batch_size", type=int, default=32)
Expand Down Expand Up @@ -128,7 +131,10 @@ def main():
print(tokens_per_chunk, mid_delay)

frame_asr = FrameBatchASR(
asr_model=asr_model, frame_len=chunk_len, total_buffer=args.total_buffer_in_secs, batch_size=args.batch_size,
asr_model=asr_model,
frame_len=chunk_len,
total_buffer=args.total_buffer_in_secs,
batch_size=args.batch_size,
)

hyps, refs, wer = get_wer_feat(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@
# Common Arguments
parser = ArgumentParser()
parser.add_argument(
"--asr_model", type=str, required=True, help="Path to asr model .nemo file",
"--asr_model",
type=str,
required=True,
help="Path to asr model .nemo file",
)
parser.add_argument("--test_manifest", type=str, required=True, help="path to evaluation data")
parser.add_argument("--batch_size", type=int, default=32)
Expand Down
9 changes: 6 additions & 3 deletions examples/asr/asr_streaming/speech_to_text_streaming_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@

def extract_transcriptions(hyps):
"""
The transcribed_texts returned by CTC and RNNT models are different.
This method would extract and return the text section of the hypothesis.
The transcribed_texts returned by CTC and RNNT models are different.
This method would extract and return the text section of the hypothesis.
"""
if isinstance(hyps[0], Hypothesis):
transcriptions = []
Expand Down Expand Up @@ -190,7 +190,10 @@ def perform_streaming(asr_model, streaming_buffer, compare_vs_offline=False, deb
def main():
parser = ArgumentParser()
parser.add_argument(
"--asr_model", type=str, required=True, help="Path to an ASR model .nemo file or name of a pretrained model.",
"--asr_model",
type=str,
required=True,
help="Path to an ASR model .nemo file or name of a pretrained model.",
)
parser.add_argument(
"--device", type=str, help="The device to load the model onto and perform the streaming", default="cuda"
Expand Down
20 changes: 16 additions & 4 deletions examples/asr/experimental/k2/make_token_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ def main():
description="""Create token LM for input manifest and tokenizer.""",
)
parser.add_argument(
"--manifest", required=True, type=str, help="Comma separated list of manifest files",
"--manifest",
required=True,
type=str,
help="Comma separated list of manifest files",
)
parser.add_argument(
"--tokenizer_dir",
Expand All @@ -54,13 +57,22 @@ def main():
),
)
parser.add_argument(
"--ngram_order", type=int, default=2, choices=[2, 3, 4, 5], help="Order of n-gram to use",
"--ngram_order",
type=int,
default=2,
choices=[2, 3, 4, 5],
help="Order of n-gram to use",
)
parser.add_argument(
"--output_file", required=True, type=str, help="The path to store the token LM",
"--output_file",
required=True,
type=str,
help="The path to store the token LM",
)
parser.add_argument(
"--do_lowercase", action="store_true", help="Whether to apply lower case conversion on the text",
"--do_lowercase",
action="store_true",
help="Whether to apply lower case conversion on the text",
)
args = parser.parse_args()

Expand Down
6 changes: 5 additions & 1 deletion examples/asr/experimental/sclite/speech_to_text_sclite.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,11 @@ def get_utt_info(manifest_path):
def main():
parser = ArgumentParser()
parser.add_argument(
"--asr_model", type=str, default="QuartzNet15x5Base-En", required=False, help="Pass: 'QuartzNet15x5Base-En'",
"--asr_model",
type=str,
default="QuartzNet15x5Base-En",
required=False,
help="Pass: 'QuartzNet15x5Base-En'",
)
parser.add_argument("--dataset", type=str, required=True, help="path to evaluation data")
parser.add_argument("--batch_size", type=int, default=4)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,13 @@
),
# ... repeat 14 more times
nemo_asr.modules.conv_asr.JasperEncoderConfig(
filters=1024, repeat=1, kernel=[1], stride=[1], dilation=[1], dropout=cfg.model.dropout, residual=False,
filters=1024,
repeat=1,
kernel=[1],
stride=[1],
dilation=[1],
dropout=cfg.model.dropout,
residual=False,
),
]

Expand Down
6 changes: 5 additions & 1 deletion examples/asr/export/transducer/infer_transducer_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@
def parse_arguments():
parser = ArgumentParser()
parser.add_argument(
"--nemo_model", type=str, default=None, required=True, help="Path to .nemo file",
"--nemo_model",
type=str,
default=None,
required=True,
help="Path to .nemo file",
)
parser.add_argument('--onnx_encoder', type=str, default=None, required=False, help="Path to onnx encoder model")
parser.add_argument(
Expand Down
6 changes: 5 additions & 1 deletion examples/asr/quantization/speech_to_text_calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def autocast(enabled=None):
def main():
parser = ArgumentParser()
parser.add_argument(
"--asr_model", type=str, default="QuartzNet15x5Base-En", required=True, help="Pass: 'QuartzNet15x5Base-En'",
"--asr_model",
type=str,
default="QuartzNet15x5Base-En",
required=True,
help="Pass: 'QuartzNet15x5Base-En'",
)
parser.add_argument("--dataset", type=str, required=True, help="path to evaluation data")
parser.add_argument("--batch_size", type=int, default=256)
Expand Down
6 changes: 5 additions & 1 deletion examples/asr/quantization/speech_to_text_quant_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ def autocast(enabled=None):
def main():
parser = ArgumentParser()
parser.add_argument(
"--asr_model", type=str, default="QuartzNet15x5Base-En", required=True, help="Pass: 'QuartzNet15x5Base-En'",
"--asr_model",
type=str,
default="QuartzNet15x5Base-En",
required=True,
help="Pass: 'QuartzNet15x5Base-En'",
)
parser.add_argument("--dataset", type=str, required=True, help="path to evaluation data")
parser.add_argument("--wer_target", type=float, default=None, help="used by test")
Expand Down
6 changes: 5 additions & 1 deletion examples/asr/quantization/speech_to_text_quant_infer_trt.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ def autocast(enabled=None):
def main():
parser = ArgumentParser()
parser.add_argument(
"--asr_model", type=str, default="QuartzNet15x5Base-En", required=True, help="Pass: 'QuartzNet15x5Base-En'",
"--asr_model",
type=str,
default="QuartzNet15x5Base-En",
required=True,
help="Pass: 'QuartzNet15x5Base-En'",
)
parser.add_argument(
"--asr_onnx",
Expand Down
4 changes: 3 additions & 1 deletion examples/asr/speech_classification/vad_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ def main(cfg):
'vad_stream': True,
'sample_rate': 16000,
'manifest_filepath': manifest_vad_input,
'labels': ['infer',],
'labels': [
'infer',
],
'num_workers': cfg.num_workers,
'shuffle': False,
'window_length_in_sec': cfg.vad.parameters.window_length_in_sec,
Expand Down
8 changes: 6 additions & 2 deletions examples/asr/transcribe_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,15 @@ def autocast():
"RNNT models do not support transcribe partial audio for now. Transcribing full audio."
)
transcriptions = asr_model.transcribe(
paths2audio_files=filepaths, batch_size=cfg.batch_size, num_workers=cfg.num_workers,
paths2audio_files=filepaths,
batch_size=cfg.batch_size,
num_workers=cfg.num_workers,
)
else:
transcriptions = asr_model.transcribe(
paths2audio_files=filepaths, batch_size=cfg.batch_size, num_workers=cfg.num_workers,
paths2audio_files=filepaths,
batch_size=cfg.batch_size,
num_workers=cfg.num_workers,
)

logging.info(f"Finished transcribing {len(filepaths)} files !")
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/duplex_text_normalization/analyze_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

# Longest Common Subsequence
def lcs(X, Y):
""" Function for finding the longest common subsequence between two lists.
"""Function for finding the longest common subsequence between two lists.
In this script, this function is particular used for aligning between the
ground-truth output string and the predicted string (for visualization purpose).
Args:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def process_url(o):
"""

def flatten(l):
""" flatten a list of lists """
"""flatten a list of lists"""
return [item for sublist in l for item in sublist]

if o != '<self>' and '_letter' in o:
Expand Down Expand Up @@ -288,7 +288,7 @@ def convert(example):
def ignore(example):
"""
This function makes sure specific class types like 'PLAIN', 'ELECTRONIC' etc. are left unchanged.
Args:
example: data example
"""
Expand All @@ -300,7 +300,7 @@ def ignore(example):


def process_file(fp):
""" Reading the raw data from a file of NeMo format and preprocesses it. Write is out to the output directory.
"""Reading the raw data from a file of NeMo format and preprocesses it. Write is out to the output directory.
For more info about the data format, refer to the
`text_normalization doc <https://github.com/NVIDIA/NeMo/blob/main/docs/source/nlp/text_normalization.rst>`.
Expand Down
8 changes: 4 additions & 4 deletions examples/nlp/duplex_text_normalization/data/en/upsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@

def include_sentence(sentence_patterns) -> bool:
"""
Determines whether to use a sentence for upsampling whose patterns are provided as input. This will check the global pattern tables
Determines whether to use a sentence for upsampling whose patterns are provided as input. This will check the global pattern tables
if this sentence includes any patterns that are still needed.
Args:
Expand Down Expand Up @@ -143,7 +143,7 @@ def include_sentence(sentence_patterns) -> bool:


def read_data_file(fp: str, upsample_file: bool = False):
""" Reading the raw data from a file of NeMo format
"""Reading the raw data from a file of NeMo format
For more info about the data format, refer to the
`text_normalization doc <https://github.com/NVIDIA/NeMo/blob/main/docs/source/nlp/text_normalization.rst>`.
Expand Down Expand Up @@ -204,7 +204,7 @@ def update_patterns(patterns: dict, new_patterns: dict):
Args:
patterns: main table
new_patterns: new table to update the main table with
new_patterns: new table to update the main table with
"""
for k, v in new_patterns.items():
patterns[k] += v
Expand Down Expand Up @@ -254,7 +254,7 @@ def lookup_patterns(cls: str, input_str: str) -> dict:

def create_pattern(templates: List[str], input_str: str, pretty: bool = False):
"""
create all patterns based on list of input templates using the input string.
create all patterns based on list of input templates using the input string.
Args:
templates: list of templates/stencils
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,17 @@ def main(cfg: DictConfig) -> None:
]

def _get_predictions(lines: List[str], mode: str, batch_size: int, text_file: str):
""" Runs inference on a batch data without labels and saved predictions to a file. """
"""Runs inference on a batch data without labels and saved predictions to a file."""
assert mode in ['tn', 'itn']
file_name, extension = os.path.splitext(text_file)
batch, all_preds = [], []
for i, line in enumerate(lines):
batch.append(line.strip())
if len(batch) == batch_size or i == len(lines) - 1:
outputs = tn_model._infer(batch, [constants.DIRECTIONS_TO_MODE[mode]] * len(batch),)
outputs = tn_model._infer(
batch,
[constants.DIRECTIONS_TO_MODE[mode]] * len(batch),
)
all_preds.extend([x for x in outputs[-1]])
batch = []
assert len(all_preds) == len(lines)
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/duplex_text_normalization/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@


def instantiate_model_and_trainer(cfg: DictConfig, model_name: str, do_training: bool):
""" Function for instantiating a model and a trainer
"""Function for instantiating a model and a trainer
Args:
cfg: The config used to instantiate the model and the trainer.
model_name: A str indicates whether the model to be instantiated is a tagger or a decoder (i.e., model_name should be either TAGGER_MODEL or DECODER_MODEL).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
class ElectronicNormalizer(Normalizer):
"""
Normalizer for ELECTRONIC.
Args:
input_case: accepting either "lower_cased" or "cased" input.
lang: language
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
class ClassifyFst(GraphFst):
"""
Final class that composes all other classification grammars. This class can process an entire sentence including punctuation.
For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File.
For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File.
More details to deployment at NeMo/tools/text_processing_deployment.
Args:
input_case: accepting either "lower_cased" or "cased" input.
deterministic: if True will provide a single transduction option,
Expand Down
Loading

0 comments on commit 81f2d2e

Please sign in to comment.