Skip to content

Commit

Permalink
Remove last old manifest usage.
Browse files Browse the repository at this point in the history
Signed-off-by: Stanislav Beliaev <stasbelyaev96@gmail.com>
  • Loading branch information
stasbel committed Jan 22, 2020
1 parent b3a8cd4 commit 00a9d59
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 17 deletions.
28 changes: 13 additions & 15 deletions collections/nemo_asr/nemo_asr/parts/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@

class ManifestBase():
def __init__(self,
manifest_paths,
labels,
max_duration=None,
min_duration=None,
sort_by_duration=False,
max_utts=0,
blank_index=-1,
unk_index=-1,
normalize=True):
manifest_paths,
labels,
max_duration=None,
min_duration=None,
sort_by_duration=False,
max_utts=0,
blank_index=-1,
unk_index=-1,
normalize=True,
):
self.min_duration = min_duration
self.max_duration = max_duration
self.sort_by_duration = sort_by_duration
Expand Down Expand Up @@ -61,7 +62,8 @@ def __init__(self,

# tokenize transcript text
item["tokens"] = self.tokenize_transcript(
text, self.labels_map, self.unk_index, self.blank_index)
text, self.labels_map, self.unk_index, self.blank_index
)

# support files using audio_filename
if 'audio_filename' in item and 'audio_filepath' not in item:
Expand Down Expand Up @@ -155,11 +157,7 @@ def normalize_text(text, labels):
# Punctuation to remove
punctuation = string.punctuation
# Define punctuation that will be handled by text cleaner
punctuation_to_replace = {
"+": "plus",
"&": "and",
"%": "percent"
}
punctuation_to_replace = {"+": "plus", "&": "and", "%": "percent"}
for char in punctuation_to_replace:
punctuation = punctuation.replace(char, "")
# We might also want to consider:
Expand Down
10 changes: 8 additions & 2 deletions collections/nemo_asr/nemo_asr/parts/perturb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import random

import librosa
from nemo_asr.parts import char_parsers
from nemo_asr.parts import manifests
from scipy import signal

from .segment import AudioSegment
Expand Down Expand Up @@ -47,7 +49,9 @@ def perturb(self, data):

class ImpulsePerturbation(Perturbation):
def __init__(self, manifest_path=None, rng=None):
self._manifest = ManifestEN(manifest_path)
self._manifest = manifests.ASRAudioText(
manifest_path, parser=char_parsers.make_parser([])
)
self._rng = random.Random() if rng is None else rng

def perturb(self, data):
Expand Down Expand Up @@ -91,7 +95,9 @@ def __init__(
max_gain_db=300.0,
rng=None,
):
self._manifest = ManifestEN(manifest_path)
self._manifest = manifests.ASRAudioText(
manifest_path, parser=char_parsers.make_parser([])
)
self._rng = random.Random() if rng is None else rng
self._min_snr_db = min_snr_db
self._max_snr_db = max_snr_db
Expand Down

0 comments on commit 00a9d59

Please sign in to comment.