Skip to content

Commit

Permalink
[TTS] refactor aligner.encode into separate g2p process and encode_fr…
Browse files Browse the repository at this point in the history
…om_g2p process to fix aligner bug. (NVIDIA#4992) (NVIDIA#5000)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>
  • Loading branch information
github-actions[bot] authored and Hainan Xu committed Nov 29, 2022
1 parent ed8ff1f commit 290e4c6
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -333,12 +333,23 @@ def __init__(
self.g2p = g2p

def encode(self, text):
"""See base class."""
ps, space, tokens = [], self.tokens[self.space], set(self.tokens)
"""See base class for more information."""

text = self.text_preprocessing_func(text)
g2p_text = self.g2p(text) # TODO: handle infer
return self.encode_from_g2p(g2p_text, text)

def encode_from_g2p(self, g2p_text: List[str], raw_text: Optional[str] = None):
"""
Encodes text that has already been run through G2P.
Called for encoding to tokens after text preprocessing and G2P.
Args:
g2p_text: G2P's output, could be a mixture of phonemes and graphemes,
e.g. "see OOV" -> ['S', 'IY1', ' ', 'O', 'O', 'V']
raw_text: original raw input
"""
ps, space, tokens = [], self.tokens[self.space], set(self.tokens)
for p in g2p_text: # noqa
# Remove stress
if p.isalnum() and len(p) == 3 and not self.stresses:
Expand All @@ -355,9 +366,10 @@ def encode(self, text):
ps.append(p)
# Warn about unknown char/phoneme
elif p != space:
logging.warning(
f"Text: [{''.join(g2p_text)}] contains unknown char/phoneme: [{p}]. Original text: [{text}]. Symbol will be skipped."
)
message = f"Text: [{''.join(g2p_text)}] contains unknown char/phoneme: [{p}]."
if raw_text is not None:
message += f"Original text: [{raw_text}]. Symbol will be skipped."
logging.warning(message)

# Remove trailing spaces
if ps:
Expand Down
4 changes: 2 additions & 2 deletions tutorials/tts/Aligner_Inference_Examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -485,8 +485,8 @@
"# Construct our two candidate sentences by replacing \"t\" \"h\" \"a\" \"t\" with two phonemic possibilities\n",
"that1 = [\"DH\", \"AE1\", \"T\"]\n",
"that2 = [\"DH\", \"AH0\", \"T\"]\n",
"pron1_g2p = that1 + text_g2p[4:-1] # Chop off trailing space, the tokenizer will add it\n",
"pron2_g2p = that2 + text_g2p[4:-1] # Ditto.\n",
"pron1_g2p = that1 + text_g2p[5:-1] # Chop off trailing space, the tokenizer will add it\n",
"pron2_g2p = that2 + text_g2p[5:-1] # Ditto.\n",
"print(\"=== Text ===\")\n",
"print(pron1_g2p)\n",
"print(pron2_g2p)\n",
Expand Down

0 comments on commit 290e4c6

Please sign in to comment.