Skip to content

Commit

Permalink
Activate User Normalization Dictionary
Browse files Browse the repository at this point in the history
Activate the user normalization dictionary and add it to the
prenormalization step of the frontend pipeline.

This needs to be done conditionally. We normally want the dictionary
activated for general TTS, but we need to deactivate it for the play
buttons in the NormDictInfo activity.

Signed-off-by: Daniel Schnell <dschnell@grammatek.com>
  • Loading branch information
lumpidu committed Mar 20, 2024
1 parent 54e88d3 commit e1724ab
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 29 deletions.
18 changes: 16 additions & 2 deletions app/src/main/java/com/grammatek/simaromur/AppRepository.java
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,15 @@ public LiveData<List<NormDictEntry>> getNormDictEntries() {
return mNormDictDao.getSortedEntries();
}

/**
* Get a LiveData list of all current normalization dictionary entries.
*
* @return List of all current normalization dictionary entries as LiveData
*/
public List<NormDictEntry> getNormDictEntriesDirect() {
return mNormDictDao.getEntries();
}

/**
* Creates or updates the given entry inside the Db.
*/
Expand Down Expand Up @@ -953,15 +962,20 @@ public String getLoadedVoiceName() {
*
* @param text Raw text as received by the TTS service
* @param item cache item to save into the speech audio cache
* @param voice voice to use for normalization and G2P
* @param doIgnoreUserDict true to ignore user dictionary, false otherwise
* @return updated cache item
*/
synchronized
public CacheItem executeFrontendAndSaveIntoCache(String text, CacheItem item, com.grammatek.simaromur.db.Voice voice) {
public CacheItem executeFrontendAndSaveIntoCache(String text,
CacheItem item,
com.grammatek.simaromur.db.Voice voice,
boolean doIgnoreUserDict) {
String phonemes = "";
if (item.getUtterance().getNormalized().isEmpty()) {
// we always need to normalize the text, but it doesn't hurt, if we always do G2P as well
// for network voices, this is currently all that is needed.
String normalizedText = mFrontend.getNormalizationManager().process(text);
String normalizedText = mFrontend.getNormalizationManager().process(text, doIgnoreUserDict);
phonemes = mFrontend.transcribe(normalizedText, voice.type, voice.version);
Log.v(LOG_TAG, "executeFrontendAndSaveIntoCache: original (\"" + text + "\"), normalized (\"" + normalizedText + "\"), phonemes (\"" + phonemes + "\")");
if (!phonemes.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ public void onPlayCancelClicked(View v) {
// TODO: do we need a special mode for the frontend to bypass any user dictionary ?
if (mIsPlaying1 || mIsPlaying2) {
CacheItem item = appRepo.getUtteranceCache().addUtterance(text);
item = appRepo.executeFrontendAndSaveIntoCache(text, item, appRepo.getCurrentVoice());
item = appRepo.executeFrontendAndSaveIntoCache(text, item, appRepo.getCurrentVoice(), true);
if ((item.getUtterance().getPhonemesCount() == 0) ||
item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) {
Log.w(LOG_TAG, "onPlayCancelClicked: Nothing to speak ?!");
Expand Down
2 changes: 1 addition & 1 deletion app/src/main/java/com/grammatek/simaromur/TTSService.java
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ protected void onSynthesizeText(SynthesisRequest request,
// item and save it into cache, then test one-by-one availability of every single
// requested utterance component and eventually add the missing pieces
CacheItem item = mRepository.getUtteranceCache().addUtterance(text);
item = mRepository.executeFrontendAndSaveIntoCache(text, item, voice);
item = mRepository.executeFrontendAndSaveIntoCache(text, item, voice, false);
if ((item.getUtterance().getPhonemesCount() == 0) ||
item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) {
Log.w(LOG_TAG, "onSynthesizeText: No phonemes to speak");
Expand Down
2 changes: 1 addition & 1 deletion app/src/main/java/com/grammatek/simaromur/VoiceInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ public void onPlayClicked(View v) {

// execute frontend
CacheItem item = appRepo.getUtteranceCache().addUtterance(text);
item = appRepo.executeFrontendAndSaveIntoCache(text, item, mVoice);
item = appRepo.executeFrontendAndSaveIntoCache(text, item, mVoice, false);
if ((item.getUtterance().getPhonemesCount() == 0) ||
item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) {
Log.w(LOG_TAG, "onPlayClicked: No phonemes to speak");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,6 @@ public static String getVersion() {
return "1.0";
}

/**
* Processes text for input into a TTS engine. This includes unicode cleaning, tokenizing, and
* normalizing the the text, and then to convert it into an X-SAMPA transcription.
*
* @param text raw input text
* @return an X-SAMPA transcription of @text
*/
public String process(String text) {
final String normalized = mNormalizationManager.process(text);
return transcribe(normalized, IGNORE_TYPE, IGNORE_VERSION);
}

/**
* Transcribe text to IPA symbols. Punctuation is kept as is, which conforms to the kind of
* IPA dialect encoded into the VITS model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,15 @@ public NormalizationManager(Context context, Map<String, PronDictEntry> pronDict
* Processes the input text according to the defined steps: unicode cleaning,
* tokenizing, normalizing
* @param text the input text
* @param doIgnoreUserDict if true, the user dictionary is ignored
* @return normalized version of 'text'
*/
public String process(final String text) {
public String process(final String text, boolean doIgnoreUserDict) {
Log.v(LOG_TAG, "process() called");
String cleaned = mUnicodeNormalizer.normalizeEncoding(text);

List<String> strings = mTokenizer.detectSentences(cleaned);
List<String> normalizedSentences = normalize(strings);
List<String> normalizedSentences = normalize(strings, doIgnoreUserDict);
List<String> cleanNormalized = mUnicodeNormalizer.normalizeAlphabet(normalizedSentences);
for (String sentence : cleanNormalized) {
Log.v(LOG_TAG, "normalized sentence: " + sentence);
Expand All @@ -66,12 +67,12 @@ public String process(final String text) {
}

// pre-normalization, tagging and final normalization of the sentences in 'tokenized'
private List<String> normalize(final List<String> strings) {
private List<String> normalize(final List<String> strings, boolean doIgnoreUserDict) {
String preNormalized;
List<String> normalized = new ArrayList<>();

for (String sentence : strings) {
preNormalized = mTTSNormalizer.preNormalize(sentence);
preNormalized = mTTSNormalizer.preNormalize(sentence, doIgnoreUserDict);
String[] tags = tagText(preNormalized);
// preNormalized is tokenized as string, so we know splitting on whitespace will give
// us the correct tokens according to the tokenizer
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package com.grammatek.simaromur.frontend;

import android.util.Log;

import androidx.annotation.NonNull;

import com.grammatek.simaromur.App;
import com.grammatek.simaromur.db.NormDictEntry;
import com.grammatek.simaromur.device.SymbolsLvLIs;

import java.util.*;
Expand All @@ -21,6 +25,7 @@
*/

public class TTSNormalizer {
private static final String LOG_TAG = "Simaromur_" + TTSNormalizer.class.getSimpleName();

private final List<CategoryTuple> BigCardinalFilledTupleList = Stream.of(CardinalOnesTuples.getTuples(), CardinalThousandTuples.getTuples(), CardinalMillionTuples.getTuples(),
CardinalBigTuples.getTuples())
Expand Down Expand Up @@ -80,9 +85,13 @@ public TTSNormalizer() {
* @param text input text, unicode-normalized and if splitted on whitespace we have an array of tokens
* @return pre-normalized text, i.e. some common abbreviations expanded
*/
public String preNormalize(String text) {
public String preNormalize(String text, boolean doIgnoreUserDict) {
String normalized = text;
String domain = ""; //we will need to determine this from "text" in real life!
String domain = ""; // we will need to determine this from "text" in real life!

if (!doIgnoreUserDict) {
normalized = replaceFromNormDict(normalized);
}

// some pre-processing and formatting of digits
if (DIGITS_PTRN.matcher(normalized).matches()) {
Expand Down Expand Up @@ -132,6 +141,39 @@ public String preNormalize(String text) {
return normalized;
}

/**
* Replace abbreviations and other patterns from the normalization dictionary via the
* NormDictEntryDao.
*
* @param sentence input sentence
* @return normalized sentence with search terms replaced
*/
private String replaceFromNormDict(String sentence) {
// replace abbreviations and other patterns from the normalization dictionary via the
// NormDictEntryDao
String normalized = sentence;
List<NormDictEntry> entries = App.getAppRepository().getNormDictEntriesDirect();

if (entries != null) {
// sort entries descending to match longer strings first. This is important for
// abbreviations, e.g. "Donald Trump" should be replaced before "Trump"
entries.sort((o1, o2) -> o2.term.length() - o1.term.length());
for (NormDictEntry entry : entries) {
// make for every entry.term a regular expression matching on word boundaries and
// case insensitive
Pattern regex = Pattern.compile("\\b(?i)" + entry.term.strip().toLowerCase() + "\\b");
if (regex.matcher(normalized).find()) {
//Log.v(LOG_TAG, "replaceFromNormDict() - replacing: " + regex + " with: " + entry.replacement);
normalized = regex.matcher(normalized).replaceAll(entry.replacement);
}
}
}
if (!normalized.equals(sentence)) {
Log.v(LOG_TAG, "replaceFromNormDict() replaced: " + sentence + " with: " + normalized);
}
return normalized;
}

/**
* Performs normalizing of text partly based on POS-tags. For number normalization the algorithm looks at
* the POS-tags at the next token position, to determine the correct form of the normalization (case, gender, etc.)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public class NormalizationManagerTest {
public void processTest() {
String input = "síma 421-6368";
NormalizationManager manager = new NormalizationManager(context, pronDict);
String processed = manager.process(input);
String processed = manager.process(input, false);
System.out.println(processed);

assertEquals("síma fjórir tveir einn - sex þrír sex átta .",
Expand All @@ -45,7 +45,7 @@ public void processTest() {
public void processDigitsTest() {
NormalizationManager manager = new NormalizationManager(context, pronDict);
for (String sent : getDigits().keySet()) {
String processed = manager.process(sent);
String processed = manager.process(sent, false);
assertEquals(getDigits().get(sent), processed);
}
}
Expand All @@ -54,7 +54,7 @@ public void processDigitsTest() {
public void processSymbolsTest() {
NormalizationManager manager = new NormalizationManager(context, pronDict);
for (String sent : getSymbols().keySet()) {
String processed = manager.process(sent);
String processed = manager.process(sent, false);
assertEquals(getSymbols().get(sent), processed);
}
}
Expand All @@ -63,7 +63,7 @@ public void processSymbolsTest() {
public void processNewIssuesTest() {
NormalizationManager manager = new NormalizationManager(context, pronDict);
for (String sent : getNewTestSentences().keySet()) {
String processed = manager.process(sent);
String processed = manager.process(sent, false);
assertEquals(getNewTestSentences().get(sent), processed);
}
}
Expand All @@ -72,7 +72,7 @@ public void processNewIssuesTest() {
public void processV14IssuesTest() {
NormalizationManager manager = new NormalizationManager(context, pronDict);
for (String sent : getV14TestSentences().keySet()) {
String processed = manager.process(sent);
String processed = manager.process(sent, false);
assertEquals(getV14TestSentences().get(sent), processed);
}
}
Expand All @@ -81,7 +81,7 @@ public void processV14IssuesTest() {
public void processListTest() {
NormalizationManager manager = new NormalizationManager(context, pronDict);
for (String sent : getTestSentences().keySet()) {
String processed = manager.process(sent);
String processed = manager.process(sent, false);
assertEquals(getTestSentences().get(sent), processed);
}
}
Expand Down

0 comments on commit e1724ab

Please sign in to comment.