From e1724aba564f5f32a3c8f90ff633a421ccf6853a Mon Sep 17 00:00:00 2001 From: Daniel Schnell Date: Wed, 20 Mar 2024 11:22:10 +0000 Subject: [PATCH] Activate User Normalization Dictionary Activate the user normalization dictionary and add it to the prenormalization step of the frontend pipeline. This needs to be done conditionally. We normally want the dictionary activated for general TTS, but we need to deactivate it for the play buttons in the NormDictInfo activity. Signed-off-by: Daniel Schnell --- .../grammatek/simaromur/AppRepository.java | 18 +++++++- .../com/grammatek/simaromur/NormDictInfo.java | 2 +- .../com/grammatek/simaromur/TTSService.java | 2 +- .../com/grammatek/simaromur/VoiceInfo.java | 2 +- .../simaromur/frontend/FrontendManager.java | 12 ----- .../frontend/NormalizationManager.java | 9 ++-- .../simaromur/frontend/TTSNormalizer.java | 46 ++++++++++++++++++- .../simaromur/NormalizationManagerTest.java | 12 ++--- 8 files changed, 74 insertions(+), 29 deletions(-) diff --git a/app/src/main/java/com/grammatek/simaromur/AppRepository.java b/app/src/main/java/com/grammatek/simaromur/AppRepository.java index 579c4d4..81f8637 100644 --- a/app/src/main/java/com/grammatek/simaromur/AppRepository.java +++ b/app/src/main/java/com/grammatek/simaromur/AppRepository.java @@ -349,6 +349,15 @@ public LiveData> getNormDictEntries() { return mNormDictDao.getSortedEntries(); } + /** + * Get a LiveData list of all current normalization dictionary entries. + * + * @return List of all current normalization dictionary entries as LiveData + */ + public List getNormDictEntriesDirect() { + return mNormDictDao.getEntries(); + } + /** * Creates or updates the given entry inside the Db. */ @@ -953,15 +962,20 @@ public String getLoadedVoiceName() { * * @param text Raw text as received by the TTS service * @param item cache item to save into the speech audio cache + * @param voice voice to use for normalization and G2P + * @param doIgnoreUserDict true to ignore user dictionary, false otherwise * @return updated cache item */ synchronized - public CacheItem executeFrontendAndSaveIntoCache(String text, CacheItem item, com.grammatek.simaromur.db.Voice voice) { + public CacheItem executeFrontendAndSaveIntoCache(String text, + CacheItem item, + com.grammatek.simaromur.db.Voice voice, + boolean doIgnoreUserDict) { String phonemes = ""; if (item.getUtterance().getNormalized().isEmpty()) { // we always need to normalize the text, but it doesn't hurt, if we always do G2P as well // for network voices, this is currently all that is needed. - String normalizedText = mFrontend.getNormalizationManager().process(text); + String normalizedText = mFrontend.getNormalizationManager().process(text, doIgnoreUserDict); phonemes = mFrontend.transcribe(normalizedText, voice.type, voice.version); Log.v(LOG_TAG, "executeFrontendAndSaveIntoCache: original (\"" + text + "\"), normalized (\"" + normalizedText + "\"), phonemes (\"" + phonemes + "\")"); if (!phonemes.isEmpty()) { diff --git a/app/src/main/java/com/grammatek/simaromur/NormDictInfo.java b/app/src/main/java/com/grammatek/simaromur/NormDictInfo.java index 9deb8a5..151d0f1 100644 --- a/app/src/main/java/com/grammatek/simaromur/NormDictInfo.java +++ b/app/src/main/java/com/grammatek/simaromur/NormDictInfo.java @@ -229,7 +229,7 @@ public void onPlayCancelClicked(View v) { // TODO: do we need a special mode for the frontend to bypass any user dictionary ? if (mIsPlaying1 || mIsPlaying2) { CacheItem item = appRepo.getUtteranceCache().addUtterance(text); - item = appRepo.executeFrontendAndSaveIntoCache(text, item, appRepo.getCurrentVoice()); + item = appRepo.executeFrontendAndSaveIntoCache(text, item, appRepo.getCurrentVoice(), true); if ((item.getUtterance().getPhonemesCount() == 0) || item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) { Log.w(LOG_TAG, "onPlayCancelClicked: Nothing to speak ?!"); diff --git a/app/src/main/java/com/grammatek/simaromur/TTSService.java b/app/src/main/java/com/grammatek/simaromur/TTSService.java index 7acbd5f..fba6f8e 100644 --- a/app/src/main/java/com/grammatek/simaromur/TTSService.java +++ b/app/src/main/java/com/grammatek/simaromur/TTSService.java @@ -197,7 +197,7 @@ protected void onSynthesizeText(SynthesisRequest request, // item and save it into cache, then test one-by-one availability of every single // requested utterance component and eventually add the missing pieces CacheItem item = mRepository.getUtteranceCache().addUtterance(text); - item = mRepository.executeFrontendAndSaveIntoCache(text, item, voice); + item = mRepository.executeFrontendAndSaveIntoCache(text, item, voice, false); if ((item.getUtterance().getPhonemesCount() == 0) || item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) { Log.w(LOG_TAG, "onSynthesizeText: No phonemes to speak"); diff --git a/app/src/main/java/com/grammatek/simaromur/VoiceInfo.java b/app/src/main/java/com/grammatek/simaromur/VoiceInfo.java index 83c87ae..f37b0b8 100644 --- a/app/src/main/java/com/grammatek/simaromur/VoiceInfo.java +++ b/app/src/main/java/com/grammatek/simaromur/VoiceInfo.java @@ -379,7 +379,7 @@ public void onPlayClicked(View v) { // execute frontend CacheItem item = appRepo.getUtteranceCache().addUtterance(text); - item = appRepo.executeFrontendAndSaveIntoCache(text, item, mVoice); + item = appRepo.executeFrontendAndSaveIntoCache(text, item, mVoice, false); if ((item.getUtterance().getPhonemesCount() == 0) || item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) { Log.w(LOG_TAG, "onPlayClicked: No phonemes to speak"); diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/FrontendManager.java b/app/src/main/java/com/grammatek/simaromur/frontend/FrontendManager.java index d850f81..e893bf4 100644 --- a/app/src/main/java/com/grammatek/simaromur/frontend/FrontendManager.java +++ b/app/src/main/java/com/grammatek/simaromur/frontend/FrontendManager.java @@ -40,18 +40,6 @@ public static String getVersion() { return "1.0"; } - /** - * Processes text for input into a TTS engine. This includes unicode cleaning, tokenizing, and - * normalizing the the text, and then to convert it into an X-SAMPA transcription. - * - * @param text raw input text - * @return an X-SAMPA transcription of @text - */ - public String process(String text) { - final String normalized = mNormalizationManager.process(text); - return transcribe(normalized, IGNORE_TYPE, IGNORE_VERSION); - } - /** * Transcribe text to IPA symbols. Punctuation is kept as is, which conforms to the kind of * IPA dialect encoded into the VITS model. diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/NormalizationManager.java b/app/src/main/java/com/grammatek/simaromur/frontend/NormalizationManager.java index a0c853d..96e5bea 100644 --- a/app/src/main/java/com/grammatek/simaromur/frontend/NormalizationManager.java +++ b/app/src/main/java/com/grammatek/simaromur/frontend/NormalizationManager.java @@ -50,14 +50,15 @@ public NormalizationManager(Context context, Map pronDict * Processes the input text according to the defined steps: unicode cleaning, * tokenizing, normalizing * @param text the input text + * @param doIgnoreUserDict if true, the user dictionary is ignored * @return normalized version of 'text' */ - public String process(final String text) { + public String process(final String text, boolean doIgnoreUserDict) { Log.v(LOG_TAG, "process() called"); String cleaned = mUnicodeNormalizer.normalizeEncoding(text); List strings = mTokenizer.detectSentences(cleaned); - List normalizedSentences = normalize(strings); + List normalizedSentences = normalize(strings, doIgnoreUserDict); List cleanNormalized = mUnicodeNormalizer.normalizeAlphabet(normalizedSentences); for (String sentence : cleanNormalized) { Log.v(LOG_TAG, "normalized sentence: " + sentence); @@ -66,12 +67,12 @@ public String process(final String text) { } // pre-normalization, tagging and final normalization of the sentences in 'tokenized' - private List normalize(final List strings) { + private List normalize(final List strings, boolean doIgnoreUserDict) { String preNormalized; List normalized = new ArrayList<>(); for (String sentence : strings) { - preNormalized = mTTSNormalizer.preNormalize(sentence); + preNormalized = mTTSNormalizer.preNormalize(sentence, doIgnoreUserDict); String[] tags = tagText(preNormalized); // preNormalized is tokenized as string, so we know splitting on whitespace will give // us the correct tokens according to the tokenizer diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java b/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java index 485866c..2646894 100644 --- a/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java +++ b/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java @@ -1,7 +1,11 @@ package com.grammatek.simaromur.frontend; +import android.util.Log; + import androidx.annotation.NonNull; +import com.grammatek.simaromur.App; +import com.grammatek.simaromur.db.NormDictEntry; import com.grammatek.simaromur.device.SymbolsLvLIs; import java.util.*; @@ -21,6 +25,7 @@ */ public class TTSNormalizer { + private static final String LOG_TAG = "Simaromur_" + TTSNormalizer.class.getSimpleName(); private final List BigCardinalFilledTupleList = Stream.of(CardinalOnesTuples.getTuples(), CardinalThousandTuples.getTuples(), CardinalMillionTuples.getTuples(), CardinalBigTuples.getTuples()) @@ -80,9 +85,13 @@ public TTSNormalizer() { * @param text input text, unicode-normalized and if splitted on whitespace we have an array of tokens * @return pre-normalized text, i.e. some common abbreviations expanded */ - public String preNormalize(String text) { + public String preNormalize(String text, boolean doIgnoreUserDict) { String normalized = text; - String domain = ""; //we will need to determine this from "text" in real life! + String domain = ""; // we will need to determine this from "text" in real life! + + if (!doIgnoreUserDict) { + normalized = replaceFromNormDict(normalized); + } // some pre-processing and formatting of digits if (DIGITS_PTRN.matcher(normalized).matches()) { @@ -132,6 +141,39 @@ public String preNormalize(String text) { return normalized; } + /** + * Replace abbreviations and other patterns from the normalization dictionary via the + * NormDictEntryDao. + * + * @param sentence input sentence + * @return normalized sentence with search terms replaced + */ + private String replaceFromNormDict(String sentence) { + // replace abbreviations and other patterns from the normalization dictionary via the + // NormDictEntryDao + String normalized = sentence; + List entries = App.getAppRepository().getNormDictEntriesDirect(); + + if (entries != null) { + // sort entries descending to match longer strings first. This is important for + // abbreviations, e.g. "Donald Trump" should be replaced before "Trump" + entries.sort((o1, o2) -> o2.term.length() - o1.term.length()); + for (NormDictEntry entry : entries) { + // make for every entry.term a regular expression matching on word boundaries and + // case insensitive + Pattern regex = Pattern.compile("\\b(?i)" + entry.term.strip().toLowerCase() + "\\b"); + if (regex.matcher(normalized).find()) { + //Log.v(LOG_TAG, "replaceFromNormDict() - replacing: " + regex + " with: " + entry.replacement); + normalized = regex.matcher(normalized).replaceAll(entry.replacement); + } + } + } + if (!normalized.equals(sentence)) { + Log.v(LOG_TAG, "replaceFromNormDict() replaced: " + sentence + " with: " + normalized); + } + return normalized; + } + /** * Performs normalizing of text partly based on POS-tags. For number normalization the algorithm looks at * the POS-tags at the next token position, to determine the correct form of the normalization (case, gender, etc.) diff --git a/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java b/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java index 2ee47af..2775bd1 100644 --- a/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java +++ b/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java @@ -34,7 +34,7 @@ public class NormalizationManagerTest { public void processTest() { String input = "síma 421-6368"; NormalizationManager manager = new NormalizationManager(context, pronDict); - String processed = manager.process(input); + String processed = manager.process(input, false); System.out.println(processed); assertEquals("síma fjórir tveir einn - sex þrír sex átta .", @@ -45,7 +45,7 @@ public void processTest() { public void processDigitsTest() { NormalizationManager manager = new NormalizationManager(context, pronDict); for (String sent : getDigits().keySet()) { - String processed = manager.process(sent); + String processed = manager.process(sent, false); assertEquals(getDigits().get(sent), processed); } } @@ -54,7 +54,7 @@ public void processDigitsTest() { public void processSymbolsTest() { NormalizationManager manager = new NormalizationManager(context, pronDict); for (String sent : getSymbols().keySet()) { - String processed = manager.process(sent); + String processed = manager.process(sent, false); assertEquals(getSymbols().get(sent), processed); } } @@ -63,7 +63,7 @@ public void processSymbolsTest() { public void processNewIssuesTest() { NormalizationManager manager = new NormalizationManager(context, pronDict); for (String sent : getNewTestSentences().keySet()) { - String processed = manager.process(sent); + String processed = manager.process(sent, false); assertEquals(getNewTestSentences().get(sent), processed); } } @@ -72,7 +72,7 @@ public void processNewIssuesTest() { public void processV14IssuesTest() { NormalizationManager manager = new NormalizationManager(context, pronDict); for (String sent : getV14TestSentences().keySet()) { - String processed = manager.process(sent); + String processed = manager.process(sent, false); assertEquals(getV14TestSentences().get(sent), processed); } } @@ -81,7 +81,7 @@ public void processV14IssuesTest() { public void processListTest() { NormalizationManager manager = new NormalizationManager(context, pronDict); for (String sent : getTestSentences().keySet()) { - String processed = manager.process(sent); + String processed = manager.process(sent, false); assertEquals(getTestSentences().get(sent), processed); } }