Activate User Normalization Dictionary

Activate the user normalization dictionary and add it to the prenormalization step of the frontend pipeline. This needs to be done conditionally. We normally want the dictionary activated for general TTS, but we need to deactivate it for the play buttons in the NormDictInfo activity. Signed-off-by: Daniel Schnell <dschnell@grammatek.com>
grammatek · Mar 20, 2024 · e1724ab · e1724ab
1 parent 54e88d3
commit e1724ab
Show file tree

Hide file tree

Showing 8 changed files with 74 additions and 29 deletions.
diff --git a/app/src/main/java/com/grammatek/simaromur/AppRepository.java b/app/src/main/java/com/grammatek/simaromur/AppRepository.java
@@ -349,6 +349,15 @@ public LiveData<List<NormDictEntry>> getNormDictEntries() {
         return mNormDictDao.getSortedEntries();
     }
 
+    /**
+     * Get a LiveData list of all current normalization dictionary entries.
+     *
+     * @return List of all current normalization dictionary entries as LiveData
+     */
+    public List<NormDictEntry> getNormDictEntriesDirect() {
+        return mNormDictDao.getEntries();
+    }
+
     /**
      * Creates or updates the given entry inside the Db.
      */
@@ -953,15 +962,20 @@ public String getLoadedVoiceName() {
      *
      * @param text Raw text as received by the TTS service
      * @param item cache item to save into the speech audio cache
+     * @param voice voice to use for normalization and G2P
+     * @param doIgnoreUserDict true to ignore user dictionary, false otherwise
      * @return updated cache item
      */
     synchronized
-    public CacheItem executeFrontendAndSaveIntoCache(String text, CacheItem item, com.grammatek.simaromur.db.Voice voice) {
+    public CacheItem executeFrontendAndSaveIntoCache(String text,
+                                                     CacheItem item,
+                                                     com.grammatek.simaromur.db.Voice voice,
+                                                     boolean doIgnoreUserDict) {
         String phonemes = "";
         if (item.getUtterance().getNormalized().isEmpty()) {
             // we always need to normalize the text, but it doesn't hurt, if we always do G2P as well
             // for network voices, this is currently all that is needed.
-            String normalizedText = mFrontend.getNormalizationManager().process(text);
+            String normalizedText = mFrontend.getNormalizationManager().process(text, doIgnoreUserDict);
             phonemes = mFrontend.transcribe(normalizedText, voice.type, voice.version);
             Log.v(LOG_TAG, "executeFrontendAndSaveIntoCache: original (\"" + text + "\"), normalized (\"" + normalizedText + "\"), phonemes (\"" + phonemes + "\")");
             if (!phonemes.isEmpty()) {

diff --git a/app/src/main/java/com/grammatek/simaromur/NormDictInfo.java b/app/src/main/java/com/grammatek/simaromur/NormDictInfo.java
@@ -229,7 +229,7 @@ public void onPlayCancelClicked(View v) {
         // TODO: do we need a special mode for the frontend to bypass any user dictionary ?
         if (mIsPlaying1 || mIsPlaying2) {
             CacheItem item = appRepo.getUtteranceCache().addUtterance(text);
-            item = appRepo.executeFrontendAndSaveIntoCache(text, item, appRepo.getCurrentVoice());
+            item = appRepo.executeFrontendAndSaveIntoCache(text, item, appRepo.getCurrentVoice(), true);
             if ((item.getUtterance().getPhonemesCount() == 0) ||
                     item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) {
                 Log.w(LOG_TAG, "onPlayCancelClicked: Nothing to speak ?!");

diff --git a/app/src/main/java/com/grammatek/simaromur/TTSService.java b/app/src/main/java/com/grammatek/simaromur/TTSService.java
@@ -197,7 +197,7 @@ protected void onSynthesizeText(SynthesisRequest request,
         // item and save it into cache, then test one-by-one availability of every single
         // requested utterance component and eventually add the missing pieces
         CacheItem item = mRepository.getUtteranceCache().addUtterance(text);
-        item = mRepository.executeFrontendAndSaveIntoCache(text, item, voice);
+        item = mRepository.executeFrontendAndSaveIntoCache(text, item, voice, false);
         if ((item.getUtterance().getPhonemesCount() == 0) ||
                 item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) {
             Log.w(LOG_TAG, "onSynthesizeText: No phonemes to speak");

diff --git a/app/src/main/java/com/grammatek/simaromur/VoiceInfo.java b/app/src/main/java/com/grammatek/simaromur/VoiceInfo.java
@@ -379,7 +379,7 @@ public void onPlayClicked(View v) {
 
         // execute frontend
         CacheItem item = appRepo.getUtteranceCache().addUtterance(text);
-        item = appRepo.executeFrontendAndSaveIntoCache(text, item, mVoice);
+        item = appRepo.executeFrontendAndSaveIntoCache(text, item, mVoice, false);
         if ((item.getUtterance().getPhonemesCount() == 0) ||
                 item.getUtterance().getPhonemesList().get(0).getSymbols().isEmpty()) {
             Log.w(LOG_TAG, "onPlayClicked: No phonemes to speak");

diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/FrontendManager.java b/app/src/main/java/com/grammatek/simaromur/frontend/FrontendManager.java
@@ -40,18 +40,6 @@ public static String getVersion() {
         return "1.0";
     }
 
-    /**
-     * Processes text for input into a TTS engine. This includes unicode cleaning, tokenizing, and
-     * normalizing the the text, and then to convert it into an X-SAMPA transcription.
-     *
-     * @param text raw input text
-     * @return an X-SAMPA transcription of @text
-     */
-    public String process(String text) {
-        final String normalized = mNormalizationManager.process(text);
-        return transcribe(normalized, IGNORE_TYPE, IGNORE_VERSION);
-    }
-
     /**
      * Transcribe text to IPA symbols. Punctuation is kept as is, which conforms to the kind of
      * IPA dialect encoded into the VITS model.

diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/NormalizationManager.java b/app/src/main/java/com/grammatek/simaromur/frontend/NormalizationManager.java
@@ -50,14 +50,15 @@ public NormalizationManager(Context context, Map<String, PronDictEntry> pronDict
      * Processes the input text according to the defined steps: unicode cleaning,
      * tokenizing, normalizing
      * @param text the input text
+     * @param doIgnoreUserDict if true, the user dictionary is ignored
      * @return normalized version of 'text'
      */
-    public String process(final String text) {
+    public String process(final String text, boolean doIgnoreUserDict) {
         Log.v(LOG_TAG, "process() called");
         String cleaned = mUnicodeNormalizer.normalizeEncoding(text);
 
         List<String> strings = mTokenizer.detectSentences(cleaned);
-        List<String> normalizedSentences = normalize(strings);
+        List<String> normalizedSentences = normalize(strings, doIgnoreUserDict);
         List<String> cleanNormalized = mUnicodeNormalizer.normalizeAlphabet(normalizedSentences);
         for (String sentence : cleanNormalized) {
             Log.v(LOG_TAG, "normalized sentence: " + sentence);
@@ -66,12 +67,12 @@ public String process(final String text) {
     }
 
     // pre-normalization, tagging and final normalization of the sentences in 'tokenized'
-    private List<String> normalize(final List<String> strings) {
+    private List<String> normalize(final List<String> strings, boolean doIgnoreUserDict) {
         String preNormalized;
         List<String> normalized = new ArrayList<>();
 
         for (String sentence : strings) {
-            preNormalized = mTTSNormalizer.preNormalize(sentence);
+            preNormalized = mTTSNormalizer.preNormalize(sentence, doIgnoreUserDict);
             String[] tags = tagText(preNormalized);
             // preNormalized is tokenized as string, so we know splitting on whitespace will give
             // us the correct tokens according to the tokenizer

diff --git a/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java b/app/src/main/java/com/grammatek/simaromur/frontend/TTSNormalizer.java
@@ -1,7 +1,11 @@
 package com.grammatek.simaromur.frontend;
 
+import android.util.Log;
+
 import androidx.annotation.NonNull;
 
+import com.grammatek.simaromur.App;
+import com.grammatek.simaromur.db.NormDictEntry;
 import com.grammatek.simaromur.device.SymbolsLvLIs;
 
 import java.util.*;
@@ -21,6 +25,7 @@
  */
 
 public class TTSNormalizer {
+    private static final String LOG_TAG = "Simaromur_" + TTSNormalizer.class.getSimpleName();
 
     private final List<CategoryTuple> BigCardinalFilledTupleList = Stream.of(CardinalOnesTuples.getTuples(), CardinalThousandTuples.getTuples(), CardinalMillionTuples.getTuples(),
                     CardinalBigTuples.getTuples())
@@ -80,9 +85,13 @@ public TTSNormalizer() {
      * @param text input text, unicode-normalized and if splitted on whitespace we have an array of tokens
      * @return pre-normalized text, i.e. some common abbreviations expanded
      */
-    public String preNormalize(String text) {
+    public String preNormalize(String text, boolean doIgnoreUserDict) {
         String normalized = text;
-        String domain = ""; //we will need to determine this from "text" in real life!
+        String domain = ""; // we will need to determine this from "text" in real life!
+
+        if (!doIgnoreUserDict) {
+            normalized = replaceFromNormDict(normalized);
+        }
 
         // some pre-processing and formatting of digits
         if (DIGITS_PTRN.matcher(normalized).matches()) {
@@ -132,6 +141,39 @@ public String preNormalize(String text) {
         return normalized;
     }
 
+    /**
+     * Replace abbreviations and other patterns from the normalization dictionary via the
+     * NormDictEntryDao.
+     *
+     * @param sentence  input sentence
+     * @return         normalized sentence with search terms replaced
+     */
+    private String replaceFromNormDict(String sentence) {
+        // replace abbreviations and other patterns from the normalization dictionary via the
+        // NormDictEntryDao
+        String normalized = sentence;
+        List<NormDictEntry> entries = App.getAppRepository().getNormDictEntriesDirect();
+
+        if (entries != null) {
+            // sort entries descending to match longer strings first. This is important for
+            // abbreviations, e.g. "Donald Trump" should be replaced before "Trump"
+            entries.sort((o1, o2) -> o2.term.length() - o1.term.length());
+            for (NormDictEntry entry : entries) {
+                // make for every entry.term a regular expression matching on word boundaries and
+                // case insensitive
+                Pattern regex = Pattern.compile("\\b(?i)" + entry.term.strip().toLowerCase() + "\\b");
+                if (regex.matcher(normalized).find()) {
+                    //Log.v(LOG_TAG, "replaceFromNormDict() - replacing: " + regex + " with: " + entry.replacement);
+                    normalized = regex.matcher(normalized).replaceAll(entry.replacement);
+                }
+            }
+        }
+        if (!normalized.equals(sentence)) {
+            Log.v(LOG_TAG, "replaceFromNormDict() replaced: " + sentence + " with: " + normalized);
+        }
+        return normalized;
+    }
+
     /**
      * Performs normalizing of text partly based on POS-tags. For number normalization the algorithm looks at
      * the POS-tags at the next token position, to determine the correct form of the normalization (case, gender, etc.)

diff --git a/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java b/app/src/test/java/com/grammatek/simaromur/NormalizationManagerTest.java
@@ -34,7 +34,7 @@ public class NormalizationManagerTest {
     public void processTest() {
         String input = "síma 421-6368";
         NormalizationManager manager = new NormalizationManager(context, pronDict);
-        String processed = manager.process(input);
+        String processed = manager.process(input, false);
         System.out.println(processed);
 
         assertEquals("síma fjórir tveir einn - sex þrír sex átta .",
@@ -45,7 +45,7 @@ public void processTest() {
     public void processDigitsTest() {
         NormalizationManager manager = new NormalizationManager(context, pronDict);
         for (String sent : getDigits().keySet()) {
-            String processed = manager.process(sent);
+            String processed = manager.process(sent, false);
             assertEquals(getDigits().get(sent), processed);
         }
     }
@@ -54,7 +54,7 @@ public void processDigitsTest() {
     public void processSymbolsTest() {
         NormalizationManager manager = new NormalizationManager(context, pronDict);
         for (String sent : getSymbols().keySet()) {
-            String processed = manager.process(sent);
+            String processed = manager.process(sent, false);
             assertEquals(getSymbols().get(sent), processed);
         }
     }
@@ -63,7 +63,7 @@ public void processSymbolsTest() {
     public void processNewIssuesTest() {
         NormalizationManager manager = new NormalizationManager(context, pronDict);
         for (String sent : getNewTestSentences().keySet()) {
-            String processed = manager.process(sent);
+            String processed = manager.process(sent, false);
             assertEquals(getNewTestSentences().get(sent), processed);
         }
     }
@@ -72,7 +72,7 @@ public void processNewIssuesTest() {
     public void processV14IssuesTest() {
         NormalizationManager manager = new NormalizationManager(context, pronDict);
         for (String sent : getV14TestSentences().keySet()) {
-            String processed = manager.process(sent);
+            String processed = manager.process(sent, false);
             assertEquals(getV14TestSentences().get(sent), processed);
         }
     }
@@ -81,7 +81,7 @@ public void processV14IssuesTest() {
     public void processListTest() {
         NormalizationManager manager = new NormalizationManager(context, pronDict);
         for (String sent : getTestSentences().keySet()) {
-            String processed = manager.process(sent);
+            String processed = manager.process(sent, false);
             assertEquals(getTestSentences().get(sent), processed);
         }
     }