diff --git a/ovos_classifiers/opm.py b/ovos_classifiers/opm.py new file mode 100644 index 0000000..4ccf2ea --- /dev/null +++ b/ovos_classifiers/opm.py @@ -0,0 +1,38 @@ +from typing import Optional, List + +from ovos_plugin_manager.templates.transformers import UtteranceTransformer + +from ovos_classifiers.heuristics.normalize import Normalizer, CatalanNormalizer, CzechNormalizer, \ + PortugueseNormalizer, AzerbaijaniNormalizer, RussianNormalizer, EnglishNormalizer, UkrainianNormalizer + + +class UtteranceNormalizer(UtteranceTransformer): + + def __init__(self, name="ovos-utterance-normalizer", priority=1): + super().__init__(name, priority) + + @staticmethod + def get_normalizer(lang: str): + if lang.startswith("en"): + return EnglishNormalizer() + elif lang.startswith("pt"): + return PortugueseNormalizer() + elif lang.startswith("uk"): + return UkrainianNormalizer() + elif lang.startswith("ca"): + return CatalanNormalizer() + elif lang.startswith("cz"): + return CzechNormalizer() + elif lang.startswith("az"): + return AzerbaijaniNormalizer() + elif lang.startswith("ru"): + return RussianNormalizer() + return Normalizer() + + def transform(self, utterances: List[str], + context: Optional[dict] = None) -> (list, dict): + context = context or {} + lang = context.get("lang") or self.config.get("lang", "en-us") + normalizer = self.get_normalizer(lang) + norm = [normalizer.normalize(u) for u in utterances] + utterances + return list(set(norm)), context diff --git a/setup.py b/setup.py index fb2413a..9034c7b 100644 --- a/setup.py +++ b/setup.py @@ -80,5 +80,8 @@ def required(requirements_file): 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', - ] + ], + entry_points={ + 'neon.plugin.text': 'ovos-utterance-normalizer=ovos_classifiers.opm:UtteranceNormalizer' + } )