diff --git a/README.md b/README.md index 33c10f3..4f324c0 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ the usual configuration files are loaded, some new params are exposed under the ## Install -Non streaming STT plugins are wrapped into a `FlacStreamingPlugin`(adapted from default dinkum STT), this uses `flac` via subprocess, if using a StreamingSTT directly the `flac` dependency is not needed - using [ovos-vad-plugin-silero](https://github.com/OpenVoiceOS/ovos-vad-plugin-silero) is strongly recommended instead of the default webrtcvad plugin ## mycroft-dinkum vs ovos-dinkum-listener @@ -18,6 +16,7 @@ using [ovos-vad-plugin-silero](https://github.com/OpenVoiceOS/ovos-vad-plugin-si ovos exclusive features: +- non-streaming STT support - continuous listening (no wakeword, VAD only) - hybrid listening (no wakeword for follow up commands) - multiple wakewords @@ -34,40 +33,38 @@ ovos exclusive features: ``` /home/miro/.venvs/ovos-core/bin/python /home/miro/PycharmProjects/mycroft-dinkum-listener/ovos_dinkum_listener/__main__.py -2023-04-10 14:23:26.044 - OVOS - ovos_utils.process_utils:PIDLock:301 - INFO - Create PIDLock in: None -2023-04-10 14:23:26.123 - OVOS - ovos_config.models:load_local:96 - DEBUG - Configuration /home/miro/PycharmProjects/ovos-core/mycroft/configuration/mycroft.conf loaded -2023-04-10 14:23:26.144 - OVOS - ovos_config.models:load_local:102 - DEBUG - Configuration '/etc/mycroft/mycroft.conf' not defined, skipping -2023-04-10 14:23:26.163 - OVOS - ovos_config.models:load_local:96 - DEBUG - Configuration /home/miro/.config/mycroft/web_cache.json loaded -2023-04-10 14:23:26.184 - OVOS - ovos_config.models:load_local:102 - DEBUG - Configuration '/home/miro/.config/mycroft/mycroft.conf' not defined, skipping -2023-04-10 14:23:26.203 - OVOS - ovos_config.models:load_local:102 - DEBUG - Configuration '/etc/xdg/mycroft/mycroft.conf' not defined, skipping -2023-04-10 14:23:26.239 - OVOS - ovos_config.models:load_local:102 - DEBUG - Configuration '/home/miro/.config/kdedefaults/mycroft/mycroft.conf' not defined, skipping -2023-04-10 14:23:26.266 - OVOS - ovos_config.models:load_local:102 - DEBUG - Configuration '/home/miro/.mycroft/mycroft.conf' not defined, skipping -2023-04-10 14:23:26.446 - OVOS - ovos_utils.messagebus::281 - WARNING - ovos-bus-client not installed -2023-04-10 14:23:26.548 - OVOS - ovos_utils.intents.layers::5 - ERROR - This module is deprecated, import from `ovos_workshop.skills.layers -2023-04-10 14:23:26.597 - OVOS - ovos_utils.configuration::52 - WARNING - configuration moved to the `ovos_config` package. This submodule will be removed in ovos_utils 0.1.0 -2023-04-10 14:23:26.620 - OVOS - __main__:before_start:133 - INFO - Starting service... -2023-04-10 14:23:26.620 - OVOS - ovos_bus_client.conf:load_message_bus_config:19 - INFO - Loading message bus configs -2023-04-10 14:23:26.624 - OVOS - ovos_bus_client.client.client:on_open:88 - INFO - Connected -2023-04-10 14:23:26.625 - OVOS - ovos_bus_client.session:reset_default_session:171 - INFO - New Default Session Start: 32073cb1-e92c-4f2d-a77c-4c7102ebd36a -2023-04-10 14:23:26.626 - OVOS - __main__:_connect_to_bus:249 - INFO - Connected to Mycroft Core message bus -2023-04-10 14:23:26.632 - OVOS - ovos_dinkum_listener.voice_loop.microphone:_run:91 - DEBUG - Opening microphone (device=default, rate=16000, width=2, channels=1) -2023-04-10 14:23:26.643 - OVOS - ovos_dinkum_listener.voice_loop.hotwords:load_hotword_engines:63 - INFO - creating hotword engines -2023-04-10 14:23:26.644 - OVOS - ovos_plugin_manager.wakewords:load_module:110 - INFO - Loading "hey_mycroft" wake word via ovos-ww-plugin-precise-lite -2023-04-10 14:23:26.808419: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. -2023-04-10 14:23:26.810269: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. -2023-04-10 14:23:26.852304: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. -2023-04-10 14:23:26.852766: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +2023-04-23 00:57:58.713 - OVOS - ovos_config.models:load_local:105 - DEBUG - Configuration /home/miro/PycharmProjects/ovos-core/mycroft/configuration/mycroft.conf loaded +2023-04-23 00:57:58.753 - OVOS - ovos_config.models:load_local:111 - DEBUG - Configuration '/etc/mycroft/mycroft.conf' not defined, skipping +2023-04-23 00:57:58.793 - OVOS - ovos_config.models:load_local:111 - DEBUG - Configuration '/home/miro/.config/mycroft/web_cache.json' not defined, skipping +2023-04-23 00:57:58.834 - OVOS - ovos_config.models:load_local:111 - DEBUG - Configuration '/home/miro/.config/mycroft/mycroft.conf' not defined, skipping +2023-04-23 00:57:58.872 - OVOS - ovos_config.models:load_local:111 - DEBUG - Configuration '/etc/xdg/mycroft/mycroft.conf' not defined, skipping +2023-04-23 00:57:58.919 - OVOS - ovos_config.models:load_local:111 - DEBUG - Configuration '/home/miro/.config/kdedefaults/mycroft/mycroft.conf' not defined, skipping +2023-04-23 00:57:58.968 - OVOS - ovos_config.models:load_local:111 - DEBUG - Configuration '/home/miro/.mycroft/mycroft.conf' not defined, skipping +2023-04-23 00:57:59.023 - OVOS - ovos_utils.configuration:get_xdg_config_save_path:141 - WARNING - configuration moved to the `ovos_config` package. This submodule will be removed in ovos_utils 0.1.0 +2023-04-23 00:57:59.042 - OVOS - ovos_utils.configuration:get_xdg_base:76 - WARNING - configuration moved to the `ovos_config` package. This submodule will be removed in ovos_utils 0.1.0 +2023-04-23 00:57:59.062 - OVOS - __main__:before_start:141 - INFO - Starting service... +2023-04-23 00:57:59.062 - OVOS - ovos_bus_client.conf:load_message_bus_config:19 - INFO - Loading message bus configs +2023-04-23 00:57:59.065 - OVOS - ovos_bus_client.client.client:on_open:85 - INFO - Connected +2023-04-23 00:57:59.066 - OVOS - ovos_bus_client.session:reset_default_session:171 - INFO - New Default Session Start: f1ec40cd-a5b5-40aa-ab6c-2a9d90a77d88 +2023-04-23 00:57:59.066 - OVOS - __main__:_connect_to_bus:261 - INFO - Connected to Mycroft Core message bus +2023-04-23 00:57:59.070 - OVOS - ovos_dinkum_listener.voice_loop.microphone:_run:91 - DEBUG - Opening microphone (device=default, rate=16000, width=2, channels=1) +2023-04-23 00:57:59.159 - OVOS - ovos_dinkum_listener.voice_loop.hotwords:load_hotword_engines:64 - INFO - creating hotword engines +2023-04-23 00:57:59.160 - OVOS - ovos_plugin_manager.wakewords:load_module:110 - INFO - Loading "hey_mycroft" wake word via ovos-ww-plugin-precise-lite +2023-04-23 00:57:59.388057: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2023-04-23 00:57:59.410033: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. +2023-04-23 00:57:59.516648: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. +2023-04-23 00:57:59.517355: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. -2023-04-10 14:23:27.361300: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT -2023-04-10 14:23:27.931 - OVOS - ovos_plugin_manager.wakewords:load_module:117 - INFO - Loaded the Wake Word plugin ovos-ww-plugin-precise-lite -2023-04-10 14:23:27.933 - OVOS - ovos_plugin_manager.wakewords:load_module:110 - INFO - Loading "wake_up" wake word via ovos-ww-plugin-pocketsphinx +2023-04-23 00:58:00.110092: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +2023-04-23 00:58:00.855 - OVOS - ovos_plugin_manager.wakewords:load_module:117 - INFO - Loaded the Wake Word plugin ovos-ww-plugin-precise-lite +2023-04-23 00:58:00.862 - OVOS - ovos_plugin_manager.wakewords:load_module:110 - INFO - Loading "wake_up" wake word via ovos-ww-plugin-pocketsphinx +2023-04-23 00:58:00.870 - OVOS - ovos_plugin_manager.wakewords:load_module:117 - INFO - Loaded the Wake Word plugin ovos-ww-plugin-pocketsphinx INFO: Created TensorFlow Lite XNNPACK delegate for CPU. -2023-04-10 14:23:27.940 - OVOS - ovos_plugin_manager.wakewords:load_module:117 - INFO - Loaded the Wake Word plugin ovos-ww-plugin-pocketsphinx -2023-04-10 14:23:28.047 - OVOS - ovos_dinkum_listener.plugins:load_stt_module:116 - WARNING - dinkum only supports streaming STTs -2023-04-10 14:23:28.048 - OVOS - ovos_dinkum_listener.plugins:load_stt_module:117 - INFO - Using FlacStreamingSTT wrapper -> ovos-backend-client.api.STTApi(backend_type=BackendType.OFFLINE) -2023-04-10 14:23:34.234 - OVOS - __main__:_record_being:271 - DEBUG - Record begin -2023-04-10 14:23:38.672 - OVOS - __main__:_stt_text:351 - DEBUG - Record end -2023-04-10 14:23:38.689 - OVOS - __main__:_stt_text:366 - DEBUG - STT: hello +2023-04-23 00:58:00.934 - OVOS - ovos_dinkum_listener.plugins:load_stt_module:61 - DEBUG - Using FakeStreamingSTT wrapper +2023-04-23 00:58:27.211 - OVOS - __main__:_record_begin:283 - DEBUG - Record begin +2023-04-23 00:58:29.892 - OVOS - ovos_dinkum_listener.voice_loop.voice_loop:_after_cmd:431 - DEBUG - transformers metadata: {'client_name': 'ovos_dinkum_listener', 'source': 'audio', 'destination': ['skills']} +2023-04-23 00:58:30.089 - OVOS - __main__:_stt_text:408 - DEBUG - Record end +2023-04-23 00:58:30.091 - OVOS - __main__:_stt_text:420 - DEBUG - STT: thank you ``` ## Credits diff --git a/ovos_dinkum_listener/plugins.py b/ovos_dinkum_listener/plugins.py index c9ec098..b7c0acc 100644 --- a/ovos_dinkum_listener/plugins.py +++ b/ovos_dinkum_listener/plugins.py @@ -1,56 +1,37 @@ -# Copyright 2022 Mycroft AI Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -""" 'plugins' according to dinkum are not really pluggable """ - -import subprocess -import tempfile -from queue import Queue from typing import Any, Dict -from ovos_backend_client.api import STTApi, BackendType from ovos_bus_client import MessageBusClient from ovos_plugin_manager.stt import OVOSSTTFactory from ovos_plugin_manager.templates.stt import StreamingSTT, StreamThread +from ovos_plugin_manager.utils import ReadWriteStream +from ovos_config import Configuration from ovos_utils.log import LOG +from speech_recognition import AudioData -class FlacStreamThread(StreamThread): +class FakeStreamThread(StreamThread): - def __init__(self, queue, language): + def __init__(self, queue, language, engine, sample_rate, sample_width): super().__init__(queue, language) - self._flac_proc = None - self._start_flac() + self.lang = language + self.buffer = ReadWriteStream() + self.engine = engine + self.sample_rate = sample_rate + self.sample_width = sample_width def finalize(self): """ return final transcription """ try: - assert self._flac_proc is not None - assert self._flac_file is not None - - # Read contents of encoded file. - # - # A file is needed here so the encoder can seek back and write the - # length. - self._flac_proc.communicate() - self._flac_file.seek(0) - flac = self._flac_file.read() - self._stop_flac() - - return STTApi(backend_type=BackendType.OFFLINE).stt(flac, "en-US", 1) + # plugins expect AudioData objects + audio = AudioData(self.buffer.read(), + sample_rate=self.sample_rate, + sample_width=self.sample_width) + transcript = self.engine.execute(audio, self.lang) + + self.buffer.clear() + return transcript except Exception: - LOG.exception("Error in STTApi") + LOG.exception(f"Error in STT plugin: {self.engine.__class__.__name__}") return None def handle_audio_stream(self, audio, language): @@ -58,62 +39,25 @@ def handle_audio_stream(self, audio, language): self.update(chunk) def update(self, chunk: bytes): - # Stream chunks into FLAC encoder - assert self._flac_proc is not None - assert self._flac_proc.stdin is not None - - self._flac_proc.stdin.write(chunk) - - def _start_flac(self): - self._stop_flac() - - # pylint: disable=consider-using-with - self._flac_file = tempfile.NamedTemporaryFile(suffix=".flac", mode="wb+") - - # Encode raw audio into temporary file - self._flac_proc = subprocess.Popen( - [ - "flac", - "--totally-silent", - "--best", - "--endian=little", - "--channels=1", - "--bps=16", - "--sample-rate=16000", - "--sign=signed", - "-f", - "-o", - self._flac_file.name, - "-", - ], - stdin=subprocess.PIPE, - ) - - def _stop_flac(self): - if self._flac_proc is not None: - # Try to gracefully terminate - self._flac_proc.terminate() - self._flac_proc.wait(0.5) - try: - self._flac_proc.communicate() - except subprocess.TimeoutExpired: - self._flac_proc.kill() - - self._flac_proc = None + self.buffer.write(chunk) -class FlacStreamingSTT(StreamingSTT): +class FakeStreamingSTT(StreamingSTT): + def __init__(self, config=None): + super().__init__(config) + self.engine = OVOSSTTFactory.create() def create_streaming_thread(self): - self.queue = Queue() - return FlacStreamThread(self.queue, self.lang) + listener = Configuration().get("listener", {}) + sample_rate = listener.get("sample_rate", 16000) + sample_width = listener.get("sample_width", 2) + return FakeStreamThread(self.queue, self.lang, self.engine, sample_rate, sample_width) def load_stt_module(config: Dict[str, Any], bus: MessageBusClient) -> StreamingSTT: stt_config = config["stt"] plug = OVOSSTTFactory.create(stt_config) if not isinstance(plug, StreamingSTT): - LOG.warning("dinkum only supports streaming STTs") - LOG.info("Using FlacStreamingSTT wrapper -> ovos-backend-client.api.STTApi(backend_type=BackendType.OFFLINE)") - return FlacStreamingSTT(config) + LOG.debug("Using FakeStreamingSTT wrapper") + return FakeStreamingSTT(config) return plug diff --git a/ovos_dinkum_listener/transformers.py b/ovos_dinkum_listener/transformers.py index 0036400..16580fc 100644 --- a/ovos_dinkum_listener/transformers.py +++ b/ovos_dinkum_listener/transformers.py @@ -40,7 +40,7 @@ def __init__(self, bus, config=None): self.bus = bus # to activate a plugin, just add an entry to mycroft.conf for it self.config = self.config_core.get("audio_transformers") or { - # "ovos_audio_transformer_xxx_plugin": {} + # "ovos-audio-classifier-gender": {} } self.load_plugins() @@ -77,16 +77,21 @@ def shutdown(self): pass def feed_audio(self, chunk): + # print("...feeding audio", len(chunk)) for module in self.modules: module.feed_audio_chunk(chunk) def feed_hotword(self, chunk): + # print("....feeding ww", len(chunk)) for module in self.modules: module.feed_hotword_chunk(chunk) def feed_speech(self, chunk): - for module in self.modules: - module.feed_speech_chunk(chunk) + try: + for module in self.modules: + module.feed_speech_chunk(chunk) + except Exception as e: + LOG.exception(e) def transform(self, chunk): context = {'client_name': 'ovos_dinkum_listener', @@ -98,6 +103,6 @@ def transform(self, chunk): chunk, data = module.transform(chunk) LOG.debug(f"{module.name}: {data}") context = merge_dict(context, data) - except: - pass + except Exception as e: + LOG.exception(e) return chunk, context diff --git a/requirements.txt b/requirements.txt index 757a9fc..a517996 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -ovos-plugin-manager +ovos-plugin-manager>=0.0.23a9 ovos-utils ovos-bus-client sdnotify -pyalsaaudio \ No newline at end of file +pyalsaaudio +SpeechRecognition