Skip to content

Commit

Permalink
feat/fallback_stt (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl authored Apr 23, 2023
1 parent 6d13c82 commit ca4f88e
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 8 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ using [ovos-vad-plugin-silero](https://github.com/OpenVoiceOS/ovos-vad-plugin-si

ovos exclusive features:

- fallback STT
- non-streaming STT support
- compatible with all existing wake-word/STT plugins
- continuous listening (no wakeword, VAD only)
- hybrid listening (no wakeword for follow up commands)
- multiple wakewords
Expand Down
6 changes: 4 additions & 2 deletions ovos_dinkum_listener/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from ovos_utils.log import LOG
from ovos_utils.sound import play_audio

from ovos_dinkum_listener.plugins import load_stt_module
from ovos_dinkum_listener.plugins import load_stt_module, load_fallback_stt
from ovos_dinkum_listener.voice_loop import AlsaMicrophone, DinkumVoiceLoop, ListeningMode, ListeningState
from ovos_dinkum_listener.voice_loop.hotwords import HotwordContainer

Expand Down Expand Up @@ -163,14 +163,16 @@ def start(self):
hotwords.load_hotword_engines()

vad = OVOSVADFactory.create()
stt = load_stt_module(self.config, self.bus)
stt = load_stt_module()
fallback_stt = load_fallback_stt()

transformers = AudioTransformersService(self.bus, self.config)

self.voice_loop = DinkumVoiceLoop(
mic=mic,
hotwords=hotwords,
stt=stt,
fallback_stt=fallback_stt,
vad=vad,
transformers=transformers,
#
Expand Down
26 changes: 21 additions & 5 deletions ovos_dinkum_listener/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def update(self, chunk: bytes):


class FakeStreamingSTT(StreamingSTT):
def __init__(self, config=None):
def __init__(self, engine, config=None):
super().__init__(config)
self.engine = OVOSSTTFactory.create()
self.engine = engine

def create_streaming_thread(self):
listener = Configuration().get("listener", {})
Expand All @@ -54,10 +54,26 @@ def create_streaming_thread(self):
return FakeStreamThread(self.queue, self.lang, self.engine, sample_rate, sample_width)


def load_stt_module(config: Dict[str, Any], bus: MessageBusClient) -> StreamingSTT:
stt_config = config["stt"]
def load_stt_module(config: Dict[str, Any] = None) -> StreamingSTT:
stt_config = config or Configuration()["stt"]
plug = OVOSSTTFactory.create(stt_config)
if not isinstance(plug, StreamingSTT):
LOG.debug("Using FakeStreamingSTT wrapper")
return FakeStreamingSTT(config)
return FakeStreamingSTT(plug, config)
return plug


def load_fallback_stt(cfg: Dict[str, Any] = None) -> StreamingSTT:
cfg = cfg or Configuration().get("stt", {})
fbm = cfg.get("fallback_module")
if fbm:
try:
config = cfg.get(fbm, {})
plug = OVOSSTTFactory.create({"stt": {"module": fbm, fbm: config}})
if not isinstance(plug, StreamingSTT):
LOG.debug("Using FakeStreamingSTT wrapper")
return FakeStreamingSTT(plug, config)
return plug
except:
LOG.exception("Failed to load fallback STT")
return None
18 changes: 17 additions & 1 deletion ovos_dinkum_listener/voice_loop/voice_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class VoiceLoop:
mic: Microphone
hotwords: HotwordContainer
stt: StreamingSTT
fallback_stt: StreamingSTT
vad: VADEngine
transformers: AudioTransformersService

Expand Down Expand Up @@ -331,6 +332,8 @@ def _detect_ww(self, chunk):
self.timeout_seconds_left = self.timeout_seconds
self.stt_audio_bytes = bytes()
self.stt.stream_start()
if self.fallback_stt is not None:
self.fallback_stt.stream_start()

# Reset the VAD internal state to avoid the model getting
# into a degenerative state where it always reports silence.
Expand Down Expand Up @@ -370,6 +373,8 @@ def _before_cmd(self, chunk):
while self.stt_chunks:
stt_chunk = self.stt_chunks.popleft()
self.stt.stream_data(stt_chunk)
if self.fallback_stt is not None:
self.fallback_stt.stream_data(stt_chunk)

self.timeout_seconds_left -= self.mic.seconds_per_chunk
if self.timeout_seconds_left <= 0:
Expand Down Expand Up @@ -404,6 +409,8 @@ def _in_cmd(self, chunk):
stt_chunk = self.stt_chunks.popleft()

self.stt.stream_data(stt_chunk)
if self.fallback_stt is not None:
self.fallback_stt.stream_data(stt_chunk)

self.timeout_seconds_left -= self.mic.seconds_per_chunk
if self.timeout_seconds_left <= 0:
Expand Down Expand Up @@ -431,7 +438,16 @@ def _after_cmd(self, chunk):
LOG.debug(f"transformers metadata: {stt_context}")

# get text and trigger callback
text = self.stt.stream_stop() or ""
try:
text = self.stt.stream_stop() or ""
except:
LOG.exception("STT failed")
text = ""

if not text and self.fallback_stt is not None:
LOG.info("Attempting fallback STT plugin")
text = self.fallback_stt.stream_stop() or ""

# TODO - some plugins return list of transcripts some just text
# standardize support for this
if isinstance(text, list):
Expand Down

0 comments on commit ca4f88e

Please sign in to comment.