From 74215796ad8a9e04d2afabeaaddd31af5c5e6917 Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Wed, 30 Oct 2024 14:10:19 +0000 Subject: [PATCH 1/3] feat:binary handlers (#1) * feat:binary handlers companion to https://github.com/JarbasHiveMind/hivemind-websocket-client/pull/33 and https://github.com/JarbasHiveMind/HiveMind-core/pull/100 * requirements.txt --- hivemind_listener/__init__.py | 75 +++++++++++++++++++++++++++++------ requirements.txt | 6 +-- 2 files changed, 66 insertions(+), 15 deletions(-) diff --git a/hivemind_listener/__init__.py b/hivemind_listener/__init__.py index 83d89b9..05e7b71 100644 --- a/hivemind_listener/__init__.py +++ b/hivemind_listener/__init__.py @@ -12,8 +12,7 @@ from ovos_bus_client import MessageBusClient from ovos_bus_client.message import Message -from hivemind_bus_client.message import HiveMessage, HiveMessageType -from hivemind_bus_client.serialization import HiveMindBinaryPayloadType +from hivemind_bus_client.message import HiveMessage, HiveMessageType, HiveMindBinaryPayloadType from hivemind_core.protocol import HiveMindListenerProtocol, HiveMindClientConnection from hivemind_core.service import HiveMindService from ovos_plugin_manager.stt import OVOSSTTFactory @@ -153,12 +152,17 @@ def handle_client_disconnected(self, client: HiveMindClientConnection): self.stop_listener(client) @classmethod - def get_b64_tts(cls, message: Message = None) -> str: + def get_tts(cls, message: Message = None) -> str: utterance = message.data['utterance'] ctxt = cls.tts._get_ctxt({"message": message}) wav, _ = cls.tts.synth(utterance, ctxt) + return str(wav) + + @classmethod + def get_b64_tts(cls, message: Message = None) -> str: + wav = cls.get_tts(message) # cast to str() to get a path, as it is a AudioFile object from tts cache - with open(str(wav), "rb") as f: + with open(wav, "rb") as f: audio = f.read() return base64.b64encode(audio).decode("utf-8") @@ -171,20 +175,67 @@ def transcribe_b64_audio(cls, message: Message = None) -> List[Tuple[str, float] utterances = cls.stt.transcribe(audio, lang) return utterances - def handle_binary_message(self, message: HiveMessage, client: HiveMindClientConnection): - assert message.msg_type == HiveMessageType.BINARY - if message.bin_type == HiveMindBinaryPayloadType.RAW_AUDIO: - bin_data = message.payload - if client.peer in self.listeners: - # LOG.debug(f"Got {len(bin_data)} bytes of audio data from {client.peer}") - m: FakeMicrophone = self.listeners[client.peer].mic + def handle_microphone_input(self, bin_data: bytes, + sample_rate: int, + sample_width: int, + client: HiveMindClientConnection): + if client.peer in self.listeners: + m: FakeMicrophone = self.listeners[client.peer].mic + if m.sample_rate != sample_rate or m.sample_width != sample_width: + LOG.debug(f"Got {len(bin_data)} bytes of audio data from {client.peer}") + LOG.error(f"sample_rate/sample_width mismatch! " + f"got: ({sample_rate}, {sample_width}) " + f"expected: ({m.sample_rate}, {m.sample_width})") + # TODO - convert sample_rate if needed + else: m.queue.put(bin_data) + def handle_stt_transcribe_request(self, bin_data: bytes, + sample_rate: int, + sample_width: int, + lang: str, + client: HiveMindClientConnection): + LOG.debug(f"Received binary STT input: {len(bin_data)} bytes") + audio = sr.AudioData(bin_data, sample_rate, sample_width) + tx = self.stt.transcribe(audio, lang) + m = Message("recognizer_loop:transcribe.response", {"transcriptions": tx, "lang": lang}) + client.send(HiveMessage(HiveMessageType.BUS, payload=m)) + + def handle_stt_handle_request(self, bin_data: bytes, + sample_rate: int, + sample_width: int, + lang: str, + client: HiveMindClientConnection): + LOG.debug(f"Received binary STT input: {len(bin_data)} bytes") + audio = sr.AudioData(bin_data, sample_rate, sample_width) + tx = self.stt.transcribe(audio, lang) + if tx: + utts = [t[0].rstrip(" '\"").lstrip(" '\"") for t in tx] + m = Message("recognizer_loop:utterance", + {"utterances": utts, "lang": lang}) + self.handle_inject_mycroft_msg(m, client) + else: + LOG.info(f"STT transcription error for client: {client.peer}") + m = Message("recognizer_loop:speech.recognition.unknown") + client.send(HiveMessage(HiveMessageType.BUS, payload=m)) + def handle_inject_mycroft_msg(self, message: Message, client: HiveMindClientConnection): """ message (Message): mycroft bus message object """ - if message.msg_type == "speak:b64_audio": + if message.msg_type == "speak:synth": + wav = self.get_tts(message) + with open(wav, "rb") as f: + bin_data = f.read() + payload = HiveMessage(HiveMessageType.BINARY, + payload=bin_data, + metadata={"lang": message.data["lang"], + "file_name": wav.split("/")[-1], + "utterance": message.data["utterance"]}, + bin_type=HiveMindBinaryPayloadType.TTS_AUDIO) + client.send(payload) + return + elif message.msg_type == "speak:b64_audio": msg: Message = message.reply("speak:b64_audio.response", message.data) msg.data["audio"] = self.get_b64_tts(message) if msg.context.get("destination") is None: diff --git a/requirements.txt b/requirements.txt index fe76d8e..036babb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ ovos-simple-listener -hivemind_bus_client -ovos-plugin-manager -jarbas_hive_mind \ No newline at end of file +hivemind_bus_client>=0.1.0,<1.0.0 +ovos-plugin-manager<1.0.0 +jarbas_hive_mind>=0.14.0,<1.0.0 \ No newline at end of file From 414ed62b12ffe357f449e144febadd7f396f5ed7 Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Wed, 30 Oct 2024 14:10:36 +0000 Subject: [PATCH 2/3] Increment Version to 0.2.0a1 --- hivemind_listener/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hivemind_listener/version.py b/hivemind_listener/version.py index cbb1e3f..d446f48 100644 --- a/hivemind_listener/version.py +++ b/hivemind_listener/version.py @@ -1,6 +1,6 @@ # START_VERSION_BLOCK VERSION_MAJOR = 0 -VERSION_MINOR = 1 +VERSION_MINOR = 2 VERSION_BUILD = 0 -VERSION_ALPHA = 0 +VERSION_ALPHA = 1 # END_VERSION_BLOCK From 84b0f0fbd84200967d5deb42fd30ff27329432c6 Mon Sep 17 00:00:00 2001 From: JarbasAl Date: Wed, 30 Oct 2024 14:11:04 +0000 Subject: [PATCH 3/3] Update Changelog --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..503166f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,13 @@ +# Changelog + +## [0.2.0a1](https://github.com/JarbasHiveMind/hivemind-listener/tree/0.2.0a1) (2024-10-30) + +[Full Changelog](https://github.com/JarbasHiveMind/hivemind-listener/compare/0.1.0...0.2.0a1) + +**Merged pull requests:** + +- feat:binary handlers [\#1](https://github.com/JarbasHiveMind/hivemind-listener/pull/1) ([JarbasAl](https://github.com/JarbasAl)) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)*