Skip to content

Commit

Permalink
Merge pull request #268 from bolna-ai/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
marmikcfc authored Jun 20, 2024
2 parents 4c5c88c + 1fe5e95 commit 7b5fb16
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 35 deletions.
2 changes: 1 addition & 1 deletion bolna/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def validate_language(cls, value):

class Synthesizer(BaseModel):
provider: str
provider_config: Union[PollyConfig, XTTSConfig, ElevenLabsConfig, OpenAIConfig, FourieConfig, StylettsConfig, MeloConfig, DeepgramConfig] = Field(union_mode='left_to_right')
provider_config: Union[PollyConfig, XTTSConfig, ElevenLabsConfig, OpenAIConfig, FourieConfig, MeloConfig, StylettsConfig, DeepgramConfig] = Field(union_mode='smart')
stream: bool = False
buffer_size: Optional[int] = 40 # 40 characters in a buffer
audio_format: Optional[str] = "pcm"
Expand Down
1 change: 1 addition & 0 deletions bolna/output_handlers/telephony_providers/twilio.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ async def handle_interruption(self):

async def form_media_message(self, audio_data, audio_format="wav"):
if audio_format != "mulaw":
logger.info(f"Converting to mulaw")
audio_data = audioop.lin2ulaw(audio_data, 2)
base64_audio = base64.b64encode(audio_data).decode("utf-8")
message = {
Expand Down
105 changes: 71 additions & 34 deletions bolna/synthesizer/melo_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import os
from dotenv import load_dotenv
from bolna.helpers.logger_config import configure_logger
from bolna.helpers.utils import create_ws_data_packet
from bolna.helpers.utils import create_ws_data_packet, resample, wav_bytes_to_pcm
from bolna.memory.cache.inmemory_scalar_cache import InmemoryScalarCache
from .base_synthesizer import BaseSynthesizer
import json
import base64
Expand All @@ -14,7 +15,7 @@


class MeloSynthesizer(BaseSynthesizer):
def __init__(self, audio_format="pcm", sampling_rate="8000", stream=False, buffer_size=400,
def __init__(self, audio_format="pcm", sampling_rate="8000", stream=False, buffer_size=400, caching = True,
**kwargs):
super().__init__(stream, buffer_size)
self.format = "linear16" if audio_format == "pcm" else audio_format
Expand All @@ -38,31 +39,50 @@ def __init__(self, audio_format="pcm", sampling_rate="8000", stream=False, buffe
self.noise_scale=kwargs.get('noise_scale')
self.noise_scale_w = kwargs.get('noise_scale_w')
self.speed = kwargs.get('speed')
self.synthesized_characters = 0
self.caching = caching
if caching:
self.cache = InmemoryScalarCache()

async def __generate_http(self, text):
payload = {
"voice_id": self.voice,
"text": text,
"sr": self.sample_rate,
"sdp_ratio" : self.sdp_ratio,
"noise_scale" : self.noise_scale,
"noise_scale_w" : self.noise_scale_w,
"speed" : self.speed
}

headers = {
'Content-Type': 'application/json'
}
def get_synthesized_characters(self):
return self.synthesized_characters

async with aiohttp.ClientSession() as session:
if payload is not None:
async def __generate_http(self, text):
try:
payload = {
"voice_id": self.voice,
"text": text,
"sr": self.sample_rate,
"sdp_ratio" : self.sdp_ratio,
"noise_scale" : self.noise_scale,
"noise_scale_w" : self.noise_scale_w,
"speed" : self.speed
}

headers = {
'Content-Type': 'application/json'
}

async with aiohttp.ClientSession() as session:
logger.info(f"Posting {self.url}")
async with session.post(self.url, headers=headers, json=payload) as response:
if response.status == 200:
res_json:dict = json.loads(await response.text())
chunk = base64.b64decode(res_json["audio"])
yield chunk
else:
logger.info("Payload was null")
return chunk

except Exception as e:
logger.error(f"Could not synthesizer")

async def synthesize(self, text):
# This is used for one off synthesis mainly for use cases like voice lab and IVR
try:
logger.info(f"Synthesizeing")
audio = await self.__generate_http(text)
return audio
except Exception as e:
logger.error(f"Could not synthesize {e}")


async def open_connection(self):
pass
Expand All @@ -71,21 +91,38 @@ async def generate(self):
while True:
message = await self.internal_queue.get()
logger.info(f"Generating TTS response for message: {message}")

meta_info, text = message.get("meta_info"), message.get("data")
async for message in self.__generate_http(text):
if not self.first_chunk_generated:
meta_info["is_first_chunk"] = True
self.first_chunk_generated = True

if self.caching:
logger.info(f"Caching is on")
if self.cache.get(text):
logger.info(f"Cache hit and hence returning quickly {text}")
audio = self.cache.get(text)
else:
meta_info["is_first_chunk"] = False
if "end_of_llm_stream" in meta_info and meta_info["end_of_llm_stream"]:
meta_info["end_of_synthesizer_stream"] = True
self.first_chunk_generated = False

meta_info['text'] = text
meta_info['format'] = self.format
yield create_ws_data_packet(message, meta_info)
logger.info(f"Not a cache hit {list(self.cache.data_dict)}")
self.synthesized_characters += len(text)
audio = await self.__generate_http(text)
self.cache.set(text, message)
else:
logger.info(f"No caching present")
self.synthesized_characters += len(text)
audio = await self.__generate_http(text)

if not self.first_chunk_generated:
meta_info["is_first_chunk"] = True
self.first_chunk_generated = True
else:
meta_info["is_first_chunk"] = False
if "end_of_llm_stream" in meta_info and meta_info["end_of_llm_stream"]:
meta_info["end_of_synthesizer_stream"] = True
self.first_chunk_generated = False

meta_info['text'] = text
meta_info['format'] = self.format
if self.sample_rate == 8000:
audio = wav_bytes_to_pcm(audio)
logger.info(f"Sending sample rate of {self.sample_rate}")
yield create_ws_data_packet(audio, meta_info)

async def push(self, message):
logger.info("Pushed message to internal queue")
Expand Down

0 comments on commit 7b5fb16

Please sign in to comment.