From fba0693a208a2818ffea1a874562fe67733071f4 Mon Sep 17 00:00:00 2001 From: Jerryuhoo Date: Fri, 29 Apr 2022 17:43:50 +0800 Subject: [PATCH] fix random speaker embedding bug, test=tts --- paddlespeech/t2s/exps/voice_cloning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddlespeech/t2s/exps/voice_cloning.py b/paddlespeech/t2s/exps/voice_cloning.py index 9257b07decd..2742cd06858 100644 --- a/paddlespeech/t2s/exps/voice_cloning.py +++ b/paddlespeech/t2s/exps/voice_cloning.py @@ -110,10 +110,10 @@ def voice_cloning(args): print(f"{utt_id} done!") # Randomly generate numbers of 0 ~ 0.2, 256 is the dim of spk_emb random_spk_emb = np.random.rand(256) * 0.2 - random_spk_emb = paddle.to_tensor(random_spk_emb) + random_spk_emb = paddle.to_tensor(random_spk_emb, dtype='float32') utt_id = "random_spk_emb" with paddle.no_grad(): - wav = voc_inference(am_inference(phone_ids, spk_emb=spk_emb)) + wav = voc_inference(am_inference(phone_ids, spk_emb=random_spk_emb)) sf.write( str(output_dir / (utt_id + ".wav")), wav.numpy(),