Merge pull request #1613 from yt605155624/restructure_expand

[tts]restructure expand in length_regulator.py for paddle2onnx, test=tts
PaddlePaddle · Mar 30, 2022 · 318edec · 318edec
2 parents 943d4ac + e52fc08
commit 318edec
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 10 deletions.
diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md
@@ -227,7 +227,9 @@ Pretrained FastSpeech2 model with no silence in the edge of audios:
 - [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
 - [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)
 
-The static model can be downloaded here [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip).
+The static model can be downloaded here:
+- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
+- [fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip)
 
 Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
 :-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:

diff --git a/paddlespeech/t2s/modules/predictor/length_regulator.py b/paddlespeech/t2s/modules/predictor/length_regulator.py
@@ -73,15 +73,21 @@ def expand(self, encodings: paddle.Tensor,
         batch_size, t_enc = paddle.shape(durations)
         slens = paddle.sum(durations, -1)
         t_dec = paddle.max(slens)
-        M = paddle.zeros([batch_size, t_dec, t_enc])
-        for i in range(batch_size):
-            k = 0
-            for j in range(t_enc):
-                d = durations[i, j]
-                # If the d == 0, slice action is meaningless and not supported in paddle
-                if d >= 1:
-                    M[i, k:k + d, j] = 1
-                k += d
+        t_dec_1 = t_dec + 1
+        flatten_duration = paddle.cumsum(
+            paddle.reshape(durations, [batch_size * t_enc])) + 1
+        init = paddle.zeros(t_dec_1)
+        m_batch = batch_size * t_enc
+        M = paddle.zeros([t_dec_1, m_batch])
+        for i in range(m_batch):
+            d = flatten_duration[i]
+            m = paddle.concat(
+                [paddle.ones(d), paddle.zeros(t_dec_1 - d)], axis=0)
+            M[:, i] = m - init
+            init = m
+        M = paddle.reshape(M, shape=[t_dec_1, batch_size, t_enc])
+        M = M[1:, :, :]
+        M = paddle.transpose(M, (1, 0, 2))
         encodings = paddle.matmul(M, encodings)
         return encodings