Merge pull request #362 from kan-bayashi/minor_update

kan-bayashi · May 17, 2022 · d5d4792 · d5d4792
2 parents a2fc640 + 0ad70d5
commit d5d4792
Show file tree

Hide file tree

Showing 26 changed files with 250 additions and 133 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -14,11 +14,12 @@ jobs:
   linter_and_test:
     runs-on: ubuntu-20.04
     strategy:
-      max-parallel: 5
+      max-parallel: 8
       matrix:
-        python-version: [3.6]
+        python-version: [3.7]
         # 1.6 is failed on cpu: https://github.com/kan-bayashi/ParallelWaveGAN/issues/198
-        pytorch-version: [1.4, 1.5.1, 1.7.1, 1.8.1, 1.9, 1.10.2]
+        # 1.10 does not support python 3.6
+        pytorch-version: [1.4, 1.5.1, 1.7.1, 1.8.1, 1.9, 1.10.2, 1.11.0]
     steps:
       - uses: actions/checkout@master
       - uses: actions/setup-python@v2
@@ -46,7 +47,7 @@ jobs:
       - name: Black & Flake8
         run: |
           source tools/venv/bin/activate
-          black --diff parallel_wavegan
+          black --diff --check parallel_wavegan
           flake8 parallel_wavegan
           flake8 --extend-ignore=D test
       - name: Pytest
@@ -59,8 +60,8 @@ jobs:
     strategy:
       max-parallel: 10
       matrix:
-        python-version: [3.7]
-        pytorch-version: [1.10.2]
+        python-version: [3.9]
+        pytorch-version: [1.11.0]
         config:
           - "parallel_wavegan.v1.debug.yaml"
           - "melgan.v1.debug.yaml"

diff --git a/README.md b/README.md
@@ -62,7 +62,7 @@ This repository is tested on Ubuntu 20.04 with a GPU Titan V.
 - sox (you can install via `sudo apt install sox` in ubuntu)
 
 Different cuda version should be working but not explicitly tested.  
-All of the codes are tested on Pytorch 1.4, 1.5.1, 1.7.1, 1.8.1, 1.9 and 10.2.
+All of the codes are tested on Pytorch 1.4, 1.5.1, 1.7.1, 1.8.1, 1.9, 10.2 and 11.0.
 
 Pytorch 1.6 works but there are some issues in cpu mode (See #198).
 

diff --git a/egs/kiritan/voc1/local/dataset_split.py b/egs/kiritan/voc1/local/dataset_split.py
@@ -44,9 +44,7 @@ def process_text_info(text):
     for line in info.readlines():
         line = line.strip().split()
         label_info.append(
-            "{} {} {}".format(
-                float(line[0]), float(line[1]), line[2].strip()
-            )
+            "{} {} {}".format(float(line[0]), float(line[1]), line[2].strip())
         )
         text_info.append(line[2].strip())
     return " ".join(label_info), " ".join(text_info)
@@ -67,10 +65,13 @@ def process_subset(src_data, subset, check_func, fs):
             continue
         utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(song_index))
 
-        cmd = f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
+        cmd = (
+            f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16"
+            f" -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
+        )
         print(f"cmd: {cmd}")
         os.system(cmd)
-        
+
         wavscp.write(
             "{} {}\n".format(
                 utt_id, os.path.join(fixed_data, "{}_bits16.wav".format(song_index))

diff --git a/egs/kising/voc1/local/data_prep.py b/egs/kising/voc1/local/data_prep.py
@@ -63,7 +63,10 @@ def process_subset(args, set_name, check_func):
         utt = song.split("_")[0]
         utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(utt))
 
-        cmd = f"sox {os.path.join(src_wavdir, song)} -c 1 -t wavpcm -b 16 -r {args.sr} {os.path.join(args.wav_dumpdir, utt_id)}_bits16.wav"
+        cmd = (
+            f"sox {os.path.join(src_wavdir, song)} -c 1 -t wavpcm -b 16 -r"
+            f" {args.sr} {os.path.join(args.wav_dumpdir, utt_id)}_bits16.wav"
+        )
         os.system(cmd)
 
         wavscp.write(

diff --git a/egs/natsume/voc1/local/dataset_split.py b/egs/natsume/voc1/local/dataset_split.py
@@ -40,9 +40,7 @@ def process_text_info(text):
     for line in info.readlines():
         line = line.strip().split()
         label_info.append(
-            "{} {} {}".format(
-                float(line[0]), float(line[1]), line[2].strip()
-            )
+            "{} {} {}".format(float(line[0]), float(line[1]), line[2].strip())
         )
         text_info.append(line[2].strip())
     return " ".join(label_info), " ".join(text_info)
@@ -63,10 +61,13 @@ def process_subset(src_data, subset, check_func, fs):
             continue
         utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(song_index))
 
-        cmd = f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
+        cmd = (
+            f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16"
+            f" -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
+        )
         print(f"cmd: {cmd}")
         os.system(cmd)
-        
+
         wavscp.write(
             "{} {}\n".format(
                 utt_id, os.path.join(fixed_data, "{}_bits16.wav".format(song_index))

diff --git a/egs/no7singing/voc1/local/dataset_split.py b/egs/no7singing/voc1/local/dataset_split.py
@@ -45,7 +45,9 @@ def process_text_info(text):
         line = line.strip().split()
         label_info.append(
             "{} {} {}".format(
-                float(line[0])/1e7, float(line[1])/1e7, line[2].strip() # no7singing timings to seconds
+                float(line[0]) / 1e7,
+                float(line[1]) / 1e7,
+                line[2].strip(),  # no7singing timings to seconds
             )
         )
         text_info.append(line[2].strip())
@@ -67,10 +69,13 @@ def process_subset(src_data, subset, check_func, fs):
             continue
         utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(song_index))
 
-        cmd = f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
+        cmd = (
+            f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16"
+            f" -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
+        )
         print(f"cmd: {cmd}")
         os.system(cmd)
-        
+
         wavscp.write(
             "{} {}\n".format(
                 utt_id, os.path.join(fixed_data, "{}_bits16.wav".format(song_index))

diff --git a/egs/ofuton_p_utagoe_db/voc1/local/dataset_split.py b/egs/ofuton_p_utagoe_db/voc1/local/dataset_split.py
@@ -4,8 +4,20 @@
 
 
 UTT_PREFIX = "ofuton"
-DEV_LIST = ["chatsumi", "my_grandfathers_clock_3_2", "haruyo_koi", "momiji", "tetsudou_shouka"]
-TEST_LIST = ["usagito_kame", "my_grandfathers_clock_1_2", "antagata_dokosa", "momotarou", "furusato"]
+DEV_LIST = [
+    "chatsumi",
+    "my_grandfathers_clock_3_2",
+    "haruyo_koi",
+    "momiji",
+    "tetsudou_shouka",
+]
+TEST_LIST = [
+    "usagito_kame",
+    "my_grandfathers_clock_1_2",
+    "antagata_dokosa",
+    "momotarou",
+    "furusato",
+]
 
 
 def train_check(song):
@@ -65,10 +77,13 @@ def process_subset(src_data, subset, check_func, fs):
         utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(folder))
 
         makedir(os.path.join(fixed_data, folder))
-        cmd = f"sox {os.path.join(src_data, folder, folder)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
+        cmd = (
+            f"sox {os.path.join(src_data, folder, folder)}.wav -c 1 -t wavpcm -b 16 -r"
+            f" {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
+        )
         print(f"cmd: {cmd}")
         os.system(cmd)
-        
+
         wavscp.write(
             "{} {}\n".format(
                 utt_id, os.path.join(fixed_data, folder, "{}_bits16.wav".format(folder))

diff --git a/egs/oniku_kurumi_utagoe_db/voc1/local/dataset_split.py b/egs/oniku_kurumi_utagoe_db/voc1/local/dataset_split.py
@@ -65,10 +65,13 @@ def process_subset(src_data, subset, check_func, fs):
         utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(folder))
 
         makedir(os.path.join(fixed_data, folder))
-        cmd = f"sox {os.path.join(src_data, folder, folder)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
+        cmd = (
+            f"sox {os.path.join(src_data, folder, folder)}.wav -c 1 -t wavpcm -b 16 -r"
+            f" {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
+        )
         print(f"cmd: {cmd}")
         os.system(cmd)
-        
+
         wavscp.write(
             "{} {}\n".format(
                 utt_id, os.path.join(fixed_data, folder, "{}_bits16.wav".format(folder))

diff --git a/egs/opencpop/voc1/local/data_prep.py b/egs/opencpop/voc1/local/data_prep.py
@@ -10,7 +10,10 @@ def process_utterance(wavscp, utt2spk, audio_dir, wav_dumpdir, segment, tgt_sr=2
     utt2spk.write("{} {}\n".format(uid, "opencpop"))
 
     # apply bit convert, there is a known issue in direct convert in format wavscp
-    cmd = f"sox {os.path.join(audio_dir, uid)}.wav -c 1 -t wavpcm -b 16 -r {tgt_sr} {os.path.join(wav_dumpdir, uid)}_bits16.wav"
+    cmd = (
+        f"sox {os.path.join(audio_dir, uid)}.wav -c 1 -t wavpcm -b 16 -r"
+        f" {tgt_sr} {os.path.join(wav_dumpdir, uid)}_bits16.wav"
+    )
     os.system(cmd)
 
     wavscp.write("{} {}_bits16.wav\n".format(uid, os.path.join(wav_dumpdir, uid)))

diff --git a/egs/pjs/voc1/local/dataset_split.py b/egs/pjs/voc1/local/dataset_split.py
@@ -4,8 +4,30 @@
 
 
 UTT_PREFIX = "pjs"
-DEV_LIST = ["pjs002", "pjs0012", "pjs022", "pjs032", "pjs042", "pjs052", "pjs062", "pjs072", "pjs082", "pjs092"]
-TEST_LIST = ["pjs007", "pjs017", "pjs027", "pjs037", "pjs047", "pjs057", "pjs067", "pjs077", "pjs087", "pjs097"]
+DEV_LIST = [
+    "pjs002",
+    "pjs0012",
+    "pjs022",
+    "pjs032",
+    "pjs042",
+    "pjs052",
+    "pjs062",
+    "pjs072",
+    "pjs082",
+    "pjs092",
+]
+TEST_LIST = [
+    "pjs007",
+    "pjs017",
+    "pjs027",
+    "pjs037",
+    "pjs047",
+    "pjs057",
+    "pjs067",
+    "pjs077",
+    "pjs087",
+    "pjs097",
+]
 
 
 def train_check(song):
@@ -62,15 +84,18 @@ def process_subset(src_data, subset, check_func, fs):
             continue
         if not check_func(folder):
             continue
-        if folder == 'background_noise':
+        if folder == "background_noise":
             continue
         utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(folder))
 
         makedir(os.path.join(fixed_data, folder))
-        cmd = f"sox {os.path.join(src_data, folder, folder)}_song.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
+        cmd = (
+            f"sox {os.path.join(src_data, folder, folder)}_song.wav -c 1 -t wavpcm -b"
+            f" 16 -r {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
+        )
         print(f"cmd: {cmd}")
         os.system(cmd)
-        
+
         wavscp.write(
             "{} {}\n".format(
                 utt_id, os.path.join(fixed_data, folder, "{}_bits16.wav".format(folder))

diff --git a/parallel_wavegan/bin/compute_statistics.py b/parallel_wavegan/bin/compute_statistics.py
@@ -25,22 +25,28 @@
 def main():
     """Run preprocessing process."""
     parser = argparse.ArgumentParser(
-        description="Compute mean and variance of dumped raw features "
-        "(See detail in parallel_wavegan/bin/compute_statistics.py)."
+        description=(
+            "Compute mean and variance of dumped raw features "
+            "(See detail in parallel_wavegan/bin/compute_statistics.py)."
+        )
     )
     parser.add_argument(
         "--feats-scp",
         "--scp",
         default=None,
         type=str,
-        help="kaldi-style feats.scp file. "
-        "you need to specify either feats-scp or rootdir.",
+        help=(
+            "kaldi-style feats.scp file. "
+            "you need to specify either feats-scp or rootdir."
+        ),
     )
     parser.add_argument(
         "--rootdir",
         type=str,
-        help="directory including feature files. "
-        "you need to specify either feats-scp or rootdir.",
+        help=(
+            "directory including feature files. "
+            "you need to specify either feats-scp or rootdir."
+        ),
     )
     parser.add_argument(
         "--config",
@@ -53,8 +59,10 @@ def main():
         default=None,
         type=str,
         required=True,
-        help="directory to save statistics. if not provided, "
-        "stats will be saved in the above root directory. (default=None)",
+        help=(
+            "directory to save statistics. if not provided, "
+            "stats will be saved in the above root directory. (default=None)"
+        ),
     )
     parser.add_argument(
         "--verbose",

diff --git a/parallel_wavegan/bin/decode.py b/parallel_wavegan/bin/decode.py
@@ -27,23 +27,29 @@
 def main():
     """Run decoding process."""
     parser = argparse.ArgumentParser(
-        description="Decode dumped features with trained Parallel WaveGAN Generator "
-        "(See detail in parallel_wavegan/bin/decode.py)."
+        description=(
+            "Decode dumped features with trained Parallel WaveGAN Generator "
+            "(See detail in parallel_wavegan/bin/decode.py)."
+        )
     )
     parser.add_argument(
         "--feats-scp",
         "--scp",
         default=None,
         type=str,
-        help="kaldi-style feats.scp file. "
-        "you need to specify either feats-scp or dumpdir.",
+        help=(
+            "kaldi-style feats.scp file. "
+            "you need to specify either feats-scp or dumpdir."
+        ),
     )
     parser.add_argument(
         "--dumpdir",
         default=None,
         type=str,
-        help="directory including feature files. "
-        "you need to specify either feats-scp or dumpdir.",
+        help=(
+            "directory including feature files. "
+            "you need to specify either feats-scp or dumpdir."
+        ),
     )
     parser.add_argument(
         "--outdir",
@@ -61,16 +67,20 @@ def main():
         "--config",
         default=None,
         type=str,
-        help="yaml format configuration file. if not explicitly provided, "
-        "it will be searched in the checkpoint directory. (default=None)",
+        help=(
+            "yaml format configuration file. if not explicitly provided, "
+            "it will be searched in the checkpoint directory. (default=None)"
+        ),
     )
     parser.add_argument(
         "--normalize-before",
         default=False,
         action="store_true",
-        help="whether to perform feature normalization before input to the model. "
-        "if true, it assumes that the feature is de-normalized. this is useful when "
-        "text2mel model and vocoder use different feature statistics.",
+        help=(
+            "whether to perform feature normalization before input to the model. if"
+            " true, it assumes that the feature is de-normalized. this is useful when"
+            " text2mel model and vocoder use different feature statistics."
+        ),
     )
     parser.add_argument(
         "--verbose",