Skip to content

Commit

Permalink
Merge pull request #362 from kan-bayashi/minor_update
Browse files Browse the repository at this point in the history
  • Loading branch information
kan-bayashi authored May 17, 2022
2 parents a2fc640 + 0ad70d5 commit d5d4792
Show file tree
Hide file tree
Showing 26 changed files with 250 additions and 133 deletions.
13 changes: 7 additions & 6 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ jobs:
linter_and_test:
runs-on: ubuntu-20.04
strategy:
max-parallel: 5
max-parallel: 8
matrix:
python-version: [3.6]
python-version: [3.7]
# 1.6 is failed on cpu: https://github.com/kan-bayashi/ParallelWaveGAN/issues/198
pytorch-version: [1.4, 1.5.1, 1.7.1, 1.8.1, 1.9, 1.10.2]
# 1.10 does not support python 3.6
pytorch-version: [1.4, 1.5.1, 1.7.1, 1.8.1, 1.9, 1.10.2, 1.11.0]
steps:
- uses: actions/checkout@master
- uses: actions/setup-python@v2
Expand Down Expand Up @@ -46,7 +47,7 @@ jobs:
- name: Black & Flake8
run: |
source tools/venv/bin/activate
black --diff parallel_wavegan
black --diff --check parallel_wavegan
flake8 parallel_wavegan
flake8 --extend-ignore=D test
- name: Pytest
Expand All @@ -59,8 +60,8 @@ jobs:
strategy:
max-parallel: 10
matrix:
python-version: [3.7]
pytorch-version: [1.10.2]
python-version: [3.9]
pytorch-version: [1.11.0]
config:
- "parallel_wavegan.v1.debug.yaml"
- "melgan.v1.debug.yaml"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ This repository is tested on Ubuntu 20.04 with a GPU Titan V.
- sox (you can install via `sudo apt install sox` in ubuntu)

Different cuda version should be working but not explicitly tested.
All of the codes are tested on Pytorch 1.4, 1.5.1, 1.7.1, 1.8.1, 1.9 and 10.2.
All of the codes are tested on Pytorch 1.4, 1.5.1, 1.7.1, 1.8.1, 1.9, 10.2 and 11.0.

Pytorch 1.6 works but there are some issues in cpu mode (See #198).

Expand Down
11 changes: 6 additions & 5 deletions egs/kiritan/voc1/local/dataset_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ def process_text_info(text):
for line in info.readlines():
line = line.strip().split()
label_info.append(
"{} {} {}".format(
float(line[0]), float(line[1]), line[2].strip()
)
"{} {} {}".format(float(line[0]), float(line[1]), line[2].strip())
)
text_info.append(line[2].strip())
return " ".join(label_info), " ".join(text_info)
Expand All @@ -67,10 +65,13 @@ def process_subset(src_data, subset, check_func, fs):
continue
utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(song_index))

cmd = f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
cmd = (
f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16"
f" -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
)
print(f"cmd: {cmd}")
os.system(cmd)

wavscp.write(
"{} {}\n".format(
utt_id, os.path.join(fixed_data, "{}_bits16.wav".format(song_index))
Expand Down
5 changes: 4 additions & 1 deletion egs/kising/voc1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ def process_subset(args, set_name, check_func):
utt = song.split("_")[0]
utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(utt))

cmd = f"sox {os.path.join(src_wavdir, song)} -c 1 -t wavpcm -b 16 -r {args.sr} {os.path.join(args.wav_dumpdir, utt_id)}_bits16.wav"
cmd = (
f"sox {os.path.join(src_wavdir, song)} -c 1 -t wavpcm -b 16 -r"
f" {args.sr} {os.path.join(args.wav_dumpdir, utt_id)}_bits16.wav"
)
os.system(cmd)

wavscp.write(
Expand Down
11 changes: 6 additions & 5 deletions egs/natsume/voc1/local/dataset_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ def process_text_info(text):
for line in info.readlines():
line = line.strip().split()
label_info.append(
"{} {} {}".format(
float(line[0]), float(line[1]), line[2].strip()
)
"{} {} {}".format(float(line[0]), float(line[1]), line[2].strip())
)
text_info.append(line[2].strip())
return " ".join(label_info), " ".join(text_info)
Expand All @@ -63,10 +61,13 @@ def process_subset(src_data, subset, check_func, fs):
continue
utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(song_index))

cmd = f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
cmd = (
f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16"
f" -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
)
print(f"cmd: {cmd}")
os.system(cmd)

wavscp.write(
"{} {}\n".format(
utt_id, os.path.join(fixed_data, "{}_bits16.wav".format(song_index))
Expand Down
11 changes: 8 additions & 3 deletions egs/no7singing/voc1/local/dataset_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def process_text_info(text):
line = line.strip().split()
label_info.append(
"{} {} {}".format(
float(line[0])/1e7, float(line[1])/1e7, line[2].strip() # no7singing timings to seconds
float(line[0]) / 1e7,
float(line[1]) / 1e7,
line[2].strip(), # no7singing timings to seconds
)
)
text_info.append(line[2].strip())
Expand All @@ -67,10 +69,13 @@ def process_subset(src_data, subset, check_func, fs):
continue
utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(song_index))

cmd = f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
cmd = (
f"sox {os.path.join(src_data, 'wav', song_index)}.wav -c 1 -t wavpcm -b 16"
f" -r {fs} {os.path.join(fixed_data, song_index)}_bits16.wav"
)
print(f"cmd: {cmd}")
os.system(cmd)

wavscp.write(
"{} {}\n".format(
utt_id, os.path.join(fixed_data, "{}_bits16.wav".format(song_index))
Expand Down
23 changes: 19 additions & 4 deletions egs/ofuton_p_utagoe_db/voc1/local/dataset_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,20 @@


UTT_PREFIX = "ofuton"
DEV_LIST = ["chatsumi", "my_grandfathers_clock_3_2", "haruyo_koi", "momiji", "tetsudou_shouka"]
TEST_LIST = ["usagito_kame", "my_grandfathers_clock_1_2", "antagata_dokosa", "momotarou", "furusato"]
DEV_LIST = [
"chatsumi",
"my_grandfathers_clock_3_2",
"haruyo_koi",
"momiji",
"tetsudou_shouka",
]
TEST_LIST = [
"usagito_kame",
"my_grandfathers_clock_1_2",
"antagata_dokosa",
"momotarou",
"furusato",
]


def train_check(song):
Expand Down Expand Up @@ -65,10 +77,13 @@ def process_subset(src_data, subset, check_func, fs):
utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(folder))

makedir(os.path.join(fixed_data, folder))
cmd = f"sox {os.path.join(src_data, folder, folder)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
cmd = (
f"sox {os.path.join(src_data, folder, folder)}.wav -c 1 -t wavpcm -b 16 -r"
f" {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
)
print(f"cmd: {cmd}")
os.system(cmd)

wavscp.write(
"{} {}\n".format(
utt_id, os.path.join(fixed_data, folder, "{}_bits16.wav".format(folder))
Expand Down
7 changes: 5 additions & 2 deletions egs/oniku_kurumi_utagoe_db/voc1/local/dataset_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,13 @@ def process_subset(src_data, subset, check_func, fs):
utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(folder))

makedir(os.path.join(fixed_data, folder))
cmd = f"sox {os.path.join(src_data, folder, folder)}.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
cmd = (
f"sox {os.path.join(src_data, folder, folder)}.wav -c 1 -t wavpcm -b 16 -r"
f" {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
)
print(f"cmd: {cmd}")
os.system(cmd)

wavscp.write(
"{} {}\n".format(
utt_id, os.path.join(fixed_data, folder, "{}_bits16.wav".format(folder))
Expand Down
5 changes: 4 additions & 1 deletion egs/opencpop/voc1/local/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ def process_utterance(wavscp, utt2spk, audio_dir, wav_dumpdir, segment, tgt_sr=2
utt2spk.write("{} {}\n".format(uid, "opencpop"))

# apply bit convert, there is a known issue in direct convert in format wavscp
cmd = f"sox {os.path.join(audio_dir, uid)}.wav -c 1 -t wavpcm -b 16 -r {tgt_sr} {os.path.join(wav_dumpdir, uid)}_bits16.wav"
cmd = (
f"sox {os.path.join(audio_dir, uid)}.wav -c 1 -t wavpcm -b 16 -r"
f" {tgt_sr} {os.path.join(wav_dumpdir, uid)}_bits16.wav"
)
os.system(cmd)

wavscp.write("{} {}_bits16.wav\n".format(uid, os.path.join(wav_dumpdir, uid)))
Expand Down
35 changes: 30 additions & 5 deletions egs/pjs/voc1/local/dataset_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,30 @@


UTT_PREFIX = "pjs"
DEV_LIST = ["pjs002", "pjs0012", "pjs022", "pjs032", "pjs042", "pjs052", "pjs062", "pjs072", "pjs082", "pjs092"]
TEST_LIST = ["pjs007", "pjs017", "pjs027", "pjs037", "pjs047", "pjs057", "pjs067", "pjs077", "pjs087", "pjs097"]
DEV_LIST = [
"pjs002",
"pjs0012",
"pjs022",
"pjs032",
"pjs042",
"pjs052",
"pjs062",
"pjs072",
"pjs082",
"pjs092",
]
TEST_LIST = [
"pjs007",
"pjs017",
"pjs027",
"pjs037",
"pjs047",
"pjs057",
"pjs067",
"pjs077",
"pjs087",
"pjs097",
]


def train_check(song):
Expand Down Expand Up @@ -62,15 +84,18 @@ def process_subset(src_data, subset, check_func, fs):
continue
if not check_func(folder):
continue
if folder == 'background_noise':
if folder == "background_noise":
continue
utt_id = "{}_{}".format(UTT_PREFIX, pack_zero(folder))

makedir(os.path.join(fixed_data, folder))
cmd = f"sox {os.path.join(src_data, folder, folder)}_song.wav -c 1 -t wavpcm -b 16 -r {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
cmd = (
f"sox {os.path.join(src_data, folder, folder)}_song.wav -c 1 -t wavpcm -b"
f" 16 -r {fs} {os.path.join(fixed_data, folder, folder)}_bits16.wav"
)
print(f"cmd: {cmd}")
os.system(cmd)

wavscp.write(
"{} {}\n".format(
utt_id, os.path.join(fixed_data, folder, "{}_bits16.wav".format(folder))
Expand Down
24 changes: 16 additions & 8 deletions parallel_wavegan/bin/compute_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,28 @@
def main():
"""Run preprocessing process."""
parser = argparse.ArgumentParser(
description="Compute mean and variance of dumped raw features "
"(See detail in parallel_wavegan/bin/compute_statistics.py)."
description=(
"Compute mean and variance of dumped raw features "
"(See detail in parallel_wavegan/bin/compute_statistics.py)."
)
)
parser.add_argument(
"--feats-scp",
"--scp",
default=None,
type=str,
help="kaldi-style feats.scp file. "
"you need to specify either feats-scp or rootdir.",
help=(
"kaldi-style feats.scp file. "
"you need to specify either feats-scp or rootdir."
),
)
parser.add_argument(
"--rootdir",
type=str,
help="directory including feature files. "
"you need to specify either feats-scp or rootdir.",
help=(
"directory including feature files. "
"you need to specify either feats-scp or rootdir."
),
)
parser.add_argument(
"--config",
Expand All @@ -53,8 +59,10 @@ def main():
default=None,
type=str,
required=True,
help="directory to save statistics. if not provided, "
"stats will be saved in the above root directory. (default=None)",
help=(
"directory to save statistics. if not provided, "
"stats will be saved in the above root directory. (default=None)"
),
)
parser.add_argument(
"--verbose",
Expand Down
32 changes: 21 additions & 11 deletions parallel_wavegan/bin/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,29 @@
def main():
"""Run decoding process."""
parser = argparse.ArgumentParser(
description="Decode dumped features with trained Parallel WaveGAN Generator "
"(See detail in parallel_wavegan/bin/decode.py)."
description=(
"Decode dumped features with trained Parallel WaveGAN Generator "
"(See detail in parallel_wavegan/bin/decode.py)."
)
)
parser.add_argument(
"--feats-scp",
"--scp",
default=None,
type=str,
help="kaldi-style feats.scp file. "
"you need to specify either feats-scp or dumpdir.",
help=(
"kaldi-style feats.scp file. "
"you need to specify either feats-scp or dumpdir."
),
)
parser.add_argument(
"--dumpdir",
default=None,
type=str,
help="directory including feature files. "
"you need to specify either feats-scp or dumpdir.",
help=(
"directory including feature files. "
"you need to specify either feats-scp or dumpdir."
),
)
parser.add_argument(
"--outdir",
Expand All @@ -61,16 +67,20 @@ def main():
"--config",
default=None,
type=str,
help="yaml format configuration file. if not explicitly provided, "
"it will be searched in the checkpoint directory. (default=None)",
help=(
"yaml format configuration file. if not explicitly provided, "
"it will be searched in the checkpoint directory. (default=None)"
),
)
parser.add_argument(
"--normalize-before",
default=False,
action="store_true",
help="whether to perform feature normalization before input to the model. "
"if true, it assumes that the feature is de-normalized. this is useful when "
"text2mel model and vocoder use different feature statistics.",
help=(
"whether to perform feature normalization before input to the model. if"
" true, it assumes that the feature is de-normalized. this is useful when"
" text2mel model and vocoder use different feature statistics."
),
)
parser.add_argument(
"--verbose",
Expand Down
Loading

0 comments on commit d5d4792

Please sign in to comment.