-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.sh
129 lines (112 loc) · 5.29 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/bash
# Copyright 2020 Tomoki Hayashi
# MIT License (https://opensource.org/licenses/MIT)
. ./cmd.sh || exit 1;
. ./path.sh || exit 1;
# basic settings
stage=-1 # stage to start
stop_stage=100 # stage to stop
verbose=1 # verbosity level (lower is less info)
world_size=1 # number of workers in training
distributed_init= # file path for init_process_group in distributed training
nj=16 # number of parallel jobs in feature extraction
# NOTE(kan-bayashi): renamed to conf to avoid conflict in parse_options.sh
conf=conf/ctxv2w.v1.yaml
sampling_rate=16000 # sampling frequency
num_mels=80 # number of mel basis
hop_size=160 # number of shift points
win_length=465 # window length
# speaker setting
part="all" # "clean" or "all"
# if set to "clean", use only clean data
# if set to "all", use clean + other data
# directory path setting
datadir=$PWD/data
featdir=$PWD/feats
# training related setting
tag="" # tag for directory to save model
resume="" # checkpoint path to resume training
# (e.g. <path>/<to>/checkpoint-10000steps.pkl)
# decoding related setting
checkpoint= # checkpoint path to be used for decoding
# if not provided, the latest one will be used
# (e.g. <path>/<to>/checkpoint-400000steps.pkl)
train_set="train_${part}" # name of training data directory
dev_set="dev_${part}" # name of development data directory
# eval_set="eval_${part}" # name of evaluation data directory
eval_set="eval_${part}"
# shellcheck disable=SC1091
. ./utils/parse_options.sh || exit 1;
set -eo pipefail
chmod +x ctx_vec2wav/bin/train.py ctx_vec2wav/bin/decode.py
vqdir=feats/vqidx/
if [ -z "${tag}" ]; then
expdir="exp/${train_set}_$(basename "${conf}" .yaml)"
else
expdir="exp/${train_set}_${tag}"
fi
last_checkpoint="$(ls -dt "${expdir}"/*.pkl | head -1 || true)"
if [ -z $resume ]; then
resume=$last_checkpoint
fi
if [ "${stage}" -le 2 ] && [ "${stop_stage}" -ge 2 ]; then
echo "Stage 2: Network training"
[ ! -e "${expdir}" ] && mkdir -p "${expdir}"
echo "Hostname: `hostname`."
echo "CUDA Devices: $CUDA_VISIBLE_DEVICES"
echo "Training start. See the progress via ${expdir}/train.log."
${cuda_cmd} --gpu 1 "${expdir}/log/train.log" \
train.py \
--config "${conf}" \
--train-wav-scp $datadir/${train_set}/wav.scp \
--train-vqidx-scp ${featdir}/vqidx/${train_set}/feats.scp \
--train-mel-scp ${featdir}/normed_fbank/${train_set}/feats.scp \
--train-aux-scp ${featdir}/normed_ppe/${train_set}/feats.scp \
--train-num-frames ${datadir}/${train_set}/utt2num_frames \
--dev-wav-scp ${datadir}/${dev_set}/wav.scp \
--dev-vqidx-scp ${featdir}/vqidx/${dev_set}/feats.scp \
--dev-mel-scp ${featdir}/normed_fbank/${dev_set}/feats.scp \
--dev-aux-scp ${featdir}/normed_ppe/${dev_set}/feats.scp \
--dev-num-frames $datadir/${dev_set}/utt2num_frames \
--vq-codebook $vqdir/codebook.npy \
--outdir "${expdir}" \
--resume "${resume}" \
--sampling-rate ${sampling_rate} \
--hop-size ${hop_size} \
--num-mels ${num_mels} \
--win-length ${win_length} \
--verbose "${verbose}"
echo "Successfully finished training."
fi
if [ "${stage}" -le 3 ] && [ "${stop_stage}" -ge 3 ]; then
echo "Stage 3: Network decoding"
# shellcheck disable=SC2012
[ -z "${checkpoint}" ] && checkpoint="$(ls -dt "${expdir}"/*.pkl | head -1 || true)"
outdir="${expdir}/synthesis/$(basename "${checkpoint}" .pkl)"
for name in "${eval_set}"; do
[ ! -e "${outdir}/${name}" ] && mkdir -p "${outdir}/${name}"
if [ ! -e "${featdir}/normed_fbank/${name}" ]; then
mkdir -p "${featdir}/normed_fbank/${name}"
cat ${featdir}/normed_fbank/{dev_all,eval_all}/feats.scp | filter_scp.pl ${datadir}/${name}/wav.scp - | uniq > ${featdir}/normed_fbank/${name}/feats.scp
fi
if [ ! -e "${featdir}/vqidx/${name}" ]; then
mkdir -p "${featdir}/vqidx/${name}"
cat ${featdir}/vqidx/{dev_all,eval_all}/feats.scp | filter_scp.pl ${datadir}/${name}/wav.scp - | uniq > ${featdir}/vqidx/${name}/feats.scp
fi
./utils/feat-to-len.py scp:${featdir}/normed_fbank/${name}/feats.scp > ${datadir}/${name}/utt2num_frames
echo "$(wc -l ${featdir}/normed_fbank/${name}/feats.scp) utterances for decoding"
python local/build_prompt_feat.py ${datadir}/${name}/utt2num_frames ${datadir}/${name}/utt2spk ${featdir}/normed_fbank/${name}/feats.scp 300 > ${datadir}/${name}/prompt.scp
echo "Decoding start. See the progress via ${outdir}/${name}/log/decode.log."
${cuda_cmd} --gpu 1 "${outdir}/${name}/log/decode.log" \
decode.py \
--feats-scp ${featdir}/vqidx/${name}/feats.scp \
--prompt-scp ${datadir}/${name}/prompt.scp \
--num-frames ${datadir}/${name}/utt2num_frames \
--checkpoint "${checkpoint}" \
--outdir "${outdir}/${name}/wav" \
--verbose "${verbose}"
echo "Successfully finished decoding of ${name} set."
done
echo "Successfully finished decoding."
fi
echo "Finished."