diff --git a/egs/chime5_spkdet/v1/local/make_voxceleb2cat.pl b/egs/chime5_spkdet/v1/local/make_voxceleb2cat.pl index c037e094..6bea3737 100755 --- a/egs/chime5_spkdet/v1/local/make_voxceleb2cat.pl +++ b/egs/chime5_spkdet/v1/local/make_voxceleb2cat.pl @@ -58,7 +58,8 @@ foreach (@rec_dirs) { my $rec_id = $_; - my $file_list = "$out_dir/lists_cat/$rec_id.txt"; + my $utt_id = "$spkr_id-$rec_id"; + my $file_list = "$out_dir/lists_cat/$utt_id.txt"; if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){ die "Error creating $file_list"; } @@ -66,7 +67,6 @@ if($fs == 8){ $wav = $wav." sox -t wav - -t wav -r 8k - |" } - my $utt_id = "$spkr_id-$rec_id"; print WAV "$utt_id", " $wav", "\n"; print SPKR "$utt_id", " $spkr_id", "\n"; } diff --git a/egs/sre18/v1.8k/local/make_voxceleb2cat.pl b/egs/sre18/v1.8k/local/make_voxceleb2cat.pl index c037e094..6bea3737 100755 --- a/egs/sre18/v1.8k/local/make_voxceleb2cat.pl +++ b/egs/sre18/v1.8k/local/make_voxceleb2cat.pl @@ -58,7 +58,8 @@ foreach (@rec_dirs) { my $rec_id = $_; - my $file_list = "$out_dir/lists_cat/$rec_id.txt"; + my $utt_id = "$spkr_id-$rec_id"; + my $file_list = "$out_dir/lists_cat/$utt_id.txt"; if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){ die "Error creating $file_list"; } @@ -66,7 +67,6 @@ if($fs == 8){ $wav = $wav." sox -t wav - -t wav -r 8k - |" } - my $utt_id = "$spkr_id-$rec_id"; print WAV "$utt_id", " $wav", "\n"; print SPKR "$utt_id", " $spkr_id", "\n"; } diff --git a/egs/sre19-cmn2/v1/local/make_voxceleb2cat.pl b/egs/sre19-cmn2/v1/local/make_voxceleb2cat.pl index c037e094..6bea3737 100755 --- a/egs/sre19-cmn2/v1/local/make_voxceleb2cat.pl +++ b/egs/sre19-cmn2/v1/local/make_voxceleb2cat.pl @@ -58,7 +58,8 @@ foreach (@rec_dirs) { my $rec_id = $_; - my $file_list = "$out_dir/lists_cat/$rec_id.txt"; + my $utt_id = "$spkr_id-$rec_id"; + my $file_list = "$out_dir/lists_cat/$utt_id.txt"; if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){ die "Error creating $file_list"; } @@ -66,7 +67,6 @@ if($fs == 8){ $wav = $wav." sox -t wav - -t wav -r 8k - |" } - my $utt_id = "$spkr_id-$rec_id"; print WAV "$utt_id", " $wav", "\n"; print SPKR "$utt_id", " $spkr_id", "\n"; } diff --git a/egs/voices_challenge/v1/local/make_voxceleb2cat.pl b/egs/voices_challenge/v1/local/make_voxceleb2cat.pl index c037e094..6bea3737 100755 --- a/egs/voices_challenge/v1/local/make_voxceleb2cat.pl +++ b/egs/voices_challenge/v1/local/make_voxceleb2cat.pl @@ -58,7 +58,8 @@ foreach (@rec_dirs) { my $rec_id = $_; - my $file_list = "$out_dir/lists_cat/$rec_id.txt"; + my $utt_id = "$spkr_id-$rec_id"; + my $file_list = "$out_dir/lists_cat/$utt_id.txt"; if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){ die "Error creating $file_list"; } @@ -66,7 +67,6 @@ if($fs == 8){ $wav = $wav." sox -t wav - -t wav -r 8k - |" } - my $utt_id = "$spkr_id-$rec_id"; print WAV "$utt_id", " $wav", "\n"; print SPKR "$utt_id", " $spkr_id", "\n"; } diff --git a/egs/voxceleb/adv.v2/local/make_voxceleb2cat.pl b/egs/voxceleb/adv.v2/local/make_voxceleb2cat.pl index c037e094..6bea3737 100755 --- a/egs/voxceleb/adv.v2/local/make_voxceleb2cat.pl +++ b/egs/voxceleb/adv.v2/local/make_voxceleb2cat.pl @@ -58,7 +58,8 @@ foreach (@rec_dirs) { my $rec_id = $_; - my $file_list = "$out_dir/lists_cat/$rec_id.txt"; + my $utt_id = "$spkr_id-$rec_id"; + my $file_list = "$out_dir/lists_cat/$utt_id.txt"; if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){ die "Error creating $file_list"; } @@ -66,7 +67,6 @@ if($fs == 8){ $wav = $wav." sox -t wav - -t wav -r 8k - |" } - my $utt_id = "$spkr_id-$rec_id"; print WAV "$utt_id", " $wav", "\n"; print SPKR "$utt_id", " $spkr_id", "\n"; } diff --git a/egs/voxceleb/v1.1/README.md b/egs/voxceleb/v1.1/README.md index d456a495..5b5b93e5 100644 --- a/egs/voxceleb/v1.1/README.md +++ b/egs/voxceleb/v1.1/README.md @@ -116,10 +116,13 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr | | | | Cosine | 0.96 | 0.065 | 0.110 | | config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 1.04 | 0.071 | 0.118 | | | | | Cosine | 0.93 | 0.067 | 0.108 | +| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA | 0.90 | 0.067 | 0.118 | +| | | | Cosine | 0.85 | 0.060 | 0.094 | | config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.44 | 0.102 | 0.169 | | | | | Cosine | 1.29 | 0.084 | 0.140 | | config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.071 | 0.116 | | config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.074 | 0.116 | +| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.09 | 0.081 | 0.150 | ### VoxCeleb 1 Entire-Clean trial list @@ -153,10 +156,13 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr | | | | Cosine | 1.05 | 0.069 | 0.121 | | config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 1.18 | 0.075 | 0.131 | | | | | Cosine | 0.98 | 0.063 | 0.110 | +| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | Res2Net50 width=26x8 | + SWA | PLDA | 1.17 | 0.072 | 0.123 | +| | | | Cosine | 0.94 | 0.061 | 0.107 | | config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.56 | 0.095 | 0.166 | | | | | Cosine | 1.27 | 0.079 | 0.142 | | config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.19 | 0.077 | 0.137 | | config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.073 | 0.129 | +| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | TSE-Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.068 | 0.120 | ### VoxCeleb 1 Hard-Clean trial list @@ -190,8 +196,10 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr | | | | Cosine | 1.99 | 0.119 | 0.196 | | config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 2.18 | 0.127 | 0.211 | | | | | Cosine | 1.89 | 0.112 | 0.184 | +| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA | 2.14 | 0.125 | 0.209 | +| | | | Cosine | 1.84 | 0.110 | 0.186 | | config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 2.78 | 0.156 | 0.252 | | | | | Cosine | 2.26 | 0.134 | 0.214 | | config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 2.24 | 0.134 | 0.221 | | config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.20 | 0.132 | 0.219 | - +| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.02 | 0.123 | 0.203 | diff --git a/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh new file mode 100644 index 00000000..c2191649 --- /dev/null +++ b/egs/voxceleb/v1.1/global_conf/config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh @@ -0,0 +1,56 @@ +# Res2Net50 w26s8 x-vector with mixed precision training + +# acoustic features +feat_config=conf/fbank80_stmn_16k.yaml +feat_type=fbank80_stmn + +#vad +vad_config=conf/vad_16k.yaml + +# x-vector training +nnet_data=voxceleb2cat_train +nnet_num_augs=6 +aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml" + +batch_size_1gpu=24 +eff_batch_size=512 # effective batch size +ipe=$nnet_num_augs +min_chunk=4 +max_chunk=4 +lr=0.05 + +nnet_type=res2net50 +dropout=0 +embed_dim=256 +width_factor=3.25 +scale=8 +ws_tag=w26s8 + +s=30 +margin_warmup=20 +margin=0.3 + +nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale" + +opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 70 --swa-lr 1e-3 --swa-anneal-epochs 5" +lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step" + +nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1 +nnet_num_epochs=90 +nnet_dir=exp/xvector_nnets/$nnet_name +nnet=$nnet_dir/swa_model_ep0091.pth + + +# back-end +plda_aug_config=conf/reverb_noise_aug.yaml +plda_num_augs=6 +if [ $plda_num_augs -eq 0 ]; then + plda_data=voxceleb2cat_train +else + plda_data=voxceleb2cat_train_augx${plda_num_augs} +fi +plda_type=splda +lda_dim=200 +plda_y_dim=150 +plda_z_dim=200 + diff --git a/egs/voxceleb/v1/local/make_voxceleb2cat.pl b/egs/voxceleb/v1/local/make_voxceleb2cat.pl index c037e094..6bea3737 100755 --- a/egs/voxceleb/v1/local/make_voxceleb2cat.pl +++ b/egs/voxceleb/v1/local/make_voxceleb2cat.pl @@ -58,7 +58,8 @@ foreach (@rec_dirs) { my $rec_id = $_; - my $file_list = "$out_dir/lists_cat/$rec_id.txt"; + my $utt_id = "$spkr_id-$rec_id"; + my $file_list = "$out_dir/lists_cat/$utt_id.txt"; if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){ die "Error creating $file_list"; } @@ -66,7 +67,6 @@ if($fs == 8){ $wav = $wav." sox -t wav - -t wav -r 8k - |" } - my $utt_id = "$spkr_id-$rec_id"; print WAV "$utt_id", " $wav", "\n"; print SPKR "$utt_id", " $spkr_id", "\n"; } diff --git a/hyp_utils/xvectors/extract_xvectors_from_wav.sh b/hyp_utils/xvectors/extract_xvectors_from_wav.sh index c2447610..2aa0d460 100755 --- a/hyp_utils/xvectors/extract_xvectors_from_wav.sh +++ b/hyp_utils/xvectors/extract_xvectors_from_wav.sh @@ -90,7 +90,7 @@ if [ $stage -le 0 ];then --part-idx JOB --num-parts $nj \ --input $data_dir/wav.scp \ --model-path $nnet_file --chunk-length $chunk_length \ - --output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp || exit 1; + --output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp set -e fi