Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions egs/chime5_spkdet/v1/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
4 changes: 2 additions & 2 deletions egs/sre18/v1.8k/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
4 changes: 2 additions & 2 deletions egs/sre19-cmn2/v1/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
4 changes: 2 additions & 2 deletions egs/voices_challenge/v1/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
4 changes: 2 additions & 2 deletions egs/voxceleb/adv.v2/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
10 changes: 9 additions & 1 deletion egs/voxceleb/v1.1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,13 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
| | | | Cosine | 0.96 | 0.065 | 0.110 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 1.04 | 0.071 | 0.118 |
| | | | Cosine | 0.93 | 0.067 | 0.108 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA | 0.90 | 0.067 | 0.118 |
| | | | Cosine | 0.85 | 0.060 | 0.094 |
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.44 | 0.102 | 0.169 |
| | | | Cosine | 1.29 | 0.084 | 0.140 |
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.071 | 0.116 |
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.074 | 0.116 |
| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.09 | 0.081 | 0.150 |


### VoxCeleb 1 Entire-Clean trial list
Expand Down Expand Up @@ -153,10 +156,13 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
| | | | Cosine | 1.05 | 0.069 | 0.121 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 1.18 | 0.075 | 0.131 |
| | | | Cosine | 0.98 | 0.063 | 0.110 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | Res2Net50 width=26x8 | + SWA | PLDA | 1.17 | 0.072 | 0.123 |
| | | | Cosine | 0.94 | 0.061 | 0.107 |
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.56 | 0.095 | 0.166 |
| | | | Cosine | 1.27 | 0.079 | 0.142 |
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.19 | 0.077 | 0.137 |
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.073 | 0.129 |
| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | TSE-Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.068 | 0.120 |


### VoxCeleb 1 Hard-Clean trial list
Expand Down Expand Up @@ -190,8 +196,10 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
| | | | Cosine | 1.99 | 0.119 | 0.196 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 2.18 | 0.127 | 0.211 |
| | | | Cosine | 1.89 | 0.112 | 0.184 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA | 2.14 | 0.125 | 0.209 |
| | | | Cosine | 1.84 | 0.110 | 0.186 |
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 2.78 | 0.156 | 0.252 |
| | | | Cosine | 2.26 | 0.134 | 0.214 |
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 2.24 | 0.134 | 0.221 |
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.20 | 0.132 | 0.219 |

| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.02 | 0.123 | 0.203 |
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Res2Net50 w26s8 x-vector with mixed precision training

# acoustic features
feat_config=conf/fbank80_stmn_16k.yaml
feat_type=fbank80_stmn

#vad
vad_config=conf/vad_16k.yaml

# x-vector training
nnet_data=voxceleb2cat_train
nnet_num_augs=6
aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"

batch_size_1gpu=24
eff_batch_size=512 # effective batch size
ipe=$nnet_num_augs
min_chunk=4
max_chunk=4
lr=0.05

nnet_type=res2net50
dropout=0
embed_dim=256
width_factor=3.25
scale=8
ws_tag=w26s8

s=30
margin_warmup=20
margin=0.3

nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"

opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 70 --swa-lr 1e-3 --swa-anneal-epochs 5"
lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"

nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
nnet_num_epochs=90
nnet_dir=exp/xvector_nnets/$nnet_name
nnet=$nnet_dir/swa_model_ep0091.pth


# back-end
plda_aug_config=conf/reverb_noise_aug.yaml
plda_num_augs=6
if [ $plda_num_augs -eq 0 ]; then
plda_data=voxceleb2cat_train
else
plda_data=voxceleb2cat_train_augx${plda_num_augs}
fi
plda_type=splda
lda_dim=200
plda_y_dim=150
plda_z_dim=200

4 changes: 2 additions & 2 deletions egs/voxceleb/v1/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
2 changes: 1 addition & 1 deletion hyp_utils/xvectors/extract_xvectors_from_wav.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ if [ $stage -le 0 ];then
--part-idx JOB --num-parts $nj \
--input $data_dir/wav.scp \
--model-path $nnet_file --chunk-length $chunk_length \
--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp || exit 1;
--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
set -e
fi

Expand Down