Skip to content

Commit c77f0ef

Browse files
Merge pull request #61 from hyperion-ml/lachesis
Lachesis
2 parents fb3af32 + 6a2678f commit c77f0ef

File tree

9 files changed

+78
-14
lines changed

9 files changed

+78
-14
lines changed

egs/chime5_spkdet/v1/local/make_voxceleb2cat.pl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@
5858

5959
foreach (@rec_dirs) {
6060
my $rec_id = $_;
61-
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
61+
my $utt_id = "$spkr_id-$rec_id";
62+
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
6263
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
6364
die "Error creating $file_list";
6465
}
6566
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
6667
if($fs == 8){
6768
$wav = $wav." sox -t wav - -t wav -r 8k - |"
6869
}
69-
my $utt_id = "$spkr_id-$rec_id";
7070
print WAV "$utt_id", " $wav", "\n";
7171
print SPKR "$utt_id", " $spkr_id", "\n";
7272
}

egs/sre18/v1.8k/local/make_voxceleb2cat.pl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@
5858

5959
foreach (@rec_dirs) {
6060
my $rec_id = $_;
61-
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
61+
my $utt_id = "$spkr_id-$rec_id";
62+
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
6263
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
6364
die "Error creating $file_list";
6465
}
6566
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
6667
if($fs == 8){
6768
$wav = $wav." sox -t wav - -t wav -r 8k - |"
6869
}
69-
my $utt_id = "$spkr_id-$rec_id";
7070
print WAV "$utt_id", " $wav", "\n";
7171
print SPKR "$utt_id", " $spkr_id", "\n";
7272
}

egs/sre19-cmn2/v1/local/make_voxceleb2cat.pl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@
5858

5959
foreach (@rec_dirs) {
6060
my $rec_id = $_;
61-
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
61+
my $utt_id = "$spkr_id-$rec_id";
62+
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
6263
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
6364
die "Error creating $file_list";
6465
}
6566
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
6667
if($fs == 8){
6768
$wav = $wav." sox -t wav - -t wav -r 8k - |"
6869
}
69-
my $utt_id = "$spkr_id-$rec_id";
7070
print WAV "$utt_id", " $wav", "\n";
7171
print SPKR "$utt_id", " $spkr_id", "\n";
7272
}

egs/voices_challenge/v1/local/make_voxceleb2cat.pl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@
5858

5959
foreach (@rec_dirs) {
6060
my $rec_id = $_;
61-
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
61+
my $utt_id = "$spkr_id-$rec_id";
62+
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
6263
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
6364
die "Error creating $file_list";
6465
}
6566
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
6667
if($fs == 8){
6768
$wav = $wav." sox -t wav - -t wav -r 8k - |"
6869
}
69-
my $utt_id = "$spkr_id-$rec_id";
7070
print WAV "$utt_id", " $wav", "\n";
7171
print SPKR "$utt_id", " $spkr_id", "\n";
7272
}

egs/voxceleb/adv.v2/local/make_voxceleb2cat.pl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@
5858

5959
foreach (@rec_dirs) {
6060
my $rec_id = $_;
61-
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
61+
my $utt_id = "$spkr_id-$rec_id";
62+
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
6263
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
6364
die "Error creating $file_list";
6465
}
6566
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
6667
if($fs == 8){
6768
$wav = $wav." sox -t wav - -t wav -r 8k - |"
6869
}
69-
my $utt_id = "$spkr_id-$rec_id";
7070
print WAV "$utt_id", " $wav", "\n";
7171
print SPKR "$utt_id", " $spkr_id", "\n";
7272
}

egs/voxceleb/v1.1/README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,13 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
116116
| | | | Cosine | 0.96 | 0.065 | 0.110 |
117117
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 1.04 | 0.071 | 0.118 |
118118
| | | | Cosine | 0.93 | 0.067 | 0.108 |
119+
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA | 0.90 | 0.067 | 0.118 |
120+
| | | | Cosine | 0.85 | 0.060 | 0.094 |
119121
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.44 | 0.102 | 0.169 |
120122
| | | | Cosine | 1.29 | 0.084 | 0.140 |
121123
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.071 | 0.116 |
122124
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.074 | 0.116 |
125+
| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.09 | 0.081 | 0.150 |
123126

124127

125128
### VoxCeleb 1 Entire-Clean trial list
@@ -153,10 +156,13 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
153156
| | | | Cosine | 1.05 | 0.069 | 0.121 |
154157
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 1.18 | 0.075 | 0.131 |
155158
| | | | Cosine | 0.98 | 0.063 | 0.110 |
159+
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | Res2Net50 width=26x8 | + SWA | PLDA | 1.17 | 0.072 | 0.123 |
160+
| | | | Cosine | 0.94 | 0.061 | 0.107 |
156161
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.56 | 0.095 | 0.166 |
157162
| | | | Cosine | 1.27 | 0.079 | 0.142 |
158163
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.19 | 0.077 | 0.137 |
159164
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.073 | 0.129 |
165+
| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | TSE-Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.068 | 0.120 |
160166

161167

162168
### VoxCeleb 1 Hard-Clean trial list
@@ -190,8 +196,10 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
190196
| | | | Cosine | 1.99 | 0.119 | 0.196 |
191197
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 2.18 | 0.127 | 0.211 |
192198
| | | | Cosine | 1.89 | 0.112 | 0.184 |
199+
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA | 2.14 | 0.125 | 0.209 |
200+
| | | | Cosine | 1.84 | 0.110 | 0.186 |
193201
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 2.78 | 0.156 | 0.252 |
194202
| | | | Cosine | 2.26 | 0.134 | 0.214 |
195203
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 2.24 | 0.134 | 0.221 |
196204
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.20 | 0.132 | 0.219 |
197-
205+
| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.02 | 0.123 | 0.203 |
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Res2Net50 w26s8 x-vector with mixed precision training
2+
3+
# acoustic features
4+
feat_config=conf/fbank80_stmn_16k.yaml
5+
feat_type=fbank80_stmn
6+
7+
#vad
8+
vad_config=conf/vad_16k.yaml
9+
10+
# x-vector training
11+
nnet_data=voxceleb2cat_train
12+
nnet_num_augs=6
13+
aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"
14+
15+
batch_size_1gpu=24
16+
eff_batch_size=512 # effective batch size
17+
ipe=$nnet_num_augs
18+
min_chunk=4
19+
max_chunk=4
20+
lr=0.05
21+
22+
nnet_type=res2net50
23+
dropout=0
24+
embed_dim=256
25+
width_factor=3.25
26+
scale=8
27+
ws_tag=w26s8
28+
29+
s=30
30+
margin_warmup=20
31+
margin=0.3
32+
33+
nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"
34+
35+
opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 70 --swa-lr 1e-3 --swa-anneal-epochs 5"
36+
lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"
37+
38+
nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
39+
nnet_num_epochs=90
40+
nnet_dir=exp/xvector_nnets/$nnet_name
41+
nnet=$nnet_dir/swa_model_ep0091.pth
42+
43+
44+
# back-end
45+
plda_aug_config=conf/reverb_noise_aug.yaml
46+
plda_num_augs=6
47+
if [ $plda_num_augs -eq 0 ]; then
48+
plda_data=voxceleb2cat_train
49+
else
50+
plda_data=voxceleb2cat_train_augx${plda_num_augs}
51+
fi
52+
plda_type=splda
53+
lda_dim=200
54+
plda_y_dim=150
55+
plda_z_dim=200
56+

egs/voxceleb/v1/local/make_voxceleb2cat.pl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@
5858

5959
foreach (@rec_dirs) {
6060
my $rec_id = $_;
61-
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
61+
my $utt_id = "$spkr_id-$rec_id";
62+
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
6263
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
6364
die "Error creating $file_list";
6465
}
6566
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
6667
if($fs == 8){
6768
$wav = $wav." sox -t wav - -t wav -r 8k - |"
6869
}
69-
my $utt_id = "$spkr_id-$rec_id";
7070
print WAV "$utt_id", " $wav", "\n";
7171
print SPKR "$utt_id", " $spkr_id", "\n";
7272
}

hyp_utils/xvectors/extract_xvectors_from_wav.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ if [ $stage -le 0 ];then
9090
--part-idx JOB --num-parts $nj \
9191
--input $data_dir/wav.scp \
9292
--model-path $nnet_file --chunk-length $chunk_length \
93-
--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp || exit 1;
93+
--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
9494
set -e
9595
fi
9696

0 commit comments

Comments
 (0)