diff --git a/egs/voxceleb/v1.1/run_001_prepare_data.sh b/egs/voxceleb/v1.1/run_001_prepare_data.sh
index a87531d0..dd9937b7 100755
--- a/egs/voxceleb/v1.1/run_001_prepare_data.sh
+++ b/egs/voxceleb/v1.1/run_001_prepare_data.sh
@@ -15,14 +15,16 @@ config_file=default_config.sh
 
 
 if [ $stage -le 1 ];then
-
-    # Prepare the VoxCeleb2 dataset for training.
-    local/make_voxceleb2cat.pl $voxceleb2_root dev 16 data/voxceleb2cat_train
-    # local/make_voxceleb2cat.pl $voxceleb2_root test 16 data/voxceleb2cat_test
-    # utils/combine_data.sh data/voxceleb2cat data/voxceleb2cat_train data/voxceleb2cat_test 
+  # Prepare the VoxCeleb2 dataset for training.
+  local/make_voxceleb2cat.pl $voxceleb2_root dev 16 data/voxceleb2cat_train
+  # local/make_voxceleb2cat.pl $voxceleb2_root test 16 data/voxceleb2cat_test
+  # utils/combine_data.sh data/voxceleb2cat data/voxceleb2cat_train data/voxceleb2cat_test 
 fi
 
 if [ $stage -le 2 ];then
-    # prepare voxceleb1 for test
-    local/make_voxceleb1_oeh.pl $voxceleb1_root data
+  # prepare voxceleb1 for test
+  # This script is for the old version of the dataset
+  local/make_voxceleb1_oeh.pl $voxceleb1_root data
+  # Use this for the newer version of voxceleb1:
+  # local/make_voxceleb1_v2_oeh.pl $voxceleb1_root data
 fi
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_o.pl b/egs/voxceleb/v1/local/make_voxceleb1_o.pl
index 93cbc83f..dce92245 100755
--- a/egs/voxceleb/v1/local/make_voxceleb1_o.pl
+++ b/egs/voxceleb/v1/local/make_voxceleb1_o.pl
@@ -5,8 +5,13 @@
 #           2020  Jesus Villalba
 #
 # Usage: make_voxceleb1.pl /export/voxceleb1 data/
-# Create trial lists for Voxceleb1 original, Entire (E) and hard (H), 
+# Create trial lists for Voxceleb1 original, 
 # with cleaned and non-cleaned versions
+# Attention:
+#  - This script is for the old version of the dataset without anonymized speaker-ids
+#  - This script assumes that the voxceleb1 dataset has all speaker directories
+#  dumped in the same wav directory, NOT separated dev and test directories
+
 
 if (@ARGV != 2) {
   print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n";
@@ -26,18 +31,47 @@
 my @trials_url = ("$url_base/veri_test.txt", "$url_base/veri_test2.txt");
 my @trials = ("trials_o", "trials_o_clean");
 
-open(META_IN, "<", "$data_base/vox1_meta.csv") or die "Could not open the meta data file $data_base/vox1_meta.csv";
+my $meta_url = "https://www.openslr.org/resources/49/vox1_meta.csv";
+my $meta_path = "$data_base/vox1_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox1_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+
+open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
 my %id2spkr = ();
+my %spkr2gender = ();
+my %spkr2nation = ();
 while (<META_IN>) {
-  chomp;
-  my ($vox_id, $spkr_id, $gender, $nation, $set) = split;
-  $id2spkr{$vox_id} = $spkr_id;
-
+    chomp;
+    my ($vox_id, $spkr_id, $gender, $nation, $set) = split "\t";
+    $id2spkr{$vox_id} = $spkr_id;
+    $spkr2gender{$spkr_id} = $gender;
+    $nation =~ s@ @-@g;
+    $spkr2nation{$spkr_id} = $nation;
 }
 close(META_IN) or die;
 
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv";
+my $lid_path = "$data_base/lang_vox1_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox1_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  my ($vox_id, $vid_id, $file_id) = split '/', $utt_id;
+  my $spkr_id = $id2spkr{$vox_id};
+  my $utt_id = "$spkr_id-$vid_id-00$file_id";
+  $utt_id =~ s@\.wav$@@;
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
 #download trials from voxceleb web page
-my %valid_utts = ();
 for($i = 0; $i <= $#trials; $i++) {
 
     my $file_i = "$out_dir/$trials_basename[$i]";
@@ -70,8 +104,6 @@
 	    $target = "target";
 	}
 	print TRIAL_OUT "$utt_id1 $utt_id2 $target\n";
-	$valid_utts{$utt_id1} = 1;
-	$valid_utts{$utt_id2} = 1;
     }
     
     close(TRIAL_IN) or die;
@@ -84,8 +116,11 @@
 my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
 closedir $dh;
 
-open(SPKR_TEST, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
-open(WAV_TEST, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(NAT, ">", "$out_dir/spk2nation") or die "Could not open the output file $out_dir/spk2nation";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
 
 foreach (@spkr_dirs) {
   my $spkr_id = $_;
@@ -95,6 +130,9 @@
   if (exists $id2spkr{$spkr_id}) {
     $new_spkr_id = $id2spkr{$spkr_id};
   }
+  print GENDER "$new_spkr_id $spkr2gender{$new_spkr_id}\n";
+  print NAT "$new_spkr_id $spkr2nation{$new_spkr_id}\n";
+
   opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!";
   my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
   closedir $dh;
@@ -104,15 +142,22 @@
     my $segment = substr($filename, 12, 7);
     my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
     my $utt_id = "$new_spkr_id-$rec_id-$segment";
-    if (exists $valid_utts{$utt_id}) {
-	print WAV_TEST "$utt_id", " $wav", "\n";
-	print SPKR_TEST "$utt_id", " $new_spkr_id", "\n";
+    print WAV "$utt_id", " $wav", "\n";
+    print SPKR "$utt_id", " $new_spkr_id", "\n";
+    if (exists $utt2lang{$utt_id}) {
+	print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+    }
+    else {
+	print LANG "$utt_id N/A\n";
     }
   }
 }
 
-close(SPKR_TEST) or die;
-close(WAV_TEST) or die;
+close(SPKR) or die;
+close(WAV) or die;
+close(LANG) or die;
+close(GENDER) or die;
+close(NAT) or die;
 
 if (system(
   "cat $out_dir/trials_* | sort -u > $out_dir/trials") != 0) {
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_oeh.pl b/egs/voxceleb/v1/local/make_voxceleb1_oeh.pl
index 25ffa642..760ab397 100755
--- a/egs/voxceleb/v1/local/make_voxceleb1_oeh.pl
+++ b/egs/voxceleb/v1/local/make_voxceleb1_oeh.pl
@@ -7,6 +7,10 @@
 # Usage: make_voxceleb1.pl /export/voxceleb1 data/
 # Create trial lists for Voxceleb1 original, Entire (E) and hard (H), 
 # with cleaned and non-cleaned versions
+# Attention:
+#  - This script is for the old version of the dataset without anonymized speaker-ids
+#  - This script assumes that the voxceleb1 dataset has all speaker directories
+#  dumped in the same wav directory, NOT separated dev and test directories
 
 if (@ARGV != 2) {
   print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n";
@@ -26,16 +30,46 @@
 my @trials_url = ("$url_base/veri_test.txt", "$url_base/veri_test2.txt", "$url_base/list_test_hard.txt", "$url_base/list_test_hard2.txt", "$url_base/list_test_all.txt", "$url_base/list_test_all2.txt");
 my @trials = ("trials_o", "trials_o_clean", "trials_h", "trials_h_clean", "trials_e", "trials_e_clean");
 
-open(META_IN, "<", "$data_base/vox1_meta.csv") or die "Could not open the meta data file $data_base/vox1_meta.csv";
+my $meta_url = "https://www.openslr.org/resources/49/vox1_meta.csv";
+my $meta_path = "$data_base/vox1_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox1_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+
+open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
 my %id2spkr = ();
+my %spkr2gender = ();
+my %spkr2nation = ();
 while (<META_IN>) {
-  chomp;
-  my ($vox_id, $spkr_id, $gender, $nation, $set) = split;
-  $id2spkr{$vox_id} = $spkr_id;
-
+    chomp;
+    my ($vox_id, $spkr_id, $gender, $nation, $set) = split "\t";
+    $id2spkr{$vox_id} = $spkr_id;
+    $spkr2gender{$spkr_id} = $gender;
+    $nation =~ s@ @-@g;
+    $spkr2nation{$spkr_id} = $nation;
 }
 close(META_IN) or die;
 
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv";
+my $lid_path = "$data_base/lang_vox1_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox1_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  my ($vox_id, $vid_id, $file_id) = split '/', $utt_id;
+  my $spkr_id = $id2spkr{$vox_id};
+  my $utt_id = "$spkr_id-$vid_id-00$file_id";
+  $utt_id =~ s@\.wav$@@;
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
 #download trials from voxceleb web page
 for($i = 0; $i <= $#trials; $i++) {
 
@@ -81,8 +115,11 @@
 my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
 closedir $dh;
 
-open(SPKR_TEST, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
-open(WAV_TEST, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(NAT, ">", "$out_dir/spk2nation") or die "Could not open the output file $out_dir/spk2nation";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
 
 foreach (@spkr_dirs) {
   my $spkr_id = $_;
@@ -92,6 +129,9 @@
   if (exists $id2spkr{$spkr_id}) {
     $new_spkr_id = $id2spkr{$spkr_id};
   }
+  print GENDER "$new_spkr_id $spkr2gender{$new_spkr_id}\n";
+  print NAT "$new_spkr_id $spkr2nation{$new_spkr_id}\n";
+
   opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!";
   my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
   closedir $dh;
@@ -101,13 +141,22 @@
     my $segment = substr($filename, 12, 7);
     my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
     my $utt_id = "$new_spkr_id-$rec_id-$segment";
-    print WAV_TEST "$utt_id", " $wav", "\n";
-    print SPKR_TEST "$utt_id", " $new_spkr_id", "\n";
+    print WAV "$utt_id", " $wav", "\n";
+    print SPKR "$utt_id", " $new_spkr_id", "\n";
+    if (exists $utt2lang{$utt_id}) {
+	print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+    }
+    else {
+	print LANG "$utt_id N/A\n";
+    }
   }
 }
 
-close(SPKR_TEST) or die;
-close(WAV_TEST) or die;
+close(SPKR) or die;
+close(WAV) or die;
+close(LANG) or die;
+close(GENDER) or die;
+close(NAT) or die;
 
 if (system(
   "cat $out_dir/trials_* | sort -u > $out_dir/trials") != 0) {
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_v2.pl b/egs/voxceleb/v1/local/make_voxceleb1_v2.pl
new file mode 100755
index 00000000..74ee23c1
--- /dev/null
+++ b/egs/voxceleb/v1/local/make_voxceleb1_v2.pl
@@ -0,0 +1,142 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Ewald Enzinger
+#           2018  David Snyder
+#           2018  Jesus Villalba
+#
+# Apache 2.0
+# Usage: make_voxceleb1_v2.pl /export/voxceleb1 data/
+# Attention:
+#  - This script is for the recent version of the dataset
+#  - This version of the script does NOT remove SITW overlap speakers
+#  - Files from the same video are NOT concatenated into 1 segment
+#  - This script assumes that the voxceleb1 dataset has all speaker directories dumped in the same wav directory, NOT separated dev and test directories
+
+if (@ARGV != 3) {
+  print STDERR "Usage: $0 <path-to-voxceleb1> fs <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb1 16 data/\n";
+  exit(1);
+}
+
+($data_base, $fs, $out_dir) = @ARGV;
+my $out_dir = "$out_dir/voxceleb1_train";
+
+if (system("mkdir -p $out_dir") != 0) {
+  die "Error making directory $out_train_dir";
+}
+
+my $meta_url = "https://www.openslr.org/resources/49/vox1_meta.csv";
+my $meta_path = "$data_base/vox1_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox1_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+
+open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
+
+my %id2spkr = ();
+my $test_spkrs = ();
+my %spkr2gender = ();
+my %spkr2nation = ();
+while (<META_IN>) {
+  chomp;
+  my ($vox_id, $spkr_id, $gender, $nation, $set) = split "\t";
+  $spkr2gender{$vox_id} = $gender;
+  $nation =~ s@ @-@g;
+  $spkr2nation{$vox_id} = $nation;
+  if ( $set eq "test"){
+      $test_spkrs{$vox_id} = ();
+  }
+}
+close(META_IN) or die;
+
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv";
+my $lid_path = "$data_base/lang_vox1_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox1_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  my ($spkr_id, $vid_id, $file_id) = split '/', $utt_id;
+  $file_id =~ s@\.wav$@@; 
+  my $utt_id = "$spkr_id-$vid_id-$file_id";
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
+my $wav_dir = "$data_base/wav";
+opendir my $dh, "$wav_dir" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$wav_dir/$_" && ! /^\.{1,2}$/ || -l "$wav_dir/$_" } readdir($dh);
+closedir $dh;
+
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(NAT, ">", "$out_dir/spk2nation") or die "Could not open the output file $out_dir/spk2nation";
+
+foreach (@spkr_dirs) {
+    my $spkr_id = $_;
+
+    next if (exists $test_spkrs{$spkr_id});
+
+    print GENDER "$spkr_id $spkr2gender{$spkr_id}\n";
+    print NAT "$spkr_id $spkr2nation{$spkr_id}\n";
+
+    my $spkr_dir = "$wav_dir/$spkr_id";
+    opendir my $dh, "$spkr_dir" or die "Cannot open directory: $!";
+    my @vid_dirs = grep {-d "$spkr_dir/$_" && ! /^\.{1,2}$/ } readdir($dh);
+    my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+    closedir $dh;
+    foreach (@vid_dirs) {
+	my $vid_id = $_;
+	my $vid_dir = "$spkr_dir/$vid_id";
+	opendir my $dh, "$vid_dir" or die "Cannot open directory: $!";
+	my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+	closedir $dh;
+	foreach (@files) {
+	    my $segment = $_;
+	    my $wav = "$vid_dir/$segment.wav";
+	    my $utt_id = "$spkr_id-$vid_id-$segment";
+	    if($fs == 8){
+		$wav = "sox " . $wav . " -t wav -r 8k - |";
+	    }
+	    print WAV "$utt_id", " $wav", "\n";
+	    print SPKR "$utt_id", " $spkr_id", "\n";
+	    if (exists $utt2lang{$utt_id}) {
+		print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+	    }
+	    else {
+		print LANG "$utt_id N/A\n";
+	    }
+	}
+    }
+}
+close(GENDER) or die;
+close(NAT) or die;
+close(SPKR) or die;
+close(WAV) or die;
+close(LANG) or die;
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_v2_o.pl b/egs/voxceleb/v1/local/make_voxceleb1_v2_o.pl
new file mode 100755
index 00000000..9ab37221
--- /dev/null
+++ b/egs/voxceleb/v1/local/make_voxceleb1_v2_o.pl
@@ -0,0 +1,211 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Ewald Enzinger
+#           2018  David Snyder
+#           2020  Jesus Villalba
+#
+# Usage: make_voxceleb1.pl /export/voxceleb1 data/
+# Create trial lists for Voxceleb1 original,
+# with cleaned and non-cleaned versions
+# Attention:
+#  - This script is for the recent version of the dataset
+#  - This script assumes that the voxceleb1 dataset has all speaker directories
+#  dumped in the same wav directory, NOT separated dev and test directories
+
+if (@ARGV != 2) {
+  print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb1 data/\n";
+  exit(1);
+}
+
+($data_base, $out_dir) = @ARGV;
+my $out_dir = "$out_dir/voxceleb1_test";
+
+if (system("mkdir -p $out_dir") != 0) {
+  die "Error making directory $out_dir";
+}
+
+my $url_base="http://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta";
+my @trials_basename = ("very_test.txt", "very_test2.txt", "list_test_hard.txt", "list_test_hard2.txt", "list_test_all.txt", "list_test_all2.txt");
+my @trials_url = ("$url_base/veri_test.txt", "$url_base/veri_test2.txt");
+my @trials = ("trials_o", "trials_o_clean");
+
+my $meta_url = "https://www.openslr.org/resources/49/vox1_meta.csv";
+my $meta_path = "$data_base/vox1_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox1_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+
+open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
+my %id2spkr = ();
+my %spkr2gender = ();
+my %spkr2nation = ();
+while (<META_IN>) {
+    chomp;
+    my ($vox_id, $spkr_id, $gender, $nation, $set) = split "\t";
+    $spkr2gender{$vox_id} = $gender;
+    $nation =~ s@ @-@g;
+    $spkr2nation{$vox_id} = $nation;
+}
+close(META_IN) or die;
+
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv";
+my $lid_path = "$data_base/lang_vox1_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox1_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  my ($vox_id, $vid_id, $file_id) = split '/', $utt_id;
+  my $utt_id = "$vox_id-$vid_id-$file_id";
+  $utt_id =~ s@\.wav$@@;
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
+#download trials from voxceleb web page
+for($i = 0; $i <= $#trials; $i++) {
+
+    my $file_i = "$out_dir/$trials_basename[$i]";
+    my $url_i = $trials_url[$i];
+    my $trial_i = "$out_dir/$trials[$i]";
+    if (! -e $file_i) {
+	system("wget -O $file_i $url_i");
+    }
+    #mapping from new speaker ids and file-names to old ones
+    open(TRIAL_IN, "<", "$file_i") or die "Could not open the verification trials file $file_i";
+    open(TRIAL_OUT, ">", "$trial_i") or die "Could not open the output file $trial_i";
+    while (<TRIAL_IN>) {
+	chomp;
+	my ($tar_or_non, $path1, $path2) = split;
+
+	# Create entry for left-hand side of trial
+	my ($spkr_id, $rec_id, $segment) = split('/', $path1);
+	$segment =~ s/\.wav$//;
+	my $utt_id1 = "$spkr_id-$rec_id-$segment";
+	
+	# Create entry for right-hand side of trial
+	my ($spkr_id, $rec_id, $segment) = split('/', $path2);
+	$segment =~ s/\.wav$//;
+	my $utt_id2 = "$spkr_id-$rec_id-$segment";
+	
+	my $target = "nontarget";
+	if ($tar_or_non eq "1") {
+	    $target = "target";
+	}
+	print TRIAL_OUT "$utt_id1 $utt_id2 $target\n";
+    }
+    
+    close(TRIAL_IN) or die;
+    close(TRIAL_OUT) or die;
+    
+}
+
+my $wav_dir = "$data_base/wav";
+opendir my $dh, "$wav_dir" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$wav_dir/$_" && ! /^\.{1,2}$/ || -l "$wav_dir/$_" } readdir($dh);
+closedir $dh;
+
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(NAT, ">", "$out_dir/spk2nation") or die "Could not open the output file $out_dir/spk2nation";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
+
+foreach (@spkr_dirs) {
+    my $spkr_id = $_;
+
+    print GENDER "$spkr_id $spkr2gender{$spkr_id}\n";
+    print NAT "$spkr_id $spkr2nation{$spkr_id}\n";
+
+    my $spkr_dir = "$wav_dir/$spkr_id";
+    opendir my $dh, "$spkr_dir" or die "Cannot open directory: $!";
+    my @vid_dirs = grep {-d "$spkr_dir/$_" && ! /^\.{1,2}$/ } readdir($dh);
+    my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+    closedir $dh;
+    foreach (@vid_dirs) {
+	my $vid_id = $_;
+	my $vid_dir = "$spkr_dir/$vid_id";
+	opendir my $dh, "$vid_dir" or die "Cannot open directory: $!";
+	my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+	closedir $dh;
+	foreach (@files) {
+	    my $segment = $_;
+	    my $wav = "$vid_dir/$segment.wav";
+	    my $utt_id = "$spkr_id-$vid_id-$segment";
+	    if($fs == 8){
+		$wav = "sox " . $wav . " -t wav -r 8k - |";
+	    }
+	    print WAV "$utt_id", " $wav", "\n";
+	    print SPKR "$utt_id", " $spkr_id", "\n";
+	    if (exists $utt2lang{$utt_id}) {
+		print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+	    }
+	    else {
+		print LANG "$utt_id N/A\n";
+	    }
+	}
+    }
+}
+
+# foreach (@spkr_dirs) {
+#   my $spkr_id = $_;
+#   my $new_spkr_id = $spkr_id;
+#   # If we're using a newer version of VoxCeleb1, we need to "deanonymize"
+#   # the speaker labels.
+#   if (exists $id2spkr{$spkr_id}) {
+#     $new_spkr_id = $id2spkr{$spkr_id};
+#   }
+#   print GENDER "$new_spkr_id $spkr2gender{$new_spkr_id}\n";
+#   print NAT "$new_spkr_id $spkr2nation{$new_spkr_id}\n";
+
+#   opendir my $dh, "$wav_dir/$spkr_id/" or die "Cannot open directory: $!";
+#   my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+#   closedir $dh;
+#   foreach (@files) {
+#     my $filename = $_;
+#     my $rec_id = substr($filename, 0, 11);
+#     my $segment = substr($filename, 12, 7);
+#     my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
+#     my $utt_id = "$new_spkr_id-$rec_id-$segment";
+#     print WAV "$utt_id", " $wav", "\n";
+#     print SPKR "$utt_id", " $new_spkr_id", "\n";
+#     if (exists $utt2lang{$utt_id}) {
+# 	print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+#     }
+#     else {
+# 	print LANG "$utt_id N/A\n";
+#     }
+#   }
+# }
+
+close(SPKR) or die;
+close(WAV) or die;
+close(LANG) or die;
+close(GENDER) or die;
+close(NAT) or die;
+
+if (system(
+  "cat $out_dir/trials_* | sort -u > $out_dir/trials") != 0) {
+  die "Error creating trials file in directory $out_dir";
+}
+
+if (system(
+  "awk '{ print \$1,\$1 }' $out_dir/trials | sort -u > $out_dir/utt2model") != 0) {
+  die "Error creating utt2model file in directory $out_dir";
+}
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
+
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_v2_oeh.pl b/egs/voxceleb/v1/local/make_voxceleb1_v2_oeh.pl
new file mode 100755
index 00000000..247ad30a
--- /dev/null
+++ b/egs/voxceleb/v1/local/make_voxceleb1_v2_oeh.pl
@@ -0,0 +1,211 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Ewald Enzinger
+#           2018  David Snyder
+#           2020  Jesus Villalba
+#
+# Usage: make_voxceleb1.pl /export/voxceleb1 data/
+# Create trial lists for Voxceleb1 original, Entire (E) and hard (H), 
+# with cleaned and non-cleaned versions
+# Attention:
+#  - This script is for the recent version of the dataset
+#  - This script assumes that the voxceleb1 dataset has all speaker directories
+#    dumped in the same wav directory, NOT separated dev and test directories
+
+if (@ARGV != 2) {
+  print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb1 data/\n";
+  exit(1);
+}
+
+($data_base, $out_dir) = @ARGV;
+my $out_dir = "$out_dir/voxceleb1_test";
+
+if (system("mkdir -p $out_dir") != 0) {
+  die "Error making directory $out_dir";
+}
+
+my $url_base="http://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta";
+my @trials_basename = ("very_test.txt", "very_test2.txt", "list_test_hard.txt", "list_test_hard2.txt", "list_test_all.txt", "list_test_all2.txt");
+my @trials_url = ("$url_base/veri_test.txt", "$url_base/veri_test2.txt", "$url_base/list_test_hard.txt", "$url_base/list_test_hard2.txt", "$url_base/list_test_all.txt", "$url_base/list_test_all2.txt");
+my @trials = ("trials_o", "trials_o_clean", "trials_h", "trials_h_clean", "trials_e", "trials_e_clean");
+
+my $meta_url = "https://www.openslr.org/resources/49/vox1_meta.csv";
+my $meta_path = "$data_base/vox1_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox1_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+
+open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
+my %id2spkr = ();
+my %spkr2gender = ();
+my %spkr2nation = ();
+while (<META_IN>) {
+    chomp;
+    my ($vox_id, $spkr_id, $gender, $nation, $set) = split "\t";
+    $spkr2gender{$vox_id} = $gender;
+    $nation =~ s@ @-@g;
+    $spkr2nation{$vox_id} = $nation;
+}
+close(META_IN) or die;
+
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv";
+my $lid_path = "$data_base/lang_vox1_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox1_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  my ($vox_id, $vid_id, $file_id) = split '/', $utt_id;
+  my $utt_id = "$vox_id-$vid_id-$file_id";
+  $utt_id =~ s@\.wav$@@;
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
+#download trials from voxceleb web page
+for($i = 0; $i <= $#trials; $i++) {
+
+    my $file_i = "$out_dir/$trials_basename[$i]";
+    my $url_i = $trials_url[$i];
+    my $trial_i = "$out_dir/$trials[$i]";
+    if (! -e $file_i) {
+	system("wget -O $file_i $url_i");
+    }
+    #mapping from new speaker ids and file-names to old ones
+    open(TRIAL_IN, "<", "$file_i") or die "Could not open the verification trials file $file_i";
+    open(TRIAL_OUT, ">", "$trial_i") or die "Could not open the output file $trial_i";
+    while (<TRIAL_IN>) {
+	chomp;
+	my ($tar_or_non, $path1, $path2) = split;
+
+	# Create entry for left-hand side of trial
+	my ($spkr_id, $rec_id, $segment) = split('/', $path1);
+	$segment =~ s/\.wav$//;
+	my $utt_id1 = "$spkr_id-$rec_id-$segment";
+	
+	# Create entry for right-hand side of trial
+	my ($spkr_id, $rec_id, $segment) = split('/', $path2);
+	$segment =~ s/\.wav$//;
+	my $utt_id2 = "$spkr_id-$rec_id-$segment";
+	
+	my $target = "nontarget";
+	if ($tar_or_non eq "1") {
+	    $target = "target";
+	}
+	print TRIAL_OUT "$utt_id1 $utt_id2 $target\n";
+    }
+    
+    close(TRIAL_IN) or die;
+    close(TRIAL_OUT) or die;
+    
+}
+
+my $wav_dir = "$data_base/wav";
+opendir my $dh, "$wav_dir" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$wav_dir/$_" && ! /^\.{1,2}$/ || -l "$wav_dir/$_" } readdir($dh);
+closedir $dh;
+
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(NAT, ">", "$out_dir/spk2nation") or die "Could not open the output file $out_dir/spk2nation";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
+
+foreach (@spkr_dirs) {
+    my $spkr_id = $_;
+
+    print GENDER "$spkr_id $spkr2gender{$spkr_id}\n";
+    print NAT "$spkr_id $spkr2nation{$spkr_id}\n";
+
+    my $spkr_dir = "$wav_dir/$spkr_id";
+    opendir my $dh, "$spkr_dir" or die "Cannot open directory: $!";
+    my @vid_dirs = grep {-d "$spkr_dir/$_" && ! /^\.{1,2}$/ } readdir($dh);
+    my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+    closedir $dh;
+    foreach (@vid_dirs) {
+	my $vid_id = $_;
+	my $vid_dir = "$spkr_dir/$vid_id";
+	opendir my $dh, "$vid_dir" or die "Cannot open directory: $!";
+	my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+	closedir $dh;
+	foreach (@files) {
+	    my $segment = $_;
+	    my $wav = "$vid_dir/$segment.wav";
+	    my $utt_id = "$spkr_id-$vid_id-$segment";
+	    if($fs == 8){
+		$wav = "sox " . $wav . " -t wav -r 8k - |";
+	    }
+	    print WAV "$utt_id", " $wav", "\n";
+	    print SPKR "$utt_id", " $spkr_id", "\n";
+	    if (exists $utt2lang{$utt_id}) {
+		print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+	    }
+	    else {
+		print LANG "$utt_id N/A\n";
+	    }
+	}
+    }
+}
+
+# foreach (@spkr_dirs) {
+#   my $spkr_id = $_;
+#   my $new_spkr_id = $spkr_id;
+#   # If we're using a newer version of VoxCeleb1, we need to "deanonymize"
+#   # the speaker labels.
+#   if (exists $id2spkr{$spkr_id}) {
+#     $new_spkr_id = $id2spkr{$spkr_id};
+#   }
+#   print GENDER "$new_spkr_id $spkr2gender{$new_spkr_id}\n";
+#   print NAT "$new_spkr_id $spkr2nation{$new_spkr_id}\n";
+
+#   opendir my $dh, "$wav_dir/$spkr_id/" or die "Cannot open directory: $!";
+#   my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+#   closedir $dh;
+#   foreach (@files) {
+#     my $filename = $_;
+#     my $rec_id = substr($filename, 0, 11);
+#     my $segment = substr($filename, 12, 7);
+#     my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
+#     my $utt_id = "$new_spkr_id-$rec_id-$segment";
+#     print WAV "$utt_id", " $wav", "\n";
+#     print SPKR "$utt_id", " $new_spkr_id", "\n";
+#     if (exists $utt2lang{$utt_id}) {
+# 	print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+#     }
+#     else {
+# 	print LANG "$utt_id N/A\n";
+#     }
+#   }
+# }
+
+close(SPKR) or die;
+close(WAV) or die;
+close(LANG) or die;
+close(GENDER) or die;
+close(NAT) or die;
+
+if (system(
+  "cat $out_dir/trials_* | sort -u > $out_dir/trials") != 0) {
+  die "Error creating trials file in directory $out_dir";
+}
+
+if (system(
+  "awk '{ print \$1,\$1 }' $out_dir/trials | sort -u > $out_dir/utt2model") != 0) {
+  die "Error creating utt2model file in directory $out_dir";
+}
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
+
diff --git a/egs/voxceleb/v1/local/make_voxceleb1cat.pl b/egs/voxceleb/v1/local/make_voxceleb1cat.pl
index d8072942..59cbf0db 100755
--- a/egs/voxceleb/v1/local/make_voxceleb1cat.pl
+++ b/egs/voxceleb/v1/local/make_voxceleb1cat.pl
@@ -6,8 +6,12 @@
 #
 # Apache 2.0
 # Usage: make_voxceleb1cat.pl /export/voxceleb1 data/
-# This version of the script does NOT remove SITW overlap speakers
-# Files from the same video are concatenated into 1 segment
+# Attention:
+#  - This script is for the old version of the dataset without anonymized speaker-ids
+#  - This version of the script does NOT remove SITW overlap speakers
+#  - Files from the same video are concatenated into 1 segment
+#  - This script assumes that the voxceleb1 dataset has all speaker directories
+#  dumped in the same wav directory, NOT separated dev and test directories
 
 if (@ARGV != 3) {
   print STDERR "Usage: $0 <path-to-voxceleb1> fs <path-to-data-dir>\n";
@@ -22,31 +26,57 @@
   die "Error making directory $out_train_dir";
 }
 
-opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!";
-my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
-closedir $dh;
-
-if (! -e "$data_base/voxceleb1_test.txt") {
-  system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt");
+my $meta_url = "https://www.openslr.org/resources/49/vox1_meta.csv";
+my $meta_path = "$data_base/vox1_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox1_meta.csv";
+    system("wget -O $meta_path $meta_url");
 }
 
-if (! -e "$data_base/vox1_meta.csv") {
-  system("wget -O $data_base/vox1_meta.csv http://www.openslr.org/resources/49/vox1_meta.csv");
-}
-
-open(META_IN, "<", "$data_base/vox1_meta.csv") or die "Could not open the meta data file $data_base/vox1_meta.csv";
+open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
 
 my %id2spkr = ();
 my $test_spkrs = ();
+my %spkr2gender = ();
+my %spkr2nation = ();
 while (<META_IN>) {
   chomp;
-  my ($vox_id, $spkr_id, $gender, $nation, $set) = split;
+  my ($vox_id, $spkr_id, $gender, $nation, $set) = split "\t";
   $id2spkr{$vox_id} = $spkr_id;
+  $spkr2gender{$spkr_id} = $gender;
+  $nation =~ s@ @-@g;
+  $spkr2nation{$spkr_id} = $nation;
   if ( $set eq "test"){
       $test_spkrs{$spkr_id} = ();
   }
 }
+close(META_IN) or die;
+
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv";
+my $lid_path = "$data_base/lang_vox1_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox1_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  my ($vox_id, $vid_id, $file_id) = split '/', $utt_id;
+  my $spkr_id = $id2spkr{$vox_id};
+  my $utt_id = "$spkr_id-$vid_id";
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
 
+opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
+closedir $dh;
+
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(NAT, ">", "$out_dir/spk2nation") or die "Could not open the output file $out_dir/spk2nation";
 
 my %rec2utt = ();
 my %rec2spk = ();
@@ -56,6 +86,9 @@
     if (exists $id2spkr{$spkr_id}) {
 	$new_spkr_id = $id2spkr{$spkr_id};
     }
+    print GENDER "$new_spkr_id $spkr2gender{$new_spkr_id}\n";
+    print NAT "$new_spkr_id $spkr2nation{$new_spkr_id}\n";
+
     opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!";
     my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
     closedir $dh;
@@ -76,9 +109,12 @@
 	}
     }
 }
+close(GENDER) or die;
+close(NAT) or die;
 
 open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
 open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
 
 foreach my $utt_id (keys %rec2spk) {
     my $wav = "";
@@ -89,12 +125,20 @@
 	$wav = "sox " . $rec2utt{$utt_id} . " -t wav - |";
     }
     my $spkr_id = $rec2spk{$utt_id};
+    my $land_id = $utt2lang{$utt_id};
     print WAV "$utt_id", " $wav", "\n";
     print SPKR "$utt_id", " $spkr_id", "\n";
+    if (exists $utt2lang{$utt_id}) {
+	print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+    }
+    else {
+	print LANG "$utt_id N/A\n";
+    }
 }
 
 close(SPKR) or die;
 close(WAV) or die;
+close(LANG) or die;
 
 if (system(
   "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
diff --git a/egs/voxceleb/v1/local/make_voxceleb1cat_v2.pl b/egs/voxceleb/v1/local/make_voxceleb1cat_v2.pl
new file mode 100755
index 00000000..e5baa746
--- /dev/null
+++ b/egs/voxceleb/v1/local/make_voxceleb1cat_v2.pl
@@ -0,0 +1,162 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Ewald Enzinger
+#           2018  David Snyder
+#           2018  Jesus Villalba
+#
+# Apache 2.0
+# Usage: make_voxceleb1cat_v2.pl /export/voxceleb1 data/
+# Attention:
+# - This script is for the recent version of the dataset
+# - This version of the script does NOT remove SITW overlap speakers
+# - Files from the same video are concatenated into 1 segment
+# - This script assumes that the voxceleb1 dataset has all speaker directories
+#  dumped in the same wav directory, NOT separated dev and test directories
+
+if (@ARGV != 3) {
+  print STDERR "Usage: $0 <path-to-voxceleb1> fs <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb1 16 data/\n";
+  exit(1);
+}
+
+($data_base, $fs, $out_dir) = @ARGV;
+my $out_dir = "$out_dir/voxceleb1cat_train";
+
+if (system("mkdir -p $out_dir") != 0) {
+  die "Error making directory $out_train_dir";
+}
+
+my $meta_url = "https://www.openslr.org/resources/49/vox1_meta.csv";
+my $meta_path = "$data_base/vox1_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox1_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+
+open(META_IN, "<", "$meta_path") or die "Could not open the meta data file $meta_path";
+
+my %id2spkr = ();
+my $test_spkrs = ();
+my %spkr2gender = ();
+my %spkr2nation = ();
+while (<META_IN>) {
+  chomp;
+  my ($vox_id, $spkr_id, $gender, $nation, $set) = split "\t";
+  $spkr2gender{$vox_id} = $gender;
+  $nation =~ s@ @-@g;
+  $spkr2nation{$vox_id} = $nation;
+  if ( $set eq "test"){
+      $test_spkrs{$vox_id} = ();
+  }
+}
+close(META_IN) or die;
+
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox1_final.csv";
+my $lid_path = "$data_base/lang_vox1_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox1_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  my ($spkr_id, $vid_id, $file_id) = split '/', $utt_id;
+  my $utt_id = "$spkr_id-$vid_id";
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
+my $wav_dir = "$data_base/wav";
+opendir my $dh, "$wav_dir" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$wav_dir/$_" && ! /^\.{1,2}$/ || -l "$wav_dir/$_" } readdir($dh);
+closedir $dh;
+
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(NAT, ">", "$out_dir/spk2nation") or die "Could not open the output file $out_dir/spk2nation";
+
+my %utt2wav = ();
+my %utt2spk = ();
+foreach (@spkr_dirs) {
+    my $spkr_id = $_;
+
+    print GENDER "$spkr_id $spkr2gender{$spkr_id}\n";
+    print NAT "$spkr_id $spkr2nation{$spkr_id}\n";
+
+    my $spkr_dir = "$wav_dir/$spkr_id";
+    opendir my $dh, "$spkr_dir" or die "Cannot open directory: $!";
+    my @vid_dirs = grep {-d "$spkr_dir/$_" && ! /^\.{1,2}$/ } readdir($dh);
+    my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+    closedir $dh;
+    foreach (@vid_dirs) {
+	my $vid_id = $_;
+	my $vid_dir = "$spkr_dir/$vid_id";
+	opendir my $dh, "$vid_dir" or die "Cannot open directory: $!";
+	my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+	closedir $dh;
+	foreach (@files) {
+	    my $segment = $_;
+	    my $wav = "$vid_dir/$segment.wav";
+	    my $utt_id = "$spkr_id-$vid_id";
+	    if (not exists $test_spkrs{$spkr_id}) {
+		if (not exists $utt2wav{$utt_id}) {
+		    $utt2spk{$utt_id} = $spkr_id;
+		    $utt2wav{$utt_id} = $wav
+		}
+		else {
+		    $utt2wav{$utt_id} = $utt2wav{$utt_id} . " " . $wav
+		}
+	    }
+	}
+    }
+}
+close(GENDER) or die;
+close(NAT) or die;
+
+open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
+
+foreach my $utt_id (keys %utt2spk) {
+    my $wav = "";
+    if($fs == 8){
+	$wav = "sox " . $utt2wav{$utt_id} . " -t wav -r 8k - |";
+    }
+    else{
+	$wav = "sox " . $utt2wav{$utt_id} . " -t wav - |";
+    }
+    my $spkr_id = $utt2spk{$utt_id};
+    my $land_id = $utt2lang{$utt_id};
+    print WAV "$utt_id", " $wav", "\n";
+    print SPKR "$utt_id", " $spkr_id", "\n";
+    if (exists $utt2lang{$utt_id}) {
+	print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+    }
+    else {
+	print LANG "$utt_id N/A\n";
+    }
+}
+
+close(SPKR) or die;
+close(WAV) or die;
+close(LANG) or die;
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/voxceleb/v1/local/make_voxceleb2.pl b/egs/voxceleb/v1/local/make_voxceleb2.pl
index d88a78ce..e0ebeb0f 100755
--- a/egs/voxceleb/v1/local/make_voxceleb2.pl
+++ b/egs/voxceleb/v1/local/make_voxceleb2.pl
@@ -32,20 +32,59 @@
     $dataset_path = "$data_base/$dataset"
 }
 
-opendir my $dh, "$dataset_path" or die "Cannot open directory: $!";
-my @spkr_dirs = grep {-d "$dataset_path/$_" && ! /^\.{1,2}$/} readdir($dh);
-closedir $dh;
 
 if (system("mkdir -p $out_dir") != 0) {
   die "Error making directory $out_dir";
 }
 
+
+my $meta_url = "https://www.openslr.org/resources/49/vox2_meta.csv";
+my $meta_path = "$data_base/vox2_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox2_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+open(META_IN, "<", "$meta_path") or die "Could not open the output file $meta_path";
+my %spkr2gender = ();
+while (<META_IN>) {
+  chomp;
+  my ($spkr, $vox_id, $vgg_id, $gender, $set) = split;
+  $spkr2gender{$vox_id} = $gender;
+}
+close(META_IN) or die;
+
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox2_final.csv";
+my $lid_path = "$data_base/lang_vox2_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox2_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  $utt_id =~ s@/@-@g;
+  $utt_id =~ s@\.wav$@@;
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
+
 open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
 open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+
+opendir my $dh, "$dataset_path" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$dataset_path/$_" && ! /^\.{1,2}$/} readdir($dh);
+closedir $dh;
 
 foreach (@spkr_dirs) {
   my $spkr_id = $_;
 
+  print GENDER "$spkr_id $spkr2gender{$spkr_id}\n";
+
   opendir my $dh, "$dataset_path/$spkr_id/" or die "Cannot open directory: $!";
   my @rec_dirs = grep {-d "$dataset_path/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh);
   closedir $dh;
@@ -66,11 +105,19 @@
 	my $utt_id = "$spkr_id-$rec_id-$name";
 	print WAV "$utt_id", " $wav", "\n";
 	print SPKR "$utt_id", " $spkr_id", "\n";
+	if (exists $utt2lang{$utt_id}) {
+	    print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+	}
+	else {
+	    print LANG "$utt_id N/A\n";
+	}
     }
   }
 }
 close(SPKR) or die;
 close(WAV) or die;
+close(LANG) or die;
+close(GENDER) or die;
 
 if (system(
   "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
diff --git a/egs/voxceleb/v1/local/make_voxceleb2cat.pl b/egs/voxceleb/v1/local/make_voxceleb2cat.pl
index 6bea3737..fa4f64ab 100755
--- a/egs/voxceleb/v1/local/make_voxceleb2cat.pl
+++ b/egs/voxceleb/v1/local/make_voxceleb2cat.pl
@@ -33,9 +33,6 @@
     $dataset_path = "$data_base/$dataset"
 }
 
-opendir my $dh, "$dataset_path" or die "Cannot open directory: $!";
-my @spkr_dirs = grep {-d "$dataset_path/$_" && ! /^\.{1,2}$/} readdir($dh);
-closedir $dh;
 
 if (system("mkdir -p $out_dir") != 0) {
   die "Error making directory $out_dir";
@@ -46,12 +43,52 @@
 }
 
 
+my $meta_url = "https://www.openslr.org/resources/49/vox2_meta.csv";
+my $meta_path = "$data_base/vox2_meta.csv";
+if (! -e "$meta_path") {
+    $meta_path = "$out_dir/vox2_meta.csv";
+    system("wget -O $meta_path $meta_url");
+}
+open(META_IN, "<", "$meta_path") or die "Could not open the output file $meta_path";
+my %spkr2gender = ();
+while (<META_IN>) {
+  chomp;
+  my ($spkr, $vox_id, $vgg_id, $gender, $set) = split;
+  $spkr2gender{$vox_id} = $gender;
+}
+close(META_IN) or die;
+
+my $lid_url = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data_workshop_2021/lang_vox2_final.csv";
+my $lid_path = "$data_base/lang_vox2_final.csv";
+if (! -e "$lid_path") {
+    $lid_path = "$out_dir/lang_vox2_final.csv";
+    system("wget -O $lid_path $lid_url");
+}
+open(LID_IN, "<", "$lid_path") or die "Could not open the output file $lid_path";
+my %utt2lang = ();
+while (<LID_IN>) {
+  chomp;
+  my ($utt_id, $lang, $score) = split ',';
+  $utt_id =~ s@/@-@g;
+  $utt_id =~ s@-[^-]*\.wav$@@;
+  $utt2lang{$utt_id} = $lang;
+}
+close(LID_IN) or die;
+
 open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
 open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+open(LANG, ">", "$out_dir/utt2lang") or die "Could not open the output file $out_dir/utt2lang";
+open(GENDER, ">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+
+opendir my $dh, "$dataset_path" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$dataset_path/$_" && ! /^\.{1,2}$/} readdir($dh);
+closedir $dh;
 
 foreach (@spkr_dirs) {
   my $spkr_id = $_;
 
+  print GENDER "$spkr_id $spkr2gender{$spkr_id}\n";
+
   opendir my $dh, "$dataset_path/$spkr_id/" or die "Cannot open directory: $!";
   my @rec_dirs = grep {-d "$dataset_path/$spkr_id/$_" && ! /^\.{1,2}$/} readdir($dh);
   closedir $dh;
@@ -69,10 +106,18 @@
       }
       print WAV "$utt_id", " $wav", "\n";
       print SPKR "$utt_id", " $spkr_id", "\n";
+      if (exists $utt2lang{$utt_id}) {
+	  print LANG "$utt_id", " $utt2lang{$utt_id}", "\n";
+      }
+      else {
+	  print LANG "$utt_id N/A\n";
+      }
   }
 }
 close(SPKR) or die;
 close(WAV) or die;
+close(LANG) or die;
+close(GENDER) or die;
 
 if (system(
   "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
diff --git a/hyp_utils/kaldi/utils/fix_data_dir.sh b/hyp_utils/kaldi/utils/fix_data_dir.sh
index ed080eee..bb18e07b 100755
--- a/hyp_utils/kaldi/utils/fix_data_dir.sh
+++ b/hyp_utils/kaldi/utils/fix_data_dir.sh
@@ -117,7 +117,7 @@ function filter_speakers {
   ${kaldi_utils}/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt
 
   cat $data/spk2utt | awk '{print $1}' > $tmpdir/speakers
-  for s in cmvn.scp spk2gender; do
+  for s in cmvn.scp spk2gender spk2nation; do
     f=$data/$s
     if [ -f $f ]; then
       filter_file $f $tmpdir/speakers
@@ -127,7 +127,7 @@ function filter_speakers {
   filter_file $tmpdir/speakers $data/spk2utt
   ${kaldi_utils}/spk2utt_to_utt2spk.pl $data/spk2utt > $data/utt2spk
 
-  for s in cmvn.scp spk2gender $spk_extra_files; do
+  for s in cmvn.scp spk2gender spk2nation $spk_extra_files; do
     f=$data/$s
     if [ -f $f ]; then
       filter_file $tmpdir/speakers $f