egs: add readme for REVERB datasets

haoxiangsnr · Jan 23, 2024 · db88479 · db88479
1 parent 62122de
commit db88479
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 139 deletions.
diff --git a/.../spiking_fullsubnet/default_time_mae.toml → .../spiking_fullsubnet/baseline_m_no_df.toml b/.../spiking_fullsubnet/default_time_mae.toml → .../spiking_fullsubnet/baseline_m_no_df.toml
@@ -11,10 +11,10 @@ max_steps = 0
 max_epochs = 200
 max_grad_norm = 10
 save_max_score = true
-save_ckpt_interval = 5
-max_patience = 200
+save_ckpt_interval = 1
+max_patience = 20
 plot_norm = true
-validation_interval = 5
+validation_interval = 1
 max_num_checkpoints = 20
 scheduler_name = "constant_schedule_with_warmup"
 warmup_steps = 0
@@ -45,7 +45,7 @@ fb_output_activate_function = false
 sb_hidden_size = 224
 sb_num_layers = 2
 freq_cutoffs = [0, 32, 128, 256]
-df_orders = [5, 3, 1]
+df_orders = [1, 1, 1]
 center_freq_sizes = [4, 32, 64]
 neighbor_freq_sizes = [15, 15, 15]
 use_pre_layer_norm_fb = true
@@ -62,26 +62,32 @@ win_length = 512
 sr = 16000
 
 [train_dataset]
-path = "dataloader.SimTrainDataset"
+path = "dataloader.DNSAudio"
 [train_dataset.args]
-rvb_scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/tr_simu_1ch.scp"
-dry_scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/tr_cln.scp"
-duration_in_seconds = 4.0
-sr = 16000
+root = "/datasets/datasets_fullband/training_set/"
+limit = false
+offset = 0
+
 [train_dataset.dataloader]
-batch_size = 32
+batch_size = 64
 num_workers = 8
 drop_last = true
 pin_memory = true
 
 [validate_dataset]
-path = "dataloader.SimDTDataset"
+path = "dataloader.DNSAudio"
 [validate_dataset.args]
-rvb_scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/et_simu_1ch.scp"
-dry_scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/et_cln.scp"
-sr = 16000
-limit = 200
-offset = 0
+root = "/datasets/datasets_fullband/validation_set/"
+train = false
 [validate_dataset.dataloader]
+batch_size = 16
+num_workers = 8
+
+[test_dataset]
+path = "dataloader.DNSAudio"
+[test_dataset.args]
+root = "/nfs/xhao/data/intel_ndns/test_set/"
+train = false
+[test_dataset.dataloader]
 batch_size = 1
-num_workers = 1
+num_workers = 0
diff --git a/recipes/reverb/README.md b/recipes/reverb/README.md
@@ -0,0 +1,18 @@
+# REVERB Challenge dataset
+
+REVERB challenge is to inspire and evaluate diverse ideas for speech enhancement and robust automatic speech recognition in reverberant environments.
+
+The REVERB challenge data is currently available only through LDC. For more details, please visit the link: https://reverb2014.dereverberation.com/instructions.html.
+
+## Inference
+
+If you want to use a pretrained model, please refer to the following example command:
+
+```shell
+accelerate launch --multi_gpu --num_processes=4 --gpu_ids 0,1,2,3 --main_process_port 46599 run.py -C default.toml -M predict --ckpt_path /home/xhao/proj/spiking-fullsubnet/recipes/reverb/spiking_fullsubnet/exp/default/checkpoints/epoch_0155
+```
+
+Some notes:
+- As the metrics on the test set only must be evaluated by MatLab, you should run the command using the `predict` mode, which will output the audio samples of the model.
+- The `--ckpt_path` is the path of the pretrained model.
+- Using `predict` mode, the output audio samples of the model will be saved in a evaluation directory, which is corresponding to the shell script of the REVERB Challenge dataset. For example. we output the audio samples in the `/nfs/xhao/data/reverb_challenge/kaldi/egs/reverb/s5/wav/spiking_fullsubnet` directory.
diff --git a/recipes/reverb/spiking_fullsubnet/default.toml b/recipes/reverb/spiking_fullsubnet/default.toml
@@ -85,3 +85,19 @@ offset = 0
 [validate_dataset.dataloader]
 batch_size = 1
 num_workers = 0
+
+[[test_dataset]]
+path = "dataloader.EvaluationSimDataset"
+[test_dataset.args]
+scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/et_simu_1ch.scp"
+[test_dataset.dataloader]
+batch_size = 1
+num_workers = 1
+
+[[test_dataset]]
+path = "dataloader.EvaluationRealDataset"
+[test_dataset.args]
+scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/et_real_1ch.scp"
+[test_dataset.dataloader]
+batch_size = 1
+num_workers = 1
diff --git a/recipes/reverb/spiking_fullsubnet/default_sisdr_loss.toml b/recipes/reverb/spiking_fullsubnet/default_sisdr_loss.toml
diff --git a/recipes/reverb/spiking_fullsubnet/trainer.py b/recipes/reverb/spiking_fullsubnet/trainer.py
@@ -23,7 +23,7 @@ def __init__(self, *args, **kwargs):
         self.pesq_nb = PESQ(sr=self.sr, mode="nb")
         self.sisnr_loss = SISNRLoss(return_neg=False)
         self.si_sdr = SISDR()
-        self.north_star_metric = "si_sdr"
+        self.north_star_metric = "OVRL"
 
     def training_step(self, batch, batch_idx):
         self.optimizer.zero_grad()
@@ -99,24 +99,6 @@ def validation_epoch_end(self, outputs, log_to_tensorboard=True):
 
         return score
 
-    def test_step(self, batch, batch_idx, dataloader_idx=0):
-        mix_y, fpath = batch
-        fpath = Path(fpath[0])
-        est_y, *_ = self.model(mix_y)
-
-        # save audio
-        est_y = est_y.squeeze(0).detach().cpu().numpy()
-
-        mix_root = Path("/nfs/xhao/data/reverb_challenge/REVERB_DATA_OFFICIAL")
-        est_root = Path("/nfs/xhao/data/reverb_challenge/kaldi/egs/reverb/s5/wav/spiking_fullsubnet")
-        save_fpath = est_root / fpath.relative_to(mix_root)
-        save_fpath.parent.mkdir(parents=True, exist_ok=True)
-
-        sf.write(save_fpath, est_y, samplerate=self.sr)
-
-    def test_epoch_end(self, outputs):
-        pass
-
     def predict_step(self, batch, batch_idx, dataloader_idx):
         mix_y, fpath = batch
         fpath = Path(fpath[0])