Skip to content

Commit

Permalink
egs: add readme for REVERB datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
haoxiangsnr committed Jan 23, 2024
1 parent 62122de commit db88479
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 139 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ max_steps = 0
max_epochs = 200
max_grad_norm = 10
save_max_score = true
save_ckpt_interval = 5
max_patience = 200
save_ckpt_interval = 1
max_patience = 20
plot_norm = true
validation_interval = 5
validation_interval = 1
max_num_checkpoints = 20
scheduler_name = "constant_schedule_with_warmup"
warmup_steps = 0
Expand Down Expand Up @@ -45,7 +45,7 @@ fb_output_activate_function = false
sb_hidden_size = 224
sb_num_layers = 2
freq_cutoffs = [0, 32, 128, 256]
df_orders = [5, 3, 1]
df_orders = [1, 1, 1]
center_freq_sizes = [4, 32, 64]
neighbor_freq_sizes = [15, 15, 15]
use_pre_layer_norm_fb = true
Expand All @@ -62,26 +62,32 @@ win_length = 512
sr = 16000

[train_dataset]
path = "dataloader.SimTrainDataset"
path = "dataloader.DNSAudio"
[train_dataset.args]
rvb_scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/tr_simu_1ch.scp"
dry_scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/tr_cln.scp"
duration_in_seconds = 4.0
sr = 16000
root = "/datasets/datasets_fullband/training_set/"
limit = false
offset = 0

[train_dataset.dataloader]
batch_size = 32
batch_size = 64
num_workers = 8
drop_last = true
pin_memory = true

[validate_dataset]
path = "dataloader.SimDTDataset"
path = "dataloader.DNSAudio"
[validate_dataset.args]
rvb_scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/et_simu_1ch.scp"
dry_scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/et_cln.scp"
sr = 16000
limit = 200
offset = 0
root = "/datasets/datasets_fullband/validation_set/"
train = false
[validate_dataset.dataloader]
batch_size = 16
num_workers = 8

[test_dataset]
path = "dataloader.DNSAudio"
[test_dataset.args]
root = "/nfs/xhao/data/intel_ndns/test_set/"
train = false
[test_dataset.dataloader]
batch_size = 1
num_workers = 1
num_workers = 0
18 changes: 18 additions & 0 deletions recipes/reverb/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# REVERB Challenge dataset

REVERB challenge is to inspire and evaluate diverse ideas for speech enhancement and robust automatic speech recognition in reverberant environments.

The REVERB challenge data is currently available only through LDC. For more details, please visit the link: https://reverb2014.dereverberation.com/instructions.html.

## Inference

If you want to use a pretrained model, please refer to the following example command:

```shell
accelerate launch --multi_gpu --num_processes=4 --gpu_ids 0,1,2,3 --main_process_port 46599 run.py -C default.toml -M predict --ckpt_path /home/xhao/proj/spiking-fullsubnet/recipes/reverb/spiking_fullsubnet/exp/default/checkpoints/epoch_0155
```

Some notes:
- As the metrics on the test set only must be evaluated by MatLab, you should run the command using the `predict` mode, which will output the audio samples of the model.
- The `--ckpt_path` is the path of the pretrained model.
- Using `predict` mode, the output audio samples of the model will be saved in a evaluation directory, which is corresponding to the shell script of the REVERB Challenge dataset. For example. we output the audio samples in the `/nfs/xhao/data/reverb_challenge/kaldi/egs/reverb/s5/wav/spiking_fullsubnet` directory.
16 changes: 16 additions & 0 deletions recipes/reverb/spiking_fullsubnet/default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,19 @@ offset = 0
[validate_dataset.dataloader]
batch_size = 1
num_workers = 0

[[test_dataset]]
path = "dataloader.EvaluationSimDataset"
[test_dataset.args]
scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/et_simu_1ch.scp"
[test_dataset.dataloader]
batch_size = 1
num_workers = 1

[[test_dataset]]
path = "dataloader.EvaluationRealDataset"
[test_dataset.args]
scp_fpath = "/home/xhao/proj/spiking-fullsubnet/recipes/reverb/data/et_real_1ch.scp"
[test_dataset.dataloader]
batch_size = 1
num_workers = 1
103 changes: 0 additions & 103 deletions recipes/reverb/spiking_fullsubnet/default_sisdr_loss.toml

This file was deleted.

20 changes: 1 addition & 19 deletions recipes/reverb/spiking_fullsubnet/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, *args, **kwargs):
self.pesq_nb = PESQ(sr=self.sr, mode="nb")
self.sisnr_loss = SISNRLoss(return_neg=False)
self.si_sdr = SISDR()
self.north_star_metric = "si_sdr"
self.north_star_metric = "OVRL"

def training_step(self, batch, batch_idx):
self.optimizer.zero_grad()
Expand Down Expand Up @@ -99,24 +99,6 @@ def validation_epoch_end(self, outputs, log_to_tensorboard=True):

return score

def test_step(self, batch, batch_idx, dataloader_idx=0):
mix_y, fpath = batch
fpath = Path(fpath[0])
est_y, *_ = self.model(mix_y)

# save audio
est_y = est_y.squeeze(0).detach().cpu().numpy()

mix_root = Path("/nfs/xhao/data/reverb_challenge/REVERB_DATA_OFFICIAL")
est_root = Path("/nfs/xhao/data/reverb_challenge/kaldi/egs/reverb/s5/wav/spiking_fullsubnet")
save_fpath = est_root / fpath.relative_to(mix_root)
save_fpath.parent.mkdir(parents=True, exist_ok=True)

sf.write(save_fpath, est_y, samplerate=self.sr)

def test_epoch_end(self, outputs):
pass

def predict_step(self, batch, batch_idx, dataloader_idx):
mix_y, fpath = batch
fpath = Path(fpath[0])
Expand Down

0 comments on commit db88479

Please sign in to comment.