Skip to content

Commit

Permalink
egs: initial model zoo
Browse files Browse the repository at this point in the history
  • Loading branch information
haoxiangsnr committed Jul 28, 2023
1 parent 15ff1d1 commit 39005bc
Show file tree
Hide file tree
Showing 27 changed files with 305 additions and 71 deletions.
4 changes: 2 additions & 2 deletions audiozen/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def compute_synops(fb_all_layer_outputs, sb_all_layer_outputs):
+ sb_all_layer_outputs[i][j].size(-1)
)
)
return {"synops": synops}
return synops.item()


def compute_neuronops(fb_all_layer_outputs, sb_all_layer_outputs):
Expand All @@ -357,4 +357,4 @@ def compute_neuronops(fb_all_layer_outputs, sb_all_layer_outputs):
for i in range(len(sb_all_layer_outputs)):
for j in range(len(sb_all_layer_outputs[i])):
neuronops += sb_all_layer_outputs[i][j].size(-1)
return {"neuronops": neuronops}
return neuronops
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
[[validate_dataset]]
path = "dataloader.DNSAudio"

[validate_dataset.args]
root = "/datasets/datasets_fullband/validation_set/"
train = false
[validate_dataset.dataloader]
batch_size = 6
num_workers = 6

[meta]
save_dir = "exp"
description = "Train a model using Generative Adversarial Networks (GANs)"
seed = 20220815
exp_id = "baseline_l"
config_path = "/home/xianghao/proj/audiozen/recipes/intel_ndns/spike_fsb/baseline_l.toml"

[trainer]
path = "trainer.Trainer"

[loss_function]
path = "torch.nn.MSELoss"

[optimizer_g]
path = "torch.optim.AdamW"

[optimizer_d]
path = "torch.optim.AdamW"

[lr_scheduler_g]
path = "torch.optim.lr_scheduler.ExponentialLR"

[lr_scheduler_d]
path = "torch.optim.lr_scheduler.ExponentialLR"

[model_g]
path = "model_low_freq.Separator"

[model_d]
path = "discriminator.Discriminator"

[acoustics]
n_fft = 512
hop_length = 128
win_length = 512
sr = 16000

[train_dataset]
path = "dataloader.DNSAudio"

[test_dataset]
path = "dataloader.DNSAudio"

[trainer.args]
max_epoch = 9999
clip_grad_norm_value = 10
save_max_score = true
save_ckpt_interval = 1
patience = 20
plot_norm = true
validation_interval = 1
max_num_checkpoints = 9999

[loss_function.args]

[optimizer_g.args]
lr = 0.001

[optimizer_d.args]
lr = 0.001

[lr_scheduler_g.args]
gamma = 0.99

[lr_scheduler_d.args]
gamma = 0.99

[model_g.args]
sr = 16000
fdrc = 0.5
n_fft = 512
fb_freqs = 64
hop_length = 128
win_length = 512
num_freqs = 256
sequence_model = "GSU"
fb_hidden_size = 320
fb_output_activate_function = false
freq_cutoffs = [ 32, 128, 192,]
sb_df_orders = [ 5, 3, 1, 1,]
sb_num_center_freqs = [ 2, 4, 32, 64,]
sb_num_neighbor_freqs = [ 15, 15, 15, 15,]
fb_num_center_freqs = [ 2, 4, 32, 64,]
fb_num_neighbor_freqs = [ 0, 0, 0, 0,]
sb_hidden_size = 256
sb_output_activate_function = false
norm_type = "offline_laplace_norm"
shared_weights = true
bn = true

[model_d.args]

[train_dataset.args]
root = "/datasets/datasets_fullband/training_set/"
limit = false
offset = 0

[train_dataset.dataloader]
batch_size = 24
num_workers = 12
drop_last = true
pin_memory = true

[test_dataset.args]
root = "/datasets/datasets_fullband/validation_set/"
train = false

[test_dataset.dataloader]
batch_size = 6
num_workers = 6
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
23 changes: 14 additions & 9 deletions notebooks/check_metric.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,9 @@
"metadata": {},
"outputs": [],
"source": [
"synops = 342508\n",
"synops = 211109\n",
"# neuronops = 3208\n",
"neuronops = 1793\n",
"buffer_latency = 0.016\n",
"synops = 559048\n",
"neuronops = 3376\n",
"buffer_latency = 0.008\n",
"enc_dec_latency = 0.000036\n",
"dns_latency = 0\n",
"dt = buffer_latency"
Expand All @@ -77,10 +75,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"229039\n",
"Solution Latency : 16.036 ms\n",
"Power proxy (Effective SynOPS) : 14314937.500 ops/s\n",
"PDP proxy (SynOPS-delay product) : 229554.338 ops\n"
"592808\n",
"Solution Latency : 8.036 ms\n",
"Power proxy (Effective SynOPS) : 74101000.000 ops/s\n",
"PDP proxy (SynOPS-delay product) : 595475.636 ops\n"
]
}
],
Expand All @@ -94,6 +92,13 @@
"print(f'Power proxy (Effective SynOPS) : {effective_synops_rate:.3f} ops/s')\n",
"print(f'PDP proxy (SynOPS-delay product) : {synops_delay_product: .3f} ops')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
87 changes: 51 additions & 36 deletions recipes/intel_ndns/metricsboard_writeout.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,58 +68,73 @@
"params": 1580 * 10**3,
"size_kilobytes": 1580 * 4,
},
# ======================= FSB + SNN =======================
{
"team": "Clairaudience",
"model": "FSB+SNN",
"model": "model_S",
"date": "2023-07-25",
"SI-SNR": 14.24,
"SI-SNRi_data": 14.24 - custom_noisy["SI-SNR"],
"SI-SNRi_enc+dec": 14.24 - custom_noisy["SI-SNR"],
"MOS_ovrl": 2.92 - custom_noisy["MOS_ovrl"],
"SI-SNR": 13.67,
"SI-SNRi_data": 13.67 - custom_noisy["SI-SNR"],
"SI-SNRi_enc+dec": 13.67 - custom_noisy["SI-SNR"],
"MOS_ovrl": 2.95 - custom_noisy["MOS_ovrl"],
"MOS_sig": 3.25 - custom_noisy["MOS_sig"],
"MOS_bak": 3.88 - custom_noisy["MOS_bak"],
"MOS_bak": 3.93 - custom_noisy["MOS_bak"],
"latency_enc+dec_ms": 0.036,
"latency_total_ms": 16.036,
"power_proxy_Ops/s": 24.6 * 10**6,
"PDP_proxy_Ops": 395078,
"params": 911 * 10**3,
"size_kilobytes": 911 * 4,
"latency_total_ms": 8.036,
"power_proxy_Ops/s": 29 * 10**6,
"PDP_proxy_Ops": 234815,
"params": 512 * 10**3,
"size_kilobytes": 512 * 4,
},
{
"team": "Clairaudience",
"model": "FSB+SNN (small)",
"date": "2023-07-25",
"SI-SNR": 14.09,
"SI-SNRi_data": 14.09 - custom_noisy["SI-SNR"],
"SI-SNRi_enc+dec": 14.09 - custom_noisy["SI-SNR"],
"MOS_ovrl": 2.90 - custom_noisy["MOS_ovrl"],
"MOS_sig": 3.23 - custom_noisy["MOS_sig"],
"MOS_bak": 3.86 - custom_noisy["MOS_bak"],
"model": "model_M",
"date": "2023-07-26",
"SI-SNR": 14.50,
"SI-SNRi_data": 14.50 - custom_noisy["SI-SNR"],
"SI-SNRi_enc+dec": 14.50 - custom_noisy["SI-SNR"],
"MOS_ovrl": 3.02 - custom_noisy["MOS_ovrl"],
"MOS_sig": 3.32 - custom_noisy["MOS_sig"],
"MOS_bak": 3.97 - custom_noisy["MOS_bak"],
"latency_enc+dec_ms": 0.036,
"latency_total_ms": 16.036,
"power_proxy_Ops/s": 24.6 * 10**6,
"PDP_proxy_Ops": 395078,
"params": 643 * 10**3,
"size_kilobytes": 911 * 4,
"latency_total_ms": 8.036,
"power_proxy_Ops/s": 53.6 * 10**6,
"PDP_proxy_Ops": 431 * 10**3,
"params": 954 * 10**3,
"size_kilobytes": 954 * 4,
},
# ======================= FSB + SNN + GAN =======================
{
"team": "Clairaudience",
"model": "FSB + SNN + GAN + MF + SISDRLoss",
"date": "2023-07-26",
"SI-SNR": 14.52,
"SI-SNRi_data": 14.52 - custom_noisy["SI-SNR"],
"SI-SNRi_enc+dec": 14.52 - custom_noisy["SI-SNR"],
"model": "model_L",
"date": "2023-07-27",
"SI-SNR": 14.51,
"SI-SNRi_data": 14.51 - custom_noisy["SI-SNR"],
"SI-SNRi_enc+dec": 14.51 - custom_noisy["SI-SNR"],
"MOS_ovrl": 3.01 - custom_noisy["MOS_ovrl"],
"MOS_sig": 3.31 - custom_noisy["MOS_sig"],
"MOS_bak": 3.95 - custom_noisy["MOS_bak"],
"MOS_bak": 3.97 - custom_noisy["MOS_bak"],
"latency_enc+dec_ms": 0.036,
"latency_total_ms": 8.036,
"power_proxy_Ops/s": None,
"PDP_proxy_Ops": None,
"params": 953 * 10**3,
"size_kilobytes": 953 * 4,
"power_proxy_Ops/s": 74101000,
"PDP_proxy_Ops": 595475,
"params": 1289 * 10**3,
"size_kilobytes": 1289 * 4,
},
{
"team": "Clairaudience",
"model": "model_M_dualGAN",
"date": "2023-07-27",
"SI-SNR": 12.72,
"SI-SNRi_data": 12.72 - custom_noisy["SI-SNR"],
"SI-SNRi_enc+dec": 12.72 - custom_noisy["SI-SNR"],
"MOS_ovrl": 2.89 - custom_noisy["MOS_ovrl"],
"MOS_sig": 3.25 - custom_noisy["MOS_sig"],
"MOS_bak": 3.81 - custom_noisy["MOS_bak"],
"latency_enc+dec_ms": 0.036,
"latency_total_ms": 8.036,
"power_proxy_Ops/s": 55.5 * 10**6,
"PDP_proxy_Ops": 445 * 10**3,
"params": 954 * 10**3,
"size_kilobytes": 954 * 4,
},
]

Expand Down
4 changes: 2 additions & 2 deletions recipes/intel_ndns/spike_fsb/baseline_m_dualGAN.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ save_max_score = true
save_ckpt_interval = 1
patience = 20
plot_norm = true
validation_interval = 10
validation_interval = 4
max_num_checkpoints = 9999

[loss_function]
Expand All @@ -22,7 +22,7 @@ path = "torch.nn.MSELoss"
[optimizer_g]
path = "torch.optim.AdamW"
[optimizer_g.args]
lr = 1e-3
lr = 5e-4

[optimizer_d_sig]
path = "torch.optim.AdamW"
Expand Down
41 changes: 31 additions & 10 deletions recipes/intel_ndns/spike_fsb/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from tqdm import tqdm

from audiozen.loss import SISNRLoss, freq_MAE, mag_MAE
from audiozen.metric import DNSMOS, PESQ, SISDR, STOI
from audiozen.metric import DNSMOS, PESQ, SISDR, STOI, compute_neuronops, compute_synops
from audiozen.trainer.base_trainer_gan_accelerate_ddp_validate import BaseTrainer

logger = get_logger(__name__)
Expand Down Expand Up @@ -102,21 +102,37 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0):
noisy_y = noisy_y.to(self.accelerator.device)
clean_y = clean_y.to(self.accelerator.device)

enhanced_y, *_ = self.model_g(noisy_y)
return noisy_y, clean_y, enhanced_y
enhanced_y, enhanced_mag, fb_out, sb_out = self.model_g(noisy_y)

# detach and move to cpu
synops = compute_synops(fb_out, sb_out)
neuron_ops = compute_neuronops(fb_out, sb_out)

# to tensor
synops = torch.tensor([synops], device=self.accelerator.device).unsqueeze(0)
synops = synops.repeat(enhanced_y.shape[0], 1)
neuron_ops = torch.tensor(
[neuron_ops], device=self.accelerator.device
).unsqueeze(0)
neuron_ops = neuron_ops.repeat(enhanced_y.shape[0], 1)

return noisy_y, clean_y, enhanced_y, synops, neuron_ops

def compute_metrics(self, dataloader_idx, step_out):
noisy, clean, enhanced = step_out
noisy, clean, enhanced, synops, neuron_ops = step_out

si_sdr = self.si_sdr(enhanced, clean)
# stoi = self.stoi(enhanced, clean)
# pesq_wb = self.pesq_wb(enhanced, clean)
# pesq_nb = self.pesq_nb(enhanced, clean)
dns_mos = self.dns_mos(enhanced)
return si_sdr | dns_mos

return (
si_sdr
| dns_mos
| {"synops": synops.item()}
| {"neuron_ops": neuron_ops.item()}
)

def compute_batch_metrics(self, dataloader_idx, step_out):
noisy, clean, enhanced = step_out
noisy, clean, enhanced, synops, neuron_ops = step_out
assert noisy.ndim == clean.ndim == enhanced.ndim == 2

# [num_ranks * batch_size, num_samples]
Expand All @@ -125,8 +141,13 @@ def compute_batch_metrics(self, dataloader_idx, step_out):
enhanced_i = enhanced[i, :]
clean_i = clean[i, :]
noisy_i = noisy[i, :]
synops_i = synops[i, :]
neuron_ops_i = neuron_ops[i, :]
results.append(
self.compute_metrics(dataloader_idx, (noisy_i, clean_i, enhanced_i))
self.compute_metrics(
dataloader_idx,
(noisy_i, clean_i, enhanced_i, synops_i, neuron_ops_i),
)
)

return results
Expand Down
Loading

0 comments on commit 39005bc

Please sign in to comment.