From 590b1cef456efb0534e881689788eb6224ba60c3 Mon Sep 17 00:00:00 2001 From: kan-bayashi Date: Sun, 27 Feb 2022 20:17:55 +0900 Subject: [PATCH 1/3] fixed model compatibility --- parallel_wavegan/layers/residual_block.py | 99 ++++++++++---------- parallel_wavegan/models/hifigan.py | 109 +++++++++++----------- 2 files changed, 103 insertions(+), 105 deletions(-) diff --git a/parallel_wavegan/layers/residual_block.py b/parallel_wavegan/layers/residual_block.py index 3552e293..aba45420 100644 --- a/parallel_wavegan/layers/residual_block.py +++ b/parallel_wavegan/layers/residual_block.py @@ -152,8 +152,6 @@ def __init__( use_additional_convs=True, nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.1}, - pad="ReplicationPad1d", - pad_params={}, use_causal_conv=False, ): """Initialize HiFiGANResidualBlock module. @@ -180,66 +178,69 @@ def __init__( assert kernel_size % 2 == 1, "Kernel size must be odd number." for dilation in dilations: if not use_causal_conv: - conv = torch.nn.Sequential( - getattr(torch.nn, pad)( - (kernel_size - 1) // 2 * dilation, **pad_params - ), - torch.nn.Conv1d( - channels, - channels, - kernel_size, - dilation=dilation, - bias=bias, - ), - ) - else: - conv = CausalConv1d( - channels, - channels, - kernel_size, - dilation=dilation, - bias=bias, - pad=pad, - pad_params=pad_params, - ) - self.convs1 += [ - torch.nn.Sequential( - getattr(torch.nn, nonlinear_activation)( - **nonlinear_activation_params - ), - conv, - ) - ] - if use_additional_convs: - if not use_causal_conv: - conv = torch.nn.Sequential( - getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params), + self.convs1 += [ + torch.nn.Sequential( + getattr(torch.nn, nonlinear_activation)( + **nonlinear_activation_params + ), torch.nn.Conv1d( channels, channels, kernel_size, - dilation=1, + 1, + dilation=dilation, bias=bias, + padding=(kernel_size - 1) // 2 * dilation, ), ) - else: - conv = CausalConv1d( - channels, - channels, - kernel_size, - dilation=1, - bias=bias, - pad=pad, - pad_params=pad_params, - ) - self.convs2 += [ + ] + else: + self.convs1 += [ torch.nn.Sequential( getattr(torch.nn, nonlinear_activation)( **nonlinear_activation_params ), - conv, + CausalConv1d( + channels, + channels, + kernel_size, + dilation=dilation, + bias=bias, + ), ) ] + if use_additional_convs: + if not use_causal_conv: + self.convs2 += [ + torch.nn.Sequential( + getattr(torch.nn, nonlinear_activation)( + **nonlinear_activation_params + ), + torch.nn.Conv1d( + channels, + channels, + kernel_size, + dilation=1, + bias=bias, + padding=(kernel_size - 1) // 2, + ), + ) + ] + else: + self.convs2 += [ + torch.nn.Sequential( + getattr(torch.nn, nonlinear_activation)( + **nonlinear_activation_params + ), + CausalConv1d( + channels, + channels, + kernel_size, + dilation=1, + bias=bias, + ), + ), + ] def forward(self, x): """Calculate forward propagation. diff --git a/parallel_wavegan/models/hifigan.py b/parallel_wavegan/models/hifigan.py index d6b6e62e..d3b84643 100644 --- a/parallel_wavegan/models/hifigan.py +++ b/parallel_wavegan/models/hifigan.py @@ -36,8 +36,6 @@ def __init__( bias=True, nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.1}, - pad="ReplicationPad1d", - pad_params={}, use_causal_conv=False, use_weight_norm=True, ): @@ -75,14 +73,12 @@ def __init__( self.num_blocks = len(resblock_kernel_sizes) self.use_causal_conv = use_causal_conv if not use_causal_conv: - self.input_conv = torch.nn.Sequential( - getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params), - torch.nn.Conv1d( - in_channels, - channels, - kernel_size, - bias=bias, - ), + self.input_conv = torch.nn.Conv1d( + in_channels, + channels, + kernel_size, + bias=bias, + padding=(kernel_size - 1) // 2, ) else: self.input_conv = CausalConv1d( @@ -90,41 +86,43 @@ def __init__( channels, kernel_size, bias=bias, - pad=pad, - pad_params=pad_params, ) self.upsamples = torch.nn.ModuleList() self.blocks = torch.nn.ModuleList() for i in range(len(upsample_kernel_sizes)): assert upsample_kernel_sizes[i] == 2 * upsample_scales[i] if not use_causal_conv: - conv = torch.nn.ConvTranspose1d( - channels // (2 ** i), - channels // (2 ** (i + 1)), - upsample_kernel_sizes[i], - upsample_scales[i], - padding=upsample_scales[i] // 2 + upsample_scales[i] % 2, - output_padding=upsample_scales[i] % 2, - bias=bias, - ) + self.upsamples += [ + torch.nn.Sequential( + getattr(torch.nn, nonlinear_activation)( + **nonlinear_activation_params + ), + torch.nn.ConvTranspose1d( + channels // (2 ** i), + channels // (2 ** (i + 1)), + upsample_kernel_sizes[i], + upsample_scales[i], + padding=upsample_scales[i] // 2 + upsample_scales[i] % 2, + output_padding=upsample_scales[i] % 2, + bias=bias, + ), + ) + ] else: - conv = CausalConvTranspose1d( - channels // (2 ** i), - channels // (2 ** (i + 1)), - upsample_kernel_sizes[i], - upsample_scales[i], - bias=bias, - pad=pad, - pad_params=pad_params, - ) - self.upsamples += [ - torch.nn.Sequential( - getattr(torch.nn, nonlinear_activation)( - **nonlinear_activation_params - ), - conv, - ) - ] + self.upsamples += [ + torch.nn.Sequential( + getattr(torch.nn, nonlinear_activation)( + **nonlinear_activation_params + ), + CausalConvTranspose1d( + channels // (2 ** i), + channels // (2 ** (i + 1)), + upsample_kernel_sizes[i], + upsample_scales[i], + bias=bias, + ), + ) + ] for j in range(len(resblock_kernel_sizes)): self.blocks += [ ResidualBlock( @@ -135,37 +133,36 @@ def __init__( use_additional_convs=use_additional_convs, nonlinear_activation=nonlinear_activation, nonlinear_activation_params=nonlinear_activation_params, - pad=pad, - pad_params=pad_params, use_causal_conv=use_causal_conv, ) ] if not use_causal_conv: - conv = torch.nn.Sequential( - getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params), + self.output_conv = torch.nn.Sequential( + # NOTE(kan-bayashi): follow official implementation but why + # using different slope parameter here? (0.1 vs. 0.01) + torch.nn.LeakyReLU(), torch.nn.Conv1d( channels // (2 ** (i + 1)), out_channels, kernel_size, bias=bias, + padding=(kernel_size - 1) // 2, ), + torch.nn.Tanh(), ) else: - conv = CausalConv1d( - channels // (2 ** (i + 1)), - out_channels, - kernel_size, - bias=bias, - pad=pad, - pad_params=pad_params, + self.output_conv = torch.nn.Sequential( + # NOTE(kan-bayashi): follow official implementation but why + # using different slope parameter here? (0.1 vs. 0.01) + torch.nn.LeakyReLU(), + CausalConv1d( + channels // (2 ** (i + 1)), + out_channels, + kernel_size, + bias=bias, + ), + torch.nn.Tanh(), ) - self.output_conv = torch.nn.Sequential( - # NOTE(kan-bayashi): follow official implementation but why - # using different slope parameter here? (0.1 vs. 0.01) - torch.nn.LeakyReLU(), - conv, - torch.nn.Tanh(), - ) # apply weight norm if use_weight_norm: From 303e037ad3386fa9d683e005bc7b73f87406a548 Mon Sep 17 00:00:00 2001 From: kan-bayashi Date: Sun, 27 Feb 2022 20:25:24 +0900 Subject: [PATCH 2/3] fixed test --- test/test_hifigan.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_hifigan.py b/test/test_hifigan.py index 117cf284..390c02cc 100644 --- a/test/test_hifigan.py +++ b/test/test_hifigan.py @@ -40,8 +40,6 @@ def make_hifigan_generator_args(**kwargs): bias=True, nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.1}, - pad="ReplicationPad1d", - pad_params={}, use_weight_norm=True, use_causal_conv=False, ) From 46a81d236fe6106fac0a33d2d17537cf01aa0306 Mon Sep 17 00:00:00 2001 From: kan-bayashi Date: Sun, 27 Feb 2022 20:26:18 +0900 Subject: [PATCH 3/3] fixed docstring --- parallel_wavegan/layers/residual_block.py | 2 -- parallel_wavegan/models/hifigan.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/parallel_wavegan/layers/residual_block.py b/parallel_wavegan/layers/residual_block.py index aba45420..06f414ed 100644 --- a/parallel_wavegan/layers/residual_block.py +++ b/parallel_wavegan/layers/residual_block.py @@ -164,8 +164,6 @@ def __init__( bias (bool): Whether to add bias parameter in convolution layers. nonlinear_activation (str): Activation function module name. nonlinear_activation_params (dict): Hyperparameters for activation function. - pad (str): Padding function module name before convolution layer. - pad_params (dict): Hyperparameters for padding function. use_causal_conv (bool): Whether to use causal structure. """ diff --git a/parallel_wavegan/models/hifigan.py b/parallel_wavegan/models/hifigan.py index d3b84643..b606f3aa 100644 --- a/parallel_wavegan/models/hifigan.py +++ b/parallel_wavegan/models/hifigan.py @@ -54,8 +54,6 @@ def __init__( bias (bool): Whether to add bias parameter in convolution layers. nonlinear_activation (str): Activation function module name. nonlinear_activation_params (dict): Hyperparameters for activation function. - pad (str): Padding function module name before convolution layer. - pad_params (dict): Hyperparameters for padding function. use_causal_conv (bool): Whether to use causal structure. use_weight_norm (bool): Whether to use weight norm. If set to true, it will be applied to all of the conv layers.