A collection of audio autoencoders, in PyTorch. Pretrained models can be found at archisound
.
pip install audio-encoders-pytorch
from audio_encoders_pytorch import AutoEncoder1d
autoencoder = AutoEncoder1d(
in_channels=2, # Number of input channels
channels=32, # Number of base channels
multipliers=[1, 1, 2, 2], # Channel multiplier between layers (i.e. channels * multiplier[i] -> channels * multiplier[i+1])
factors=[4, 4, 4], # Downsampling/upsampling factor per layer
num_blocks=[2, 2, 2] # Number of resnet blocks per layer
)
x = torch.randn(1, 2, 2**18) # [1, 2, 262144]
x_recon = autoencoder(x) # [1, 2, 262144]
from audio_encoders_pytorch import Discriminator1d
discriminator = Discriminator1d(
in_channels=2, # Number of input channels
channels=32, # Number of base channels
multipliers=[1, 1, 2, 2], # Channel multiplier between layers (i.e. channels * multiplier[i] -> channels * multiplier[i+1])
factors=[8, 8, 8], # Downsampling factor per layer
num_blocks=[2, 2, 2], # Number of resnet blocks per layer
use_loss=[True, True, True] # Whether to use this layer as GAN loss
)
wave_true = torch.randn(1, 2, 2**18)
wave_fake = torch.randn(1, 2, 2**18)
loss_generator, loss_discriminator = discriminator(wave_true, wave_fake)
# tensor(0.613949, grad_fn=<MeanBackward0>) tensor(0.097330, grad_fn=<MeanBackward0>)