Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐞fix support for non-square images #204

Merged
merged 4 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions anomalib/data/utils/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@
# See the License for the specific language governing permissions
# and limitations under the License.

import math
from pathlib import Path
from typing import List, Union

import cv2
import numpy as np
import torch.nn.functional as F
from torch import Tensor
from torchvision.datasets.folder import IMG_EXTENSIONS


Expand Down Expand Up @@ -66,3 +69,23 @@ def read_image(path: Union[str, Path]) -> np.ndarray:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

return image


def pad_nextpow2(batch: Tensor) -> Tensor:
"""Compute required padding from input size and return padded images.

Finds the largest dimension and computes a square image of dimensions that are of the power of 2.
In case the image dimension is odd, it returns the image with an extra padding on one side.

Args:
batch (Tensor): Input images

Returns:
batch: Padded batch
"""
# find the largest dimension
l_dim = 2 ** math.ceil(math.log(max(*batch.shape[-2:]), 2))
padding_w = [math.ceil((l_dim - batch.shape[-2]) / 2), math.floor((l_dim - batch.shape[-2]) / 2)]
padding_h = [math.ceil((l_dim - batch.shape[-1]) / 2), math.floor((l_dim - batch.shape[-1]) / 2)]
padded_batch = F.pad(batch, pad=[*padding_h, *padding_w])
return padded_batch
9 changes: 6 additions & 3 deletions anomalib/models/ganomaly/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pytorch_lightning.callbacks import EarlyStopping
from torch import Tensor, optim

from anomalib.data.utils.image import pad_nextpow2
from anomalib.models.components import AnomalyModule

from .torch_model import GanomalyModel
Expand All @@ -38,8 +39,9 @@ class GanomalyLightning(AnomalyModule):

def __init__(self, hparams: Union[DictConfig, ListConfig]):
super().__init__(hparams)

self.model: GanomalyModel = GanomalyModel(
input_size=hparams.model.input_size[0],
input_size=hparams.model.input_size,
num_input_channels=3,
n_features=hparams.model.n_features,
latent_vec_size=hparams.model.latent_vec_size,
Expand Down Expand Up @@ -99,18 +101,19 @@ def training_step(self, batch, _, optimizer_idx): # pylint: disable=arguments-d
Dict[str, Tensor]: Loss
"""
images = batch["image"]
padded_images = pad_nextpow2(images)
loss: Dict[str, Tensor]

# Discriminator
if optimizer_idx == 0:
# forward pass
loss_discriminator = self.model.get_discriminator_loss(images)
loss_discriminator = self.model.get_discriminator_loss(padded_images)
loss = {"loss": loss_discriminator}

# Generator
else:
# forward pass
loss_generator = self.model.get_generator_loss(images)
loss_generator = self.model.get_generator_loss(padded_images)

loss = {"loss": loss_generator}

Expand Down
87 changes: 60 additions & 27 deletions anomalib/models/ganomaly/torch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@


import math
from typing import Tuple

import torch
from torch import Tensor, nn

from anomalib.data.utils.image import pad_nextpow2


class Encoder(nn.Module):
"""Encoder Network.

Args:
input_size (int): Size of input image
input_size (Tuple[int, int]): Size of input image
latent_vec_size (int): Size of latent vector z
num_input_channels (int): Number of input channels in the image
n_features (int): Number of features per convolution layer
Expand All @@ -31,7 +34,7 @@ class Encoder(nn.Module):

def __init__(
self,
input_size: int,
input_size: Tuple[int, int],
latent_vec_size: int,
num_input_channels: int,
n_features: int,
Expand All @@ -40,13 +43,10 @@ def __init__(
):
super().__init__()

assert input_size % 16 == 0, "Input size should be a multiple of 16"

self.input_layers = nn.Sequential()

self.input_layers.add_module(
f"initial-conv-{num_input_channels}-{n_features}",
nn.Conv2d(num_input_channels, n_features, kernel_size=4, stride=2, padding=1, bias=False),
nn.Conv2d(num_input_channels, n_features, kernel_size=4, stride=2, padding=4, bias=False),
)
self.input_layers.add_module(f"initial-relu-{n_features}", nn.LeakyReLU(0.2, inplace=True))

Expand All @@ -63,8 +63,8 @@ def __init__(

# Create pyramid features to reach latent vector
self.pyramid_features = nn.Sequential()
input_size = input_size // 2
while input_size > 4:
pyramid_dim = min(*input_size) // 2 # Use the smaller dimension to create pyramid.
while pyramid_dim > 4:
in_features = n_features
out_features = n_features * 2
self.pyramid_features.add_module(
Expand All @@ -74,12 +74,17 @@ def __init__(
self.pyramid_features.add_module(f"pyramid-{out_features}-batchnorm", nn.BatchNorm2d(out_features))
self.pyramid_features.add_module(f"pyramid-{out_features}-relu", nn.LeakyReLU(0.2, inplace=True))
n_features = out_features
input_size = input_size // 2
pyramid_dim = pyramid_dim // 2

# Final conv
if add_final_conv_layer:
self.final_conv_layer = nn.Conv2d(
n_features, latent_vec_size, kernel_size=4, stride=1, padding=0, bias=False
n_features,
latent_vec_size,
kernel_size=4,
stride=1,
padding=0,
bias=False,
)

def forward(self, input_tensor: Tensor):
Expand All @@ -98,7 +103,7 @@ class Decoder(nn.Module):
"""Decoder Network.

Args:
input_size (int): Size of input image
input_size (Tuple[int, int]): Size of input image
latent_vec_size (int): Size of latent vector z
num_input_channels (int): Number of input channels in the image
n_features (int): Number of features per convolution layer
Expand All @@ -107,39 +112,57 @@ class Decoder(nn.Module):
"""

def __init__(
self, input_size: int, latent_vec_size: int, num_input_channels: int, n_features: int, extra_layers: int = 0
self,
input_size: Tuple[int, int],
latent_vec_size: int,
num_input_channels: int,
n_features: int,
extra_layers: int = 0,
):
super().__init__()
assert input_size % 16 == 0, "Input size should be a multiple of 16"

self.latent_input = nn.Sequential()

# Calculate input channel size to recreate inverse pyramid
exp_factor = int(math.log(input_size // 4, 2)) - 1
exp_factor = math.ceil(math.log(min(input_size) // 2, 2)) - 2
n_input_features = n_features * (2**exp_factor)

# CNN layer for latent vector input
self.latent_input.add_module(
f"initial-{latent_vec_size}-{n_input_features}-convt",
nn.ConvTranspose2d(latent_vec_size, n_input_features, kernel_size=4, stride=1, padding=0, bias=False),
nn.ConvTranspose2d(
latent_vec_size,
n_input_features,
kernel_size=4,
stride=1,
padding=0,
bias=False,
),
)
self.latent_input.add_module(f"initial-{n_input_features}-batchnorm", nn.BatchNorm2d(n_input_features))
self.latent_input.add_module(f"initial-{n_input_features}-relu", nn.ReLU(True))

# Create inverse pyramid
self.inverse_pyramid = nn.Sequential()
input_size = input_size // 2
while input_size > 4:
pyramid_dim = min(*input_size) // 2 # Use the smaller dimension to create pyramid.
while pyramid_dim > 4:
in_features = n_input_features
out_features = n_input_features // 2
self.inverse_pyramid.add_module(
f"pyramid-{in_features}-{out_features}-convt",
nn.ConvTranspose2d(in_features, out_features, kernel_size=4, stride=2, padding=1, bias=False),
nn.ConvTranspose2d(
in_features,
out_features,
kernel_size=4,
stride=2,
padding=1,
bias=False,
),
)
self.inverse_pyramid.add_module(f"pyramid-{out_features}-batchnorm", nn.BatchNorm2d(out_features))
self.inverse_pyramid.add_module(f"pyramid-{out_features}-relu", nn.ReLU(True))
n_input_features = out_features
input_size = input_size // 2
pyramid_dim = pyramid_dim // 2

# Extra Layers
self.extra_layers = nn.Sequential()
Expand All @@ -159,7 +182,14 @@ def __init__(
self.final_layers = nn.Sequential()
self.final_layers.add_module(
f"final-{n_input_features}-{num_input_channels}-convt",
nn.ConvTranspose2d(n_input_features, num_input_channels, kernel_size=4, stride=2, padding=1, bias=False),
nn.ConvTranspose2d(
n_input_features,
num_input_channels,
kernel_size=4,
stride=2,
padding=1,
bias=False,
),
)
self.final_layers.add_module(f"final-{num_input_channels}-tanh", nn.Tanh())

Expand All @@ -178,13 +208,13 @@ class Discriminator(nn.Module):
Made of only one encoder layer which takes x and x_hat to produce a score.

Args:
input_size (int): Input image size.
input_size (Tuple[int,int]): Input image size.
num_input_channels (int): Number of image channels.
n_features (int): Number of feature maps in each convolution layer.
extra_layers (int, optional): Add extra intermediate layers. Defaults to 0.
"""

def __init__(self, input_size: int, num_input_channels: int, n_features: int, extra_layers: int = 0):
def __init__(self, input_size: Tuple[int, int], num_input_channels: int, n_features: int, extra_layers: int = 0):
super().__init__()
encoder = Encoder(input_size, 1, num_input_channels, n_features, extra_layers)
layers = []
Expand Down Expand Up @@ -212,7 +242,7 @@ class Generator(nn.Module):
Made of an encoder-decoder-encoder architecture.

Args:
input_size (int): Size of input data.
input_size (Tuple[int,int]): Size of input data.
latent_vec_size (int): Dimension of latent vector produced between the first encoder-decoder.
num_input_channels (int): Number of channels in input image.
n_features (int): Number of feature maps in each convolution layer.
Expand All @@ -222,7 +252,7 @@ class Generator(nn.Module):

def __init__(
self,
input_size: int,
input_size: Tuple[int, int],
latent_vec_size: int,
num_input_channels: int,
n_features: int,
Expand Down Expand Up @@ -250,7 +280,7 @@ class GanomalyModel(nn.Module):
"""Ganomaly Model.

Args:
input_size (int): Input dimension of a square tensor.
input_size (Tuple[int,int]): Input dimension.
num_input_channels (int): Number of input channels.
n_features (int): Number of features layers in the CNNs.
latent_vec_size (int): Size of autoencoder latent vector.
Expand All @@ -263,7 +293,7 @@ class GanomalyModel(nn.Module):

def __init__(
self,
input_size: int,
input_size: Tuple[int, int],
num_input_channels: int,
n_features: int,
latent_vec_size: int,
Expand Down Expand Up @@ -348,7 +378,9 @@ def get_generator_loss(self, images: Tensor) -> Tensor:
pred_fake, _ = self.discriminator(fake)

error_enc = self.loss_enc(latent_i, latent_o)

error_con = self.loss_con(images, fake)

error_adv = self.loss_adv(pred_real, pred_fake)

loss_generator = error_adv * self.wadv + error_con * self.wcon + error_enc * self.wenc
Expand All @@ -363,6 +395,7 @@ def forward(self, batch: Tensor) -> Tensor:
Returns:
Tensor: Regeneration scores.
"""
padded_batch = pad_nextpow2(batch)
self.generator.eval()
_, latent_i, latent_o = self.generator(batch)
_, latent_i, latent_o = self.generator(padded_batch)
return torch.mean(torch.pow((latent_i - latent_o), 2), dim=1).view(-1) # convert nx1x1 to n