From b03a7ec8116b410b81917056144e22882fd447a9 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 21 Feb 2022 15:51:17 +0000 Subject: [PATCH 01/10] Extend the EfficientNet class to support v1 and v2. --- torchvision/models/efficientnet.py | 152 ++++++++++++++++++++++------- 1 file changed, 119 insertions(+), 33 deletions(-) diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index f7eba46cb39..8adde96344e 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -1,5 +1,7 @@ import copy import math +import warnings +from dataclasses import dataclass from functools import partial from typing import Any, Callable, Optional, List, Sequence @@ -40,8 +42,23 @@ } -class MBConvConfig: - # Stores information listed at Table 1 of the EfficientNet paper +@dataclass +class _MBConvConfig: + expand_ratio: float + kernel: int + stride: int + input_channels: int + out_channels: int + num_layers: int + block: Callable[..., nn.Module] + + @staticmethod + def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int: + return _make_divisible(channels * width_mult, 8, min_value) + + +class MBConvConfig(_MBConvConfig): + # Stores information listed at Table 1 of the EfficientNet paper & Table 4 of the EfficientNetV2 paper def __init__( self, expand_ratio: float, @@ -52,36 +69,37 @@ def __init__( num_layers: int, width_mult: float, depth_mult: float, + block: Optional[Callable[..., nn.Module]] = None ) -> None: - self.expand_ratio = expand_ratio - self.kernel = kernel - self.stride = stride - self.input_channels = self.adjust_channels(input_channels, width_mult) - self.out_channels = self.adjust_channels(out_channels, width_mult) - self.num_layers = self.adjust_depth(num_layers, depth_mult) - - def __repr__(self) -> str: - s = ( - f"{self.__class__.__name__}(" - f"expand_ratio={self.expand_ratio}" - f", kernel={self.kernel}" - f", stride={self.stride}" - f", input_channels={self.input_channels}" - f", out_channels={self.out_channels}" - f", num_layers={self.num_layers}" - f")" - ) - return s - - @staticmethod - def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int: - return _make_divisible(channels * width_mult, 8, min_value) + input_channels = self.adjust_channels(input_channels, width_mult) + out_channels = self.adjust_channels(out_channels, width_mult) + num_layers = self.adjust_depth(num_layers, depth_mult) + if block is None: + block = MBConv + super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block) @staticmethod def adjust_depth(num_layers: int, depth_mult: float): return int(math.ceil(num_layers * depth_mult)) +class FusedMBConvConfig(_MBConvConfig): + # Stores information listed at Table 4 of the EfficientNetV2 paper + def __init__( + self, + expand_ratio: float, + kernel: int, + stride: int, + input_channels: int, + out_channels: int, + num_layers: int, + block: Optional[Callable[..., nn.Module]] = None + ) -> None: + if block is None: + block = FusedMBConv + super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block) + + class MBConv(nn.Module): def __init__( self, @@ -149,6 +167,68 @@ def forward(self, input: Tensor) -> Tensor: return result +class FusedMBConv(nn.Module): + def __init__( + self, + cnf: FusedMBConvConfig, + stochastic_depth_prob: float, + norm_layer: Callable[..., nn.Module], + **kwargs: Any, + ) -> None: + super().__init__() + + if not (1 <= cnf.stride <= 2): + raise ValueError("illegal stride value") + + self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels + + layers: List[nn.Module] = [] + activation_layer = nn.SiLU + + expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio) + if expanded_channels != cnf.input_channels: + # fused expand + layers.append( + ConvNormActivation( + cnf.input_channels, + expanded_channels, + kernel_size=cnf.kernel, + stride=cnf.stride, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + + # project + layers.append( + ConvNormActivation( + expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None + ) + ) + else: + layers.append( + ConvNormActivation( + cnf.input_channels, + cnf.out_channels, + kernel_size=cnf.kernel, + stride=cnf.stride, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + + self.block = nn.Sequential(*layers) + self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row") + self.out_channels = cnf.out_channels + + def forward(self, input: Tensor) -> Tensor: + result = self.block(input) + if self.use_res_connect: + result = self.stochastic_depth(result) + result += input + return result + + class EfficientNet(nn.Module): def __init__( self, @@ -156,19 +236,17 @@ def __init__( dropout: float, stochastic_depth_prob: float = 0.2, num_classes: int = 1000, - block: Optional[Callable[..., nn.Module]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None, **kwargs: Any, ) -> None: """ - EfficientNet main class + EfficientNet V1 and V2 main class Args: inverted_residual_setting (List[MBConvConfig]): Network structure dropout (float): The droupout probability stochastic_depth_prob (float): The stochastic depth probability num_classes (int): Number of classes - block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use """ super().__init__() @@ -178,12 +256,19 @@ def __init__( raise ValueError("The inverted_residual_setting should not be empty") elif not ( isinstance(inverted_residual_setting, Sequence) - and all([isinstance(s, MBConvConfig) for s in inverted_residual_setting]) + and all([isinstance(s, _MBConvConfig) for s in inverted_residual_setting]) ): raise TypeError("The inverted_residual_setting should be List[MBConvConfig]") - if block is None: - block = MBConv + if "block" in kwargs: + warnings.warn( + "The parameter 'block' is deprecated since 0.13 and will be removed 0.15. " + "Please pass this information on 'MBConvConfig.block' instead." + ) + if kwargs["block"] is not None: + for s in inverted_residual_setting: + if isinstance(s, MBConvConfig): + s.block = kwargs["block"] if norm_layer is None: norm_layer = nn.BatchNorm2d @@ -215,14 +300,15 @@ def __init__( # adjust stochastic depth probability based on the depth of the stage block sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks - stage.append(block(block_cnf, sd_prob, norm_layer)) + stage.append(block_cnf.block(block_cnf, sd_prob, norm_layer)) stage_block_id += 1 layers.append(nn.Sequential(*stage)) # building last several layers lastconv_input_channels = inverted_residual_setting[-1].out_channels - lastconv_output_channels = 4 * lastconv_input_channels + is_v2 = any([isinstance(s, FusedMBConvConfig) for s in inverted_residual_setting]) + lastconv_output_channels = 1280 if is_v2 else 4 * lastconv_input_channels layers.append( ConvNormActivation( lastconv_input_channels, From aa82cf1140786e7cf567c85e0d1cc4d4decde496 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 21 Feb 2022 17:47:44 +0000 Subject: [PATCH 02/10] Refactor config/builder methods and add prototype builders --- ...elTester.test_efficientnet_v2_l_expect.pkl | Bin 0 -> 939 bytes ...elTester.test_efficientnet_v2_m_expect.pkl | Bin 0 -> 939 bytes ...elTester.test_efficientnet_v2_s_expect.pkl | Bin 0 -> 939 bytes torchvision/models/efficientnet.py | 179 ++++++++++++++---- torchvision/prototype/models/efficientnet.py | 137 +++++++++----- 5 files changed, 234 insertions(+), 82 deletions(-) create mode 100644 test/expect/ModelTester.test_efficientnet_v2_l_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_v2_m_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_v2_s_expect.pkl diff --git a/test/expect/ModelTester.test_efficientnet_v2_l_expect.pkl b/test/expect/ModelTester.test_efficientnet_v2_l_expect.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f3ca5315337c7f74d8ad7b249ad315b25b2b6e33 GIT binary patch literal 939 zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW zr zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5=ZXk1(PMcUyNU9^O`bzjNTYiKgW2-?On!^CN(DFdzP3;u)HwRy`W%Xc1yvO zcSoYB?P5cd57*;O6a01D^0I6UEX5C{d-feC!;CHPm`@OYn`?lZJcbftyFl+tAqtxTK2MU zjWzjT@*@6-$+dfNTb?}-H+kl+Y7+h}am!U{zpYERKQTE{7P-}S=He|$Rg+C_to>`^ zW~*SDlzQA46k0EqeSCfb7*ZgNJGA&2tl^ zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5+}K&-l!|ab@RVFvo^gv@Xx4JH+}OAt;-t|o~<^_GGW~OY_6Ws0f+ZSKD#3} zooQ$_YARl~NzCVx(XV)UquLJL%^j*0o6JKWZM-7$X4CU%=1ng?EHnz}7cmMCn`*2x znROGxoOe1O8hAGJ&U$84#KE|EhO)JBRh`*p-OVb|Jk=09gUl6v|299xMA0N zqaU6Qn`bOsYs3}w+bHGEUZYf2RpXAYO-3TCE*l26${B0z+iG;+^`!Nt2aPr}d<)+E zRjY8*vo(^NL7}Dh-1Y1UU`T;5?$F|Au!e_LS!z)+Fc#dL%!v#xq>zI!jk!QJU!0d7 z$^^6(gaf=8K@>bqBFCWsNCE|*r%-g=$bRBO(fJC4k zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5=S<5r>W+Sf2PNF@|)c-&NaO|!+Tr8ic_0)EEP;W?s}Onp2}ouF~{2MmS^17 zgimbSdg2b5?sH(+c4%Rg=>dl=o0*DtY-71pXLh*Q(R9MkH6{la12ZL36dwuYW5G@COc z!gM0Pqp2hBLX*Q6CYY@(U0{0ckcHWmH}_0^Y;4UooRl)tFj6(W;8J16)pOBQNorA+@ZzKU=0tgvecqtU@W*fnG+dYNFfJd8gqeczBn&E zlnH1n2nTpGf+%>JM2 None: input_channels = self.adjust_channels(input_channels, width_mult) out_channels = self.adjust_channels(out_channels, width_mult) @@ -93,7 +96,7 @@ def __init__( input_channels: int, out_channels: int, num_layers: int, - block: Optional[Callable[..., nn.Module]] = None + block: Optional[Callable[..., nn.Module]] = None, ) -> None: if block is None: block = FusedMBConv @@ -232,22 +235,24 @@ def forward(self, input: Tensor) -> Tensor: class EfficientNet(nn.Module): def __init__( self, - inverted_residual_setting: List[MBConvConfig], + inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]], dropout: float, stochastic_depth_prob: float = 0.2, num_classes: int = 1000, norm_layer: Optional[Callable[..., nn.Module]] = None, + last_channel: Optional[int] = None, **kwargs: Any, ) -> None: """ EfficientNet V1 and V2 main class Args: - inverted_residual_setting (List[MBConvConfig]): Network structure + inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure dropout (float): The droupout probability stochastic_depth_prob (float): The stochastic depth probability num_classes (int): Number of classes norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use + last_channel (int): The number of channels on the penultimate layer """ super().__init__() _log_api_usage_once(self) @@ -307,8 +312,7 @@ def __init__( # building last several layers lastconv_input_channels = inverted_residual_setting[-1].out_channels - is_v2 = any([isinstance(s, FusedMBConvConfig) for s in inverted_residual_setting]) - lastconv_output_channels = 1280 if is_v2 else 4 * lastconv_input_channels + lastconv_output_channels = last_channel if last_channel is not None else 4 * lastconv_input_channels layers.append( ConvNormActivation( lastconv_input_channels, @@ -355,24 +359,14 @@ def forward(self, x: Tensor) -> Tensor: def _efficientnet( arch: str, - width_mult: float, - depth_mult: float, + inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]], dropout: float, + last_channel: Optional[int], pretrained: bool, progress: bool, **kwargs: Any, ) -> EfficientNet: - bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult) - inverted_residual_setting = [ - bneck_conf(1, 3, 1, 32, 16, 1), - bneck_conf(6, 3, 2, 16, 24, 2), - bneck_conf(6, 5, 2, 24, 40, 2), - bneck_conf(6, 3, 2, 40, 80, 3), - bneck_conf(6, 5, 1, 80, 112, 3), - bneck_conf(6, 5, 2, 112, 192, 4), - bneck_conf(6, 3, 1, 192, 320, 1), - ] - model = EfficientNet(inverted_residual_setting, dropout, **kwargs) + model = EfficientNet(inverted_residual_setting, dropout, last_channel=last_channel, **kwargs) if pretrained: if model_urls.get(arch, None) is None: raise ValueError(f"No checkpoint is available for model type {arch}") @@ -381,6 +375,61 @@ def _efficientnet( return model +def _efficientnet_conf( + arch: str, + **kwargs: Any, +) -> Tuple[Sequence[Union[MBConvConfig, FusedMBConvConfig]], Optional[int]]: + inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]] + if arch.startswith("efficientnet_b"): + bneck_conf = partial(MBConvConfig, width_mult=kwargs.pop("width_mult"), depth_mult=kwargs.pop("depth_mult")) + inverted_residual_setting = [ + bneck_conf(1, 3, 1, 32, 16, 1), + bneck_conf(6, 3, 2, 16, 24, 2), + bneck_conf(6, 5, 2, 24, 40, 2), + bneck_conf(6, 3, 2, 40, 80, 3), + bneck_conf(6, 5, 1, 80, 112, 3), + bneck_conf(6, 5, 2, 112, 192, 4), + bneck_conf(6, 3, 1, 192, 320, 1), + ] + last_channel = None + elif arch.startswith("efficientnet_v2_s"): + inverted_residual_setting = [ + FusedMBConvConfig(1, 3, 1, 24, 24, 2), + FusedMBConvConfig(4, 3, 2, 24, 48, 4), + FusedMBConvConfig(4, 3, 2, 48, 64, 4), + MBConvConfig(4, 3, 2, 64, 128, 6), + MBConvConfig(6, 3, 1, 128, 160, 9), + MBConvConfig(6, 3, 2, 160, 256, 15), + ] + last_channel = 1280 + elif arch.startswith("efficientnet_v2_m"): + inverted_residual_setting = [ + FusedMBConvConfig(1, 3, 1, 24, 24, 3), + FusedMBConvConfig(4, 3, 2, 24, 48, 5), + FusedMBConvConfig(4, 3, 2, 48, 80, 5), + MBConvConfig(4, 3, 2, 80, 160, 7), + MBConvConfig(6, 3, 1, 160, 176, 14), + MBConvConfig(6, 3, 2, 176, 304, 18), + MBConvConfig(6, 3, 1, 304, 512, 5), + ] + last_channel = 1280 + elif arch.startswith("efficientnet_v2_l"): + inverted_residual_setting = [ + FusedMBConvConfig(1, 3, 1, 32, 32, 4), + FusedMBConvConfig(4, 3, 2, 32, 64, 7), + FusedMBConvConfig(4, 3, 2, 64, 96, 7), + MBConvConfig(4, 3, 2, 96, 192, 10), + MBConvConfig(6, 3, 1, 192, 224, 19), + MBConvConfig(6, 3, 2, 224, 384, 25), + MBConvConfig(6, 3, 1, 384, 640, 7), + ] + last_channel = 1280 + else: + raise ValueError(f"Unsupported model type {arch}") + + return inverted_residual_setting, last_channel + + def efficientnet_b0(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: """ Constructs a EfficientNet B0 architecture from @@ -390,7 +439,9 @@ def efficientnet_b0(pretrained: bool = False, progress: bool = True, **kwargs: A pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ - return _efficientnet("efficientnet_b0", 1.0, 1.0, 0.2, pretrained, progress, **kwargs) + arch = "efficientnet_b0" + inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.0, depth_mult=1.0) + return _efficientnet(arch, inverted_residual_setting, 0.2, last_channel, pretrained, progress, **kwargs) def efficientnet_b1(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -402,7 +453,9 @@ def efficientnet_b1(pretrained: bool = False, progress: bool = True, **kwargs: A pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ - return _efficientnet("efficientnet_b1", 1.0, 1.1, 0.2, pretrained, progress, **kwargs) + arch = "efficientnet_b1" + inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.0, depth_mult=1.1) + return _efficientnet(arch, inverted_residual_setting, 0.2, last_channel, pretrained, progress, **kwargs) def efficientnet_b2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -414,7 +467,9 @@ def efficientnet_b2(pretrained: bool = False, progress: bool = True, **kwargs: A pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ - return _efficientnet("efficientnet_b2", 1.1, 1.2, 0.3, pretrained, progress, **kwargs) + arch = "efficientnet_b2" + inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.1, depth_mult=1.2) + return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs) def efficientnet_b3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -426,7 +481,9 @@ def efficientnet_b3(pretrained: bool = False, progress: bool = True, **kwargs: A pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ - return _efficientnet("efficientnet_b3", 1.2, 1.4, 0.3, pretrained, progress, **kwargs) + arch = "efficientnet_b3" + inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.2, depth_mult=1.4) + return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs) def efficientnet_b4(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -438,7 +495,9 @@ def efficientnet_b4(pretrained: bool = False, progress: bool = True, **kwargs: A pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ - return _efficientnet("efficientnet_b4", 1.4, 1.8, 0.4, pretrained, progress, **kwargs) + arch = "efficientnet_b4" + inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.4, depth_mult=1.8) + return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs) def efficientnet_b5(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -450,11 +509,13 @@ def efficientnet_b5(pretrained: bool = False, progress: bool = True, **kwargs: A pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ + arch = "efficientnet_b5" + inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.6, depth_mult=2.2) return _efficientnet( - "efficientnet_b5", - 1.6, - 2.2, + arch, + inverted_residual_setting, 0.4, + last_channel, pretrained, progress, norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), @@ -471,11 +532,13 @@ def efficientnet_b6(pretrained: bool = False, progress: bool = True, **kwargs: A pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ + arch = "efficientnet_b6" + inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.8, depth_mult=2.6) return _efficientnet( - "efficientnet_b6", - 1.8, - 2.6, + arch, + inverted_residual_setting, 0.5, + last_channel, pretrained, progress, norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), @@ -492,13 +555,57 @@ def efficientnet_b7(pretrained: bool = False, progress: bool = True, **kwargs: A pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """ + arch = "efficientnet_b7" + inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=2.0, depth_mult=3.1) return _efficientnet( - "efficientnet_b7", - 2.0, - 3.1, + arch, + inverted_residual_setting, 0.5, + last_channel, pretrained, progress, norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs, ) + + +def efficientnet_v2_s(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs an EfficientNetV2-S architecture from + `"EfficientNetV2: Smaller Models and Faster Training" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + arch = "efficientnet_v2_s" + inverted_residual_setting, last_channel = _efficientnet_conf(arch) + return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs) + + +def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs an EfficientNetV2-M architecture from + `"EfficientNetV2: Smaller Models and Faster Training" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + arch = "efficientnet_v2_m" + inverted_residual_setting, last_channel = _efficientnet_conf(arch) + return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs) + + +def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs an EfficientNetV2-L architecture from + `"EfficientNetV2: Smaller Models and Faster Training" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + arch = "efficientnet_v2_l" + inverted_residual_setting, last_channel = _efficientnet_conf(arch) + return _efficientnet(arch, inverted_residual_setting, 0.5, last_channel, pretrained, progress, **kwargs) diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py index 1fa2ea4d294..95e7d0987f3 100644 --- a/torchvision/prototype/models/efficientnet.py +++ b/torchvision/prototype/models/efficientnet.py @@ -1,11 +1,11 @@ from functools import partial -from typing import Any, Optional +from typing import Any, Optional, Sequence, Union from torch import nn from torchvision.prototype.transforms import ImageNetEval from torchvision.transforms.functional import InterpolationMode -from ...models.efficientnet import EfficientNet, MBConvConfig +from ...models.efficientnet import EfficientNet, MBConvConfig, FusedMBConvConfig, _efficientnet_conf from ._api import WeightsEnum, Weights from ._meta import _IMAGENET_CATEGORIES from ._utils import handle_legacy_interface, _ovewrite_named_param @@ -21,6 +21,9 @@ "EfficientNet_B5_Weights", "EfficientNet_B6_Weights", "EfficientNet_B7_Weights", + "EfficientNet_V2_S_Weights", + "EfficientNet_V2_M_Weights", + "EfficientNet_V2_L_Weights", "efficientnet_b0", "efficientnet_b1", "efficientnet_b2", @@ -29,13 +32,16 @@ "efficientnet_b5", "efficientnet_b6", "efficientnet_b7", + "efficientnet_v2_s", + "efficientnet_v2_m", + "efficientnet_v2_l", ] def _efficientnet( - width_mult: float, - depth_mult: float, + inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]], dropout: float, + last_channel: Optional[int], weights: Optional[WeightsEnum], progress: bool, **kwargs: Any, @@ -43,18 +49,7 @@ def _efficientnet( if weights is not None: _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"])) - bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult) - inverted_residual_setting = [ - bneck_conf(1, 3, 1, 32, 16, 1), - bneck_conf(6, 3, 2, 16, 24, 2), - bneck_conf(6, 5, 2, 24, 40, 2), - bneck_conf(6, 3, 2, 40, 80, 3), - bneck_conf(6, 5, 1, 80, 112, 3), - bneck_conf(6, 5, 2, 112, 192, 4), - bneck_conf(6, 3, 1, 192, 320, 1), - ] - - model = EfficientNet(inverted_residual_setting, dropout, **kwargs) + model = EfficientNet(inverted_residual_setting, dropout, last_channel=last_channel, **kwargs) if weights is not None: model.load_state_dict(weights.get_state_dict(progress=progress)) @@ -62,7 +57,7 @@ def _efficientnet( return model -_COMMON_META = { +_COMMON_META_V1 = { "task": "image_classification", "architecture": "EfficientNet", "publication_year": 2019, @@ -78,7 +73,7 @@ class EfficientNet_B0_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth", transforms=partial(ImageNetEval, crop_size=224, resize_size=256, interpolation=InterpolationMode.BICUBIC), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 5288548, "size": (224, 224), "acc@1": 77.692, @@ -93,7 +88,7 @@ class EfficientNet_B1_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth", transforms=partial(ImageNetEval, crop_size=240, resize_size=256, interpolation=InterpolationMode.BICUBIC), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 7794184, "size": (240, 240), "acc@1": 78.642, @@ -104,7 +99,7 @@ class EfficientNet_B1_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth", transforms=partial(ImageNetEval, crop_size=240, resize_size=255, interpolation=InterpolationMode.BILINEAR), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 7794184, "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning", "interpolation": InterpolationMode.BILINEAR, @@ -121,7 +116,7 @@ class EfficientNet_B2_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth", transforms=partial(ImageNetEval, crop_size=288, resize_size=288, interpolation=InterpolationMode.BICUBIC), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 9109994, "size": (288, 288), "acc@1": 80.608, @@ -136,7 +131,7 @@ class EfficientNet_B3_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth", transforms=partial(ImageNetEval, crop_size=300, resize_size=320, interpolation=InterpolationMode.BICUBIC), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 12233232, "size": (300, 300), "acc@1": 82.008, @@ -151,7 +146,7 @@ class EfficientNet_B4_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth", transforms=partial(ImageNetEval, crop_size=380, resize_size=384, interpolation=InterpolationMode.BICUBIC), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 19341616, "size": (380, 380), "acc@1": 83.384, @@ -166,7 +161,7 @@ class EfficientNet_B5_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth", transforms=partial(ImageNetEval, crop_size=456, resize_size=456, interpolation=InterpolationMode.BICUBIC), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 30389784, "size": (456, 456), "acc@1": 83.444, @@ -181,7 +176,7 @@ class EfficientNet_B6_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth", transforms=partial(ImageNetEval, crop_size=528, resize_size=528, interpolation=InterpolationMode.BICUBIC), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 43040704, "size": (528, 528), "acc@1": 84.008, @@ -196,7 +191,7 @@ class EfficientNet_B7_Weights(WeightsEnum): url="https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth", transforms=partial(ImageNetEval, crop_size=600, resize_size=600, interpolation=InterpolationMode.BICUBIC), meta={ - **_COMMON_META, + **_COMMON_META_V1, "num_params": 66347960, "size": (600, 600), "acc@1": 84.122, @@ -206,13 +201,26 @@ class EfficientNet_B7_Weights(WeightsEnum): DEFAULT = IMAGENET1K_V1 +class EfficientNet_V2_S_Weights(WeightsEnum): + pass + + +class EfficientNet_V2_M_Weights(WeightsEnum): + pass + + +class EfficientNet_V2_L_Weights(WeightsEnum): + pass + + @handle_legacy_interface(weights=("pretrained", EfficientNet_B0_Weights.IMAGENET1K_V1)) def efficientnet_b0( *, weights: Optional[EfficientNet_B0_Weights] = None, progress: bool = True, **kwargs: Any ) -> EfficientNet: weights = EfficientNet_B0_Weights.verify(weights) - return _efficientnet(width_mult=1.0, depth_mult=1.0, dropout=0.2, weights=weights, progress=progress, **kwargs) + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b0", width_mult=1.0, depth_mult=1.0) + return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs) @handle_legacy_interface(weights=("pretrained", EfficientNet_B1_Weights.IMAGENET1K_V1)) @@ -221,7 +229,8 @@ def efficientnet_b1( ) -> EfficientNet: weights = EfficientNet_B1_Weights.verify(weights) - return _efficientnet(width_mult=1.0, depth_mult=1.1, dropout=0.2, weights=weights, progress=progress, **kwargs) + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b1", width_mult=1.0, depth_mult=1.1) + return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs) @handle_legacy_interface(weights=("pretrained", EfficientNet_B2_Weights.IMAGENET1K_V1)) @@ -230,7 +239,8 @@ def efficientnet_b2( ) -> EfficientNet: weights = EfficientNet_B2_Weights.verify(weights) - return _efficientnet(width_mult=1.1, depth_mult=1.2, dropout=0.3, weights=weights, progress=progress, **kwargs) + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b2", width_mult=1.1, depth_mult=1.2) + return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs) @handle_legacy_interface(weights=("pretrained", EfficientNet_B3_Weights.IMAGENET1K_V1)) @@ -239,7 +249,8 @@ def efficientnet_b3( ) -> EfficientNet: weights = EfficientNet_B3_Weights.verify(weights) - return _efficientnet(width_mult=1.2, depth_mult=1.4, dropout=0.3, weights=weights, progress=progress, **kwargs) + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b3", width_mult=1.2, depth_mult=1.4) + return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs) @handle_legacy_interface(weights=("pretrained", EfficientNet_B4_Weights.IMAGENET1K_V1)) @@ -248,7 +259,8 @@ def efficientnet_b4( ) -> EfficientNet: weights = EfficientNet_B4_Weights.verify(weights) - return _efficientnet(width_mult=1.4, depth_mult=1.8, dropout=0.4, weights=weights, progress=progress, **kwargs) + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b4", width_mult=1.4, depth_mult=1.8) + return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs) @handle_legacy_interface(weights=("pretrained", EfficientNet_B5_Weights.IMAGENET1K_V1)) @@ -257,12 +269,13 @@ def efficientnet_b5( ) -> EfficientNet: weights = EfficientNet_B5_Weights.verify(weights) + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b5", width_mult=1.6, depth_mult=2.2) return _efficientnet( - width_mult=1.6, - depth_mult=2.2, - dropout=0.4, - weights=weights, - progress=progress, + inverted_residual_setting, + 0.4, + last_channel, + weights, + progress, norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs, ) @@ -274,12 +287,13 @@ def efficientnet_b6( ) -> EfficientNet: weights = EfficientNet_B6_Weights.verify(weights) + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b6", width_mult=1.8, depth_mult=2.6) return _efficientnet( - width_mult=1.8, - depth_mult=2.6, - dropout=0.5, - weights=weights, - progress=progress, + inverted_residual_setting, + 0.5, + last_channel, + weights, + progress, norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs, ) @@ -291,12 +305,43 @@ def efficientnet_b7( ) -> EfficientNet: weights = EfficientNet_B7_Weights.verify(weights) + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b7", width_mult=2.0, depth_mult=3.1) return _efficientnet( - width_mult=2.0, - depth_mult=3.1, - dropout=0.5, - weights=weights, - progress=progress, + inverted_residual_setting, + 0.5, + last_channel, + weights, + progress, norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs, ) + + +@handle_legacy_interface(weights=("pretrained", None)) +def efficientnet_v2_s( + *, weights: Optional[EfficientNet_V2_S_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + weights = EfficientNet_V2_S_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s") + return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs) + + +@handle_legacy_interface(weights=("pretrained", None)) +def efficientnet_v2_m( + *, weights: Optional[EfficientNet_V2_M_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + weights = EfficientNet_V2_M_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m") + return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs) + + +@handle_legacy_interface(weights=("pretrained", None)) +def efficientnet_v2_l( + *, weights: Optional[EfficientNet_V2_L_Weights] = None, progress: bool = True, **kwargs: Any +) -> EfficientNet: + weights = EfficientNet_V2_L_Weights.verify(weights) + + inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l") + return _efficientnet(inverted_residual_setting, 0.5, last_channel, weights, progress, **kwargs) From 931032519acf16e9833ad3881fc9a45b69d258d8 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 21 Feb 2022 19:26:53 +0000 Subject: [PATCH 03/10] Refactoring weight info. --- torchvision/models/efficientnet.py | 3 +- torchvision/prototype/models/efficientnet.py | 43 +++++++++++++++++--- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index 82e4174cd9b..c56fac844ba 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -42,6 +42,8 @@ "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth", "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth", "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth", + # Temporary TF weights + "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth", } @@ -176,7 +178,6 @@ def __init__( cnf: FusedMBConvConfig, stochastic_depth_prob: float, norm_layer: Callable[..., nn.Module], - **kwargs: Any, ) -> None: super().__init__() diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py index 95e7d0987f3..94f871d337c 100644 --- a/torchvision/prototype/models/efficientnet.py +++ b/torchvision/prototype/models/efficientnet.py @@ -57,17 +57,30 @@ def _efficientnet( return model -_COMMON_META_V1 = { +_COMMON_META = { "task": "image_classification", - "architecture": "EfficientNet", - "publication_year": 2019, - "min_size": (1, 1), "categories": _IMAGENET_CATEGORIES, "interpolation": InterpolationMode.BICUBIC, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet", } +_COMMON_META_V1 = { + **_COMMON_META, + "architecture": "EfficientNet", + "publication_year": 2019, + "min_size": (1, 1), +} + + +_COMMON_META_V2 = { + **_COMMON_META, + "architecture": "EfficientNetV2", + "publication_year": 2021, + "min_size": (33, 33), +} + + class EfficientNet_B0_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( url="https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth", @@ -202,7 +215,25 @@ class EfficientNet_B7_Weights(WeightsEnum): class EfficientNet_V2_S_Weights(WeightsEnum): - pass + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth", + transforms=partial( + ImageNetEval, + crop_size=384, + resize_size=384, + interpolation=InterpolationMode.BICUBIC, + mean=(0.5, 0.5, 0.5), + std=(0.5, 0.5, 0.5), + ), + meta={ + **_COMMON_META_V2, + "num_params": 21458488, + "size": (384, 384), + "acc@1": 83.152, + "acc@5": 96.400, + }, + ) + DEFAULT = IMAGENET1K_V1 class EfficientNet_V2_M_Weights(WeightsEnum): @@ -317,7 +348,7 @@ def efficientnet_b7( ) -@handle_legacy_interface(weights=("pretrained", None)) +@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_S_Weights.IMAGENET1K_V1)) def efficientnet_v2_s( *, weights: Optional[EfficientNet_V2_S_Weights] = None, progress: bool = True, **kwargs: Any ) -> EfficientNet: From ebc1b654d9ec87bde0e29d0fb225005e899ed62c Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 22 Feb 2022 13:03:13 +0000 Subject: [PATCH 04/10] Update dropouts based on TF config ref --- torchvision/models/efficientnet.py | 6 +++--- torchvision/prototype/models/efficientnet.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index c56fac844ba..eab87e1dbb4 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -581,7 +581,7 @@ def efficientnet_v2_s(pretrained: bool = False, progress: bool = True, **kwargs: """ arch = "efficientnet_v2_s" inverted_residual_setting, last_channel = _efficientnet_conf(arch) - return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs) + return _efficientnet(arch, inverted_residual_setting, 0.2, last_channel, pretrained, progress, **kwargs) def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -595,7 +595,7 @@ def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs: """ arch = "efficientnet_v2_m" inverted_residual_setting, last_channel = _efficientnet_conf(arch) - return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs) + return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs) def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -609,4 +609,4 @@ def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs: """ arch = "efficientnet_v2_l" inverted_residual_setting, last_channel = _efficientnet_conf(arch) - return _efficientnet(arch, inverted_residual_setting, 0.5, last_channel, pretrained, progress, **kwargs) + return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs) diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py index 94f871d337c..8527a08ed5f 100644 --- a/torchvision/prototype/models/efficientnet.py +++ b/torchvision/prototype/models/efficientnet.py @@ -355,7 +355,7 @@ def efficientnet_v2_s( weights = EfficientNet_V2_S_Weights.verify(weights) inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s") - return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs) + return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs) @handle_legacy_interface(weights=("pretrained", None)) @@ -365,7 +365,7 @@ def efficientnet_v2_m( weights = EfficientNet_V2_M_Weights.verify(weights) inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m") - return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs) + return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs) @handle_legacy_interface(weights=("pretrained", None)) @@ -375,4 +375,4 @@ def efficientnet_v2_l( weights = EfficientNet_V2_L_Weights.verify(weights) inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l") - return _efficientnet(inverted_residual_setting, 0.5, last_channel, weights, progress, **kwargs) + return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs) From 7cec6a78457060523b02db254a92aedcdff95088 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 22 Feb 2022 13:19:32 +0000 Subject: [PATCH 05/10] Update BN eps on TF base_config --- torchvision/models/efficientnet.py | 33 ++++++++++++++++++-- torchvision/prototype/models/efficientnet.py | 30 ++++++++++++++++-- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index eab87e1dbb4..38d99eefccc 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -581,7 +581,16 @@ def efficientnet_v2_s(pretrained: bool = False, progress: bool = True, **kwargs: """ arch = "efficientnet_v2_s" inverted_residual_setting, last_channel = _efficientnet_conf(arch) - return _efficientnet(arch, inverted_residual_setting, 0.2, last_channel, pretrained, progress, **kwargs) + return _efficientnet( + arch, + inverted_residual_setting, + 0.2, + last_channel, + pretrained, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -595,7 +604,16 @@ def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs: """ arch = "efficientnet_v2_m" inverted_residual_setting, last_channel = _efficientnet_conf(arch) - return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs) + return _efficientnet( + arch, + inverted_residual_setting, + 0.3, + last_channel, + pretrained, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: @@ -609,4 +627,13 @@ def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs: """ arch = "efficientnet_v2_l" inverted_residual_setting, last_channel = _efficientnet_conf(arch) - return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs) + return _efficientnet( + arch, + inverted_residual_setting, + 0.4, + last_channel, + pretrained, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py index 8527a08ed5f..bb6ec129bd7 100644 --- a/torchvision/prototype/models/efficientnet.py +++ b/torchvision/prototype/models/efficientnet.py @@ -355,7 +355,15 @@ def efficientnet_v2_s( weights = EfficientNet_V2_S_Weights.verify(weights) inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s") - return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs) + return _efficientnet( + inverted_residual_setting, + 0.2, + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) @handle_legacy_interface(weights=("pretrained", None)) @@ -365,7 +373,15 @@ def efficientnet_v2_m( weights = EfficientNet_V2_M_Weights.verify(weights) inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m") - return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs) + return _efficientnet( + inverted_residual_setting, + 0.3, + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) @handle_legacy_interface(weights=("pretrained", None)) @@ -375,4 +391,12 @@ def efficientnet_v2_l( weights = EfficientNet_V2_L_Weights.verify(weights) inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l") - return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs) + return _efficientnet( + inverted_residual_setting, + 0.4, + last_channel, + weights, + progress, + norm_layer=partial(nn.BatchNorm2d, eps=1e-03), + **kwargs, + ) From 2ff873487cec32883a6dc838eb18d8fe9fa9d746 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 25 Feb 2022 10:47:59 +0000 Subject: [PATCH 06/10] Use Conv2dNormActivation. --- torchvision/models/efficientnet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index 0e42abd9a3e..fec9dd582af 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -193,7 +193,7 @@ def __init__( if expanded_channels != cnf.input_channels: # fused expand layers.append( - ConvNormActivation( + Conv2dNormActivation( cnf.input_channels, expanded_channels, kernel_size=cnf.kernel, @@ -205,13 +205,13 @@ def __init__( # project layers.append( - ConvNormActivation( + Conv2dNormActivation( expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None ) ) else: layers.append( - ConvNormActivation( + Conv2dNormActivation( cnf.input_channels, cnf.out_channels, kernel_size=cnf.kernel, From bf41dfb5fe367c621ba4c7f0e247bd7e552746ec Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sun, 27 Feb 2022 13:36:44 +0000 Subject: [PATCH 07/10] Adding pre-trained weights for EfficientNetV2-s --- docs/source/models.rst | 12 +++++++++++- hubconf.py | 3 +++ references/classification/README.md | 20 +++++++++++++++++++- torchvision/models/efficientnet.py | 4 ++-- torchvision/prototype/models/efficientnet.py | 13 ++++++------- 5 files changed, 41 insertions(+), 11 deletions(-) diff --git a/docs/source/models.rst b/docs/source/models.rst index 58bd0d81cd0..dbff7505d3c 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -38,7 +38,7 @@ architectures for image classification: - `ResNeXt`_ - `Wide ResNet`_ - `MNASNet`_ -- `EfficientNet`_ +- `EfficientNet`_ v1 & v2 - `RegNet`_ - `VisionTransformer`_ - `ConvNeXt`_ @@ -70,6 +70,9 @@ You can construct a model with random weights by calling its constructor: efficientnet_b5 = models.efficientnet_b5() efficientnet_b6 = models.efficientnet_b6() efficientnet_b7 = models.efficientnet_b7() + efficientnet_v2_s = models.efficientnet_v2_s() + efficientnet_v2_m = models.efficientnet_v2_m() + efficientnet_v2_l = models.efficientnet_v2_l() regnet_y_400mf = models.regnet_y_400mf() regnet_y_800mf = models.regnet_y_800mf() regnet_y_1_6gf = models.regnet_y_1_6gf() @@ -122,6 +125,9 @@ These can be constructed by passing ``pretrained=True``: efficientnet_b5 = models.efficientnet_b5(pretrained=True) efficientnet_b6 = models.efficientnet_b6(pretrained=True) efficientnet_b7 = models.efficientnet_b7(pretrained=True) + efficientnet_v2_s = models.efficientnet_v2_s(pretrained=True) + efficientnet_v2_m = models.efficientnet_v2_m(pretrained=True) + efficientnet_v2_l = models.efficientnet_v2_l(pretrained=True) regnet_y_400mf = models.regnet_y_400mf(pretrained=True) regnet_y_800mf = models.regnet_y_800mf(pretrained=True) regnet_y_1_6gf = models.regnet_y_1_6gf(pretrained=True) @@ -238,6 +244,7 @@ EfficientNet-B4 83.384 96.594 EfficientNet-B5 83.444 96.628 EfficientNet-B6 84.008 96.916 EfficientNet-B7 84.122 96.908 +EfficientNetV2-s 84.228 96.878 regnet_x_400mf 72.834 90.950 regnet_x_800mf 75.212 92.348 regnet_x_1_6gf 77.040 93.440 @@ -439,6 +446,9 @@ EfficientNet efficientnet_b5 efficientnet_b6 efficientnet_b7 + efficientnet_v2_s + efficientnet_v2_m + efficientnet_v2_l RegNet ------------ diff --git a/hubconf.py b/hubconf.py index 5c2ad8e9e0d..c3de4f2da9a 100644 --- a/hubconf.py +++ b/hubconf.py @@ -13,6 +13,9 @@ efficientnet_b5, efficientnet_b6, efficientnet_b7, + efficientnet_v2_s, + efficientnet_v2_m, + efficientnet_v2_l, ) from torchvision.models.googlenet import googlenet from torchvision.models.inception import inception_v3 diff --git a/references/classification/README.md b/references/classification/README.md index e75336f23ca..f7fd2414f6d 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -88,7 +88,7 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@ and [#3354](https://github.com/pytorch/vision/pull/3354) for details. -### EfficientNet +### EfficientNet-V1 The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108). @@ -114,6 +114,24 @@ torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --interpolation bic --val-resize-size 600 --val-crop-size 600 --train-crop-size 600 --test-only --pretrained ``` + +### EfficientNet-V2 +``` +torchrun --nproc_per_node=8 train.py \ +--model $MODEL --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr \ +--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \ +--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.00002 --norm-weight-decay 0.0 \ +--train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \ +--ra-sampler --ra-reps 4 +``` +Here `$MODEL` is one of `efficientnet_v2_s`, `efficientnet_v2_m` and `efficientnet_v2_l`. +Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the other variants `384` and `480` respectively. + +Note that the above command corresponds to training on a single node with 8 GPUs. +For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs), +and `--batch_size 16`. + + ### RegNet #### Small models diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index fec9dd582af..86ce1b31ee5 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -42,8 +42,8 @@ "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth", "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth", "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth", - # Temporary TF weights - "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth", + # Weights trained with TorchVision + "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth", } diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py index bb6ec129bd7..a25617f8079 100644 --- a/torchvision/prototype/models/efficientnet.py +++ b/torchvision/prototype/models/efficientnet.py @@ -60,7 +60,6 @@ def _efficientnet( _COMMON_META = { "task": "image_classification", "categories": _IMAGENET_CATEGORIES, - "interpolation": InterpolationMode.BICUBIC, "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet", } @@ -69,6 +68,7 @@ def _efficientnet( **_COMMON_META, "architecture": "EfficientNet", "publication_year": 2019, + "interpolation": InterpolationMode.BICUBIC, "min_size": (1, 1), } @@ -77,6 +77,7 @@ def _efficientnet( **_COMMON_META, "architecture": "EfficientNetV2", "publication_year": 2021, + "interpolation": InterpolationMode.BILINEAR, "min_size": (33, 33), } @@ -216,21 +217,19 @@ class EfficientNet_B7_Weights(WeightsEnum): class EfficientNet_V2_S_Weights(WeightsEnum): IMAGENET1K_V1 = Weights( - url="https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth", + url="https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth", transforms=partial( ImageNetEval, crop_size=384, resize_size=384, - interpolation=InterpolationMode.BICUBIC, - mean=(0.5, 0.5, 0.5), - std=(0.5, 0.5, 0.5), + interpolation=InterpolationMode.BILINEAR, ), meta={ **_COMMON_META_V2, "num_params": 21458488, "size": (384, 384), - "acc@1": 83.152, - "acc@5": 96.400, + "acc@1": 84.228, + "acc@5": 96.878, }, ) DEFAULT = IMAGENET1K_V1 From abeac10b7139ea2cbb6db968b8824aebb8344b47 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 2 Mar 2022 09:17:51 +0000 Subject: [PATCH 08/10] Add Medium and Large weights --- references/classification/README.md | 10 +++-- torchvision/models/efficientnet.py | 3 ++ torchvision/prototype/models/efficientnet.py | 42 ++++++++++++++++++-- 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/references/classification/README.md b/references/classification/README.md index f7fd2414f6d..173fb454995 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -124,12 +124,14 @@ torchrun --nproc_per_node=8 train.py \ --train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \ --ra-sampler --ra-reps 4 ``` -Here `$MODEL` is one of `efficientnet_v2_s`, `efficientnet_v2_m` and `efficientnet_v2_l`. -Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the other variants `384` and `480` respectively. +Here `$MODEL` is one of `efficientnet_v2_s` and `efficientnet_v2_m`. +Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the Medium `384` and `480` respectively. Note that the above command corresponds to training on a single node with 8 GPUs. -For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs), -and `--batch_size 16`. +For generatring the pre-trained weights, we trained with 4 nodes, each with 8 GPUs (for a total of 32 GPUs), +and `--batch_size 32`. + +The weights of the Large variant are ported from the original paper rather than trained from scratch. See the `EfficientNet_V2_L_Weights` entry for their exact preprocessing transforms. ### RegNet diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py index 86ce1b31ee5..f8238912ffd 100644 --- a/torchvision/models/efficientnet.py +++ b/torchvision/models/efficientnet.py @@ -44,6 +44,9 @@ "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth", # Weights trained with TorchVision "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth", + "efficientnet_v2_m": "https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth", + # Weights ported from TF + "efficientnet_v2_l": "https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth", } diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py index a25617f8079..c1bec6d6ed0 100644 --- a/torchvision/prototype/models/efficientnet.py +++ b/torchvision/prototype/models/efficientnet.py @@ -236,11 +236,45 @@ class EfficientNet_V2_S_Weights(WeightsEnum): class EfficientNet_V2_M_Weights(WeightsEnum): - pass + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth", + transforms=partial( + ImageNetEval, + crop_size=480, + resize_size=480, + interpolation=InterpolationMode.BILINEAR, + ), + meta={ + **_COMMON_META_V2, + "num_params": 54139356, + "size": (480, 480), + "acc@1": 85.119, + "acc@5": 97.151, + }, + ) + DEFAULT = IMAGENET1K_V1 class EfficientNet_V2_L_Weights(WeightsEnum): - pass + IMAGENET1K_V1 = Weights( + url="https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth", + transforms=partial( + ImageNetEval, + crop_size=480, + resize_size=480, + interpolation=InterpolationMode.BICUBIC, + mean=(0.5, 0.5, 0.5), + std=(0.5, 0.5, 0.5), + ), + meta={ + **_COMMON_META_V2, + "num_params": 118515272, + "size": (480, 480), + "acc@1": 85.808, + "acc@5": 97.788, + }, + ) + DEFAULT = IMAGENET1K_V1 @handle_legacy_interface(weights=("pretrained", EfficientNet_B0_Weights.IMAGENET1K_V1)) @@ -365,7 +399,7 @@ def efficientnet_v2_s( ) -@handle_legacy_interface(weights=("pretrained", None)) +@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_M_Weights.IMAGENET1K_V1)) def efficientnet_v2_m( *, weights: Optional[EfficientNet_V2_M_Weights] = None, progress: bool = True, **kwargs: Any ) -> EfficientNet: @@ -383,7 +417,7 @@ def efficientnet_v2_m( ) -@handle_legacy_interface(weights=("pretrained", None)) +@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_L_Weights.IMAGENET1K_V1)) def efficientnet_v2_l( *, weights: Optional[EfficientNet_V2_L_Weights] = None, progress: bool = True, **kwargs: Any ) -> EfficientNet: From 907944e3de5a47a73cfa6320c00bbd237afe0412 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 2 Mar 2022 10:17:40 +0000 Subject: [PATCH 09/10] Update stats with single batch run. --- torchvision/prototype/models/efficientnet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py index c1bec6d6ed0..2619709764f 100644 --- a/torchvision/prototype/models/efficientnet.py +++ b/torchvision/prototype/models/efficientnet.py @@ -248,8 +248,8 @@ class EfficientNet_V2_M_Weights(WeightsEnum): **_COMMON_META_V2, "num_params": 54139356, "size": (480, 480), - "acc@1": 85.119, - "acc@5": 97.151, + "acc@1": 85.112, + "acc@5": 97.156, }, ) DEFAULT = IMAGENET1K_V1 From a2694320c12c257007d7835e9cca52f3241f6f1f Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 2 Mar 2022 10:23:12 +0000 Subject: [PATCH 10/10] Add accuracies in the docs. --- docs/source/models.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/models.rst b/docs/source/models.rst index dbff7505d3c..84fee191a8e 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -245,6 +245,8 @@ EfficientNet-B5 83.444 96.628 EfficientNet-B6 84.008 96.916 EfficientNet-B7 84.122 96.908 EfficientNetV2-s 84.228 96.878 +EfficientNetV2-m 85.112 97.156 +EfficientNetV2-l 85.810 97.792 regnet_x_400mf 72.834 90.950 regnet_x_800mf 75.212 92.348 regnet_x_1_6gf 77.040 93.440