From b03a7ec8116b410b81917056144e22882fd447a9 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Mon, 21 Feb 2022 15:51:17 +0000
Subject: [PATCH 01/10] Extend the EfficientNet class to support v1 and v2.

---
 torchvision/models/efficientnet.py | 152 ++++++++++++++++++++++-------
 1 file changed, 119 insertions(+), 33 deletions(-)

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index f7eba46cb39..8adde96344e 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -1,5 +1,7 @@
 import copy
 import math
+import warnings
+from dataclasses import dataclass
 from functools import partial
 from typing import Any, Callable, Optional, List, Sequence
 
@@ -40,8 +42,23 @@
 }
 
 
-class MBConvConfig:
-    # Stores information listed at Table 1 of the EfficientNet paper
+@dataclass
+class _MBConvConfig:
+    expand_ratio: float
+    kernel: int
+    stride: int
+    input_channels: int
+    out_channels: int
+    num_layers: int
+    block: Callable[..., nn.Module]
+
+    @staticmethod
+    def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int:
+        return _make_divisible(channels * width_mult, 8, min_value)
+
+
+class MBConvConfig(_MBConvConfig):
+    # Stores information listed at Table 1 of the EfficientNet paper & Table 4 of the EfficientNetV2 paper
     def __init__(
         self,
         expand_ratio: float,
@@ -52,36 +69,37 @@ def __init__(
         num_layers: int,
         width_mult: float,
         depth_mult: float,
+        block: Optional[Callable[..., nn.Module]] = None
     ) -> None:
-        self.expand_ratio = expand_ratio
-        self.kernel = kernel
-        self.stride = stride
-        self.input_channels = self.adjust_channels(input_channels, width_mult)
-        self.out_channels = self.adjust_channels(out_channels, width_mult)
-        self.num_layers = self.adjust_depth(num_layers, depth_mult)
-
-    def __repr__(self) -> str:
-        s = (
-            f"{self.__class__.__name__}("
-            f"expand_ratio={self.expand_ratio}"
-            f", kernel={self.kernel}"
-            f", stride={self.stride}"
-            f", input_channels={self.input_channels}"
-            f", out_channels={self.out_channels}"
-            f", num_layers={self.num_layers}"
-            f")"
-        )
-        return s
-
-    @staticmethod
-    def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int:
-        return _make_divisible(channels * width_mult, 8, min_value)
+        input_channels = self.adjust_channels(input_channels, width_mult)
+        out_channels = self.adjust_channels(out_channels, width_mult)
+        num_layers = self.adjust_depth(num_layers, depth_mult)
+        if block is None:
+            block = MBConv
+        super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block)
 
     @staticmethod
     def adjust_depth(num_layers: int, depth_mult: float):
         return int(math.ceil(num_layers * depth_mult))
 
 
+class FusedMBConvConfig(_MBConvConfig):
+    # Stores information listed at Table 4 of the EfficientNetV2 paper
+    def __init__(
+        self,
+        expand_ratio: float,
+        kernel: int,
+        stride: int,
+        input_channels: int,
+        out_channels: int,
+        num_layers: int,
+        block: Optional[Callable[..., nn.Module]] = None
+    ) -> None:
+        if block is None:
+            block = FusedMBConv
+        super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block)
+
+
 class MBConv(nn.Module):
     def __init__(
         self,
@@ -149,6 +167,68 @@ def forward(self, input: Tensor) -> Tensor:
         return result
 
 
+class FusedMBConv(nn.Module):
+    def __init__(
+        self,
+        cnf: FusedMBConvConfig,
+        stochastic_depth_prob: float,
+        norm_layer: Callable[..., nn.Module],
+        **kwargs: Any,
+    ) -> None:
+        super().__init__()
+
+        if not (1 <= cnf.stride <= 2):
+            raise ValueError("illegal stride value")
+
+        self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
+
+        layers: List[nn.Module] = []
+        activation_layer = nn.SiLU
+
+        expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
+        if expanded_channels != cnf.input_channels:
+            # fused expand
+            layers.append(
+                ConvNormActivation(
+                    cnf.input_channels,
+                    expanded_channels,
+                    kernel_size=cnf.kernel,
+                    stride=cnf.stride,
+                    norm_layer=norm_layer,
+                    activation_layer=activation_layer,
+                )
+            )
+
+            # project
+            layers.append(
+                ConvNormActivation(
+                    expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None
+                )
+            )
+        else:
+            layers.append(
+                ConvNormActivation(
+                    cnf.input_channels,
+                    cnf.out_channels,
+                    kernel_size=cnf.kernel,
+                    stride=cnf.stride,
+                    norm_layer=norm_layer,
+                    activation_layer=activation_layer,
+                )
+            )
+
+        self.block = nn.Sequential(*layers)
+        self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
+        self.out_channels = cnf.out_channels
+
+    def forward(self, input: Tensor) -> Tensor:
+        result = self.block(input)
+        if self.use_res_connect:
+            result = self.stochastic_depth(result)
+            result += input
+        return result
+
+
 class EfficientNet(nn.Module):
     def __init__(
         self,
@@ -156,19 +236,17 @@ def __init__(
         dropout: float,
         stochastic_depth_prob: float = 0.2,
         num_classes: int = 1000,
-        block: Optional[Callable[..., nn.Module]] = None,
         norm_layer: Optional[Callable[..., nn.Module]] = None,
         **kwargs: Any,
     ) -> None:
         """
-        EfficientNet main class
+        EfficientNet V1 and V2 main class
 
         Args:
             inverted_residual_setting (List[MBConvConfig]): Network structure
             dropout (float): The droupout probability
             stochastic_depth_prob (float): The stochastic depth probability
             num_classes (int): Number of classes
-            block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet
             norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
         """
         super().__init__()
@@ -178,12 +256,19 @@ def __init__(
             raise ValueError("The inverted_residual_setting should not be empty")
         elif not (
             isinstance(inverted_residual_setting, Sequence)
-            and all([isinstance(s, MBConvConfig) for s in inverted_residual_setting])
+            and all([isinstance(s, _MBConvConfig) for s in inverted_residual_setting])
         ):
             raise TypeError("The inverted_residual_setting should be List[MBConvConfig]")
 
-        if block is None:
-            block = MBConv
+        if "block" in kwargs:
+            warnings.warn(
+                "The parameter 'block' is deprecated since 0.13 and will be removed 0.15. "
+                "Please pass this information on 'MBConvConfig.block' instead."
+            )
+            if kwargs["block"] is not None:
+                for s in inverted_residual_setting:
+                    if isinstance(s, MBConvConfig):
+                        s.block = kwargs["block"]
 
         if norm_layer is None:
             norm_layer = nn.BatchNorm2d
@@ -215,14 +300,15 @@ def __init__(
                 # adjust stochastic depth probability based on the depth of the stage block
                 sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
 
-                stage.append(block(block_cnf, sd_prob, norm_layer))
+                stage.append(block_cnf.block(block_cnf, sd_prob, norm_layer))
                 stage_block_id += 1
 
             layers.append(nn.Sequential(*stage))
 
         # building last several layers
         lastconv_input_channels = inverted_residual_setting[-1].out_channels
-        lastconv_output_channels = 4 * lastconv_input_channels
+        is_v2 = any([isinstance(s, FusedMBConvConfig) for s in inverted_residual_setting])
+        lastconv_output_channels = 1280 if is_v2 else 4 * lastconv_input_channels
         layers.append(
             ConvNormActivation(
                 lastconv_input_channels,

From aa82cf1140786e7cf567c85e0d1cc4d4decde496 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Mon, 21 Feb 2022 17:47:44 +0000
Subject: [PATCH 02/10] Refactor config/builder methods and add prototype
 builders

---
 ...elTester.test_efficientnet_v2_l_expect.pkl | Bin 0 -> 939 bytes
 ...elTester.test_efficientnet_v2_m_expect.pkl | Bin 0 -> 939 bytes
 ...elTester.test_efficientnet_v2_s_expect.pkl | Bin 0 -> 939 bytes
 torchvision/models/efficientnet.py            | 179 ++++++++++++++----
 torchvision/prototype/models/efficientnet.py  | 137 +++++++++-----
 5 files changed, 234 insertions(+), 82 deletions(-)
 create mode 100644 test/expect/ModelTester.test_efficientnet_v2_l_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_v2_m_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_v2_s_expect.pkl

diff --git a/test/expect/ModelTester.test_efficientnet_v2_l_expect.pkl b/test/expect/ModelTester.test_efficientnet_v2_l_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f3ca5315337c7f74d8ad7b249ad315b25b2b6e33
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5=ZXk1(PMcUyNU9^O`bzjNTYiKgW2-?On!^CN(DFdzP3;u)HwRy`W%Xc1yvO
zcSoYB?P5cd57*;O6a01<ZQ~H%npK=-VkN3;dS-T%NiCc2mYVzbjTFA{nXW6**%~%U
z*3?Vsk+Iaw+s0g>D^0I6UEX5C{d-feC!;CHPm`@OYn`?lZJcbftyFl+tAqtxTK2MU
zjWzjT@*@6-$+dfNTb?}-H+kl+Y7+h}am!U{zpYERKQTE{7P-}S=He|$Rg+C_to>`^
zW~*SDlzQA46k0EqeSCfb7*ZgNJGA&2tl^<mmReK{j0HC*b0UKaDdZqbV=j=*7w4si
zG68J`;Q((&5Cu<@$Z;qDl0X6IDHL5dvY+@+biM-ekag=DpzB3;6+enz37`vMdZA$u
z;LXOS163r)tP58TO3WYtqqjr243of~1bLSYls6bWp$b5mAi$fI4J5`4gdp`0wE#0}
B^dkTO

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_v2_m_expect.pkl b/test/expect/ModelTester.test_efficientnet_v2_m_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..c6ebbb7951c48204d3e846ad4e2580db60e12c36
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5+}K&-l!|ab@RVFvo^gv@Xx4JH+}OAt;-t|o~<^_GGW~OY_6Ws0f+ZSKD#3}
zooQ$_YARl~NzCVx(XV)UquLJL%^j*0o6JKWZM-7$X4CU%=1ng?EHnz}7cmMCn`*2x
znROGxoOe1O8hAGJ&U$84#KE|EhO)JBRh`<VFV;>*p-OVb|Jk=09gUl6v|299xMA0N
zqaU6Qn`bOsYs3}w+bHGEUZYf2RpXAYO-3TCE*l26${B0z+iG;+^`!Nt2aPr}d<)+E
zRjY8*vo(^NL7}Dh-1Y1UU`T;5?$F|Au!e_LS!z)+Fc#dL%!v#xq>zI!jk!QJU!0d7
z$^^6(gaf=8K@>bqBFCWsNCE|*r%-g=$bRBO(fJC<L)NWtfUXzWRs1M=C4er3>4k<t
zfHxbP4pfmGvo2gYC^3TojNT66GE4${669SrP~Kqhgem}Kf&gz;Hjo%I5Q5Z0)B*sm
C!S=cU

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_v2_s_expect.pkl b/test/expect/ModelTester.test_efficientnet_v2_s_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..ef798e3a0b03656dc7183ef0f9a36ef4d38bae13
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5=S<5r>W+Sf2PNF@|)c-&NaO|!+Tr8ic_0)EEP;W?s}Onp2}ouF~{2MmS^17
zgimbSdg2b5?sH(+c4%Rg=>dl=o0*DtY-71pXLh*Q(R9MkH6{<Yacq@*Sz;#fTx;8m
zvNc<``qgjSUHW&c;$lm)PZbR&O|k`Mf$w*3J@cl~gxf8A>la12ZL36dwuYW5G@COc
z!gM0Pqp2hBLX*Q6CYY@(U0{0ckcHWmH}_0^Y;4UooRl)tFj6(W;8J16)pOBQN<n#R
z`?rd%5&xaHf<lY+^tap-z>orA+@ZzKU=0tgvecqtU@W*fnG+dYNFfJd8gqeczBn&E
zlnH1n2nTpGf+%>JM2<rNkOT@qPoe0#k^RJnqVpAyhpb!Q09`M#tN2m$N&sC5(+drQ
z0B<%n9jGEXW?i^)P+|rF7`+|BWtar^B*?pLpuEB02~_~f1OeWxY#=daAOxw0s09Eq
C#Pr7i

literal 0
HcmV?d00001

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index 8adde96344e..82e4174cd9b 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -3,7 +3,7 @@
 import warnings
 from dataclasses import dataclass
 from functools import partial
-from typing import Any, Callable, Optional, List, Sequence
+from typing import Any, Callable, Optional, List, Sequence, Tuple, Union
 
 import torch
 from torch import nn, Tensor
@@ -25,6 +25,9 @@
     "efficientnet_b5",
     "efficientnet_b6",
     "efficientnet_b7",
+    "efficientnet_v2_s",
+    "efficientnet_v2_m",
+    "efficientnet_v2_l",
 ]
 
 
@@ -67,9 +70,9 @@ def __init__(
         input_channels: int,
         out_channels: int,
         num_layers: int,
-        width_mult: float,
-        depth_mult: float,
-        block: Optional[Callable[..., nn.Module]] = None
+        width_mult: float = 1.0,
+        depth_mult: float = 1.0,
+        block: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
         input_channels = self.adjust_channels(input_channels, width_mult)
         out_channels = self.adjust_channels(out_channels, width_mult)
@@ -93,7 +96,7 @@ def __init__(
         input_channels: int,
         out_channels: int,
         num_layers: int,
-        block: Optional[Callable[..., nn.Module]] = None
+        block: Optional[Callable[..., nn.Module]] = None,
     ) -> None:
         if block is None:
             block = FusedMBConv
@@ -232,22 +235,24 @@ def forward(self, input: Tensor) -> Tensor:
 class EfficientNet(nn.Module):
     def __init__(
         self,
-        inverted_residual_setting: List[MBConvConfig],
+        inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
         dropout: float,
         stochastic_depth_prob: float = 0.2,
         num_classes: int = 1000,
         norm_layer: Optional[Callable[..., nn.Module]] = None,
+        last_channel: Optional[int] = None,
         **kwargs: Any,
     ) -> None:
         """
         EfficientNet V1 and V2 main class
 
         Args:
-            inverted_residual_setting (List[MBConvConfig]): Network structure
+            inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure
             dropout (float): The droupout probability
             stochastic_depth_prob (float): The stochastic depth probability
             num_classes (int): Number of classes
             norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
+            last_channel (int): The number of channels on the penultimate layer
         """
         super().__init__()
         _log_api_usage_once(self)
@@ -307,8 +312,7 @@ def __init__(
 
         # building last several layers
         lastconv_input_channels = inverted_residual_setting[-1].out_channels
-        is_v2 = any([isinstance(s, FusedMBConvConfig) for s in inverted_residual_setting])
-        lastconv_output_channels = 1280 if is_v2 else 4 * lastconv_input_channels
+        lastconv_output_channels = last_channel if last_channel is not None else 4 * lastconv_input_channels
         layers.append(
             ConvNormActivation(
                 lastconv_input_channels,
@@ -355,24 +359,14 @@ def forward(self, x: Tensor) -> Tensor:
 
 def _efficientnet(
     arch: str,
-    width_mult: float,
-    depth_mult: float,
+    inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
     dropout: float,
+    last_channel: Optional[int],
     pretrained: bool,
     progress: bool,
     **kwargs: Any,
 ) -> EfficientNet:
-    bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult)
-    inverted_residual_setting = [
-        bneck_conf(1, 3, 1, 32, 16, 1),
-        bneck_conf(6, 3, 2, 16, 24, 2),
-        bneck_conf(6, 5, 2, 24, 40, 2),
-        bneck_conf(6, 3, 2, 40, 80, 3),
-        bneck_conf(6, 5, 1, 80, 112, 3),
-        bneck_conf(6, 5, 2, 112, 192, 4),
-        bneck_conf(6, 3, 1, 192, 320, 1),
-    ]
-    model = EfficientNet(inverted_residual_setting, dropout, **kwargs)
+    model = EfficientNet(inverted_residual_setting, dropout, last_channel=last_channel, **kwargs)
     if pretrained:
         if model_urls.get(arch, None) is None:
             raise ValueError(f"No checkpoint is available for model type {arch}")
@@ -381,6 +375,61 @@ def _efficientnet(
     return model
 
 
+def _efficientnet_conf(
+    arch: str,
+    **kwargs: Any,
+) -> Tuple[Sequence[Union[MBConvConfig, FusedMBConvConfig]], Optional[int]]:
+    inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]]
+    if arch.startswith("efficientnet_b"):
+        bneck_conf = partial(MBConvConfig, width_mult=kwargs.pop("width_mult"), depth_mult=kwargs.pop("depth_mult"))
+        inverted_residual_setting = [
+            bneck_conf(1, 3, 1, 32, 16, 1),
+            bneck_conf(6, 3, 2, 16, 24, 2),
+            bneck_conf(6, 5, 2, 24, 40, 2),
+            bneck_conf(6, 3, 2, 40, 80, 3),
+            bneck_conf(6, 5, 1, 80, 112, 3),
+            bneck_conf(6, 5, 2, 112, 192, 4),
+            bneck_conf(6, 3, 1, 192, 320, 1),
+        ]
+        last_channel = None
+    elif arch.startswith("efficientnet_v2_s"):
+        inverted_residual_setting = [
+            FusedMBConvConfig(1, 3, 1, 24, 24, 2),
+            FusedMBConvConfig(4, 3, 2, 24, 48, 4),
+            FusedMBConvConfig(4, 3, 2, 48, 64, 4),
+            MBConvConfig(4, 3, 2, 64, 128, 6),
+            MBConvConfig(6, 3, 1, 128, 160, 9),
+            MBConvConfig(6, 3, 2, 160, 256, 15),
+        ]
+        last_channel = 1280
+    elif arch.startswith("efficientnet_v2_m"):
+        inverted_residual_setting = [
+            FusedMBConvConfig(1, 3, 1, 24, 24, 3),
+            FusedMBConvConfig(4, 3, 2, 24, 48, 5),
+            FusedMBConvConfig(4, 3, 2, 48, 80, 5),
+            MBConvConfig(4, 3, 2, 80, 160, 7),
+            MBConvConfig(6, 3, 1, 160, 176, 14),
+            MBConvConfig(6, 3, 2, 176, 304, 18),
+            MBConvConfig(6, 3, 1, 304, 512, 5),
+        ]
+        last_channel = 1280
+    elif arch.startswith("efficientnet_v2_l"):
+        inverted_residual_setting = [
+            FusedMBConvConfig(1, 3, 1, 32, 32, 4),
+            FusedMBConvConfig(4, 3, 2, 32, 64, 7),
+            FusedMBConvConfig(4, 3, 2, 64, 96, 7),
+            MBConvConfig(4, 3, 2, 96, 192, 10),
+            MBConvConfig(6, 3, 1, 192, 224, 19),
+            MBConvConfig(6, 3, 2, 224, 384, 25),
+            MBConvConfig(6, 3, 1, 384, 640, 7),
+        ]
+        last_channel = 1280
+    else:
+        raise ValueError(f"Unsupported model type {arch}")
+
+    return inverted_residual_setting, last_channel
+
+
 def efficientnet_b0(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
     """
     Constructs a EfficientNet B0 architecture from
@@ -390,7 +439,9 @@ def efficientnet_b0(pretrained: bool = False, progress: bool = True, **kwargs: A
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
-    return _efficientnet("efficientnet_b0", 1.0, 1.0, 0.2, pretrained, progress, **kwargs)
+    arch = "efficientnet_b0"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.0, depth_mult=1.0)
+    return _efficientnet(arch, inverted_residual_setting, 0.2, last_channel, pretrained, progress, **kwargs)
 
 
 def efficientnet_b1(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -402,7 +453,9 @@ def efficientnet_b1(pretrained: bool = False, progress: bool = True, **kwargs: A
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
-    return _efficientnet("efficientnet_b1", 1.0, 1.1, 0.2, pretrained, progress, **kwargs)
+    arch = "efficientnet_b1"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.0, depth_mult=1.1)
+    return _efficientnet(arch, inverted_residual_setting, 0.2, last_channel, pretrained, progress, **kwargs)
 
 
 def efficientnet_b2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -414,7 +467,9 @@ def efficientnet_b2(pretrained: bool = False, progress: bool = True, **kwargs: A
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
-    return _efficientnet("efficientnet_b2", 1.1, 1.2, 0.3, pretrained, progress, **kwargs)
+    arch = "efficientnet_b2"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.1, depth_mult=1.2)
+    return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs)
 
 
 def efficientnet_b3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -426,7 +481,9 @@ def efficientnet_b3(pretrained: bool = False, progress: bool = True, **kwargs: A
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
-    return _efficientnet("efficientnet_b3", 1.2, 1.4, 0.3, pretrained, progress, **kwargs)
+    arch = "efficientnet_b3"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.2, depth_mult=1.4)
+    return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs)
 
 
 def efficientnet_b4(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -438,7 +495,9 @@ def efficientnet_b4(pretrained: bool = False, progress: bool = True, **kwargs: A
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
-    return _efficientnet("efficientnet_b4", 1.4, 1.8, 0.4, pretrained, progress, **kwargs)
+    arch = "efficientnet_b4"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.4, depth_mult=1.8)
+    return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs)
 
 
 def efficientnet_b5(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -450,11 +509,13 @@ def efficientnet_b5(pretrained: bool = False, progress: bool = True, **kwargs: A
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
+    arch = "efficientnet_b5"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.6, depth_mult=2.2)
     return _efficientnet(
-        "efficientnet_b5",
-        1.6,
-        2.2,
+        arch,
+        inverted_residual_setting,
         0.4,
+        last_channel,
         pretrained,
         progress,
         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
@@ -471,11 +532,13 @@ def efficientnet_b6(pretrained: bool = False, progress: bool = True, **kwargs: A
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
+    arch = "efficientnet_b6"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=1.8, depth_mult=2.6)
     return _efficientnet(
-        "efficientnet_b6",
-        1.8,
-        2.6,
+        arch,
+        inverted_residual_setting,
         0.5,
+        last_channel,
         pretrained,
         progress,
         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
@@ -492,13 +555,57 @@ def efficientnet_b7(pretrained: bool = False, progress: bool = True, **kwargs: A
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
+    arch = "efficientnet_b7"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch, width_mult=2.0, depth_mult=3.1)
     return _efficientnet(
-        "efficientnet_b7",
-        2.0,
-        3.1,
+        arch,
+        inverted_residual_setting,
         0.5,
+        last_channel,
         pretrained,
         progress,
         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
         **kwargs,
     )
+
+
+def efficientnet_v2_s(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs an EfficientNetV2-S architecture from
+    `"EfficientNetV2: Smaller Models and Faster Training" <https://arxiv.org/abs/2104.00298>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    arch = "efficientnet_v2_s"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch)
+    return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs)
+
+
+def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs an EfficientNetV2-M architecture from
+    `"EfficientNetV2: Smaller Models and Faster Training" <https://arxiv.org/abs/2104.00298>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    arch = "efficientnet_v2_m"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch)
+    return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs)
+
+
+def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs an EfficientNetV2-L architecture from
+    `"EfficientNetV2: Smaller Models and Faster Training" <https://arxiv.org/abs/2104.00298>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    arch = "efficientnet_v2_l"
+    inverted_residual_setting, last_channel = _efficientnet_conf(arch)
+    return _efficientnet(arch, inverted_residual_setting, 0.5, last_channel, pretrained, progress, **kwargs)
diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py
index 1fa2ea4d294..95e7d0987f3 100644
--- a/torchvision/prototype/models/efficientnet.py
+++ b/torchvision/prototype/models/efficientnet.py
@@ -1,11 +1,11 @@
 from functools import partial
-from typing import Any, Optional
+from typing import Any, Optional, Sequence, Union
 
 from torch import nn
 from torchvision.prototype.transforms import ImageNetEval
 from torchvision.transforms.functional import InterpolationMode
 
-from ...models.efficientnet import EfficientNet, MBConvConfig
+from ...models.efficientnet import EfficientNet, MBConvConfig, FusedMBConvConfig, _efficientnet_conf
 from ._api import WeightsEnum, Weights
 from ._meta import _IMAGENET_CATEGORIES
 from ._utils import handle_legacy_interface, _ovewrite_named_param
@@ -21,6 +21,9 @@
     "EfficientNet_B5_Weights",
     "EfficientNet_B6_Weights",
     "EfficientNet_B7_Weights",
+    "EfficientNet_V2_S_Weights",
+    "EfficientNet_V2_M_Weights",
+    "EfficientNet_V2_L_Weights",
     "efficientnet_b0",
     "efficientnet_b1",
     "efficientnet_b2",
@@ -29,13 +32,16 @@
     "efficientnet_b5",
     "efficientnet_b6",
     "efficientnet_b7",
+    "efficientnet_v2_s",
+    "efficientnet_v2_m",
+    "efficientnet_v2_l",
 ]
 
 
 def _efficientnet(
-    width_mult: float,
-    depth_mult: float,
+    inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
     dropout: float,
+    last_channel: Optional[int],
     weights: Optional[WeightsEnum],
     progress: bool,
     **kwargs: Any,
@@ -43,18 +49,7 @@ def _efficientnet(
     if weights is not None:
         _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
 
-    bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult)
-    inverted_residual_setting = [
-        bneck_conf(1, 3, 1, 32, 16, 1),
-        bneck_conf(6, 3, 2, 16, 24, 2),
-        bneck_conf(6, 5, 2, 24, 40, 2),
-        bneck_conf(6, 3, 2, 40, 80, 3),
-        bneck_conf(6, 5, 1, 80, 112, 3),
-        bneck_conf(6, 5, 2, 112, 192, 4),
-        bneck_conf(6, 3, 1, 192, 320, 1),
-    ]
-
-    model = EfficientNet(inverted_residual_setting, dropout, **kwargs)
+    model = EfficientNet(inverted_residual_setting, dropout, last_channel=last_channel, **kwargs)
 
     if weights is not None:
         model.load_state_dict(weights.get_state_dict(progress=progress))
@@ -62,7 +57,7 @@ def _efficientnet(
     return model
 
 
-_COMMON_META = {
+_COMMON_META_V1 = {
     "task": "image_classification",
     "architecture": "EfficientNet",
     "publication_year": 2019,
@@ -78,7 +73,7 @@ class EfficientNet_B0_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
         transforms=partial(ImageNetEval, crop_size=224, resize_size=256, interpolation=InterpolationMode.BICUBIC),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 5288548,
             "size": (224, 224),
             "acc@1": 77.692,
@@ -93,7 +88,7 @@ class EfficientNet_B1_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
         transforms=partial(ImageNetEval, crop_size=240, resize_size=256, interpolation=InterpolationMode.BICUBIC),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 7794184,
             "size": (240, 240),
             "acc@1": 78.642,
@@ -104,7 +99,7 @@ class EfficientNet_B1_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth",
         transforms=partial(ImageNetEval, crop_size=240, resize_size=255, interpolation=InterpolationMode.BILINEAR),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 7794184,
             "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning",
             "interpolation": InterpolationMode.BILINEAR,
@@ -121,7 +116,7 @@ class EfficientNet_B2_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
         transforms=partial(ImageNetEval, crop_size=288, resize_size=288, interpolation=InterpolationMode.BICUBIC),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 9109994,
             "size": (288, 288),
             "acc@1": 80.608,
@@ -136,7 +131,7 @@ class EfficientNet_B3_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
         transforms=partial(ImageNetEval, crop_size=300, resize_size=320, interpolation=InterpolationMode.BICUBIC),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 12233232,
             "size": (300, 300),
             "acc@1": 82.008,
@@ -151,7 +146,7 @@ class EfficientNet_B4_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
         transforms=partial(ImageNetEval, crop_size=380, resize_size=384, interpolation=InterpolationMode.BICUBIC),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 19341616,
             "size": (380, 380),
             "acc@1": 83.384,
@@ -166,7 +161,7 @@ class EfficientNet_B5_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
         transforms=partial(ImageNetEval, crop_size=456, resize_size=456, interpolation=InterpolationMode.BICUBIC),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 30389784,
             "size": (456, 456),
             "acc@1": 83.444,
@@ -181,7 +176,7 @@ class EfficientNet_B6_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
         transforms=partial(ImageNetEval, crop_size=528, resize_size=528, interpolation=InterpolationMode.BICUBIC),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 43040704,
             "size": (528, 528),
             "acc@1": 84.008,
@@ -196,7 +191,7 @@ class EfficientNet_B7_Weights(WeightsEnum):
         url="https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
         transforms=partial(ImageNetEval, crop_size=600, resize_size=600, interpolation=InterpolationMode.BICUBIC),
         meta={
-            **_COMMON_META,
+            **_COMMON_META_V1,
             "num_params": 66347960,
             "size": (600, 600),
             "acc@1": 84.122,
@@ -206,13 +201,26 @@ class EfficientNet_B7_Weights(WeightsEnum):
     DEFAULT = IMAGENET1K_V1
 
 
+class EfficientNet_V2_S_Weights(WeightsEnum):
+    pass
+
+
+class EfficientNet_V2_M_Weights(WeightsEnum):
+    pass
+
+
+class EfficientNet_V2_L_Weights(WeightsEnum):
+    pass
+
+
 @handle_legacy_interface(weights=("pretrained", EfficientNet_B0_Weights.IMAGENET1K_V1))
 def efficientnet_b0(
     *, weights: Optional[EfficientNet_B0_Weights] = None, progress: bool = True, **kwargs: Any
 ) -> EfficientNet:
     weights = EfficientNet_B0_Weights.verify(weights)
 
-    return _efficientnet(width_mult=1.0, depth_mult=1.0, dropout=0.2, weights=weights, progress=progress, **kwargs)
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b0", width_mult=1.0, depth_mult=1.0)
+    return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs)
 
 
 @handle_legacy_interface(weights=("pretrained", EfficientNet_B1_Weights.IMAGENET1K_V1))
@@ -221,7 +229,8 @@ def efficientnet_b1(
 ) -> EfficientNet:
     weights = EfficientNet_B1_Weights.verify(weights)
 
-    return _efficientnet(width_mult=1.0, depth_mult=1.1, dropout=0.2, weights=weights, progress=progress, **kwargs)
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b1", width_mult=1.0, depth_mult=1.1)
+    return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs)
 
 
 @handle_legacy_interface(weights=("pretrained", EfficientNet_B2_Weights.IMAGENET1K_V1))
@@ -230,7 +239,8 @@ def efficientnet_b2(
 ) -> EfficientNet:
     weights = EfficientNet_B2_Weights.verify(weights)
 
-    return _efficientnet(width_mult=1.1, depth_mult=1.2, dropout=0.3, weights=weights, progress=progress, **kwargs)
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b2", width_mult=1.1, depth_mult=1.2)
+    return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs)
 
 
 @handle_legacy_interface(weights=("pretrained", EfficientNet_B3_Weights.IMAGENET1K_V1))
@@ -239,7 +249,8 @@ def efficientnet_b3(
 ) -> EfficientNet:
     weights = EfficientNet_B3_Weights.verify(weights)
 
-    return _efficientnet(width_mult=1.2, depth_mult=1.4, dropout=0.3, weights=weights, progress=progress, **kwargs)
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b3", width_mult=1.2, depth_mult=1.4)
+    return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs)
 
 
 @handle_legacy_interface(weights=("pretrained", EfficientNet_B4_Weights.IMAGENET1K_V1))
@@ -248,7 +259,8 @@ def efficientnet_b4(
 ) -> EfficientNet:
     weights = EfficientNet_B4_Weights.verify(weights)
 
-    return _efficientnet(width_mult=1.4, depth_mult=1.8, dropout=0.4, weights=weights, progress=progress, **kwargs)
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b4", width_mult=1.4, depth_mult=1.8)
+    return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs)
 
 
 @handle_legacy_interface(weights=("pretrained", EfficientNet_B5_Weights.IMAGENET1K_V1))
@@ -257,12 +269,13 @@ def efficientnet_b5(
 ) -> EfficientNet:
     weights = EfficientNet_B5_Weights.verify(weights)
 
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b5", width_mult=1.6, depth_mult=2.2)
     return _efficientnet(
-        width_mult=1.6,
-        depth_mult=2.2,
-        dropout=0.4,
-        weights=weights,
-        progress=progress,
+        inverted_residual_setting,
+        0.4,
+        last_channel,
+        weights,
+        progress,
         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
         **kwargs,
     )
@@ -274,12 +287,13 @@ def efficientnet_b6(
 ) -> EfficientNet:
     weights = EfficientNet_B6_Weights.verify(weights)
 
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b6", width_mult=1.8, depth_mult=2.6)
     return _efficientnet(
-        width_mult=1.8,
-        depth_mult=2.6,
-        dropout=0.5,
-        weights=weights,
-        progress=progress,
+        inverted_residual_setting,
+        0.5,
+        last_channel,
+        weights,
+        progress,
         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
         **kwargs,
     )
@@ -291,12 +305,43 @@ def efficientnet_b7(
 ) -> EfficientNet:
     weights = EfficientNet_B7_Weights.verify(weights)
 
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b7", width_mult=2.0, depth_mult=3.1)
     return _efficientnet(
-        width_mult=2.0,
-        depth_mult=3.1,
-        dropout=0.5,
-        weights=weights,
-        progress=progress,
+        inverted_residual_setting,
+        0.5,
+        last_channel,
+        weights,
+        progress,
         norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
         **kwargs,
     )
+
+
+@handle_legacy_interface(weights=("pretrained", None))
+def efficientnet_v2_s(
+    *, weights: Optional[EfficientNet_V2_S_Weights] = None, progress: bool = True, **kwargs: Any
+) -> EfficientNet:
+    weights = EfficientNet_V2_S_Weights.verify(weights)
+
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s")
+    return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs)
+
+
+@handle_legacy_interface(weights=("pretrained", None))
+def efficientnet_v2_m(
+    *, weights: Optional[EfficientNet_V2_M_Weights] = None, progress: bool = True, **kwargs: Any
+) -> EfficientNet:
+    weights = EfficientNet_V2_M_Weights.verify(weights)
+
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m")
+    return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs)
+
+
+@handle_legacy_interface(weights=("pretrained", None))
+def efficientnet_v2_l(
+    *, weights: Optional[EfficientNet_V2_L_Weights] = None, progress: bool = True, **kwargs: Any
+) -> EfficientNet:
+    weights = EfficientNet_V2_L_Weights.verify(weights)
+
+    inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l")
+    return _efficientnet(inverted_residual_setting, 0.5, last_channel, weights, progress, **kwargs)

From 931032519acf16e9833ad3881fc9a45b69d258d8 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Mon, 21 Feb 2022 19:26:53 +0000
Subject: [PATCH 03/10] Refactoring weight info.

---
 torchvision/models/efficientnet.py           |  3 +-
 torchvision/prototype/models/efficientnet.py | 43 +++++++++++++++++---
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index 82e4174cd9b..c56fac844ba 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -42,6 +42,8 @@
     "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
     "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
     "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
+    # Temporary TF weights
+    "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth",
 }
 
 
@@ -176,7 +178,6 @@ def __init__(
         cnf: FusedMBConvConfig,
         stochastic_depth_prob: float,
         norm_layer: Callable[..., nn.Module],
-        **kwargs: Any,
     ) -> None:
         super().__init__()
 
diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py
index 95e7d0987f3..94f871d337c 100644
--- a/torchvision/prototype/models/efficientnet.py
+++ b/torchvision/prototype/models/efficientnet.py
@@ -57,17 +57,30 @@ def _efficientnet(
     return model
 
 
-_COMMON_META_V1 = {
+_COMMON_META = {
     "task": "image_classification",
-    "architecture": "EfficientNet",
-    "publication_year": 2019,
-    "min_size": (1, 1),
     "categories": _IMAGENET_CATEGORIES,
     "interpolation": InterpolationMode.BICUBIC,
     "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet",
 }
 
 
+_COMMON_META_V1 = {
+    **_COMMON_META,
+    "architecture": "EfficientNet",
+    "publication_year": 2019,
+    "min_size": (1, 1),
+}
+
+
+_COMMON_META_V2 = {
+    **_COMMON_META,
+    "architecture": "EfficientNetV2",
+    "publication_year": 2021,
+    "min_size": (33, 33),
+}
+
+
 class EfficientNet_B0_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
         url="https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
@@ -202,7 +215,25 @@ class EfficientNet_B7_Weights(WeightsEnum):
 
 
 class EfficientNet_V2_S_Weights(WeightsEnum):
-    pass
+    IMAGENET1K_V1 = Weights(
+        url="https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth",
+        transforms=partial(
+            ImageNetEval,
+            crop_size=384,
+            resize_size=384,
+            interpolation=InterpolationMode.BICUBIC,
+            mean=(0.5, 0.5, 0.5),
+            std=(0.5, 0.5, 0.5),
+        ),
+        meta={
+            **_COMMON_META_V2,
+            "num_params": 21458488,
+            "size": (384, 384),
+            "acc@1": 83.152,
+            "acc@5": 96.400,
+        },
+    )
+    DEFAULT = IMAGENET1K_V1
 
 
 class EfficientNet_V2_M_Weights(WeightsEnum):
@@ -317,7 +348,7 @@ def efficientnet_b7(
     )
 
 
-@handle_legacy_interface(weights=("pretrained", None))
+@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_S_Weights.IMAGENET1K_V1))
 def efficientnet_v2_s(
     *, weights: Optional[EfficientNet_V2_S_Weights] = None, progress: bool = True, **kwargs: Any
 ) -> EfficientNet:

From ebc1b654d9ec87bde0e29d0fb225005e899ed62c Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Tue, 22 Feb 2022 13:03:13 +0000
Subject: [PATCH 04/10] Update dropouts based on TF config ref

---
 torchvision/models/efficientnet.py           | 6 +++---
 torchvision/prototype/models/efficientnet.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index c56fac844ba..eab87e1dbb4 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -581,7 +581,7 @@ def efficientnet_v2_s(pretrained: bool = False, progress: bool = True, **kwargs:
     """
     arch = "efficientnet_v2_s"
     inverted_residual_setting, last_channel = _efficientnet_conf(arch)
-    return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs)
+    return _efficientnet(arch, inverted_residual_setting, 0.2, last_channel, pretrained, progress, **kwargs)
 
 
 def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -595,7 +595,7 @@ def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs:
     """
     arch = "efficientnet_v2_m"
     inverted_residual_setting, last_channel = _efficientnet_conf(arch)
-    return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs)
+    return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs)
 
 
 def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -609,4 +609,4 @@ def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs:
     """
     arch = "efficientnet_v2_l"
     inverted_residual_setting, last_channel = _efficientnet_conf(arch)
-    return _efficientnet(arch, inverted_residual_setting, 0.5, last_channel, pretrained, progress, **kwargs)
+    return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs)
diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py
index 94f871d337c..8527a08ed5f 100644
--- a/torchvision/prototype/models/efficientnet.py
+++ b/torchvision/prototype/models/efficientnet.py
@@ -355,7 +355,7 @@ def efficientnet_v2_s(
     weights = EfficientNet_V2_S_Weights.verify(weights)
 
     inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s")
-    return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs)
+    return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs)
 
 
 @handle_legacy_interface(weights=("pretrained", None))
@@ -365,7 +365,7 @@ def efficientnet_v2_m(
     weights = EfficientNet_V2_M_Weights.verify(weights)
 
     inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m")
-    return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs)
+    return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs)
 
 
 @handle_legacy_interface(weights=("pretrained", None))
@@ -375,4 +375,4 @@ def efficientnet_v2_l(
     weights = EfficientNet_V2_L_Weights.verify(weights)
 
     inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l")
-    return _efficientnet(inverted_residual_setting, 0.5, last_channel, weights, progress, **kwargs)
+    return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs)

From 7cec6a78457060523b02db254a92aedcdff95088 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Tue, 22 Feb 2022 13:19:32 +0000
Subject: [PATCH 05/10] Update BN eps on TF base_config

---
 torchvision/models/efficientnet.py           | 33 ++++++++++++++++++--
 torchvision/prototype/models/efficientnet.py | 30 ++++++++++++++++--
 2 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index eab87e1dbb4..38d99eefccc 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -581,7 +581,16 @@ def efficientnet_v2_s(pretrained: bool = False, progress: bool = True, **kwargs:
     """
     arch = "efficientnet_v2_s"
     inverted_residual_setting, last_channel = _efficientnet_conf(arch)
-    return _efficientnet(arch, inverted_residual_setting, 0.2, last_channel, pretrained, progress, **kwargs)
+    return _efficientnet(
+        arch,
+        inverted_residual_setting,
+        0.2,
+        last_channel,
+        pretrained,
+        progress,
+        norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
+        **kwargs,
+    )
 
 
 def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -595,7 +604,16 @@ def efficientnet_v2_m(pretrained: bool = False, progress: bool = True, **kwargs:
     """
     arch = "efficientnet_v2_m"
     inverted_residual_setting, last_channel = _efficientnet_conf(arch)
-    return _efficientnet(arch, inverted_residual_setting, 0.3, last_channel, pretrained, progress, **kwargs)
+    return _efficientnet(
+        arch,
+        inverted_residual_setting,
+        0.3,
+        last_channel,
+        pretrained,
+        progress,
+        norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
+        **kwargs,
+    )
 
 
 def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
@@ -609,4 +627,13 @@ def efficientnet_v2_l(pretrained: bool = False, progress: bool = True, **kwargs:
     """
     arch = "efficientnet_v2_l"
     inverted_residual_setting, last_channel = _efficientnet_conf(arch)
-    return _efficientnet(arch, inverted_residual_setting, 0.4, last_channel, pretrained, progress, **kwargs)
+    return _efficientnet(
+        arch,
+        inverted_residual_setting,
+        0.4,
+        last_channel,
+        pretrained,
+        progress,
+        norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
+        **kwargs,
+    )
diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py
index 8527a08ed5f..bb6ec129bd7 100644
--- a/torchvision/prototype/models/efficientnet.py
+++ b/torchvision/prototype/models/efficientnet.py
@@ -355,7 +355,15 @@ def efficientnet_v2_s(
     weights = EfficientNet_V2_S_Weights.verify(weights)
 
     inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s")
-    return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs)
+    return _efficientnet(
+        inverted_residual_setting,
+        0.2,
+        last_channel,
+        weights,
+        progress,
+        norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
+        **kwargs,
+    )
 
 
 @handle_legacy_interface(weights=("pretrained", None))
@@ -365,7 +373,15 @@ def efficientnet_v2_m(
     weights = EfficientNet_V2_M_Weights.verify(weights)
 
     inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m")
-    return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs)
+    return _efficientnet(
+        inverted_residual_setting,
+        0.3,
+        last_channel,
+        weights,
+        progress,
+        norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
+        **kwargs,
+    )
 
 
 @handle_legacy_interface(weights=("pretrained", None))
@@ -375,4 +391,12 @@ def efficientnet_v2_l(
     weights = EfficientNet_V2_L_Weights.verify(weights)
 
     inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l")
-    return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs)
+    return _efficientnet(
+        inverted_residual_setting,
+        0.4,
+        last_channel,
+        weights,
+        progress,
+        norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
+        **kwargs,
+    )

From 2ff873487cec32883a6dc838eb18d8fe9fa9d746 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Fri, 25 Feb 2022 10:47:59 +0000
Subject: [PATCH 06/10] Use Conv2dNormActivation.

---
 torchvision/models/efficientnet.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index 0e42abd9a3e..fec9dd582af 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -193,7 +193,7 @@ def __init__(
         if expanded_channels != cnf.input_channels:
             # fused expand
             layers.append(
-                ConvNormActivation(
+                Conv2dNormActivation(
                     cnf.input_channels,
                     expanded_channels,
                     kernel_size=cnf.kernel,
@@ -205,13 +205,13 @@ def __init__(
 
             # project
             layers.append(
-                ConvNormActivation(
+                Conv2dNormActivation(
                     expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None
                 )
             )
         else:
             layers.append(
-                ConvNormActivation(
+                Conv2dNormActivation(
                     cnf.input_channels,
                     cnf.out_channels,
                     kernel_size=cnf.kernel,

From bf41dfb5fe367c621ba4c7f0e247bd7e552746ec Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Sun, 27 Feb 2022 13:36:44 +0000
Subject: [PATCH 07/10] Adding pre-trained weights for EfficientNetV2-s

---
 docs/source/models.rst                       | 12 +++++++++++-
 hubconf.py                                   |  3 +++
 references/classification/README.md          | 20 +++++++++++++++++++-
 torchvision/models/efficientnet.py           |  4 ++--
 torchvision/prototype/models/efficientnet.py | 13 ++++++-------
 5 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/docs/source/models.rst b/docs/source/models.rst
index 58bd0d81cd0..dbff7505d3c 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -38,7 +38,7 @@ architectures for image classification:
 -  `ResNeXt`_
 -  `Wide ResNet`_
 -  `MNASNet`_
--  `EfficientNet`_
+-  `EfficientNet`_ v1 & v2
 -  `RegNet`_
 -  `VisionTransformer`_
 -  `ConvNeXt`_
@@ -70,6 +70,9 @@ You can construct a model with random weights by calling its constructor:
     efficientnet_b5 = models.efficientnet_b5()
     efficientnet_b6 = models.efficientnet_b6()
     efficientnet_b7 = models.efficientnet_b7()
+    efficientnet_v2_s = models.efficientnet_v2_s()
+    efficientnet_v2_m = models.efficientnet_v2_m()
+    efficientnet_v2_l = models.efficientnet_v2_l()
     regnet_y_400mf = models.regnet_y_400mf()
     regnet_y_800mf = models.regnet_y_800mf()
     regnet_y_1_6gf = models.regnet_y_1_6gf()
@@ -122,6 +125,9 @@ These can be constructed by passing ``pretrained=True``:
     efficientnet_b5 = models.efficientnet_b5(pretrained=True)
     efficientnet_b6 = models.efficientnet_b6(pretrained=True)
     efficientnet_b7 = models.efficientnet_b7(pretrained=True)
+    efficientnet_v2_s = models.efficientnet_v2_s(pretrained=True)
+    efficientnet_v2_m = models.efficientnet_v2_m(pretrained=True)
+    efficientnet_v2_l = models.efficientnet_v2_l(pretrained=True)
     regnet_y_400mf = models.regnet_y_400mf(pretrained=True)
     regnet_y_800mf = models.regnet_y_800mf(pretrained=True)
     regnet_y_1_6gf = models.regnet_y_1_6gf(pretrained=True)
@@ -238,6 +244,7 @@ EfficientNet-B4                   83.384          96.594
 EfficientNet-B5                   83.444          96.628
 EfficientNet-B6                   84.008          96.916
 EfficientNet-B7                   84.122          96.908
+EfficientNetV2-s                  84.228          96.878
 regnet_x_400mf                    72.834          90.950
 regnet_x_800mf                    75.212          92.348
 regnet_x_1_6gf                    77.040          93.440
@@ -439,6 +446,9 @@ EfficientNet
     efficientnet_b5
     efficientnet_b6
     efficientnet_b7
+    efficientnet_v2_s
+    efficientnet_v2_m
+    efficientnet_v2_l
 
 RegNet
 ------------
diff --git a/hubconf.py b/hubconf.py
index 5c2ad8e9e0d..c3de4f2da9a 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -13,6 +13,9 @@
     efficientnet_b5,
     efficientnet_b6,
     efficientnet_b7,
+    efficientnet_v2_s,
+    efficientnet_v2_m,
+    efficientnet_v2_l,
 )
 from torchvision.models.googlenet import googlenet
 from torchvision.models.inception import inception_v3
diff --git a/references/classification/README.md b/references/classification/README.md
index e75336f23ca..f7fd2414f6d 100644
--- a/references/classification/README.md
+++ b/references/classification/README.md
@@ -88,7 +88,7 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@
 and [#3354](https://github.com/pytorch/vision/pull/3354) for details.
 
 
-### EfficientNet
+### EfficientNet-V1
 
 The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108).
 
@@ -114,6 +114,24 @@ torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --interpolation bic
       --val-resize-size 600 --val-crop-size 600 --train-crop-size 600 --test-only --pretrained
 ```
 
+
+### EfficientNet-V2
+```
+torchrun --nproc_per_node=8 train.py \
+--model $MODEL --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr \
+--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \
+--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.00002 --norm-weight-decay 0.0 \
+--train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \
+--ra-sampler --ra-reps 4
+```
+Here `$MODEL` is one of `efficientnet_v2_s`, `efficientnet_v2_m` and `efficientnet_v2_l`. 
+Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the other variants `384` and `480` respectively.
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
+and `--batch_size 16`.
+
+
 ### RegNet
 
 #### Small models
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index fec9dd582af..86ce1b31ee5 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -42,8 +42,8 @@
     "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
     "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
     "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
-    # Temporary TF weights
-    "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth",
+    # Weights trained with TorchVision
+    "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth",
 }
 
 
diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py
index bb6ec129bd7..a25617f8079 100644
--- a/torchvision/prototype/models/efficientnet.py
+++ b/torchvision/prototype/models/efficientnet.py
@@ -60,7 +60,6 @@ def _efficientnet(
 _COMMON_META = {
     "task": "image_classification",
     "categories": _IMAGENET_CATEGORIES,
-    "interpolation": InterpolationMode.BICUBIC,
     "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet",
 }
 
@@ -69,6 +68,7 @@ def _efficientnet(
     **_COMMON_META,
     "architecture": "EfficientNet",
     "publication_year": 2019,
+    "interpolation": InterpolationMode.BICUBIC,
     "min_size": (1, 1),
 }
 
@@ -77,6 +77,7 @@ def _efficientnet(
     **_COMMON_META,
     "architecture": "EfficientNetV2",
     "publication_year": 2021,
+    "interpolation": InterpolationMode.BILINEAR,
     "min_size": (33, 33),
 }
 
@@ -216,21 +217,19 @@ class EfficientNet_B7_Weights(WeightsEnum):
 
 class EfficientNet_V2_S_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
-        url="https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth",
+        url="https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth",
         transforms=partial(
             ImageNetEval,
             crop_size=384,
             resize_size=384,
-            interpolation=InterpolationMode.BICUBIC,
-            mean=(0.5, 0.5, 0.5),
-            std=(0.5, 0.5, 0.5),
+            interpolation=InterpolationMode.BILINEAR,
         ),
         meta={
             **_COMMON_META_V2,
             "num_params": 21458488,
             "size": (384, 384),
-            "acc@1": 83.152,
-            "acc@5": 96.400,
+            "acc@1": 84.228,
+            "acc@5": 96.878,
         },
     )
     DEFAULT = IMAGENET1K_V1

From abeac10b7139ea2cbb6db968b8824aebb8344b47 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Wed, 2 Mar 2022 09:17:51 +0000
Subject: [PATCH 08/10] Add Medium and Large weights

---
 references/classification/README.md          | 10 +++--
 torchvision/models/efficientnet.py           |  3 ++
 torchvision/prototype/models/efficientnet.py | 42 ++++++++++++++++++--
 3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/references/classification/README.md b/references/classification/README.md
index f7fd2414f6d..173fb454995 100644
--- a/references/classification/README.md
+++ b/references/classification/README.md
@@ -124,12 +124,14 @@ torchrun --nproc_per_node=8 train.py \
 --train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \
 --ra-sampler --ra-reps 4
 ```
-Here `$MODEL` is one of `efficientnet_v2_s`, `efficientnet_v2_m` and `efficientnet_v2_l`. 
-Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the other variants `384` and `480` respectively.
+Here `$MODEL` is one of `efficientnet_v2_s` and `efficientnet_v2_m`. 
+Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the Medium `384` and `480` respectively.
 
 Note that the above command corresponds to training on a single node with 8 GPUs.
-For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
-and `--batch_size 16`.
+For generatring the pre-trained weights, we trained with 4 nodes, each with 8 GPUs (for a total of 32 GPUs),
+and `--batch_size 32`.
+
+The weights of the Large variant are ported from the original paper rather than trained from scratch. See the `EfficientNet_V2_L_Weights` entry for their exact preprocessing transforms.
 
 
 ### RegNet
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index 86ce1b31ee5..f8238912ffd 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -44,6 +44,9 @@
     "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
     # Weights trained with TorchVision
     "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth",
+    "efficientnet_v2_m": "https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth",
+    # Weights ported from TF
+    "efficientnet_v2_l": "https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth",
 }
 
 
diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py
index a25617f8079..c1bec6d6ed0 100644
--- a/torchvision/prototype/models/efficientnet.py
+++ b/torchvision/prototype/models/efficientnet.py
@@ -236,11 +236,45 @@ class EfficientNet_V2_S_Weights(WeightsEnum):
 
 
 class EfficientNet_V2_M_Weights(WeightsEnum):
-    pass
+    IMAGENET1K_V1 = Weights(
+        url="https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth",
+        transforms=partial(
+            ImageNetEval,
+            crop_size=480,
+            resize_size=480,
+            interpolation=InterpolationMode.BILINEAR,
+        ),
+        meta={
+            **_COMMON_META_V2,
+            "num_params": 54139356,
+            "size": (480, 480),
+            "acc@1": 85.119,
+            "acc@5": 97.151,
+        },
+    )
+    DEFAULT = IMAGENET1K_V1
 
 
 class EfficientNet_V2_L_Weights(WeightsEnum):
-    pass
+    IMAGENET1K_V1 = Weights(
+        url="https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth",
+        transforms=partial(
+            ImageNetEval,
+            crop_size=480,
+            resize_size=480,
+            interpolation=InterpolationMode.BICUBIC,
+            mean=(0.5, 0.5, 0.5),
+            std=(0.5, 0.5, 0.5),
+        ),
+        meta={
+            **_COMMON_META_V2,
+            "num_params": 118515272,
+            "size": (480, 480),
+            "acc@1": 85.808,
+            "acc@5": 97.788,
+        },
+    )
+    DEFAULT = IMAGENET1K_V1
 
 
 @handle_legacy_interface(weights=("pretrained", EfficientNet_B0_Weights.IMAGENET1K_V1))
@@ -365,7 +399,7 @@ def efficientnet_v2_s(
     )
 
 
-@handle_legacy_interface(weights=("pretrained", None))
+@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_M_Weights.IMAGENET1K_V1))
 def efficientnet_v2_m(
     *, weights: Optional[EfficientNet_V2_M_Weights] = None, progress: bool = True, **kwargs: Any
 ) -> EfficientNet:
@@ -383,7 +417,7 @@ def efficientnet_v2_m(
     )
 
 
-@handle_legacy_interface(weights=("pretrained", None))
+@handle_legacy_interface(weights=("pretrained", EfficientNet_V2_L_Weights.IMAGENET1K_V1))
 def efficientnet_v2_l(
     *, weights: Optional[EfficientNet_V2_L_Weights] = None, progress: bool = True, **kwargs: Any
 ) -> EfficientNet:

From 907944e3de5a47a73cfa6320c00bbd237afe0412 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Wed, 2 Mar 2022 10:17:40 +0000
Subject: [PATCH 09/10] Update stats with single batch run.

---
 torchvision/prototype/models/efficientnet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py
index c1bec6d6ed0..2619709764f 100644
--- a/torchvision/prototype/models/efficientnet.py
+++ b/torchvision/prototype/models/efficientnet.py
@@ -248,8 +248,8 @@ class EfficientNet_V2_M_Weights(WeightsEnum):
             **_COMMON_META_V2,
             "num_params": 54139356,
             "size": (480, 480),
-            "acc@1": 85.119,
-            "acc@5": 97.151,
+            "acc@1": 85.112,
+            "acc@5": 97.156,
         },
     )
     DEFAULT = IMAGENET1K_V1

From a2694320c12c257007d7835e9cca52f3241f6f1f Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Wed, 2 Mar 2022 10:23:12 +0000
Subject: [PATCH 10/10] Add accuracies in the docs.

---
 docs/source/models.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/source/models.rst b/docs/source/models.rst
index dbff7505d3c..84fee191a8e 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -245,6 +245,8 @@ EfficientNet-B5                   83.444          96.628
 EfficientNet-B6                   84.008          96.916
 EfficientNet-B7                   84.122          96.908
 EfficientNetV2-s                  84.228          96.878
+EfficientNetV2-m                  85.112          97.156
+EfficientNetV2-l                  85.810          97.792
 regnet_x_400mf                    72.834          90.950
 regnet_x_800mf                    75.212          92.348
 regnet_x_1_6gf                    77.040          93.440