From f8634742631a7c0d4f5882fc94a0bcf30c8553c1 Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Wed, 16 Feb 2022 15:21:59 +0100
Subject: [PATCH 01/14] feat: add target to resize for aspect ratio training

---
 doctr/transforms/modules/pytorch.py      | 26 ++++++++++++++++++++++--
 doctr/transforms/modules/tensorflow.py   | 25 +++++++++++++++++++++--
 references/detection/train_pytorch.py    |  8 ++++----
 references/detection/train_tensorflow.py |  8 ++++----
 4 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index 045cc3967e..0726b9daff 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -4,9 +4,10 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
 
 import math
-from typing import Any, Dict, Tuple, Union
+from typing import Any, Dict, Tuple, Union, Optional
 
 import torch
+import numpy as np
 from PIL.Image import Image
 from torch.nn.functional import pad
 from torchvision.transforms import functional as F
@@ -27,7 +28,12 @@ def __init__(
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
 
-    def forward(self, img: torch.Tensor) -> torch.Tensor:
+    def forward(
+        self,
+        img: torch.Tensor,
+        target: Optional[np.ndarray] = None,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, np.ndarray]]:
+
         target_ratio = self.size[0] / self.size[1]
         actual_ratio = img.shape[-2] / img.shape[-1]
         if not self.preserve_aspect_ratio or (target_ratio == actual_ratio):
@@ -41,11 +47,27 @@ def forward(self, img: torch.Tensor) -> torch.Tensor:
 
             # Scale image
             img = F.resize(img, tmp_size, self.interpolation)
+            raw_shape = img.shape[-2:]
             # Pad (inverted in pytorch)
             _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2])
             if self.symmetric_pad:
                 half_pad = (math.ceil(_pad[1] / 2), math.ceil(_pad[3] / 2))
                 _pad = (half_pad[0], _pad[1] - half_pad[0], half_pad[1], _pad[3] - half_pad[1])
+
+            # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio)
+            if target is not None:
+                if self.preserve_aspect_ratio:
+                    # Get absolute coords
+                    if target.shape[1:] == (4,):
+                        target[:, [0, 2]] *= raw_shape[-1] / self.output_size[1]
+                        target[:, [1, 3]] *= raw_shape[-2] / self.output_size[0]
+                    elif target.shape[1:] == (4, 2):
+                        target[..., 0] *= raw_shape[-1] / self.output_size[1]
+                        target[..., 1] *= raw_shape[-2] / self.output_size[0]
+                    else:
+                        raise AssertionError
+                return pad(img, _pad), target
+
             return pad(img, _pad)
 
     def __repr__(self) -> str:
diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py
index 7dc9bfc408..fee5e802d7 100644
--- a/doctr/transforms/modules/tensorflow.py
+++ b/doctr/transforms/modules/tensorflow.py
@@ -4,7 +4,7 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
 
 import random
-from typing import Any, Callable, Dict, Iterable, List, Tuple, Union
+from typing import Any, Callable, Dict, Iterable, List, Tuple, Union, Optional
 
 import numpy as np
 import tensorflow as tf
@@ -75,9 +75,15 @@ def extra_repr(self) -> str:
             _repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}"
         return _repr
 
-    def __call__(self, img: tf.Tensor) -> tf.Tensor:
+    def __call__(
+        self,
+        img: tf.Tensor,
+        target: Optional[np.ndarray] = None,
+    ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]:
+
         input_dtype = img.dtype
         img = tf.image.resize(img, self.output_size, self.method, self.preserve_aspect_ratio)
+        raw_shape = img.shape[:2]
         if self.preserve_aspect_ratio:
             # pad width
             if not self.symmetric_pad:
@@ -87,6 +93,21 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor:
             else:
                 offset = (int((self.output_size[0] - img.shape[0]) / 2), 0)
             img = tf.image.pad_to_bounding_box(img, *offset, *self.output_size)
+
+        # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio)
+        if target is not None:
+            if self.preserve_aspect_ratio:
+                # Get absolute coords
+                if target.shape[1:] == (4,):
+                    target[:, [0, 2]] *= raw_shape[1] / self.output_size[1]
+                    target[:, [1, 3]] *= raw_shape[0] / self.output_size[0]
+                elif target.shape[1:] == (4, 2):
+                    target[..., 0] *= raw_shape[1] / self.output_size[1]
+                    target[..., 1] *= raw_shape[0] / self.output_size[0]
+                else:
+                    raise AssertionError
+            return tf.cast(img, dtype=input_dtype), target
+
         return tf.cast(img, dtype=input_dtype)
 
 
diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
index d8d79b39f1..dd9c21f307 100644
--- a/references/detection/train_pytorch.py
+++ b/references/detection/train_pytorch.py
@@ -181,7 +181,7 @@ def main(args):
     val_set = DetectionDataset(
         img_folder=os.path.join(args.val_path, 'images'),
         label_path=os.path.join(args.val_path, 'labels.json'),
-        img_transforms=T.Resize((args.input_size, args.input_size)),
+        img_transforms=T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True),
         use_polygons=args.rotation and not args.eval_straight,
     )
     val_loader = DataLoader(
@@ -243,16 +243,16 @@ def main(args):
         img_folder=os.path.join(args.train_path, 'images'),
         label_path=os.path.join(args.train_path, 'labels.json'),
         img_transforms=Compose(
-            ([T.Resize((args.input_size, args.input_size))] if not args.rotation else [])
-            + [
+            [
                 # Augmentations
                 T.RandomApply(T.ColorInversion(), .1),
                 ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
             ]
         ),
         sample_transforms=T.SampleCompose([
+            T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True),
             T.RandomRotate(90, expand=True),
-            T.ImageTransform(T.Resize((args.input_size, args.input_size))),
+            T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)),
         ]) if args.rotation else None,
         use_polygons=args.rotation,
     )
diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
index 3bf72fd5e6..4fa18341db 100644
--- a/references/detection/train_tensorflow.py
+++ b/references/detection/train_tensorflow.py
@@ -141,7 +141,7 @@ def main(args):
     val_set = DetectionDataset(
         img_folder=os.path.join(args.val_path, 'images'),
         label_path=os.path.join(args.val_path, 'labels.json'),
-        img_transforms=T.Resize((args.input_size, args.input_size)),
+        img_transforms=T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True),
         use_polygons=args.rotation and not args.eval_straight,
     )
     val_loader = DataLoader(
@@ -189,8 +189,7 @@ def main(args):
         img_folder=os.path.join(args.train_path, 'images'),
         label_path=os.path.join(args.train_path, 'labels.json'),
         img_transforms=T.Compose(
-            ([T.Resize((args.input_size, args.input_size))] if not args.rotation else [])
-            + [
+            [
                 # Augmentations
                 T.RandomApply(T.ColorInversion(), .1),
                 T.RandomJpegQuality(60),
@@ -200,8 +199,9 @@ def main(args):
             ]
         ),
         sample_transforms=T.SampleCompose([
+            T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True),
             T.RandomRotate(90, expand=True),
-            T.ImageTransform(T.Resize((args.input_size, args.input_size))),
+            T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)),
         ]) if args.rotation else None,
         use_polygons=args.rotation,
     )

From 935386fc6067804f67828b782015e89629e46571 Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Wed, 16 Feb 2022 17:01:01 +0100
Subject: [PATCH 02/14] fix: tests

---
 tests/tensorflow/test_models_detection_tf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tensorflow/test_models_detection_tf.py b/tests/tensorflow/test_models_detection_tf.py
index 549e987226..f27ff6cdf7 100644
--- a/tests/tensorflow/test_models_detection_tf.py
+++ b/tests/tensorflow/test_models_detection_tf.py
@@ -66,7 +66,7 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob):
         np.array([[.75, .75, .5, .5, 0], [.65, .7, .3, .4, 0]], dtype=np.float32),
     ]
     loss = model(input_tensor, target, training=True)['loss']
-    assert isinstance(loss, tf.Tensor) and ((loss - out['loss']) / loss).numpy() < 21e-2
+    assert isinstance(loss, tf.Tensor) and ((loss - out['loss']) / loss).numpy() < 25e-2
 
 
 @pytest.fixture(scope="session")

From 53130d9f20d9547d0aa3c3b4e87aaf8c8eb996f0 Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Wed, 16 Feb 2022 17:03:01 +0100
Subject: [PATCH 03/14] fix: sorting

---
 doctr/transforms/modules/pytorch.py    | 4 ++--
 doctr/transforms/modules/tensorflow.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index 0726b9daff..7ea118a0d7 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -4,10 +4,10 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
 
 import math
-from typing import Any, Dict, Tuple, Union, Optional
+from typing import Any, Dict, Optional, Tuple, Union
 
-import torch
 import numpy as np
+import torch
 from PIL.Image import Image
 from torch.nn.functional import pad
 from torchvision.transforms import functional as F
diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py
index fee5e802d7..99fd5c7216 100644
--- a/doctr/transforms/modules/tensorflow.py
+++ b/doctr/transforms/modules/tensorflow.py
@@ -4,7 +4,7 @@
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
 
 import random
-from typing import Any, Callable, Dict, Iterable, List, Tuple, Union, Optional
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 import numpy as np
 import tensorflow as tf

From a439f7ca42805c54de12af3a3388732e818f2c0e Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Wed, 16 Feb 2022 17:55:58 +0100
Subject: [PATCH 04/14] fix: typo

---
 references/detection/train_pytorch.py    | 11 ++++++-----
 references/detection/train_tensorflow.py | 11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
index dd9c21f307..9e4fae3e8f 100644
--- a/references/detection/train_pytorch.py
+++ b/references/detection/train_pytorch.py
@@ -249,11 +249,12 @@ def main(args):
                 ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
             ]
         ),
-        sample_transforms=T.SampleCompose([
-            T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True),
-            T.RandomRotate(90, expand=True),
-            T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)),
-        ]) if args.rotation else None,
+        sample_transforms=T.SampleCompose(
+            [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)]
+            + ([T.RandomRotate(90, expand=True),
+                T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True))
+                ] if args.rotation else [])
+        ),
         use_polygons=args.rotation,
     )
 
diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
index 4fa18341db..5bacf5c33c 100644
--- a/references/detection/train_tensorflow.py
+++ b/references/detection/train_tensorflow.py
@@ -198,11 +198,12 @@ def main(args):
                 T.RandomBrightness(.3),
             ]
         ),
-        sample_transforms=T.SampleCompose([
-            T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True),
-            T.RandomRotate(90, expand=True),
-            T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)),
-        ]) if args.rotation else None,
+        sample_transforms=T.SampleCompose(
+            [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)]
+            + ([T.RandomRotate(90, expand=True),
+                T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True))
+                ] if args.rotation else [])
+        ),
         use_polygons=args.rotation,
     )
     train_loader = DataLoader(

From e236956efe5c30faa466ad26a9843dfc3e677d8e Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Wed, 16 Feb 2022 18:01:47 +0100
Subject: [PATCH 05/14] fix: typo

---
 doctr/transforms/modules/pytorch.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index 7ea118a0d7..b0f9f1f669 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -59,11 +59,11 @@ def forward(
                 if self.preserve_aspect_ratio:
                     # Get absolute coords
                     if target.shape[1:] == (4,):
-                        target[:, [0, 2]] *= raw_shape[-1] / self.output_size[1]
-                        target[:, [1, 3]] *= raw_shape[-2] / self.output_size[0]
+                        target[:, [0, 2]] *= raw_shape[-1] / self.size[1]
+                        target[:, [1, 3]] *= raw_shape[-2] / self.size[0]
                     elif target.shape[1:] == (4, 2):
-                        target[..., 0] *= raw_shape[-1] / self.output_size[1]
-                        target[..., 1] *= raw_shape[-2] / self.output_size[0]
+                        target[..., 0] *= raw_shape[-1] / self.size[1]
+                        target[..., 1] *= raw_shape[-2] / self.size[0]
                     else:
                         raise AssertionError
                 return pad(img, _pad), target

From 8ee4d43384ebadfa9bf94f431b20485a13a9aaaf Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Thu, 17 Feb 2022 17:32:04 +0100
Subject: [PATCH 06/14] fix: symmetric padding case

---
 doctr/transforms/modules/pytorch.py      | 40 +++++++++++++++++-------
 doctr/transforms/modules/tensorflow.py   | 39 ++++++++++++++++-------
 references/detection/train_pytorch.py    |  4 +--
 references/detection/train_tensorflow.py |  4 +--
 4 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index b0f9f1f669..53898a8a18 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -23,10 +23,12 @@ def __init__(
         interpolation=F.InterpolationMode.BILINEAR,
         preserve_aspect_ratio: bool = False,
         symmetric_pad: bool = False,
+        pad: bool = True,
     ) -> None:
         super().__init__(size, interpolation)
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
+        self.pad = pad
 
     def forward(
         self,
@@ -37,6 +39,8 @@ def forward(
         target_ratio = self.size[0] / self.size[1]
         actual_ratio = img.shape[-2] / img.shape[-1]
         if not self.preserve_aspect_ratio or (target_ratio == actual_ratio):
+            if target is not None:
+                return super().forward(img), target
             return super().forward(img)
         else:
             # Resize
@@ -48,27 +52,41 @@ def forward(
             # Scale image
             img = F.resize(img, tmp_size, self.interpolation)
             raw_shape = img.shape[-2:]
-            # Pad (inverted in pytorch)
-            _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2])
-            if self.symmetric_pad:
-                half_pad = (math.ceil(_pad[1] / 2), math.ceil(_pad[3] / 2))
-                _pad = (half_pad[0], _pad[1] - half_pad[0], half_pad[1], _pad[3] - half_pad[1])
+            if self.pad:
+                # Pad (inverted in pytorch)
+                _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2])
+                if self.symmetric_pad:
+                    half_pad = (math.ceil(_pad[1] / 2), math.ceil(_pad[3] / 2))
+                    _pad = (half_pad[0], _pad[1] - half_pad[0], half_pad[1], _pad[3] - half_pad[1])
+                img = pad(img, _pad)
 
             # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio)
             if target is not None:
                 if self.preserve_aspect_ratio:
                     # Get absolute coords
                     if target.shape[1:] == (4,):
-                        target[:, [0, 2]] *= raw_shape[-1] / self.size[1]
-                        target[:, [1, 3]] *= raw_shape[-2] / self.size[0]
+                        if self.pad and self.symmetric_pad:
+                            if np.max(target) <= 1:
+                                offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2]
+                            target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1]
+                            target[:, [1, 3]] = offset[1] + target[:, [1, 3]] * raw_shape[-2] / img.shape[-2]
+                        else:
+                            target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1]
+                            target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2]
                     elif target.shape[1:] == (4, 2):
-                        target[..., 0] *= raw_shape[-1] / self.size[1]
-                        target[..., 1] *= raw_shape[-2] / self.size[0]
+                        if self.pad and self.symmetric_pad:
+                            if np.max(target) <= 1:
+                                offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2]
+                            target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1]
+                            target[..., 1] = offset[1] + target[..., 1] * raw_shape[-2] / img.shape[-2]
+                        else:
+                            target[..., 0] *= raw_shape[-1] / img.shape[-1]
+                            target[..., 1] *= raw_shape[-2] / img.shape[-2]
                     else:
                         raise AssertionError
-                return pad(img, _pad), target
+                return img, target
 
-            return pad(img, _pad)
+            return img
 
     def __repr__(self) -> str:
         interpolate_str = self.interpolation.value
diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py
index 99fd5c7216..f2420a7091 100644
--- a/doctr/transforms/modules/tensorflow.py
+++ b/doctr/transforms/modules/tensorflow.py
@@ -63,11 +63,13 @@ def __init__(
         method: str = 'bilinear',
         preserve_aspect_ratio: bool = False,
         symmetric_pad: bool = False,
+        pad: bool = True
     ) -> None:
         self.output_size = output_size
         self.method = method
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
+        self.pad = pad
 
     def extra_repr(self) -> str:
         _repr = f"output_size={self.output_size}, method='{self.method}'"
@@ -85,25 +87,38 @@ def __call__(
         img = tf.image.resize(img, self.output_size, self.method, self.preserve_aspect_ratio)
         raw_shape = img.shape[:2]
         if self.preserve_aspect_ratio:
-            # pad width
-            if not self.symmetric_pad:
-                offset = (0, 0)
-            elif self.output_size[0] == img.shape[0]:
-                offset = (0, int((self.output_size[1] - img.shape[1]) / 2))
-            else:
-                offset = (int((self.output_size[0] - img.shape[0]) / 2), 0)
-            img = tf.image.pad_to_bounding_box(img, *offset, *self.output_size)
+            if self.pad:
+                # pad width
+                if not self.symmetric_pad:
+                    offset = (0, 0)
+                elif self.output_size[0] == img.shape[0]:
+                    offset = (0, int((self.output_size[1] - img.shape[1]) / 2))
+                else:
+                    offset = (int((self.output_size[0] - img.shape[0]) / 2), 0)
+                img = tf.image.pad_to_bounding_box(img, *offset, *self.output_size)
 
         # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio)
         if target is not None:
             if self.preserve_aspect_ratio:
                 # Get absolute coords
                 if target.shape[1:] == (4,):
-                    target[:, [0, 2]] *= raw_shape[1] / self.output_size[1]
-                    target[:, [1, 3]] *= raw_shape[0] / self.output_size[0]
+                    if self.pad and self.symmetric_pad:
+                        if np.max(target) <= 1:
+                            offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
+                        target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1]
+                        target[:, [1, 3]] = offset[0] + target[:, [1, 3]] * raw_shape[0] / img.shape[0]
+                    else:
+                        target[:, [0, 2]] *= raw_shape[1] / img.shape[1]
+                        target[:, [1, 3]] *= raw_shape[0] / img.shape[0]
                 elif target.shape[1:] == (4, 2):
-                    target[..., 0] *= raw_shape[1] / self.output_size[1]
-                    target[..., 1] *= raw_shape[0] / self.output_size[0]
+                    if self.pad and self.symmetric_pad:
+                        if np.max(target) <= 1:
+                            offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
+                        target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1]
+                        target[..., 1] = offset[0] + target[..., 1] * raw_shape[0] / img.shape[0]
+                    else:
+                        target[..., 0] *= raw_shape[1] / img.shape[1]
+                        target[..., 1] *= raw_shape[0] / img.shape[0]
                 else:
                     raise AssertionError
             return tf.cast(img, dtype=input_dtype), target
diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
index 9e4fae3e8f..e54b69d4a8 100644
--- a/references/detection/train_pytorch.py
+++ b/references/detection/train_pytorch.py
@@ -250,9 +250,9 @@ def main(args):
             ]
         ),
         sample_transforms=T.SampleCompose(
-            [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)]
+            [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, pad=False)]
             + ([T.RandomRotate(90, expand=True),
-                T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True))
+                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, symmetric_pad=True, pad=True)
                 ] if args.rotation else [])
         ),
         use_polygons=args.rotation,
diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
index 5bacf5c33c..ce5bb863fe 100644
--- a/references/detection/train_tensorflow.py
+++ b/references/detection/train_tensorflow.py
@@ -199,9 +199,9 @@ def main(args):
             ]
         ),
         sample_transforms=T.SampleCompose(
-            [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)]
+            [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, pad=False)]
             + ([T.RandomRotate(90, expand=True),
-                T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True))
+                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, symmetric_pad=True, pad=True)
                 ] if args.rotation else [])
         ),
         use_polygons=args.rotation,

From 2ce8609563ccabeeb8605a0a61c004989460f850 Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Fri, 18 Feb 2022 09:20:20 +0100
Subject: [PATCH 07/14] fix: unrotated case

---
 references/detection/train_pytorch.py    | 8 +++++---
 references/detection/train_tensorflow.py | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
index e54b69d4a8..e7d33b2453 100644
--- a/references/detection/train_pytorch.py
+++ b/references/detection/train_pytorch.py
@@ -250,9 +250,11 @@ def main(args):
             ]
         ),
         sample_transforms=T.SampleCompose(
-            [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, pad=False)]
-            + ([T.RandomRotate(90, expand=True),
-                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, symmetric_pad=True, pad=True)
+            ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True)
+              ] if not args.rotation else [])
+            + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, pad=False),
+                T.RandomRotate(90, expand=True),
+                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True)
                 ] if args.rotation else [])
         ),
         use_polygons=args.rotation,
diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
index ce5bb863fe..1fc953420a 100644
--- a/references/detection/train_tensorflow.py
+++ b/references/detection/train_tensorflow.py
@@ -199,9 +199,11 @@ def main(args):
             ]
         ),
         sample_transforms=T.SampleCompose(
-            [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, pad=False)]
-            + ([T.RandomRotate(90, expand=True),
-                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, symmetric_pad=True, pad=True)
+            ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True)
+              ] if not args.rotation else [])
+            + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, pad=False),
+                T.RandomRotate(90, expand=True),
+                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True)
                 ] if args.rotation else [])
         ),
         use_polygons=args.rotation,

From f9e8f5c96574b9525956bfb8068d195b936853a2 Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Tue, 22 Feb 2022 12:17:06 +0100
Subject: [PATCH 08/14] fix: args of resize

---
 doctr/models/preprocessor/pytorch.py     | 11 +++++--
 doctr/models/preprocessor/tensorflow.py  | 12 ++++++--
 doctr/transforms/modules/pytorch.py      | 37 +++++++++++++++---------
 doctr/transforms/modules/tensorflow.py   | 21 +++++++++-----
 references/detection/train_pytorch.py    |  6 ++--
 references/detection/train_tensorflow.py |  6 ++--
 6 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/doctr/models/preprocessor/pytorch.py b/doctr/models/preprocessor/pytorch.py
index 52549bde36..dded20281c 100644
--- a/doctr/models/preprocessor/pytorch.py
+++ b/doctr/models/preprocessor/pytorch.py
@@ -105,8 +105,15 @@ def __call__(
             elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
                 raise TypeError("unsupported data type for torch.Tensor")
             # Resizing
-            if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:
-                x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation)
+            if isinstance(self.resize.size, int):
+                if x.shape[-2] != self.resize.size or x.shape[-1] != self.resize.size:
+                    x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation)
+            elif isinstance(self.resize.size, tuple):
+                if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:
+                    x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation)
+            else:
+                raise AssertionError("resize size must be a tuple or an int")
+
             # Data type
             if x.dtype == torch.uint8:
                 x = x.to(dtype=torch.float32).div(255).clip(0, 1)
diff --git a/doctr/models/preprocessor/tensorflow.py b/doctr/models/preprocessor/tensorflow.py
index 642568b166..38d705b506 100644
--- a/doctr/models/preprocessor/tensorflow.py
+++ b/doctr/models/preprocessor/tensorflow.py
@@ -108,8 +108,16 @@ def __call__(
             if x.dtype == tf.uint8:
                 x = tf.image.convert_image_dtype(x, dtype=tf.float32)
             # Resizing
-            if x.shape[1] != self.resize.output_size[0] or x.shape[2] != self.resize.output_size[1]:
-                x = tf.image.resize(x, self.resize.output_size, method=self.resize.method)
+            if isinstance(self.resize.output_size, int):
+                if x.shape[1] != self.resize.output_size or x.shape[2] != self.resize.output_size:
+                    x = tf.image.resize(
+                        x, (self.resize.output_size, self.resize.output_size), method=self.resize.method
+                    )
+            elif isinstance(self.resize.output_size, tuple):
+                if x.shape[1] != self.resize.output_size[0] or x.shape[2] != self.resize.output_size[1]:
+                    x = tf.image.resize(x, self.resize.output_size, method=self.resize.method)
+            else:
+                raise AssertionError("resize output size must be an int or a tuple")
 
             batches = [x]
 
diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index 53898a8a18..86340f0ee0 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -19,16 +19,14 @@
 class Resize(T.Resize):
     def __init__(
         self,
-        size: Tuple[int, int],
+        size: Union[int, Tuple[int, int]],
         interpolation=F.InterpolationMode.BILINEAR,
         preserve_aspect_ratio: bool = False,
         symmetric_pad: bool = False,
-        pad: bool = True,
     ) -> None:
         super().__init__(size, interpolation)
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
-        self.pad = pad
 
     def forward(
         self,
@@ -36,23 +34,36 @@ def forward(
         target: Optional[np.ndarray] = None,
     ) -> Union[torch.Tensor, Tuple[torch.Tensor, np.ndarray]]:
 
-        target_ratio = self.size[0] / self.size[1]
-        actual_ratio = img.shape[-2] / img.shape[-1]
-        if not self.preserve_aspect_ratio or (target_ratio == actual_ratio):
+        if isinstance(self.size, int):
+            target_ratio = img.shape[-2] / img.shape[-1]
+            actual_ratio = img.shape[-2] / img.shape[-1]
+        elif isinstance(self.size, tuple):
+            target_ratio = self.size[0] / self.size[1]
+            actual_ratio = img.shape[-2] / img.shape[-1]
+        else:
+            raise AssertionError("size should be either a tuple or an int")
+
+        if not self.preserve_aspect_ratio or (target_ratio == actual_ratio and isinstance(self.size, tuple)):
             if target is not None:
                 return super().forward(img), target
             return super().forward(img)
         else:
             # Resize
-            if actual_ratio > target_ratio:
-                tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
-            else:
-                tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
+            if isinstance(self.size, tuple):
+                if actual_ratio > target_ratio:
+                    tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
+                else:
+                    tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
+            elif isinstance(self.size, int):  # self.size is the longest side, infer the other
+                if img.shape[-2] <= img.shape[-1]:
+                    tmp_size = (max(int(self.size * actual_ratio), 1), self.size)
+                else:
+                    tmp_size = (self.size, max(int(self.size / actual_ratio), 1))
 
             # Scale image
             img = F.resize(img, tmp_size, self.interpolation)
             raw_shape = img.shape[-2:]
-            if self.pad:
+            if isinstance(self.size, tuple):
                 # Pad (inverted in pytorch)
                 _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2])
                 if self.symmetric_pad:
@@ -65,7 +76,7 @@ def forward(
                 if self.preserve_aspect_ratio:
                     # Get absolute coords
                     if target.shape[1:] == (4,):
-                        if self.pad and self.symmetric_pad:
+                        if isinstance(self.size, tuple) and self.symmetric_pad:
                             if np.max(target) <= 1:
                                 offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2]
                             target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1]
@@ -74,7 +85,7 @@ def forward(
                             target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1]
                             target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2]
                     elif target.shape[1:] == (4, 2):
-                        if self.pad and self.symmetric_pad:
+                        if isinstance(self.size, tuple) and self.symmetric_pad:
                             if np.max(target) <= 1:
                                 offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2]
                             target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1]
diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py
index f2420a7091..77014c652a 100644
--- a/doctr/transforms/modules/tensorflow.py
+++ b/doctr/transforms/modules/tensorflow.py
@@ -59,17 +59,15 @@ class Resize(NestedObject):
     """
     def __init__(
         self,
-        output_size: Tuple[int, int],
+        output_size: Union[int, Tuple[int, int]],
         method: str = 'bilinear',
         preserve_aspect_ratio: bool = False,
         symmetric_pad: bool = False,
-        pad: bool = True
     ) -> None:
         self.output_size = output_size
         self.method = method
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
-        self.pad = pad
 
     def extra_repr(self) -> str:
         _repr = f"output_size={self.output_size}, method='{self.method}'"
@@ -84,11 +82,18 @@ def __call__(
     ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]:
 
         input_dtype = img.dtype
-        img = tf.image.resize(img, self.output_size, self.method, self.preserve_aspect_ratio)
+        if isinstance(self.output_size, int):
+            wanted_size = (self.output_size, self.output_size)
+        elif isinstance(self.output_size, tuple):
+            wanted_size = self.output_size
+        else:
+            raise AssertionError("Output size should be either a Tuple or an int")
+        img = tf.image.resize(img, wanted_size, self.method, self.preserve_aspect_ratio)
+        # It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio
         raw_shape = img.shape[:2]
         if self.preserve_aspect_ratio:
-            if self.pad:
-                # pad width
+            if isinstance(self.output_size, tuple):
+                # In that case we need to pad because we want to enforce both width and height
                 if not self.symmetric_pad:
                     offset = (0, 0)
                 elif self.output_size[0] == img.shape[0]:
@@ -102,7 +107,7 @@ def __call__(
             if self.preserve_aspect_ratio:
                 # Get absolute coords
                 if target.shape[1:] == (4,):
-                    if self.pad and self.symmetric_pad:
+                    if isinstance(self.output_size, tuple) and self.symmetric_pad:
                         if np.max(target) <= 1:
                             offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
                         target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1]
@@ -111,7 +116,7 @@ def __call__(
                         target[:, [0, 2]] *= raw_shape[1] / img.shape[1]
                         target[:, [1, 3]] *= raw_shape[0] / img.shape[0]
                 elif target.shape[1:] == (4, 2):
-                    if self.pad and self.symmetric_pad:
+                    if isinstance(self.output_size, tuple) and self.symmetric_pad:
                         if np.max(target) <= 1:
                             offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
                         target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1]
diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
index e7d33b2453..f59ad60384 100644
--- a/references/detection/train_pytorch.py
+++ b/references/detection/train_pytorch.py
@@ -250,11 +250,11 @@ def main(args):
             ]
         ),
         sample_transforms=T.SampleCompose(
-            ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True)
+            ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True)
               ] if not args.rotation else [])
-            + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, pad=False),
+            + ([T.Resize(args.input_size, preserve_aspect_ratio=True),
                 T.RandomRotate(90, expand=True),
-                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True)
+                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True)
                 ] if args.rotation else [])
         ),
         use_polygons=args.rotation,
diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
index 1fc953420a..f20f931f3b 100644
--- a/references/detection/train_tensorflow.py
+++ b/references/detection/train_tensorflow.py
@@ -199,11 +199,11 @@ def main(args):
             ]
         ),
         sample_transforms=T.SampleCompose(
-            ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True)
+            ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True)
               ] if not args.rotation else [])
-            + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, pad=False),
+            + ([T.Resize(args.input_size, preserve_aspect_ratio=True),  # This does not pad
                 T.RandomRotate(90, expand=True),
-                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True)
+                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True)
                 ] if args.rotation else [])
         ),
         use_polygons=args.rotation,

From d4b93506079570d91cd656aa91f9a572af751eca Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Tue, 22 Feb 2022 17:43:14 +0100
Subject: [PATCH 09/14] fix: tests

---
 doctr/transforms/modules/pytorch.py    | 14 ++++++++------
 doctr/transforms/modules/tensorflow.py | 14 +++++++++-----
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index 86340f0ee0..3f3c8e0498 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -37,19 +37,21 @@ def forward(
         if isinstance(self.size, int):
             target_ratio = img.shape[-2] / img.shape[-1]
             actual_ratio = img.shape[-2] / img.shape[-1]
-        elif isinstance(self.size, tuple):
+        elif isinstance(self.size, tuple) or isinstance(self.size, list):
             target_ratio = self.size[0] / self.size[1]
             actual_ratio = img.shape[-2] / img.shape[-1]
         else:
             raise AssertionError("size should be either a tuple or an int")
 
-        if not self.preserve_aspect_ratio or (target_ratio == actual_ratio and isinstance(self.size, tuple)):
+        if not self.preserve_aspect_ratio or (
+            target_ratio == actual_ratio and (isinstance(self.size, tuple) or isinstance(self.size, list))
+        ):
             if target is not None:
                 return super().forward(img), target
             return super().forward(img)
         else:
             # Resize
-            if isinstance(self.size, tuple):
+            if isinstance(self.size, tuple) or isinstance(self.size, list):
                 if actual_ratio > target_ratio:
                     tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
                 else:
@@ -63,7 +65,7 @@ def forward(
             # Scale image
             img = F.resize(img, tmp_size, self.interpolation)
             raw_shape = img.shape[-2:]
-            if isinstance(self.size, tuple):
+            if isinstance(self.size, tuple) or isinstance(self.size, list):
                 # Pad (inverted in pytorch)
                 _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2])
                 if self.symmetric_pad:
@@ -76,7 +78,7 @@ def forward(
                 if self.preserve_aspect_ratio:
                     # Get absolute coords
                     if target.shape[1:] == (4,):
-                        if isinstance(self.size, tuple) and self.symmetric_pad:
+                        if (isinstance(self.size, tuple) or isinstance(self.size, list)) and self.symmetric_pad:
                             if np.max(target) <= 1:
                                 offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2]
                             target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1]
@@ -85,7 +87,7 @@ def forward(
                             target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1]
                             target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2]
                     elif target.shape[1:] == (4, 2):
-                        if isinstance(self.size, tuple) and self.symmetric_pad:
+                        if (isinstance(self.size, tuple) or isinstance(self.size, list)) and self.symmetric_pad:
                             if np.max(target) <= 1:
                                 offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2]
                             target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1]
diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py
index 77014c652a..4bfdc9ddfc 100644
--- a/doctr/transforms/modules/tensorflow.py
+++ b/doctr/transforms/modules/tensorflow.py
@@ -84,15 +84,15 @@ def __call__(
         input_dtype = img.dtype
         if isinstance(self.output_size, int):
             wanted_size = (self.output_size, self.output_size)
-        elif isinstance(self.output_size, tuple):
+        elif isinstance(self.output_size, tuple) or isinstance(self.output_size, list):
             wanted_size = self.output_size
         else:
-            raise AssertionError("Output size should be either a Tuple or an int")
+            raise AssertionError("Output size should be either a list, a tuple or an int")
         img = tf.image.resize(img, wanted_size, self.method, self.preserve_aspect_ratio)
         # It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio
         raw_shape = img.shape[:2]
         if self.preserve_aspect_ratio:
-            if isinstance(self.output_size, tuple):
+            if (isinstance(self.output_size, tuple) or isinstance(self.output_size, list)):
                 # In that case we need to pad because we want to enforce both width and height
                 if not self.symmetric_pad:
                     offset = (0, 0)
@@ -107,7 +107,9 @@ def __call__(
             if self.preserve_aspect_ratio:
                 # Get absolute coords
                 if target.shape[1:] == (4,):
-                    if isinstance(self.output_size, tuple) and self.symmetric_pad:
+                    if (
+                        isinstance(self.output_size, tuple) or isinstance(self.output_size, list)
+                    ) and self.symmetric_pad:
                         if np.max(target) <= 1:
                             offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
                         target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1]
@@ -116,7 +118,9 @@ def __call__(
                         target[:, [0, 2]] *= raw_shape[1] / img.shape[1]
                         target[:, [1, 3]] *= raw_shape[0] / img.shape[0]
                 elif target.shape[1:] == (4, 2):
-                    if isinstance(self.output_size, tuple) and self.symmetric_pad:
+                    if (
+                        isinstance(self.output_size, tuple) or isinstance(self.output_size, list)
+                    ) and self.symmetric_pad:
                         if np.max(target) <= 1:
                             offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
                         target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1]

From a029e47a15f9c3c585e308787ce43a64c9d74a1e Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Fri, 25 Feb 2022 14:51:20 +0100
Subject: [PATCH 10/14] fix: preprocessor files

---
 doctr/models/preprocessor/pytorch.py    | 11 ++---------
 doctr/models/preprocessor/tensorflow.py | 12 ++----------
 2 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/doctr/models/preprocessor/pytorch.py b/doctr/models/preprocessor/pytorch.py
index dded20281c..52549bde36 100644
--- a/doctr/models/preprocessor/pytorch.py
+++ b/doctr/models/preprocessor/pytorch.py
@@ -105,15 +105,8 @@ def __call__(
             elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
                 raise TypeError("unsupported data type for torch.Tensor")
             # Resizing
-            if isinstance(self.resize.size, int):
-                if x.shape[-2] != self.resize.size or x.shape[-1] != self.resize.size:
-                    x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation)
-            elif isinstance(self.resize.size, tuple):
-                if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:
-                    x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation)
-            else:
-                raise AssertionError("resize size must be a tuple or an int")
-
+            if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:
+                x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation)
             # Data type
             if x.dtype == torch.uint8:
                 x = x.to(dtype=torch.float32).div(255).clip(0, 1)
diff --git a/doctr/models/preprocessor/tensorflow.py b/doctr/models/preprocessor/tensorflow.py
index 38d705b506..6d72965c74 100644
--- a/doctr/models/preprocessor/tensorflow.py
+++ b/doctr/models/preprocessor/tensorflow.py
@@ -108,16 +108,8 @@ def __call__(
             if x.dtype == tf.uint8:
                 x = tf.image.convert_image_dtype(x, dtype=tf.float32)
             # Resizing
-            if isinstance(self.resize.output_size, int):
-                if x.shape[1] != self.resize.output_size or x.shape[2] != self.resize.output_size:
-                    x = tf.image.resize(
-                        x, (self.resize.output_size, self.resize.output_size), method=self.resize.method
-                    )
-            elif isinstance(self.resize.output_size, tuple):
-                if x.shape[1] != self.resize.output_size[0] or x.shape[2] != self.resize.output_size[1]:
-                    x = tf.image.resize(x, self.resize.output_size, method=self.resize.method)
-            else:
-                raise AssertionError("resize output size must be an int or a tuple")
+            if (x.shape[1], x.shape[2]) != self.resize.output_size:  # type: ignore[index]
+                x = tf.image.resize(x, self.resize.output_size, method=self.resize.method)
 
             batches = [x]
 

From de09f2030157ca1718eb2156fefcf7da2348827a Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Fri, 25 Feb 2022 18:32:31 +0100
Subject: [PATCH 11/14] fix: validation set aspect ratio

---
 references/detection/train_pytorch.py    | 9 ++++++++-
 references/detection/train_tensorflow.py | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py
index f59ad60384..fccd2dab00 100644
--- a/references/detection/train_pytorch.py
+++ b/references/detection/train_pytorch.py
@@ -181,7 +181,14 @@ def main(args):
     val_set = DetectionDataset(
         img_folder=os.path.join(args.val_path, 'images'),
         label_path=os.path.join(args.val_path, 'labels.json'),
-        img_transforms=T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True),
+        sample_transforms=T.SampleCompose(
+            ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True)
+              ] if not args.rotation or args.eval_straight else [])
+            + ([T.Resize(args.input_size, preserve_aspect_ratio=True),  # This does not pad
+                T.RandomRotate(90, expand=True),
+                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True)
+                ] if args.rotation and not args.eval_straight else [])
+        ),
         use_polygons=args.rotation and not args.eval_straight,
     )
     val_loader = DataLoader(
diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py
index f20f931f3b..315bba15ba 100644
--- a/references/detection/train_tensorflow.py
+++ b/references/detection/train_tensorflow.py
@@ -141,7 +141,14 @@ def main(args):
     val_set = DetectionDataset(
         img_folder=os.path.join(args.val_path, 'images'),
         label_path=os.path.join(args.val_path, 'labels.json'),
-        img_transforms=T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True),
+        sample_transforms=T.SampleCompose(
+            ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True)
+              ] if not args.rotation or args.eval_straight else [])
+            + ([T.Resize(args.input_size, preserve_aspect_ratio=True),  # This does not pad
+                T.RandomRotate(90, expand=True),
+                T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True)
+                ] if args.rotation and not args.eval_straight else [])
+        ),
         use_polygons=args.rotation and not args.eval_straight,
     )
     val_loader = DataLoader(

From 4c7dc530a3b28ba1d8c9f5d3650944c479f5fdff Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Wed, 9 Mar 2022 12:20:11 +0100
Subject: [PATCH 12/14] fix: requested changes

---
 doctr/transforms/modules/pytorch.py    | 14 ++++++++------
 doctr/transforms/modules/tensorflow.py | 26 ++++++++++++--------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index d4e23f588a..5ebee7c311 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -39,21 +39,23 @@ def forward(
         if isinstance(self.size, int):
             target_ratio = img.shape[-2] / img.shape[-1]
             actual_ratio = img.shape[-2] / img.shape[-1]
-        elif isinstance(self.size, tuple) or isinstance(self.size, list):
+        elif isinstance(self.size, (tuple, list)):
             target_ratio = self.size[0] / self.size[1]
             actual_ratio = img.shape[-2] / img.shape[-1]
         else:
             raise AssertionError("size should be either a tuple or an int")
 
         if not self.preserve_aspect_ratio or (
-            target_ratio == actual_ratio and (isinstance(self.size, tuple) or isinstance(self.size, list))
+            target_ratio == actual_ratio and (isinstance(self.size, (tuple, list)))
         ):
+            # If we don't preserve the aspect ratio or the wanted aspect ratio is the same than the original one
+            # We can use with the regular resize
             if target is not None:
                 return super().forward(img), target
             return super().forward(img)
         else:
             # Resize
-            if isinstance(self.size, tuple) or isinstance(self.size, list):
+            if isinstance(self.size, (tuple, list)):
                 if actual_ratio > target_ratio:
                     tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
                 else:
@@ -67,7 +69,7 @@ def forward(
             # Scale image
             img = F.resize(img, tmp_size, self.interpolation)
             raw_shape = img.shape[-2:]
-            if isinstance(self.size, tuple) or isinstance(self.size, list):
+            if isinstance(self.size, (tuple, list)):
                 # Pad (inverted in pytorch)
                 _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2])
                 if self.symmetric_pad:
@@ -80,7 +82,7 @@ def forward(
                 if self.preserve_aspect_ratio:
                     # Get absolute coords
                     if target.shape[1:] == (4,):
-                        if (isinstance(self.size, tuple) or isinstance(self.size, list)) and self.symmetric_pad:
+                        if isinstance(self.size, (tuple, list)) and self.symmetric_pad:
                             if np.max(target) <= 1:
                                 offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2]
                             target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1]
@@ -89,7 +91,7 @@ def forward(
                             target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1]
                             target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2]
                     elif target.shape[1:] == (4, 2):
-                        if (isinstance(self.size, tuple) or isinstance(self.size, list)) and self.symmetric_pad:
+                        if isinstance(self.size, (tuple, list)) and self.symmetric_pad:
                             if np.max(target) <= 1:
                                 offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2]
                             target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1]
diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py
index 7c213a0319..e45e236855 100644
--- a/doctr/transforms/modules/tensorflow.py
+++ b/doctr/transforms/modules/tensorflow.py
@@ -71,6 +71,13 @@ def __init__(
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
 
+        if isinstance(self.output_size, int):
+            self.wanted_size = (self.output_size, self.output_size)
+        elif isinstance(self.output_size, (tuple, list)):
+            self.wanted_size = self.output_size
+        else:
+            raise AssertionError("Output size should be either a list, a tuple or an int")
+
     def extra_repr(self) -> str:
         _repr = f"output_size={self.output_size}, method='{self.method}'"
         if self.preserve_aspect_ratio:
@@ -84,17 +91,12 @@ def __call__(
     ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]:
 
         input_dtype = img.dtype
-        if isinstance(self.output_size, int):
-            wanted_size = (self.output_size, self.output_size)
-        elif isinstance(self.output_size, tuple) or isinstance(self.output_size, list):
-            wanted_size = self.output_size
-        else:
-            raise AssertionError("Output size should be either a list, a tuple or an int")
-        img = tf.image.resize(img, wanted_size, self.method, self.preserve_aspect_ratio)
+
+        img = tf.image.resize(img, self.wanted_size, self.method, self.preserve_aspect_ratio)
         # It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio
         raw_shape = img.shape[:2]
         if self.preserve_aspect_ratio:
-            if (isinstance(self.output_size, tuple) or isinstance(self.output_size, list)):
+            if isinstance(self.output_size, (tuple, list)):
                 # In that case we need to pad because we want to enforce both width and height
                 if not self.symmetric_pad:
                     offset = (0, 0)
@@ -109,9 +111,7 @@ def __call__(
             if self.preserve_aspect_ratio:
                 # Get absolute coords
                 if target.shape[1:] == (4,):
-                    if (
-                        isinstance(self.output_size, tuple) or isinstance(self.output_size, list)
-                    ) and self.symmetric_pad:
+                    if isinstance(self.output_size, (tuple, list)) and self.symmetric_pad:
                         if np.max(target) <= 1:
                             offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
                         target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1]
@@ -120,9 +120,7 @@ def __call__(
                         target[:, [0, 2]] *= raw_shape[1] / img.shape[1]
                         target[:, [1, 3]] *= raw_shape[0] / img.shape[0]
                 elif target.shape[1:] == (4, 2):
-                    if (
-                        isinstance(self.output_size, tuple) or isinstance(self.output_size, list)
-                    ) and self.symmetric_pad:
+                    if isinstance(self.output_size, (tuple, list)) and self.symmetric_pad:
                         if np.max(target) <= 1:
                             offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
                         target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1]

From a40ca1cddee7db268ce08575928183c51fbbdc66 Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Wed, 9 Mar 2022 14:50:15 +0100
Subject: [PATCH 13/14] fix: equested changes

---
 doctr/transforms/modules/pytorch.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index 5ebee7c311..07752b266b 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -30,6 +30,9 @@ def __init__(
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
 
+        if not isinstance(self.size, (int, tuple, list)):
+            raise AssertionError("size should be either a tuple, a list or an int")
+
     def forward(
         self,
         img: torch.Tensor,
@@ -38,13 +41,10 @@ def forward(
 
         if isinstance(self.size, int):
             target_ratio = img.shape[-2] / img.shape[-1]
-            actual_ratio = img.shape[-2] / img.shape[-1]
-        elif isinstance(self.size, (tuple, list)):
-            target_ratio = self.size[0] / self.size[1]
-            actual_ratio = img.shape[-2] / img.shape[-1]
         else:
-            raise AssertionError("size should be either a tuple or an int")
-
+            target_ratio = self.size[0] / self.size[1]
+        actual_ratio = img.shape[-2] / img.shape[-1]
+  
         if not self.preserve_aspect_ratio or (
             target_ratio == actual_ratio and (isinstance(self.size, (tuple, list)))
         ):

From b8dfb341390a6112543f1354fd1ce3d1ed3bc7ae Mon Sep 17 00:00:00 2001
From: charlesmindee <charles@mindee.co>
Date: Wed, 9 Mar 2022 15:51:53 +0100
Subject: [PATCH 14/14] fix: style

---
 doctr/transforms/modules/pytorch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py
index 07752b266b..dd6ef572fa 100644
--- a/doctr/transforms/modules/pytorch.py
+++ b/doctr/transforms/modules/pytorch.py
@@ -44,7 +44,7 @@ def forward(
         else:
             target_ratio = self.size[0] / self.size[1]
         actual_ratio = img.shape[-2] / img.shape[-1]
-  
+
         if not self.preserve_aspect_ratio or (
             target_ratio == actual_ratio and (isinstance(self.size, (tuple, list)))
         ):