From f8634742631a7c0d4f5882fc94a0bcf30c8553c1 Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Wed, 16 Feb 2022 15:21:59 +0100 Subject: [PATCH 01/14] feat: add target to resize for aspect ratio training --- doctr/transforms/modules/pytorch.py | 26 ++++++++++++++++++++++-- doctr/transforms/modules/tensorflow.py | 25 +++++++++++++++++++++-- references/detection/train_pytorch.py | 8 ++++---- references/detection/train_tensorflow.py | 8 ++++---- 4 files changed, 55 insertions(+), 12 deletions(-) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index 045cc3967e..0726b9daff 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -4,9 +4,10 @@ # See LICENSE or go to for full license details. import math -from typing import Any, Dict, Tuple, Union +from typing import Any, Dict, Tuple, Union, Optional import torch +import numpy as np from PIL.Image import Image from torch.nn.functional import pad from torchvision.transforms import functional as F @@ -27,7 +28,12 @@ def __init__( self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad - def forward(self, img: torch.Tensor) -> torch.Tensor: + def forward( + self, + img: torch.Tensor, + target: Optional[np.ndarray] = None, + ) -> Union[torch.Tensor, Tuple[torch.Tensor, np.ndarray]]: + target_ratio = self.size[0] / self.size[1] actual_ratio = img.shape[-2] / img.shape[-1] if not self.preserve_aspect_ratio or (target_ratio == actual_ratio): @@ -41,11 +47,27 @@ def forward(self, img: torch.Tensor) -> torch.Tensor: # Scale image img = F.resize(img, tmp_size, self.interpolation) + raw_shape = img.shape[-2:] # Pad (inverted in pytorch) _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2]) if self.symmetric_pad: half_pad = (math.ceil(_pad[1] / 2), math.ceil(_pad[3] / 2)) _pad = (half_pad[0], _pad[1] - half_pad[0], half_pad[1], _pad[3] - half_pad[1]) + + # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio) + if target is not None: + if self.preserve_aspect_ratio: + # Get absolute coords + if target.shape[1:] == (4,): + target[:, [0, 2]] *= raw_shape[-1] / self.output_size[1] + target[:, [1, 3]] *= raw_shape[-2] / self.output_size[0] + elif target.shape[1:] == (4, 2): + target[..., 0] *= raw_shape[-1] / self.output_size[1] + target[..., 1] *= raw_shape[-2] / self.output_size[0] + else: + raise AssertionError + return pad(img, _pad), target + return pad(img, _pad) def __repr__(self) -> str: diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py index 7dc9bfc408..fee5e802d7 100644 --- a/doctr/transforms/modules/tensorflow.py +++ b/doctr/transforms/modules/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import random -from typing import Any, Callable, Dict, Iterable, List, Tuple, Union +from typing import Any, Callable, Dict, Iterable, List, Tuple, Union, Optional import numpy as np import tensorflow as tf @@ -75,9 +75,15 @@ def extra_repr(self) -> str: _repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}" return _repr - def __call__(self, img: tf.Tensor) -> tf.Tensor: + def __call__( + self, + img: tf.Tensor, + target: Optional[np.ndarray] = None, + ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]: + input_dtype = img.dtype img = tf.image.resize(img, self.output_size, self.method, self.preserve_aspect_ratio) + raw_shape = img.shape[:2] if self.preserve_aspect_ratio: # pad width if not self.symmetric_pad: @@ -87,6 +93,21 @@ def __call__(self, img: tf.Tensor) -> tf.Tensor: else: offset = (int((self.output_size[0] - img.shape[0]) / 2), 0) img = tf.image.pad_to_bounding_box(img, *offset, *self.output_size) + + # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio) + if target is not None: + if self.preserve_aspect_ratio: + # Get absolute coords + if target.shape[1:] == (4,): + target[:, [0, 2]] *= raw_shape[1] / self.output_size[1] + target[:, [1, 3]] *= raw_shape[0] / self.output_size[0] + elif target.shape[1:] == (4, 2): + target[..., 0] *= raw_shape[1] / self.output_size[1] + target[..., 1] *= raw_shape[0] / self.output_size[0] + else: + raise AssertionError + return tf.cast(img, dtype=input_dtype), target + return tf.cast(img, dtype=input_dtype) diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py index d8d79b39f1..dd9c21f307 100644 --- a/references/detection/train_pytorch.py +++ b/references/detection/train_pytorch.py @@ -181,7 +181,7 @@ def main(args): val_set = DetectionDataset( img_folder=os.path.join(args.val_path, 'images'), label_path=os.path.join(args.val_path, 'labels.json'), - img_transforms=T.Resize((args.input_size, args.input_size)), + img_transforms=T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True), use_polygons=args.rotation and not args.eval_straight, ) val_loader = DataLoader( @@ -243,16 +243,16 @@ def main(args): img_folder=os.path.join(args.train_path, 'images'), label_path=os.path.join(args.train_path, 'labels.json'), img_transforms=Compose( - ([T.Resize((args.input_size, args.input_size))] if not args.rotation else []) - + [ + [ # Augmentations T.RandomApply(T.ColorInversion(), .1), ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), ] ), sample_transforms=T.SampleCompose([ + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True), T.RandomRotate(90, expand=True), - T.ImageTransform(T.Resize((args.input_size, args.input_size))), + T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)), ]) if args.rotation else None, use_polygons=args.rotation, ) diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py index 3bf72fd5e6..4fa18341db 100644 --- a/references/detection/train_tensorflow.py +++ b/references/detection/train_tensorflow.py @@ -141,7 +141,7 @@ def main(args): val_set = DetectionDataset( img_folder=os.path.join(args.val_path, 'images'), label_path=os.path.join(args.val_path, 'labels.json'), - img_transforms=T.Resize((args.input_size, args.input_size)), + img_transforms=T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True), use_polygons=args.rotation and not args.eval_straight, ) val_loader = DataLoader( @@ -189,8 +189,7 @@ def main(args): img_folder=os.path.join(args.train_path, 'images'), label_path=os.path.join(args.train_path, 'labels.json'), img_transforms=T.Compose( - ([T.Resize((args.input_size, args.input_size))] if not args.rotation else []) - + [ + [ # Augmentations T.RandomApply(T.ColorInversion(), .1), T.RandomJpegQuality(60), @@ -200,8 +199,9 @@ def main(args): ] ), sample_transforms=T.SampleCompose([ + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True), T.RandomRotate(90, expand=True), - T.ImageTransform(T.Resize((args.input_size, args.input_size))), + T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)), ]) if args.rotation else None, use_polygons=args.rotation, ) From 935386fc6067804f67828b782015e89629e46571 Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Wed, 16 Feb 2022 17:01:01 +0100 Subject: [PATCH 02/14] fix: tests --- tests/tensorflow/test_models_detection_tf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensorflow/test_models_detection_tf.py b/tests/tensorflow/test_models_detection_tf.py index 549e987226..f27ff6cdf7 100644 --- a/tests/tensorflow/test_models_detection_tf.py +++ b/tests/tensorflow/test_models_detection_tf.py @@ -66,7 +66,7 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob): np.array([[.75, .75, .5, .5, 0], [.65, .7, .3, .4, 0]], dtype=np.float32), ] loss = model(input_tensor, target, training=True)['loss'] - assert isinstance(loss, tf.Tensor) and ((loss - out['loss']) / loss).numpy() < 21e-2 + assert isinstance(loss, tf.Tensor) and ((loss - out['loss']) / loss).numpy() < 25e-2 @pytest.fixture(scope="session") From 53130d9f20d9547d0aa3c3b4e87aaf8c8eb996f0 Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Wed, 16 Feb 2022 17:03:01 +0100 Subject: [PATCH 03/14] fix: sorting --- doctr/transforms/modules/pytorch.py | 4 ++-- doctr/transforms/modules/tensorflow.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index 0726b9daff..7ea118a0d7 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -4,10 +4,10 @@ # See LICENSE or go to for full license details. import math -from typing import Any, Dict, Tuple, Union, Optional +from typing import Any, Dict, Optional, Tuple, Union -import torch import numpy as np +import torch from PIL.Image import Image from torch.nn.functional import pad from torchvision.transforms import functional as F diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py index fee5e802d7..99fd5c7216 100644 --- a/doctr/transforms/modules/tensorflow.py +++ b/doctr/transforms/modules/tensorflow.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. import random -from typing import Any, Callable, Dict, Iterable, List, Tuple, Union, Optional +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import numpy as np import tensorflow as tf From a439f7ca42805c54de12af3a3388732e818f2c0e Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Wed, 16 Feb 2022 17:55:58 +0100 Subject: [PATCH 04/14] fix: typo --- references/detection/train_pytorch.py | 11 ++++++----- references/detection/train_tensorflow.py | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py index dd9c21f307..9e4fae3e8f 100644 --- a/references/detection/train_pytorch.py +++ b/references/detection/train_pytorch.py @@ -249,11 +249,12 @@ def main(args): ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), ] ), - sample_transforms=T.SampleCompose([ - T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True), - T.RandomRotate(90, expand=True), - T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)), - ]) if args.rotation else None, + sample_transforms=T.SampleCompose( + [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)] + + ([T.RandomRotate(90, expand=True), + T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)) + ] if args.rotation else []) + ), use_polygons=args.rotation, ) diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py index 4fa18341db..5bacf5c33c 100644 --- a/references/detection/train_tensorflow.py +++ b/references/detection/train_tensorflow.py @@ -198,11 +198,12 @@ def main(args): T.RandomBrightness(.3), ] ), - sample_transforms=T.SampleCompose([ - T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True), - T.RandomRotate(90, expand=True), - T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)), - ]) if args.rotation else None, + sample_transforms=T.SampleCompose( + [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)] + + ([T.RandomRotate(90, expand=True), + T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)) + ] if args.rotation else []) + ), use_polygons=args.rotation, ) train_loader = DataLoader( From e236956efe5c30faa466ad26a9843dfc3e677d8e Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Wed, 16 Feb 2022 18:01:47 +0100 Subject: [PATCH 05/14] fix: typo --- doctr/transforms/modules/pytorch.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index 7ea118a0d7..b0f9f1f669 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -59,11 +59,11 @@ def forward( if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - target[:, [0, 2]] *= raw_shape[-1] / self.output_size[1] - target[:, [1, 3]] *= raw_shape[-2] / self.output_size[0] + target[:, [0, 2]] *= raw_shape[-1] / self.size[1] + target[:, [1, 3]] *= raw_shape[-2] / self.size[0] elif target.shape[1:] == (4, 2): - target[..., 0] *= raw_shape[-1] / self.output_size[1] - target[..., 1] *= raw_shape[-2] / self.output_size[0] + target[..., 0] *= raw_shape[-1] / self.size[1] + target[..., 1] *= raw_shape[-2] / self.size[0] else: raise AssertionError return pad(img, _pad), target From 8ee4d43384ebadfa9bf94f431b20485a13a9aaaf Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Thu, 17 Feb 2022 17:32:04 +0100 Subject: [PATCH 06/14] fix: symmetric padding case --- doctr/transforms/modules/pytorch.py | 40 +++++++++++++++++------- doctr/transforms/modules/tensorflow.py | 39 ++++++++++++++++------- references/detection/train_pytorch.py | 4 +-- references/detection/train_tensorflow.py | 4 +-- 4 files changed, 60 insertions(+), 27 deletions(-) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index b0f9f1f669..53898a8a18 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -23,10 +23,12 @@ def __init__( interpolation=F.InterpolationMode.BILINEAR, preserve_aspect_ratio: bool = False, symmetric_pad: bool = False, + pad: bool = True, ) -> None: super().__init__(size, interpolation) self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad + self.pad = pad def forward( self, @@ -37,6 +39,8 @@ def forward( target_ratio = self.size[0] / self.size[1] actual_ratio = img.shape[-2] / img.shape[-1] if not self.preserve_aspect_ratio or (target_ratio == actual_ratio): + if target is not None: + return super().forward(img), target return super().forward(img) else: # Resize @@ -48,27 +52,41 @@ def forward( # Scale image img = F.resize(img, tmp_size, self.interpolation) raw_shape = img.shape[-2:] - # Pad (inverted in pytorch) - _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2]) - if self.symmetric_pad: - half_pad = (math.ceil(_pad[1] / 2), math.ceil(_pad[3] / 2)) - _pad = (half_pad[0], _pad[1] - half_pad[0], half_pad[1], _pad[3] - half_pad[1]) + if self.pad: + # Pad (inverted in pytorch) + _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2]) + if self.symmetric_pad: + half_pad = (math.ceil(_pad[1] / 2), math.ceil(_pad[3] / 2)) + _pad = (half_pad[0], _pad[1] - half_pad[0], half_pad[1], _pad[3] - half_pad[1]) + img = pad(img, _pad) # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio) if target is not None: if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - target[:, [0, 2]] *= raw_shape[-1] / self.size[1] - target[:, [1, 3]] *= raw_shape[-2] / self.size[0] + if self.pad and self.symmetric_pad: + if np.max(target) <= 1: + offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] + target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1] + target[:, [1, 3]] = offset[1] + target[:, [1, 3]] * raw_shape[-2] / img.shape[-2] + else: + target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1] + target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2] elif target.shape[1:] == (4, 2): - target[..., 0] *= raw_shape[-1] / self.size[1] - target[..., 1] *= raw_shape[-2] / self.size[0] + if self.pad and self.symmetric_pad: + if np.max(target) <= 1: + offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] + target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1] + target[..., 1] = offset[1] + target[..., 1] * raw_shape[-2] / img.shape[-2] + else: + target[..., 0] *= raw_shape[-1] / img.shape[-1] + target[..., 1] *= raw_shape[-2] / img.shape[-2] else: raise AssertionError - return pad(img, _pad), target + return img, target - return pad(img, _pad) + return img def __repr__(self) -> str: interpolate_str = self.interpolation.value diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py index 99fd5c7216..f2420a7091 100644 --- a/doctr/transforms/modules/tensorflow.py +++ b/doctr/transforms/modules/tensorflow.py @@ -63,11 +63,13 @@ def __init__( method: str = 'bilinear', preserve_aspect_ratio: bool = False, symmetric_pad: bool = False, + pad: bool = True ) -> None: self.output_size = output_size self.method = method self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad + self.pad = pad def extra_repr(self) -> str: _repr = f"output_size={self.output_size}, method='{self.method}'" @@ -85,25 +87,38 @@ def __call__( img = tf.image.resize(img, self.output_size, self.method, self.preserve_aspect_ratio) raw_shape = img.shape[:2] if self.preserve_aspect_ratio: - # pad width - if not self.symmetric_pad: - offset = (0, 0) - elif self.output_size[0] == img.shape[0]: - offset = (0, int((self.output_size[1] - img.shape[1]) / 2)) - else: - offset = (int((self.output_size[0] - img.shape[0]) / 2), 0) - img = tf.image.pad_to_bounding_box(img, *offset, *self.output_size) + if self.pad: + # pad width + if not self.symmetric_pad: + offset = (0, 0) + elif self.output_size[0] == img.shape[0]: + offset = (0, int((self.output_size[1] - img.shape[1]) / 2)) + else: + offset = (int((self.output_size[0] - img.shape[0]) / 2), 0) + img = tf.image.pad_to_bounding_box(img, *offset, *self.output_size) # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio) if target is not None: if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - target[:, [0, 2]] *= raw_shape[1] / self.output_size[1] - target[:, [1, 3]] *= raw_shape[0] / self.output_size[0] + if self.pad and self.symmetric_pad: + if np.max(target) <= 1: + offset = offset[0] / img.shape[0], offset[1] / img.shape[1] + target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1] + target[:, [1, 3]] = offset[0] + target[:, [1, 3]] * raw_shape[0] / img.shape[0] + else: + target[:, [0, 2]] *= raw_shape[1] / img.shape[1] + target[:, [1, 3]] *= raw_shape[0] / img.shape[0] elif target.shape[1:] == (4, 2): - target[..., 0] *= raw_shape[1] / self.output_size[1] - target[..., 1] *= raw_shape[0] / self.output_size[0] + if self.pad and self.symmetric_pad: + if np.max(target) <= 1: + offset = offset[0] / img.shape[0], offset[1] / img.shape[1] + target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1] + target[..., 1] = offset[0] + target[..., 1] * raw_shape[0] / img.shape[0] + else: + target[..., 0] *= raw_shape[1] / img.shape[1] + target[..., 1] *= raw_shape[0] / img.shape[0] else: raise AssertionError return tf.cast(img, dtype=input_dtype), target diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py index 9e4fae3e8f..e54b69d4a8 100644 --- a/references/detection/train_pytorch.py +++ b/references/detection/train_pytorch.py @@ -250,9 +250,9 @@ def main(args): ] ), sample_transforms=T.SampleCompose( - [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)] + [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, pad=False)] + ([T.RandomRotate(90, expand=True), - T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)) + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, symmetric_pad=True, pad=True) ] if args.rotation else []) ), use_polygons=args.rotation, diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py index 5bacf5c33c..ce5bb863fe 100644 --- a/references/detection/train_tensorflow.py +++ b/references/detection/train_tensorflow.py @@ -199,9 +199,9 @@ def main(args): ] ), sample_transforms=T.SampleCompose( - [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)] + [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, pad=False)] + ([T.RandomRotate(90, expand=True), - T.ImageTransform(T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True)) + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, symmetric_pad=True, pad=True) ] if args.rotation else []) ), use_polygons=args.rotation, From 2ce8609563ccabeeb8605a0a61c004989460f850 Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Fri, 18 Feb 2022 09:20:20 +0100 Subject: [PATCH 07/14] fix: unrotated case --- references/detection/train_pytorch.py | 8 +++++--- references/detection/train_tensorflow.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py index e54b69d4a8..e7d33b2453 100644 --- a/references/detection/train_pytorch.py +++ b/references/detection/train_pytorch.py @@ -250,9 +250,11 @@ def main(args): ] ), sample_transforms=T.SampleCompose( - [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, pad=False)] - + ([T.RandomRotate(90, expand=True), - T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, symmetric_pad=True, pad=True) + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True) + ] if not args.rotation else []) + + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, pad=False), + T.RandomRotate(90, expand=True), + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True) ] if args.rotation else []) ), use_polygons=args.rotation, diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py index ce5bb863fe..1fc953420a 100644 --- a/references/detection/train_tensorflow.py +++ b/references/detection/train_tensorflow.py @@ -199,9 +199,11 @@ def main(args): ] ), sample_transforms=T.SampleCompose( - [T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, pad=False)] - + ([T.RandomRotate(90, expand=True), - T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=False, symmetric_pad=True, pad=True) + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True) + ] if not args.rotation else []) + + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, pad=False), + T.RandomRotate(90, expand=True), + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True) ] if args.rotation else []) ), use_polygons=args.rotation, From f9e8f5c96574b9525956bfb8068d195b936853a2 Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Tue, 22 Feb 2022 12:17:06 +0100 Subject: [PATCH 08/14] fix: args of resize --- doctr/models/preprocessor/pytorch.py | 11 +++++-- doctr/models/preprocessor/tensorflow.py | 12 ++++++-- doctr/transforms/modules/pytorch.py | 37 +++++++++++++++--------- doctr/transforms/modules/tensorflow.py | 21 +++++++++----- references/detection/train_pytorch.py | 6 ++-- references/detection/train_tensorflow.py | 6 ++-- 6 files changed, 62 insertions(+), 31 deletions(-) diff --git a/doctr/models/preprocessor/pytorch.py b/doctr/models/preprocessor/pytorch.py index 52549bde36..dded20281c 100644 --- a/doctr/models/preprocessor/pytorch.py +++ b/doctr/models/preprocessor/pytorch.py @@ -105,8 +105,15 @@ def __call__( elif x.dtype not in (torch.uint8, torch.float16, torch.float32): raise TypeError("unsupported data type for torch.Tensor") # Resizing - if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]: - x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation) + if isinstance(self.resize.size, int): + if x.shape[-2] != self.resize.size or x.shape[-1] != self.resize.size: + x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation) + elif isinstance(self.resize.size, tuple): + if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]: + x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation) + else: + raise AssertionError("resize size must be a tuple or an int") + # Data type if x.dtype == torch.uint8: x = x.to(dtype=torch.float32).div(255).clip(0, 1) diff --git a/doctr/models/preprocessor/tensorflow.py b/doctr/models/preprocessor/tensorflow.py index 642568b166..38d705b506 100644 --- a/doctr/models/preprocessor/tensorflow.py +++ b/doctr/models/preprocessor/tensorflow.py @@ -108,8 +108,16 @@ def __call__( if x.dtype == tf.uint8: x = tf.image.convert_image_dtype(x, dtype=tf.float32) # Resizing - if x.shape[1] != self.resize.output_size[0] or x.shape[2] != self.resize.output_size[1]: - x = tf.image.resize(x, self.resize.output_size, method=self.resize.method) + if isinstance(self.resize.output_size, int): + if x.shape[1] != self.resize.output_size or x.shape[2] != self.resize.output_size: + x = tf.image.resize( + x, (self.resize.output_size, self.resize.output_size), method=self.resize.method + ) + elif isinstance(self.resize.output_size, tuple): + if x.shape[1] != self.resize.output_size[0] or x.shape[2] != self.resize.output_size[1]: + x = tf.image.resize(x, self.resize.output_size, method=self.resize.method) + else: + raise AssertionError("resize output size must be an int or a tuple") batches = [x] diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index 53898a8a18..86340f0ee0 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -19,16 +19,14 @@ class Resize(T.Resize): def __init__( self, - size: Tuple[int, int], + size: Union[int, Tuple[int, int]], interpolation=F.InterpolationMode.BILINEAR, preserve_aspect_ratio: bool = False, symmetric_pad: bool = False, - pad: bool = True, ) -> None: super().__init__(size, interpolation) self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad - self.pad = pad def forward( self, @@ -36,23 +34,36 @@ def forward( target: Optional[np.ndarray] = None, ) -> Union[torch.Tensor, Tuple[torch.Tensor, np.ndarray]]: - target_ratio = self.size[0] / self.size[1] - actual_ratio = img.shape[-2] / img.shape[-1] - if not self.preserve_aspect_ratio or (target_ratio == actual_ratio): + if isinstance(self.size, int): + target_ratio = img.shape[-2] / img.shape[-1] + actual_ratio = img.shape[-2] / img.shape[-1] + elif isinstance(self.size, tuple): + target_ratio = self.size[0] / self.size[1] + actual_ratio = img.shape[-2] / img.shape[-1] + else: + raise AssertionError("size should be either a tuple or an int") + + if not self.preserve_aspect_ratio or (target_ratio == actual_ratio and isinstance(self.size, tuple)): if target is not None: return super().forward(img), target return super().forward(img) else: # Resize - if actual_ratio > target_ratio: - tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1)) - else: - tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1]) + if isinstance(self.size, tuple): + if actual_ratio > target_ratio: + tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1)) + else: + tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1]) + elif isinstance(self.size, int): # self.size is the longest side, infer the other + if img.shape[-2] <= img.shape[-1]: + tmp_size = (max(int(self.size * actual_ratio), 1), self.size) + else: + tmp_size = (self.size, max(int(self.size / actual_ratio), 1)) # Scale image img = F.resize(img, tmp_size, self.interpolation) raw_shape = img.shape[-2:] - if self.pad: + if isinstance(self.size, tuple): # Pad (inverted in pytorch) _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2]) if self.symmetric_pad: @@ -65,7 +76,7 @@ def forward( if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - if self.pad and self.symmetric_pad: + if isinstance(self.size, tuple) and self.symmetric_pad: if np.max(target) <= 1: offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1] @@ -74,7 +85,7 @@ def forward( target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1] target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2] elif target.shape[1:] == (4, 2): - if self.pad and self.symmetric_pad: + if isinstance(self.size, tuple) and self.symmetric_pad: if np.max(target) <= 1: offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1] diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py index f2420a7091..77014c652a 100644 --- a/doctr/transforms/modules/tensorflow.py +++ b/doctr/transforms/modules/tensorflow.py @@ -59,17 +59,15 @@ class Resize(NestedObject): """ def __init__( self, - output_size: Tuple[int, int], + output_size: Union[int, Tuple[int, int]], method: str = 'bilinear', preserve_aspect_ratio: bool = False, symmetric_pad: bool = False, - pad: bool = True ) -> None: self.output_size = output_size self.method = method self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad - self.pad = pad def extra_repr(self) -> str: _repr = f"output_size={self.output_size}, method='{self.method}'" @@ -84,11 +82,18 @@ def __call__( ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]: input_dtype = img.dtype - img = tf.image.resize(img, self.output_size, self.method, self.preserve_aspect_ratio) + if isinstance(self.output_size, int): + wanted_size = (self.output_size, self.output_size) + elif isinstance(self.output_size, tuple): + wanted_size = self.output_size + else: + raise AssertionError("Output size should be either a Tuple or an int") + img = tf.image.resize(img, wanted_size, self.method, self.preserve_aspect_ratio) + # It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio raw_shape = img.shape[:2] if self.preserve_aspect_ratio: - if self.pad: - # pad width + if isinstance(self.output_size, tuple): + # In that case we need to pad because we want to enforce both width and height if not self.symmetric_pad: offset = (0, 0) elif self.output_size[0] == img.shape[0]: @@ -102,7 +107,7 @@ def __call__( if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - if self.pad and self.symmetric_pad: + if isinstance(self.output_size, tuple) and self.symmetric_pad: if np.max(target) <= 1: offset = offset[0] / img.shape[0], offset[1] / img.shape[1] target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1] @@ -111,7 +116,7 @@ def __call__( target[:, [0, 2]] *= raw_shape[1] / img.shape[1] target[:, [1, 3]] *= raw_shape[0] / img.shape[0] elif target.shape[1:] == (4, 2): - if self.pad and self.symmetric_pad: + if isinstance(self.output_size, tuple) and self.symmetric_pad: if np.max(target) <= 1: offset = offset[0] / img.shape[0], offset[1] / img.shape[1] target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1] diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py index e7d33b2453..f59ad60384 100644 --- a/references/detection/train_pytorch.py +++ b/references/detection/train_pytorch.py @@ -250,11 +250,11 @@ def main(args): ] ), sample_transforms=T.SampleCompose( - ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True) + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) ] if not args.rotation else []) - + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, pad=False), + + ([T.Resize(args.input_size, preserve_aspect_ratio=True), T.RandomRotate(90, expand=True), - T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True) + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) ] if args.rotation else []) ), use_polygons=args.rotation, diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py index 1fc953420a..f20f931f3b 100644 --- a/references/detection/train_tensorflow.py +++ b/references/detection/train_tensorflow.py @@ -199,11 +199,11 @@ def main(args): ] ), sample_transforms=T.SampleCompose( - ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True) + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) ] if not args.rotation else []) - + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, pad=False), + + ([T.Resize(args.input_size, preserve_aspect_ratio=True), # This does not pad T.RandomRotate(90, expand=True), - T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True, pad=True) + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) ] if args.rotation else []) ), use_polygons=args.rotation, From d4b93506079570d91cd656aa91f9a572af751eca Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Tue, 22 Feb 2022 17:43:14 +0100 Subject: [PATCH 09/14] fix: tests --- doctr/transforms/modules/pytorch.py | 14 ++++++++------ doctr/transforms/modules/tensorflow.py | 14 +++++++++----- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index 86340f0ee0..3f3c8e0498 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -37,19 +37,21 @@ def forward( if isinstance(self.size, int): target_ratio = img.shape[-2] / img.shape[-1] actual_ratio = img.shape[-2] / img.shape[-1] - elif isinstance(self.size, tuple): + elif isinstance(self.size, tuple) or isinstance(self.size, list): target_ratio = self.size[0] / self.size[1] actual_ratio = img.shape[-2] / img.shape[-1] else: raise AssertionError("size should be either a tuple or an int") - if not self.preserve_aspect_ratio or (target_ratio == actual_ratio and isinstance(self.size, tuple)): + if not self.preserve_aspect_ratio or ( + target_ratio == actual_ratio and (isinstance(self.size, tuple) or isinstance(self.size, list)) + ): if target is not None: return super().forward(img), target return super().forward(img) else: # Resize - if isinstance(self.size, tuple): + if isinstance(self.size, tuple) or isinstance(self.size, list): if actual_ratio > target_ratio: tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1)) else: @@ -63,7 +65,7 @@ def forward( # Scale image img = F.resize(img, tmp_size, self.interpolation) raw_shape = img.shape[-2:] - if isinstance(self.size, tuple): + if isinstance(self.size, tuple) or isinstance(self.size, list): # Pad (inverted in pytorch) _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2]) if self.symmetric_pad: @@ -76,7 +78,7 @@ def forward( if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - if isinstance(self.size, tuple) and self.symmetric_pad: + if (isinstance(self.size, tuple) or isinstance(self.size, list)) and self.symmetric_pad: if np.max(target) <= 1: offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1] @@ -85,7 +87,7 @@ def forward( target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1] target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2] elif target.shape[1:] == (4, 2): - if isinstance(self.size, tuple) and self.symmetric_pad: + if (isinstance(self.size, tuple) or isinstance(self.size, list)) and self.symmetric_pad: if np.max(target) <= 1: offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1] diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py index 77014c652a..4bfdc9ddfc 100644 --- a/doctr/transforms/modules/tensorflow.py +++ b/doctr/transforms/modules/tensorflow.py @@ -84,15 +84,15 @@ def __call__( input_dtype = img.dtype if isinstance(self.output_size, int): wanted_size = (self.output_size, self.output_size) - elif isinstance(self.output_size, tuple): + elif isinstance(self.output_size, tuple) or isinstance(self.output_size, list): wanted_size = self.output_size else: - raise AssertionError("Output size should be either a Tuple or an int") + raise AssertionError("Output size should be either a list, a tuple or an int") img = tf.image.resize(img, wanted_size, self.method, self.preserve_aspect_ratio) # It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio raw_shape = img.shape[:2] if self.preserve_aspect_ratio: - if isinstance(self.output_size, tuple): + if (isinstance(self.output_size, tuple) or isinstance(self.output_size, list)): # In that case we need to pad because we want to enforce both width and height if not self.symmetric_pad: offset = (0, 0) @@ -107,7 +107,9 @@ def __call__( if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - if isinstance(self.output_size, tuple) and self.symmetric_pad: + if ( + isinstance(self.output_size, tuple) or isinstance(self.output_size, list) + ) and self.symmetric_pad: if np.max(target) <= 1: offset = offset[0] / img.shape[0], offset[1] / img.shape[1] target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1] @@ -116,7 +118,9 @@ def __call__( target[:, [0, 2]] *= raw_shape[1] / img.shape[1] target[:, [1, 3]] *= raw_shape[0] / img.shape[0] elif target.shape[1:] == (4, 2): - if isinstance(self.output_size, tuple) and self.symmetric_pad: + if ( + isinstance(self.output_size, tuple) or isinstance(self.output_size, list) + ) and self.symmetric_pad: if np.max(target) <= 1: offset = offset[0] / img.shape[0], offset[1] / img.shape[1] target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1] From a029e47a15f9c3c585e308787ce43a64c9d74a1e Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Fri, 25 Feb 2022 14:51:20 +0100 Subject: [PATCH 10/14] fix: preprocessor files --- doctr/models/preprocessor/pytorch.py | 11 ++--------- doctr/models/preprocessor/tensorflow.py | 12 ++---------- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/doctr/models/preprocessor/pytorch.py b/doctr/models/preprocessor/pytorch.py index dded20281c..52549bde36 100644 --- a/doctr/models/preprocessor/pytorch.py +++ b/doctr/models/preprocessor/pytorch.py @@ -105,15 +105,8 @@ def __call__( elif x.dtype not in (torch.uint8, torch.float16, torch.float32): raise TypeError("unsupported data type for torch.Tensor") # Resizing - if isinstance(self.resize.size, int): - if x.shape[-2] != self.resize.size or x.shape[-1] != self.resize.size: - x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation) - elif isinstance(self.resize.size, tuple): - if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]: - x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation) - else: - raise AssertionError("resize size must be a tuple or an int") - + if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]: + x = F.resize(x, self.resize.size, interpolation=self.resize.interpolation) # Data type if x.dtype == torch.uint8: x = x.to(dtype=torch.float32).div(255).clip(0, 1) diff --git a/doctr/models/preprocessor/tensorflow.py b/doctr/models/preprocessor/tensorflow.py index 38d705b506..6d72965c74 100644 --- a/doctr/models/preprocessor/tensorflow.py +++ b/doctr/models/preprocessor/tensorflow.py @@ -108,16 +108,8 @@ def __call__( if x.dtype == tf.uint8: x = tf.image.convert_image_dtype(x, dtype=tf.float32) # Resizing - if isinstance(self.resize.output_size, int): - if x.shape[1] != self.resize.output_size or x.shape[2] != self.resize.output_size: - x = tf.image.resize( - x, (self.resize.output_size, self.resize.output_size), method=self.resize.method - ) - elif isinstance(self.resize.output_size, tuple): - if x.shape[1] != self.resize.output_size[0] or x.shape[2] != self.resize.output_size[1]: - x = tf.image.resize(x, self.resize.output_size, method=self.resize.method) - else: - raise AssertionError("resize output size must be an int or a tuple") + if (x.shape[1], x.shape[2]) != self.resize.output_size: # type: ignore[index] + x = tf.image.resize(x, self.resize.output_size, method=self.resize.method) batches = [x] From de09f2030157ca1718eb2156fefcf7da2348827a Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Fri, 25 Feb 2022 18:32:31 +0100 Subject: [PATCH 11/14] fix: validation set aspect ratio --- references/detection/train_pytorch.py | 9 ++++++++- references/detection/train_tensorflow.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/references/detection/train_pytorch.py b/references/detection/train_pytorch.py index f59ad60384..fccd2dab00 100644 --- a/references/detection/train_pytorch.py +++ b/references/detection/train_pytorch.py @@ -181,7 +181,14 @@ def main(args): val_set = DetectionDataset( img_folder=os.path.join(args.val_path, 'images'), label_path=os.path.join(args.val_path, 'labels.json'), - img_transforms=T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True), + sample_transforms=T.SampleCompose( + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) + ] if not args.rotation or args.eval_straight else []) + + ([T.Resize(args.input_size, preserve_aspect_ratio=True), # This does not pad + T.RandomRotate(90, expand=True), + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) + ] if args.rotation and not args.eval_straight else []) + ), use_polygons=args.rotation and not args.eval_straight, ) val_loader = DataLoader( diff --git a/references/detection/train_tensorflow.py b/references/detection/train_tensorflow.py index f20f931f3b..315bba15ba 100644 --- a/references/detection/train_tensorflow.py +++ b/references/detection/train_tensorflow.py @@ -141,7 +141,14 @@ def main(args): val_set = DetectionDataset( img_folder=os.path.join(args.val_path, 'images'), label_path=os.path.join(args.val_path, 'labels.json'), - img_transforms=T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True), + sample_transforms=T.SampleCompose( + ([T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) + ] if not args.rotation or args.eval_straight else []) + + ([T.Resize(args.input_size, preserve_aspect_ratio=True), # This does not pad + T.RandomRotate(90, expand=True), + T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) + ] if args.rotation and not args.eval_straight else []) + ), use_polygons=args.rotation and not args.eval_straight, ) val_loader = DataLoader( From 4c7dc530a3b28ba1d8c9f5d3650944c479f5fdff Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Wed, 9 Mar 2022 12:20:11 +0100 Subject: [PATCH 12/14] fix: requested changes --- doctr/transforms/modules/pytorch.py | 14 ++++++++------ doctr/transforms/modules/tensorflow.py | 26 ++++++++++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index d4e23f588a..5ebee7c311 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -39,21 +39,23 @@ def forward( if isinstance(self.size, int): target_ratio = img.shape[-2] / img.shape[-1] actual_ratio = img.shape[-2] / img.shape[-1] - elif isinstance(self.size, tuple) or isinstance(self.size, list): + elif isinstance(self.size, (tuple, list)): target_ratio = self.size[0] / self.size[1] actual_ratio = img.shape[-2] / img.shape[-1] else: raise AssertionError("size should be either a tuple or an int") if not self.preserve_aspect_ratio or ( - target_ratio == actual_ratio and (isinstance(self.size, tuple) or isinstance(self.size, list)) + target_ratio == actual_ratio and (isinstance(self.size, (tuple, list))) ): + # If we don't preserve the aspect ratio or the wanted aspect ratio is the same than the original one + # We can use with the regular resize if target is not None: return super().forward(img), target return super().forward(img) else: # Resize - if isinstance(self.size, tuple) or isinstance(self.size, list): + if isinstance(self.size, (tuple, list)): if actual_ratio > target_ratio: tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1)) else: @@ -67,7 +69,7 @@ def forward( # Scale image img = F.resize(img, tmp_size, self.interpolation) raw_shape = img.shape[-2:] - if isinstance(self.size, tuple) or isinstance(self.size, list): + if isinstance(self.size, (tuple, list)): # Pad (inverted in pytorch) _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2]) if self.symmetric_pad: @@ -80,7 +82,7 @@ def forward( if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - if (isinstance(self.size, tuple) or isinstance(self.size, list)) and self.symmetric_pad: + if isinstance(self.size, (tuple, list)) and self.symmetric_pad: if np.max(target) <= 1: offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1] @@ -89,7 +91,7 @@ def forward( target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1] target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2] elif target.shape[1:] == (4, 2): - if (isinstance(self.size, tuple) or isinstance(self.size, list)) and self.symmetric_pad: + if isinstance(self.size, (tuple, list)) and self.symmetric_pad: if np.max(target) <= 1: offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1] diff --git a/doctr/transforms/modules/tensorflow.py b/doctr/transforms/modules/tensorflow.py index 7c213a0319..e45e236855 100644 --- a/doctr/transforms/modules/tensorflow.py +++ b/doctr/transforms/modules/tensorflow.py @@ -71,6 +71,13 @@ def __init__( self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad + if isinstance(self.output_size, int): + self.wanted_size = (self.output_size, self.output_size) + elif isinstance(self.output_size, (tuple, list)): + self.wanted_size = self.output_size + else: + raise AssertionError("Output size should be either a list, a tuple or an int") + def extra_repr(self) -> str: _repr = f"output_size={self.output_size}, method='{self.method}'" if self.preserve_aspect_ratio: @@ -84,17 +91,12 @@ def __call__( ) -> Union[tf.Tensor, Tuple[tf.Tensor, np.ndarray]]: input_dtype = img.dtype - if isinstance(self.output_size, int): - wanted_size = (self.output_size, self.output_size) - elif isinstance(self.output_size, tuple) or isinstance(self.output_size, list): - wanted_size = self.output_size - else: - raise AssertionError("Output size should be either a list, a tuple or an int") - img = tf.image.resize(img, wanted_size, self.method, self.preserve_aspect_ratio) + + img = tf.image.resize(img, self.wanted_size, self.method, self.preserve_aspect_ratio) # It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio raw_shape = img.shape[:2] if self.preserve_aspect_ratio: - if (isinstance(self.output_size, tuple) or isinstance(self.output_size, list)): + if isinstance(self.output_size, (tuple, list)): # In that case we need to pad because we want to enforce both width and height if not self.symmetric_pad: offset = (0, 0) @@ -109,9 +111,7 @@ def __call__( if self.preserve_aspect_ratio: # Get absolute coords if target.shape[1:] == (4,): - if ( - isinstance(self.output_size, tuple) or isinstance(self.output_size, list) - ) and self.symmetric_pad: + if isinstance(self.output_size, (tuple, list)) and self.symmetric_pad: if np.max(target) <= 1: offset = offset[0] / img.shape[0], offset[1] / img.shape[1] target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1] @@ -120,9 +120,7 @@ def __call__( target[:, [0, 2]] *= raw_shape[1] / img.shape[1] target[:, [1, 3]] *= raw_shape[0] / img.shape[0] elif target.shape[1:] == (4, 2): - if ( - isinstance(self.output_size, tuple) or isinstance(self.output_size, list) - ) and self.symmetric_pad: + if isinstance(self.output_size, (tuple, list)) and self.symmetric_pad: if np.max(target) <= 1: offset = offset[0] / img.shape[0], offset[1] / img.shape[1] target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1] From a40ca1cddee7db268ce08575928183c51fbbdc66 Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Wed, 9 Mar 2022 14:50:15 +0100 Subject: [PATCH 13/14] fix: equested changes --- doctr/transforms/modules/pytorch.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index 5ebee7c311..07752b266b 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -30,6 +30,9 @@ def __init__( self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad + if not isinstance(self.size, (int, tuple, list)): + raise AssertionError("size should be either a tuple, a list or an int") + def forward( self, img: torch.Tensor, @@ -38,13 +41,10 @@ def forward( if isinstance(self.size, int): target_ratio = img.shape[-2] / img.shape[-1] - actual_ratio = img.shape[-2] / img.shape[-1] - elif isinstance(self.size, (tuple, list)): - target_ratio = self.size[0] / self.size[1] - actual_ratio = img.shape[-2] / img.shape[-1] else: - raise AssertionError("size should be either a tuple or an int") - + target_ratio = self.size[0] / self.size[1] + actual_ratio = img.shape[-2] / img.shape[-1] + if not self.preserve_aspect_ratio or ( target_ratio == actual_ratio and (isinstance(self.size, (tuple, list))) ): From b8dfb341390a6112543f1354fd1ce3d1ed3bc7ae Mon Sep 17 00:00:00 2001 From: charlesmindee Date: Wed, 9 Mar 2022 15:51:53 +0100 Subject: [PATCH 14/14] fix: style --- doctr/transforms/modules/pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index 07752b266b..dd6ef572fa 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -44,7 +44,7 @@ def forward( else: target_ratio = self.size[0] / self.size[1] actual_ratio = img.shape[-2] / img.shape[-1] - + if not self.preserve_aspect_ratio or ( target_ratio == actual_ratio and (isinstance(self.size, (tuple, list))) ):