From 1af0fc163b0cf502e6d10f6c9de51384d58bbae2 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Mon, 19 Jul 2021 15:38:16 +0200 Subject: [PATCH 01/53] feat: integrate image rotation before using predictor --- doctr/models/_utils.py | 7 ++++++- doctr/models/core.py | 11 ++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index ef5599f8c9..ddd6264155 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -103,8 +103,13 @@ def rotate_page( Returns: Rotated array or tf.Tensor, padded by 0 by default. """ - if abs(angle) < min_angle or abs(angle) > 90 - min_angle: + angle = - angle #TODO : the angles given by estimate_orientation are not correct and should be negative + if abs(angle) < min_angle: return image + elif abs(angle) > 45: + # if image is closer to portrait mode, go for a full 90° rotation + image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE) + angle += 90 # track the rotation by updating angle height, width = image.shape[:2] center = (height / 2, width / 2) diff --git a/doctr/models/core.py b/doctr/models/core.py index 5f75719e20..e9704bf03b 100644 --- a/doctr/models/core.py +++ b/doctr/models/core.py @@ -9,7 +9,7 @@ from typing import List, Any, Tuple, Dict from .detection import DetectionPredictor from .recognition import RecognitionPredictor -from ._utils import extract_crops, extract_rcrops, rotate_page, rotate_boxes +from ._utils import estimate_orientation, extract_crops, extract_rcrops, rotate_page, rotate_boxes from doctr.documents.elements import Word, Line, Block, Page, Document from doctr.utils.repr import NestedObject from doctr.utils.geometry import resolve_enclosing_bbox, resolve_enclosing_rbbox @@ -42,6 +42,7 @@ def __init__( def __call__( self, pages: List[np.ndarray], + rotate_document: bool = False, **kwargs: Any, ) -> Document: @@ -49,6 +50,14 @@ def __call__( if any(page.ndim != 3 for page in pages): raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") + # Detect document rotation and rotate pages + if rotate_document: + page_angles = [] + for i, page in enumerate(pages): + page_angle = estimate_orientation(page) + page_angles.append(page_angle) + pages[i] = rotate_page(page, page_angle) + # Localize text elements boxes = self.det_predictor(pages, **kwargs) # Crop images, rotate page if necessary From c2b18d742bda3ca33990445558868c9617e6049c Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 22 Sep 2021 13:58:41 +0200 Subject: [PATCH 02/53] feat: add rotate_document functionality --- Dockerfile | 4 ++++ doctr/models/_utils.py | 2 +- doctr/models/predictor/tensorflow.py | 12 ++++++++++-- test/common/test_models.py | 9 ++++++++- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 994f49f244..4740771d7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,3 +17,7 @@ RUN apt-get update \ && apt-get autoremove -y \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /root/.cache/pip + +COPY ./demo . +RUN pip install -r requirements.txt +CMD [ "streamlit", "run", "app.py" ] \ No newline at end of file diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 5fbf58fcca..4b2ee61019 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -116,7 +116,7 @@ def rotate_page( Returns: Rotated array or tf.Tensor, padded by 0 by default. """ - angle = - angle #TODO : the angles given by estimate_orientation are not correct and should be negative + angle = - angle if abs(angle) < min_angle: return image elif abs(angle) > 45: diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 7004f776e3..37e315a5e5 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -13,8 +13,7 @@ from doctr.models.detection.predictor import DetectionPredictor from doctr.models.recognition.predictor import RecognitionPredictor from doctr.utils.geometry import rotate_image, rotate_boxes -from .._utils import extract_crops, extract_rcrops - +from .._utils import extract_crops, extract_rcrops, estimate_orientation, rotate_page __all__ = ['OCRPredictor'] @@ -43,6 +42,7 @@ def __init__( def __call__( self, pages: List[Union[np.ndarray, tf.Tensor]], + rotate_document: bool = False, **kwargs: Any, ) -> Document: @@ -50,6 +50,14 @@ def __call__( if any(page.ndim != 3 for page in pages): raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") + # Detect document rotation and rotate pages + if rotate_document: + page_angles = [] + for i, page in enumerate(pages): + page_angle = estimate_orientation(page) + page_angles.append(page_angle) + pages[i] = rotate_page(page, page_angle) + # Localize text elements boxes = self.det_predictor(pages, **kwargs) # Crop images, rotate page if necessary diff --git a/test/common/test_models.py b/test/common/test_models.py index 94de5efb2c..689f10a554 100644 --- a/test/common/test_models.py +++ b/test/common/test_models.py @@ -5,7 +5,7 @@ import cv2 from doctr.io import reader, DocumentFile -from doctr.models._utils import extract_crops, extract_rcrops, get_bitmap_angle, estimate_orientation +from doctr.models._utils import extract_crops, extract_rcrops, get_bitmap_angle, estimate_orientation, rotate_page def test_extract_crops(mock_pdf): # noqa: F811 @@ -92,3 +92,10 @@ def test_get_bitmap_angle(mock_bitmap): def test_estimate_orientation(mock_image): angle = estimate_orientation(mock_image) assert abs(angle - 30.) < 1. + + +def test_rotate_page(mock_image): + angle = estimate_orientation(mock_image) + rotated = rotate_page(mock_image, angle) + angle_rotated = estimate_orientation(rotated) + assert abs(angle_rotated - 0.) < 1. From 312179feb5fe6908dc313eca918aa276aa5b5d16 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 22 Sep 2021 14:08:26 +0200 Subject: [PATCH 03/53] fix: remove min_angle from rotate_page --- doctr/models/_utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 4b2ee61019..578716a612 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -104,22 +104,18 @@ def extract_rcrops( def rotate_page( image: np.ndarray, angle: float = 0., - min_angle: float = 1. ) -> np.ndarray: """Rotate an image counterclockwise by an ange alpha (negative angle to go clockwise). Args: image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 - min_angle: min. angle in degrees to rotate a page Returns: Rotated array or tf.Tensor, padded by 0 by default. """ angle = - angle - if abs(angle) < min_angle: - return image - elif abs(angle) > 45: + if abs(angle) > 45: # if image is closer to portrait mode, go for a full 90° rotation image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE) angle += 90 # track the rotation by updating angle From 6fe1bc62c033f787d26a17c877932c958ea49375 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Thu, 30 Sep 2021 11:00:39 +0200 Subject: [PATCH 04/53] fix: correct models.predictor.tensorflow --- Dockerfile | 6 +----- doctr/models/_utils.py | 25 ------------------------- doctr/models/predictor/tensorflow.py | 13 +++++++------ 3 files changed, 8 insertions(+), 36 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4740771d7b..678d12f6e0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,8 +16,4 @@ RUN apt-get update \ && pip cache purge \ && apt-get autoremove -y \ && rm -rf /var/lib/apt/lists/* \ - && rm -rf /root/.cache/pip - -COPY ./demo . -RUN pip install -r requirements.txt -CMD [ "streamlit", "run", "app.py" ] \ No newline at end of file + && rm -rf /root/.cache/pip \ No newline at end of file diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 578716a612..36a8d6f9f4 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -101,31 +101,6 @@ def extract_rcrops( return crops -def rotate_page( - image: np.ndarray, - angle: float = 0., -) -> np.ndarray: - """Rotate an image counterclockwise by an ange alpha (negative angle to go clockwise). - - Args: - image: numpy tensor to rotate - angle: rotation angle in degrees, between -90 and +90 - - Returns: - Rotated array or tf.Tensor, padded by 0 by default. - """ - angle = - angle - if abs(angle) > 45: - # if image is closer to portrait mode, go for a full 90° rotation - image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE) - angle += 90 # track the rotation by updating angle - - height, width = image.shape[:2] - center = (height / 2, width / 2) - rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0) - return cv2.warpAffine(image, rot_mat, (width, height)) - - def get_max_width_length_ratio(contour: np.ndarray) -> float: """ Get the maximum shape ratio of a contour. diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 79c7e2c2a1..40531fc8f9 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -8,16 +8,19 @@ from typing import List, Any, Union from doctr.io.elements import Document +from doctr.utils.geometry import rotate_image from doctr.utils.repr import NestedObject from doctr.models.builder import DocumentBuilder from doctr.models.detection.predictor import DetectionPredictor from doctr.models.recognition.predictor import RecognitionPredictor +from doctr.models._utils import estimate_orientation from .base import _OCRPredictor __all__ = ['OCRPredictor'] + class OCRPredictor(NestedObject, _OCRPredictor): """Implements an object able to localize and identify text elements in a set of documents @@ -41,7 +44,7 @@ def __init__( def __call__( self, pages: List[Union[np.ndarray, tf.Tensor]], - rotate_document: bool = False, + straighten_pages: bool = False, **kwargs: Any, ) -> Document: @@ -50,12 +53,10 @@ def __call__( raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") # Detect document rotation and rotate pages - if rotate_document: + if straighten_pages: page_angles = [] - for i, page in enumerate(pages): - page_angle = estimate_orientation(page) - page_angles.append(page_angle) - pages[i] = rotate_page(page, page_angle) + page_orientations = [estimate_orientation(page) for page in pages] + pages = [rotate_image(page, angle) for page, angle in zip(pages, page_orientations)] # Localize text elements loc_preds = self.det_predictor(pages, **kwargs) From a6f2ff1e85e265183aa8e2bf1aa9b973cd04a4a3 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 1 Oct 2021 10:14:08 +0200 Subject: [PATCH 05/53] fix: minor corrections --- Dockerfile | 2 +- doctr/models/predictor/tensorflow.py | 1 - test/common/test_models.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 678d12f6e0..994f49f244 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,4 +16,4 @@ RUN apt-get update \ && pip cache purge \ && apt-get autoremove -y \ && rm -rf /var/lib/apt/lists/* \ - && rm -rf /root/.cache/pip \ No newline at end of file + && rm -rf /root/.cache/pip diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 40531fc8f9..c1f8704406 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -54,7 +54,6 @@ def __call__( # Detect document rotation and rotate pages if straighten_pages: - page_angles = [] page_orientations = [estimate_orientation(page) for page in pages] pages = [rotate_image(page, angle) for page, angle in zip(pages, page_orientations)] diff --git a/test/common/test_models.py b/test/common/test_models.py index 689f10a554..6906f81a48 100644 --- a/test/common/test_models.py +++ b/test/common/test_models.py @@ -94,7 +94,7 @@ def test_estimate_orientation(mock_image): assert abs(angle - 30.) < 1. -def test_rotate_page(mock_image): +def test_estimate_orientation(mock_image): angle = estimate_orientation(mock_image) rotated = rotate_page(mock_image, angle) angle_rotated = estimate_orientation(rotated) From ccda4d036bba530a4713758126c8faf0057ef20d Mon Sep 17 00:00:00 2001 From: Rob192 Date: Mon, 4 Oct 2021 17:21:57 +0200 Subject: [PATCH 06/53] feat: Rotate back images and boxes after straightening --- doctr/models/_utils.py | 52 +++++++++++++++++++++-- doctr/models/predictor/tensorflow.py | 24 ++++++++--- doctr/utils/common_types.py | 2 +- doctr/utils/geometry.py | 62 ++++++++++++++++++++++++---- doctr/utils/visualization.py | 2 +- test/common/test_models.py | 27 +++++++++++- test/common/test_utils_geometry.py | 33 ++++++++++++++- 7 files changed, 178 insertions(+), 24 deletions(-) diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 36a8d6f9f4..578a506d4c 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -5,11 +5,13 @@ import numpy as np import cv2 -from math import floor -from typing import List +from math import floor, ceil +from typing import List, Optional, Tuple from statistics import median_low -__all__ = ['estimate_orientation', 'extract_crops', 'extract_rcrops', 'get_bitmap_angle'] +from doctr.utils import compute_expanded_shape + +__all__ = ['estimate_orientation', 'extract_crops', 'extract_rcrops', 'get_bitmap_angle', 'rotate_image'] def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]: @@ -188,3 +190,47 @@ def get_bitmap_angle(bitmap: np.ndarray, n_ct: int = 20, std_max: float = 3.) -> angle = 90 + angle return angle + + +def rotate_image( + image: np.ndarray, + angle: float, + expand: bool = False, + mask_shape: Optional[Tuple[int, int]] = None +) -> np.ndarray: + """Rotate an image counterclockwise by an given angle. + + Args: + image: numpy tensor to rotate + angle: rotation angle in degrees, between -90 and +90 + expand: whether the image should be padded before the rotation + mask_shape: applies a mask on the image of the specified shape given in absolute pixels + + Returns: + Rotated array, padded by 0 by default. + """ + + # Compute the expanded padding + if expand: + exp_shape = compute_expanded_shape(image.shape[:-1], angle) + h_pad, w_pad = int(max(0,ceil(exp_shape[0] - image.shape[0]))), int(max(0,ceil(exp_shape[1] - image.shape[1]))) + exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) + else: + exp_img = image + + height, width = exp_img.shape[:2] + rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) + rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) + + if mask_shape is not None: + if len(mask_shape) != 2: + raise ValueError(f"Mask length should be 2, was found at: {len(mask_shape)}") + h_crop, w_crop = int(height - ceil(mask_shape[0])), int(ceil(width - mask_shape[1])) + if h_crop > 0 and w_crop > 0: + rot_img = rot_img[h_crop // 2: - h_crop // 2, w_crop // 2: - w_crop // 2] + elif w_crop <= 0: + rot_img = rot_img[h_crop // 2: - h_crop // 2, ] + elif h_crop <= 0: + rot_img = rot_img[:, w_crop // 2: - w_crop // 2] + + return rot_img diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index c1f8704406..32435b882d 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -8,12 +8,12 @@ from typing import List, Any, Union from doctr.io.elements import Document -from doctr.utils.geometry import rotate_image +from doctr.utils.geometry import rotate_boxes from doctr.utils.repr import NestedObject from doctr.models.builder import DocumentBuilder from doctr.models.detection.predictor import DetectionPredictor from doctr.models.recognition.predictor import RecognitionPredictor -from doctr.models._utils import estimate_orientation +from doctr.models._utils import estimate_orientation, rotate_image from .base import _OCRPredictor @@ -33,18 +33,19 @@ def __init__( self, det_predictor: DetectionPredictor, reco_predictor: RecognitionPredictor, - rotated_bbox: bool = False + rotated_bbox: bool = False, + straighten_pages: bool = False, ) -> None: super().__init__() self.det_predictor = det_predictor self.reco_predictor = reco_predictor + self.straighten_pages = straighten_pages self.doc_builder = DocumentBuilder(rotated_bbox=rotated_bbox) def __call__( self, pages: List[Union[np.ndarray, tf.Tensor]], - straighten_pages: bool = False, **kwargs: Any, ) -> Document: @@ -53,9 +54,10 @@ def __call__( raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") # Detect document rotation and rotate pages - if straighten_pages: + if self.straighten_pages: page_orientations = [estimate_orientation(page) for page in pages] - pages = [rotate_image(page, angle) for page, angle in zip(pages, page_orientations)] + page_shapes = [page.shape[:-1] for page in pages] + pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, page_orientations)] # Localize text elements loc_preds = self.det_predictor(pages, **kwargs) @@ -65,5 +67,15 @@ def __call__( word_preds = self.reco_predictor([crop for page_crops in crops for crop in page_crops], **kwargs) boxes, text_preds = self._process_predictions(loc_preds, word_preds, self.doc_builder.rotated_bbox) + + # Rotate back pages and boxes while keeping original image size + if self.straighten_pages: + pages = [rotate_image(page, angle, expand=True, mask_shape=mask) for page, angle, mask in + zip(pages, page_orientations, page_shapes)] + rboxes = [rotate_boxes(page_boxes, angle, expand=True, orig_shape=page.shape[:2], mask_shape=mask) for + page_boxes, page, angle, mask in zip(boxes, pages, page_orientations, page_shapes)] + boxes = rboxes + self.doc_builder = DocumentBuilder(rotated_bbox=True) #override the current doc_builder + out = self.doc_builder(boxes, text_preds, [page.shape[:2] for page in pages]) # type: ignore[misc] return out diff --git a/doctr/utils/common_types.py b/doctr/utils/common_types.py index 8f603e496b..dc9ed70676 100644 --- a/doctr/utils/common_types.py +++ b/doctr/utils/common_types.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Tuple, List, Union -__all__ = ['Point2D', 'BoundingBox', 'RotatedBbox', 'Polygon4P', 'Polygon'] +__all__ = ['Point2D', 'BoundingBox', 'RotatedBbox', 'Polygon4P', 'Polygon', 'Bbox'] Point2D = Tuple[float, float] diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 11aa8674c7..bedd1e8559 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -4,10 +4,10 @@ # See LICENSE or go to for full license details. import math -from typing import List, Union, Tuple +from typing import List, Union, Tuple, Optional import numpy as np import cv2 -from .common_types import BoundingBox, Polygon4P, RotatedBbox +from .common_types import BoundingBox, Polygon4P, RotatedBbox, Bbox __all__ = ['rbbox_to_polygon', 'bbox_to_polygon', 'polygon_to_bbox', 'polygon_to_rbbox', 'resolve_enclosing_bbox', 'resolve_enclosing_bbox', 'fit_rbbox', 'rotate_boxes', 'rotate_abs_boxes', @@ -38,6 +38,10 @@ def polygon_to_rbbox(polygon: Polygon4P) -> RotatedBbox: return fit_rbbox(cnt) +def bbox_to_rbbox(bbox: Bbox) -> RotatedBbox: + return (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2, bbox[2] - bbox[0], bbox[3] - bbox[1], 0 + + def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]: """Compute enclosing bbox either from: @@ -129,10 +133,41 @@ def rotate_abs_boxes(boxes: np.ndarray, angle: float, img_shape: Tuple[int, int] return rotated_boxes +def remap_boxes(boxes: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray: + """ Remaps a batch of RotatedBbox (x, y, w, h, alpha) expressed for an origin_shape to a destination_shape, + This does not impact the absolute shape of the boxes + + Args: + boxes: (N, 5) array of RELATIVE RotatedBbox (x, y, w, h, alpha) + orig_shape: shape of the origin image + dest_shape: shape of the destination image + + Returns: + A batch of rotated boxes (N, 5): (x, y, w, h, alpha) expressed in the destination referencial + + """ + + if len(dest_shape) != 2: + raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}") + if len(orig_shape) != 2: + raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}") + orig_width, orig_height = orig_shape + dest_width, dest_height = dest_shape + mboxes = boxes.copy() + mboxes[:, 0] = ((boxes[:, 0] * orig_height) + (dest_height - orig_height) / 2) / dest_height + mboxes[:, 1] = ((boxes[:, 1] * orig_width) + (dest_width - orig_width) / 2) / dest_width + mboxes[:, 2] = boxes[:, 2] * orig_height / dest_height + mboxes[:, 3] = boxes[:, 3] * orig_width / dest_width + return mboxes + + def rotate_boxes( boxes: np.ndarray, angle: float = 0., - min_angle: float = 1. + min_angle: float = 1., + expand: bool = False, + orig_shape: Optional[Tuple[int, int]] = None, + mask_shape: Optional[Tuple[int, int]] = None, ) -> np.ndarray: """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax) of an angle, if angle > min_angle, around the center of the page. @@ -141,28 +176,37 @@ def rotate_boxes( boxes: (N, 4) array of RELATIVE boxes angle: angle between -90 and +90 degrees min_angle: minimum angle to rotate boxes + expand: whether the image should be padded before the rotation + orig_shape: shape of the origin image + mask_shape: shape of the mask if the image is cropped after the rotation Returns: A batch of rotated boxes (N, 5): (x, y, w, h, alpha) or a batch of straight bounding boxes """ + # Change format of the boxes to rotated boxes + boxes = np.apply_along_axis(bbox_to_rbbox, 1, boxes) # If small angle, return boxes (no rotation) if abs(angle) < min_angle or abs(angle) > 90 - min_angle: return boxes + if expand: + exp_shape = compute_expanded_shape(orig_shape, angle) + boxes = remap_boxes(boxes, orig_shape=orig_shape, dest_shape=exp_shape) + orig_shape = exp_shape #in case a mask is used afterwards # Compute rotation matrix angle_rad = angle * np.pi / 180. # compute radian angle for np functions rotation_mat = np.array([ [np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)] ], dtype=boxes.dtype) - # Compute unrotated boxes - x_unrotated, y_unrotated = (boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2 - width, height = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] # Rotate centers - centers = np.stack((x_unrotated, y_unrotated), axis=-1) - rotated_centers = .5 + np.matmul(centers - .5, np.transpose(rotation_mat)) + centers = np.stack((boxes[:, 0], boxes[:, 1]), axis=-1) + rotated_centers = .5 + np.matmul(centers - .5, rotation_mat) x_center, y_center = rotated_centers[:, 0], rotated_centers[:, 1] # Compute rotated boxes - rotated_boxes = np.stack((x_center, y_center, width, height, angle * np.ones_like(boxes[:, 0])), axis=1) + rotated_boxes = np.stack((x_center, y_center, boxes[:, 2], boxes[:, 3], angle * np.ones_like(boxes[:, 0])), axis=1) + # Apply a mask if requested + if mask_shape is not None: + rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=mask_shape) return rotated_boxes diff --git a/doctr/utils/visualization.py b/doctr/utils/visualization.py index fd8b30165c..02540f2a37 100644 --- a/doctr/utils/visualization.py +++ b/doctr/utils/visualization.py @@ -99,7 +99,7 @@ def polygon_patch( # Switch to absolute coords x, w = x * width, w * width y, h = y * height, h * height - points = cv2.boxPoints(((x, y), (w, h), a)) + points = cv2.boxPoints(((x, y), (w, h), -a)) return patches.Polygon( points, diff --git a/test/common/test_models.py b/test/common/test_models.py index 6906f81a48..54882869e7 100644 --- a/test/common/test_models.py +++ b/test/common/test_models.py @@ -5,7 +5,7 @@ import cv2 from doctr.io import reader, DocumentFile -from doctr.models._utils import extract_crops, extract_rcrops, get_bitmap_angle, estimate_orientation, rotate_page +from doctr.models._utils import extract_crops, extract_rcrops, get_bitmap_angle, estimate_orientation, rotate_image def test_extract_crops(mock_pdf): # noqa: F811 @@ -89,6 +89,29 @@ def test_get_bitmap_angle(mock_bitmap): assert abs(angle - 30.) < 1. +def test_rotate_image(): + img = np.ones((32, 64, 3), dtype=np.float32) + rotated = rotate_image(img, 30.) + assert rotated.shape[:-1] == (32, 64) + assert rotated[0, 0, 0] == 0 + assert rotated[0, :, 0].sum() > 1 + + # Expand + rotated = rotate_image(img, 30., expand=True) + assert rotated.shape[:-1] == (60, 72) + assert rotated[0, :, 0].sum() <= 1 + + # Expand with 90° rotation + rotated = rotate_image(img, 90., expand=True) + assert rotated.shape[:-1] == (64, 64) + assert rotated[0, :, 0].sum() <= 1 + + # Expand with mask + rotated = rotate_image(img, 30., expand=True, mask_shape=(40, 72)) + assert rotated.shape[:-1] == (40, 72) + assert rotated[0, :, 0].sum() > 1 + + def test_estimate_orientation(mock_image): angle = estimate_orientation(mock_image) assert abs(angle - 30.) < 1. @@ -96,6 +119,6 @@ def test_estimate_orientation(mock_image): def test_estimate_orientation(mock_image): angle = estimate_orientation(mock_image) - rotated = rotate_page(mock_image, angle) + rotated = rotate_image(mock_image, -angle) angle_rotated = estimate_orientation(rotated) assert abs(angle_rotated - 0.) < 1. diff --git a/test/common/test_utils_geometry.py b/test/common/test_utils_geometry.py index 621a4e43de..bd97366546 100644 --- a/test/common/test_utils_geometry.py +++ b/test/common/test_utils_geometry.py @@ -28,25 +28,54 @@ def test_polygon_to_rbbox(): assert all(abs(i - j) <= 1e-7 for (i, j) in zip(pred, target)) +def test_bbox_to_rbbox(): + pred = geometry.bbox_to_rbbox((0, 0, 0.6, 0.4)) + target = (0.3, 0.2, 0.6, 0.4, 0) + assert all(abs(i - j) <= 1e-7 for (i, j) in zip(pred, target)) + + def test_resolve_enclosing_rbbox(): pred = geometry.resolve_enclosing_rbbox([(.2, .2, .05, .05, 0), (.2, .2, .2, .2, 0)])[:4] target = (.2, .2, .2, .2) assert all(abs(i - j) <= 1e-7 for (i, j) in zip(pred, target)) +def test_remap_boxes(): + pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0.]]), (10, 10), (20, 20)) + target = np.array([[0.5, 0.5, 0.05, 0.05, 0.]]) + assert pred.all() == target.all() + + pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0.]]), (10, 10), (20, 10)) + target = np.array([[0.5, 0.5, 0.1, 0.05, 0.]]) + assert pred.all() == target.all() + + pred = geometry.remap_boxes(np.array([[0.25, 0.5, 0.5, 0.33, 0.]]), (80, 30), (160, 30)) + target = np.array([[0.375, 0.5, 0.25, 0.1, 0.]]) + assert pred.all() == target.all() + + def test_rotate_boxes(): boxes = np.array([[0.1, 0.1, 0.8, 0.3]]) + rboxes = np.apply_along_axis(geometry.bbox_to_rbbox,1,boxes) # Angle = 0 rotated = geometry.rotate_boxes(boxes, angle=0.) - assert rotated.all() == boxes.all() + assert rotated.all() == rboxes.all() # Angle < 1: rotated = geometry.rotate_boxes(boxes, angle=0.5) - assert rotated.all() == boxes.all() + assert rotated.all() == rboxes.all() # Angle = 30 rotated = geometry.rotate_boxes(boxes, angle=30) assert rotated.shape == (1, 5) assert rotated[0, 4] == 30. + boxes = np.array([[0., 0., 0.6, 0.2]]) + # Angle = -90: + rotated = geometry.rotate_boxes(boxes, angle=-90, min_angle=0) + assert rotated.all() == np.array([[0.1, 0.7, 0.6, 0.2, -90.]]).all() + # Angle = 90 + rotated = geometry.rotate_boxes(boxes, angle=+90, min_angle=0) + assert rotated.all() == np.array([[0.9, 0.3, 0.6, 0.2, 90.]]).all() + def test_rotate_image(): img = np.ones((32, 64, 3), dtype=np.float32) From 7d4ed75671c9fa17b9e8c675e2095ee0a6f9880e Mon Sep 17 00:00:00 2001 From: Rob192 Date: Mon, 4 Oct 2021 17:28:38 +0200 Subject: [PATCH 07/53] fix: correct typo --- doctr/models/_utils.py | 2 +- doctr/models/predictor/tensorflow.py | 3 +-- doctr/utils/geometry.py | 2 +- test/common/test_models.py | 2 -- test/common/test_utils_geometry.py | 8 ++++---- 5 files changed, 7 insertions(+), 10 deletions(-) diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 578a506d4c..20b1b13be8 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -213,7 +213,7 @@ def rotate_image( # Compute the expanded padding if expand: exp_shape = compute_expanded_shape(image.shape[:-1], angle) - h_pad, w_pad = int(max(0,ceil(exp_shape[0] - image.shape[0]))), int(max(0,ceil(exp_shape[1] - image.shape[1]))) + h_pad, w_pad = int(max(0, ceil(exp_shape[0] - image.shape[0]))), int(max(0, ceil(exp_shape[1] - image.shape[1]))) exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) else: exp_img = image diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 32435b882d..8b76115e2e 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -20,7 +20,6 @@ __all__ = ['OCRPredictor'] - class OCRPredictor(NestedObject, _OCRPredictor): """Implements an object able to localize and identify text elements in a set of documents @@ -75,7 +74,7 @@ def __call__( rboxes = [rotate_boxes(page_boxes, angle, expand=True, orig_shape=page.shape[:2], mask_shape=mask) for page_boxes, page, angle, mask in zip(boxes, pages, page_orientations, page_shapes)] boxes = rboxes - self.doc_builder = DocumentBuilder(rotated_bbox=True) #override the current doc_builder + self.doc_builder = DocumentBuilder(rotated_bbox=True) # override the current doc_builder out = self.doc_builder(boxes, text_preds, [page.shape[:2] for page in pages]) # type: ignore[misc] return out diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index bedd1e8559..1d53819a71 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -191,7 +191,7 @@ def rotate_boxes( if expand: exp_shape = compute_expanded_shape(orig_shape, angle) boxes = remap_boxes(boxes, orig_shape=orig_shape, dest_shape=exp_shape) - orig_shape = exp_shape #in case a mask is used afterwards + orig_shape = exp_shape # in case a mask is used afterwards # Compute rotation matrix angle_rad = angle * np.pi / 180. # compute radian angle for np functions rotation_mat = np.array([ diff --git a/test/common/test_models.py b/test/common/test_models.py index 54882869e7..70c1d281b5 100644 --- a/test/common/test_models.py +++ b/test/common/test_models.py @@ -116,8 +116,6 @@ def test_estimate_orientation(mock_image): angle = estimate_orientation(mock_image) assert abs(angle - 30.) < 1. - -def test_estimate_orientation(mock_image): angle = estimate_orientation(mock_image) rotated = rotate_image(mock_image, -angle) angle_rotated = estimate_orientation(rotated) diff --git a/test/common/test_utils_geometry.py b/test/common/test_utils_geometry.py index bd97366546..c725117caf 100644 --- a/test/common/test_utils_geometry.py +++ b/test/common/test_utils_geometry.py @@ -42,21 +42,21 @@ def test_resolve_enclosing_rbbox(): def test_remap_boxes(): pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0.]]), (10, 10), (20, 20)) - target = np.array([[0.5, 0.5, 0.05, 0.05, 0.]]) + target = np.array([[0.5, 0.5, 0.05, 0.05, 0.]]) assert pred.all() == target.all() pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0.]]), (10, 10), (20, 10)) - target = np.array([[0.5, 0.5, 0.1, 0.05, 0.]]) + target = np.array([[0.5, 0.5, 0.1, 0.05, 0.]]) assert pred.all() == target.all() pred = geometry.remap_boxes(np.array([[0.25, 0.5, 0.5, 0.33, 0.]]), (80, 30), (160, 30)) - target = np.array([[0.375, 0.5, 0.25, 0.1, 0.]]) + target = np.array([[0.375, 0.5, 0.25, 0.1, 0.]]) assert pred.all() == target.all() def test_rotate_boxes(): boxes = np.array([[0.1, 0.1, 0.8, 0.3]]) - rboxes = np.apply_along_axis(geometry.bbox_to_rbbox,1,boxes) + rboxes = np.apply_along_axis(geometry.bbox_to_rbbox, 1, boxes) # Angle = 0 rotated = geometry.rotate_boxes(boxes, angle=0.) assert rotated.all() == rboxes.all() From a303b230367803f47ab078febe694ae47c37cfca Mon Sep 17 00:00:00 2001 From: Rob192 Date: Tue, 5 Oct 2021 13:52:20 +0200 Subject: [PATCH 08/53] fix: merge two functions rotate_image --- doctr/models/_utils.py | 44 ---------------------------- doctr/models/predictor/tensorflow.py | 4 +-- doctr/utils/geometry.py | 25 ++++++++++++---- test/common/test_models.py | 23 --------------- test/common/test_utils_geometry.py | 15 ++++++++++ 5 files changed, 37 insertions(+), 74 deletions(-) diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 20b1b13be8..ea9cf2883c 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -190,47 +190,3 @@ def get_bitmap_angle(bitmap: np.ndarray, n_ct: int = 20, std_max: float = 3.) -> angle = 90 + angle return angle - - -def rotate_image( - image: np.ndarray, - angle: float, - expand: bool = False, - mask_shape: Optional[Tuple[int, int]] = None -) -> np.ndarray: - """Rotate an image counterclockwise by an given angle. - - Args: - image: numpy tensor to rotate - angle: rotation angle in degrees, between -90 and +90 - expand: whether the image should be padded before the rotation - mask_shape: applies a mask on the image of the specified shape given in absolute pixels - - Returns: - Rotated array, padded by 0 by default. - """ - - # Compute the expanded padding - if expand: - exp_shape = compute_expanded_shape(image.shape[:-1], angle) - h_pad, w_pad = int(max(0, ceil(exp_shape[0] - image.shape[0]))), int(max(0, ceil(exp_shape[1] - image.shape[1]))) - exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) - else: - exp_img = image - - height, width = exp_img.shape[:2] - rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) - rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) - - if mask_shape is not None: - if len(mask_shape) != 2: - raise ValueError(f"Mask length should be 2, was found at: {len(mask_shape)}") - h_crop, w_crop = int(height - ceil(mask_shape[0])), int(ceil(width - mask_shape[1])) - if h_crop > 0 and w_crop > 0: - rot_img = rot_img[h_crop // 2: - h_crop // 2, w_crop // 2: - w_crop // 2] - elif w_crop <= 0: - rot_img = rot_img[h_crop // 2: - h_crop // 2, ] - elif h_crop <= 0: - rot_img = rot_img[:, w_crop // 2: - w_crop // 2] - - return rot_img diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 8b76115e2e..def8e2f537 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -8,12 +8,12 @@ from typing import List, Any, Union from doctr.io.elements import Document -from doctr.utils.geometry import rotate_boxes +from doctr.utils.geometry import rotate_boxes, rotate_image from doctr.utils.repr import NestedObject from doctr.models.builder import DocumentBuilder from doctr.models.detection.predictor import DetectionPredictor from doctr.models.recognition.predictor import RecognitionPredictor -from doctr.models._utils import estimate_orientation, rotate_image +from doctr.models._utils import estimate_orientation from .base import _OCRPredictor diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 1d53819a71..146bb0bfd6 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -3,7 +3,7 @@ # This program is licensed under the Apache License version 2. # See LICENSE or go to for full license details. -import math +from math import ceil from typing import List, Union, Tuple, Optional import numpy as np import cv2 @@ -11,7 +11,7 @@ __all__ = ['rbbox_to_polygon', 'bbox_to_polygon', 'polygon_to_bbox', 'polygon_to_rbbox', 'resolve_enclosing_bbox', 'resolve_enclosing_bbox', 'fit_rbbox', 'rotate_boxes', 'rotate_abs_boxes', - 'compute_expanded_shape', 'rotate_image'] + 'compute_expanded_shape'] def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P: @@ -213,7 +213,9 @@ def rotate_boxes( def rotate_image( image: np.ndarray, angle: float, - expand=False, + expand: bool = False, + keep_original_size: bool = False, + mask_shape: Optional[Tuple[int, int]] = None ) -> np.ndarray: """Rotate an image counterclockwise by an given angle. @@ -221,6 +223,8 @@ def rotate_image( image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 expand: whether the image should be padded before the rotation + keep_original_size: whether the image should be resized to the original image size after the rotation + mask_shape: applies a mask on the image of the specified shape given in absolute pixels after the rotation Returns: Rotated array, padded by 0 by default. @@ -229,7 +233,7 @@ def rotate_image( # Compute the expanded padding if expand: exp_shape = compute_expanded_shape(image.shape[:-1], angle) - h_pad, w_pad = int(math.ceil(exp_shape[0] - image.shape[0])), int(math.ceil(exp_shape[1] - image.shape[1])) + h_pad, w_pad = int(max(0, ceil(exp_shape[0] - image.shape[0]))), int(max(0, ceil(exp_shape[1] - image.shape[1]))) exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) else: exp_img = image @@ -237,7 +241,7 @@ def rotate_image( height, width = exp_img.shape[:2] rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) - if expand: + if keep_original_size: # Pad to get the same aspect ratio if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): # Pad width @@ -250,4 +254,15 @@ def rotate_image( # rescale rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) + if mask_shape is not None: + if len(mask_shape) != 2: + raise ValueError(f"Mask length should be 2, was found at: {len(mask_shape)}") + h_crop, w_crop = int(height - ceil(mask_shape[0])), int(ceil(width - mask_shape[1])) + if h_crop > 0 and w_crop > 0: + rot_img = rot_img[h_crop // 2: - h_crop // 2, w_crop // 2: - w_crop // 2] + elif w_crop <= 0: + rot_img = rot_img[h_crop // 2: - h_crop // 2, ] + elif h_crop <= 0: + rot_img = rot_img[:, w_crop // 2: - w_crop // 2] + return rot_img diff --git a/test/common/test_models.py b/test/common/test_models.py index 70c1d281b5..979b1bcab6 100644 --- a/test/common/test_models.py +++ b/test/common/test_models.py @@ -89,29 +89,6 @@ def test_get_bitmap_angle(mock_bitmap): assert abs(angle - 30.) < 1. -def test_rotate_image(): - img = np.ones((32, 64, 3), dtype=np.float32) - rotated = rotate_image(img, 30.) - assert rotated.shape[:-1] == (32, 64) - assert rotated[0, 0, 0] == 0 - assert rotated[0, :, 0].sum() > 1 - - # Expand - rotated = rotate_image(img, 30., expand=True) - assert rotated.shape[:-1] == (60, 72) - assert rotated[0, :, 0].sum() <= 1 - - # Expand with 90° rotation - rotated = rotate_image(img, 90., expand=True) - assert rotated.shape[:-1] == (64, 64) - assert rotated[0, :, 0].sum() <= 1 - - # Expand with mask - rotated = rotate_image(img, 30., expand=True, mask_shape=(40, 72)) - assert rotated.shape[:-1] == (40, 72) - assert rotated[0, :, 0].sum() > 1 - - def test_estimate_orientation(mock_image): angle = estimate_orientation(mock_image) assert abs(angle - 30.) < 1. diff --git a/test/common/test_utils_geometry.py b/test/common/test_utils_geometry.py index c725117caf..45e9f6bf4b 100644 --- a/test/common/test_utils_geometry.py +++ b/test/common/test_utils_geometry.py @@ -86,5 +86,20 @@ def test_rotate_image(): # Expand rotated = geometry.rotate_image(img, 30., expand=True) + assert rotated.shape[:-1] == (60, 72) + assert rotated[0, :, 0].sum() <= 1 + + # Expand + rotated = geometry.rotate_image(img, 30., expand=True, keep_original_size=True) assert rotated.shape[:-1] == (32, 64) assert rotated[0, :, 0].sum() <= 1 + + # Expand with 90° rotation + rotated = geometry.rotate_image(img, 90., expand=True) + assert rotated.shape[:-1] == (64, 64) + assert rotated[0, :, 0].sum() <= 1 + + # Expand with mask + rotated = geometry.rotate_image(img, 30., expand=True, mask_shape=(40, 72)) + assert rotated.shape[:-1] == (40, 72) + assert rotated[0, :, 0].sum() > 1 From 7a782635edb5e3db9b8227ad1f789cb5b7be65cb Mon Sep 17 00:00:00 2001 From: Rob192 Date: Tue, 5 Oct 2021 14:45:55 +0200 Subject: [PATCH 09/53] fix: do not rotate back pages but only boxes --- doctr/models/predictor/tensorflow.py | 7 +++---- doctr/models/zoo.py | 4 ++-- test/common/test_models.py | 5 +++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index def8e2f537..b4008f075c 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -52,10 +52,11 @@ def __call__( if any(page.ndim != 3 for page in pages): raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") + page_shapes = [page.shape[:-1] for page in pages] + # Detect document rotation and rotate pages if self.straighten_pages: page_orientations = [estimate_orientation(page) for page in pages] - page_shapes = [page.shape[:-1] for page in pages] pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, page_orientations)] # Localize text elements @@ -69,12 +70,10 @@ def __call__( # Rotate back pages and boxes while keeping original image size if self.straighten_pages: - pages = [rotate_image(page, angle, expand=True, mask_shape=mask) for page, angle, mask in - zip(pages, page_orientations, page_shapes)] rboxes = [rotate_boxes(page_boxes, angle, expand=True, orig_shape=page.shape[:2], mask_shape=mask) for page_boxes, page, angle, mask in zip(boxes, pages, page_orientations, page_shapes)] boxes = rboxes self.doc_builder = DocumentBuilder(rotated_bbox=True) # override the current doc_builder - out = self.doc_builder(boxes, text_preds, [page.shape[:2] for page in pages]) # type: ignore[misc] + out = self.doc_builder(boxes, text_preds, page_shapes) # type: ignore[misc] return out diff --git a/doctr/models/zoo.py b/doctr/models/zoo.py index ad84a39fa2..cea0c79701 100644 --- a/doctr/models/zoo.py +++ b/doctr/models/zoo.py @@ -12,7 +12,7 @@ __all__ = ["ocr_predictor"] -def _predictor(det_arch: str, reco_arch: str, pretrained: bool, det_bs=2, reco_bs=128) -> OCRPredictor: +def _predictor(det_arch: str, reco_arch: str, pretrained: bool, det_bs=2, reco_bs=128, *kwargs) -> OCRPredictor: # Detection det_predictor = detection_predictor(det_arch, pretrained=pretrained, batch_size=det_bs) @@ -20,7 +20,7 @@ def _predictor(det_arch: str, reco_arch: str, pretrained: bool, det_bs=2, reco_b # Recognition reco_predictor = recognition_predictor(reco_arch, pretrained=pretrained, batch_size=reco_bs) - return OCRPredictor(det_predictor, reco_predictor) + return OCRPredictor(det_predictor, reco_predictor, *kwargs) def ocr_predictor( diff --git a/test/common/test_models.py b/test/common/test_models.py index 979b1bcab6..124ba08a82 100644 --- a/test/common/test_models.py +++ b/test/common/test_models.py @@ -5,7 +5,8 @@ import cv2 from doctr.io import reader, DocumentFile -from doctr.models._utils import extract_crops, extract_rcrops, get_bitmap_angle, estimate_orientation, rotate_image +from doctr.utils import geometry +from doctr.models._utils import extract_crops, extract_rcrops, get_bitmap_angle, estimate_orientation def test_extract_crops(mock_pdf): # noqa: F811 @@ -94,6 +95,6 @@ def test_estimate_orientation(mock_image): assert abs(angle - 30.) < 1. angle = estimate_orientation(mock_image) - rotated = rotate_image(mock_image, -angle) + rotated = geometry.rotate_image(mock_image, -angle) angle_rotated = estimate_orientation(rotated) assert abs(angle_rotated - 0.) < 1. From eb341acbb40a9b8533c4bec5118c629ff6c59382 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 6 Oct 2021 11:39:22 +0200 Subject: [PATCH 10/53] fix: typos --- doctr/models/zoo.py | 4 ++-- doctr/utils/geometry.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doctr/models/zoo.py b/doctr/models/zoo.py index cea0c79701..ad84a39fa2 100644 --- a/doctr/models/zoo.py +++ b/doctr/models/zoo.py @@ -12,7 +12,7 @@ __all__ = ["ocr_predictor"] -def _predictor(det_arch: str, reco_arch: str, pretrained: bool, det_bs=2, reco_bs=128, *kwargs) -> OCRPredictor: +def _predictor(det_arch: str, reco_arch: str, pretrained: bool, det_bs=2, reco_bs=128) -> OCRPredictor: # Detection det_predictor = detection_predictor(det_arch, pretrained=pretrained, batch_size=det_bs) @@ -20,7 +20,7 @@ def _predictor(det_arch: str, reco_arch: str, pretrained: bool, det_bs=2, reco_b # Recognition reco_predictor = recognition_predictor(reco_arch, pretrained=pretrained, batch_size=reco_bs) - return OCRPredictor(det_predictor, reco_predictor, *kwargs) + return OCRPredictor(det_predictor, reco_predictor) def ocr_predictor( diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 146bb0bfd6..1ea7ec5482 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -11,7 +11,7 @@ __all__ = ['rbbox_to_polygon', 'bbox_to_polygon', 'polygon_to_bbox', 'polygon_to_rbbox', 'resolve_enclosing_bbox', 'resolve_enclosing_bbox', 'fit_rbbox', 'rotate_boxes', 'rotate_abs_boxes', - 'compute_expanded_shape'] + 'compute_expanded_shape', 'rotate_image'] def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P: @@ -151,13 +151,13 @@ def remap_boxes(boxes: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tupl raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}") if len(orig_shape) != 2: raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}") - orig_width, orig_height = orig_shape - dest_width, dest_height = dest_shape + orig_height, orig_width = orig_shape + dest_height, dest_width = dest_shape mboxes = boxes.copy() - mboxes[:, 0] = ((boxes[:, 0] * orig_height) + (dest_height - orig_height) / 2) / dest_height - mboxes[:, 1] = ((boxes[:, 1] * orig_width) + (dest_width - orig_width) / 2) / dest_width - mboxes[:, 2] = boxes[:, 2] * orig_height / dest_height - mboxes[:, 3] = boxes[:, 3] * orig_width / dest_width + mboxes[:, 0] = ((boxes[:, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width + mboxes[:, 1] = ((boxes[:, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height + mboxes[:, 2] = boxes[:, 2] * orig_width / dest_width + mboxes[:, 3] = boxes[:, 3] * orig_height / dest_height return mboxes From eeff2d61bbaca8226428d099edb29153d138d5fd Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 6 Oct 2021 14:33:20 +0200 Subject: [PATCH 11/53] =?UTF-8?q?fix:=20add=20more=20testing=20for=20remap?= =?UTF-8?q?=5Fboxes=20in=20cases=20of=20boxes=20with=20an=20angle=20of=204?= =?UTF-8?q?5=C2=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/common/test_utils_geometry.py | 33 +++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/test/common/test_utils_geometry.py b/test/common/test_utils_geometry.py index 45e9f6bf4b..ae6fef07c7 100644 --- a/test/common/test_utils_geometry.py +++ b/test/common/test_utils_geometry.py @@ -1,5 +1,8 @@ -from doctr.utils import geometry +import cv2 import numpy as np +from math import hypot + +from doctr.utils import geometry def test_bbox_to_polygon(): @@ -53,6 +56,34 @@ def test_remap_boxes(): target = np.array([[0.375, 0.5, 0.25, 0.1, 0.]]) assert pred.all() == target.all() + orig_dimension = (100, 100) + dest_dimensions = (100, 200) + orig_box = np.array([[0.5, 0.5, 0.2, 0., 45]]) + # Unpack + height_o, width_o = orig_dimension + height_d, width_d = dest_dimensions + pred = geometry.remap_boxes(orig_box, orig_dimension, dest_dimensions) + + x, y, w, h, a = orig_box[0] + # Switch to absolute coords + x, w = x * width_o, w * width_o + y, h = y * height_o, h * height_o + orig = cv2.boxPoints(((x, y), (w, h), a)) + + x, y, w, h, a = pred[0] + # Switch to absolute coords + x, w = x * width_d, w * width_d + y, h = y * height_d, h * height_d + dest = cv2.boxPoints(((x, y), (w, h), a)) + + len_orig = hypot(orig[0][0] - orig[2][0], orig[0][1] - orig[2][1]) + len_dest = hypot(dest[0][0] - dest[2][0], dest[0][1] - dest[2][1]) + assert len_orig == len_dest + + alpha_orig = np.rad2deg(np.arctan((orig[0][1] - orig[2][1])/ (orig[0][0] - orig[2][0]))) + alpha_dest = np.rad2deg(np.arctan((dest[0][1] - dest[2][1])/ (dest[0][0] - dest[2][0]))) + assert alpha_orig == alpha_dest + def test_rotate_boxes(): boxes = np.array([[0.1, 0.1, 0.8, 0.3]]) From 16f3489159ebb59df68d5752eeb3d550b9a7ded3 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Thu, 14 Oct 2021 09:27:33 +0200 Subject: [PATCH 12/53] fix: remove the cropping after rotation of the image --- doctr/utils/geometry.py | 15 +-------------- test/common/test_utils_geometry.py | 4 ---- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 1ea7ec5482..ba23d685a7 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -214,8 +214,7 @@ def rotate_image( image: np.ndarray, angle: float, expand: bool = False, - keep_original_size: bool = False, - mask_shape: Optional[Tuple[int, int]] = None + keep_original_size: bool = False ) -> np.ndarray: """Rotate an image counterclockwise by an given angle. @@ -224,7 +223,6 @@ def rotate_image( angle: rotation angle in degrees, between -90 and +90 expand: whether the image should be padded before the rotation keep_original_size: whether the image should be resized to the original image size after the rotation - mask_shape: applies a mask on the image of the specified shape given in absolute pixels after the rotation Returns: Rotated array, padded by 0 by default. @@ -254,15 +252,4 @@ def rotate_image( # rescale rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) - if mask_shape is not None: - if len(mask_shape) != 2: - raise ValueError(f"Mask length should be 2, was found at: {len(mask_shape)}") - h_crop, w_crop = int(height - ceil(mask_shape[0])), int(ceil(width - mask_shape[1])) - if h_crop > 0 and w_crop > 0: - rot_img = rot_img[h_crop // 2: - h_crop // 2, w_crop // 2: - w_crop // 2] - elif w_crop <= 0: - rot_img = rot_img[h_crop // 2: - h_crop // 2, ] - elif h_crop <= 0: - rot_img = rot_img[:, w_crop // 2: - w_crop // 2] - return rot_img diff --git a/test/common/test_utils_geometry.py b/test/common/test_utils_geometry.py index ae6fef07c7..b7b8e7def1 100644 --- a/test/common/test_utils_geometry.py +++ b/test/common/test_utils_geometry.py @@ -130,7 +130,3 @@ def test_rotate_image(): assert rotated.shape[:-1] == (64, 64) assert rotated[0, :, 0].sum() <= 1 - # Expand with mask - rotated = geometry.rotate_image(img, 30., expand=True, mask_shape=(40, 72)) - assert rotated.shape[:-1] == (40, 72) - assert rotated[0, :, 0].sum() > 1 From f7fcf907b21164b3fa1aff9332d6a52be2b4d490 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Mon, 25 Oct 2021 15:00:30 +0200 Subject: [PATCH 13/53] fix: correct model/_utils.py --- doctr/models/_utils.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index ea9cf2883c..36a8d6f9f4 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -5,13 +5,11 @@ import numpy as np import cv2 -from math import floor, ceil -from typing import List, Optional, Tuple +from math import floor +from typing import List from statistics import median_low -from doctr.utils import compute_expanded_shape - -__all__ = ['estimate_orientation', 'extract_crops', 'extract_rcrops', 'get_bitmap_angle', 'rotate_image'] +__all__ = ['estimate_orientation', 'extract_crops', 'extract_rcrops', 'get_bitmap_angle'] def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]: From cf9ab0dd2c46608e741a1aebdb22a4fe26a1dbdd Mon Sep 17 00:00:00 2001 From: Rob192 Date: Thu, 28 Oct 2021 17:36:27 +0200 Subject: [PATCH 14/53] fix: do not use resolve_lines and resolve_boxes as it does not work with rotated boxes --- doctr/models/predictor/tensorflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index b4008f075c..9b9f036222 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -73,7 +73,7 @@ def __call__( rboxes = [rotate_boxes(page_boxes, angle, expand=True, orig_shape=page.shape[:2], mask_shape=mask) for page_boxes, page, angle, mask in zip(boxes, pages, page_orientations, page_shapes)] boxes = rboxes - self.doc_builder = DocumentBuilder(rotated_bbox=True) # override the current doc_builder + self.doc_builder = DocumentBuilder(rotated_bbox=True, resolve_lines=False, resolve_blocks=False) # override the current doc_builder out = self.doc_builder(boxes, text_preds, page_shapes) # type: ignore[misc] return out From 8a310140ea5c2001923fa3b72ce27d9dfc8ba8b9 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 29 Oct 2021 10:44:32 +0200 Subject: [PATCH 15/53] fix: remove expand in geometry.rotate_boxes --- doctr/models/predictor/tensorflow.py | 12 ++++++------ doctr/utils/geometry.py | 4 ---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 9b9f036222..9bcf27c333 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -52,12 +52,12 @@ def __call__( if any(page.ndim != 3 for page in pages): raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") - page_shapes = [page.shape[:-1] for page in pages] + origin_page_shapes = [page.shape[:-1] for page in pages] # Detect document rotation and rotate pages if self.straighten_pages: - page_orientations = [estimate_orientation(page) for page in pages] - pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, page_orientations)] + origin_page_orientations = [estimate_orientation(page) for page in pages] + pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)] # Localize text elements loc_preds = self.det_predictor(pages, **kwargs) @@ -70,10 +70,10 @@ def __call__( # Rotate back pages and boxes while keeping original image size if self.straighten_pages: - rboxes = [rotate_boxes(page_boxes, angle, expand=True, orig_shape=page.shape[:2], mask_shape=mask) for - page_boxes, page, angle, mask in zip(boxes, pages, page_orientations, page_shapes)] + rboxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], mask_shape=mask) for + page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)] boxes = rboxes self.doc_builder = DocumentBuilder(rotated_bbox=True, resolve_lines=False, resolve_blocks=False) # override the current doc_builder - out = self.doc_builder(boxes, text_preds, page_shapes) # type: ignore[misc] + out = self.doc_builder(boxes, text_preds, origin_page_shapes) # type: ignore[misc] return out diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 1d85fb04ae..bc65dd12e4 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -202,10 +202,6 @@ def rotate_boxes( # If small angle, return boxes (no rotation) if abs(angle) < min_angle or abs(angle) > 90 - min_angle: return boxes - if expand: - exp_shape = compute_expanded_shape(orig_shape, angle) - boxes = remap_boxes(boxes, orig_shape=orig_shape, dest_shape=exp_shape) - orig_shape = exp_shape # in case a mask is used afterwards # Compute rotation matrix angle_rad = angle * np.pi / 180. # compute radian angle for np functions rotation_mat = np.array([ From e457cf08febbf0c4719584dd623eae3bcef5f990 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 29 Oct 2021 10:46:33 +0200 Subject: [PATCH 16/53] fix: reformat code --- doctr/models/predictor/tensorflow.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 9bcf27c333..d37220fb1d 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -70,10 +70,10 @@ def __call__( # Rotate back pages and boxes while keeping original image size if self.straighten_pages: - rboxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], mask_shape=mask) for + boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], mask_shape=mask) for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)] - boxes = rboxes - self.doc_builder = DocumentBuilder(rotated_bbox=True, resolve_lines=False, resolve_blocks=False) # override the current doc_builder + # override the current doc_builder with rotated_bbox + self.doc_builder = DocumentBuilder(rotated_bbox=True, resolve_lines=False, resolve_blocks=False) out = self.doc_builder(boxes, text_preds, origin_page_shapes) # type: ignore[misc] return out From 9975c82a0d48ff008bb028630aa5fe1a7b0f5ecb Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 29 Oct 2021 10:59:00 +0200 Subject: [PATCH 17/53] fix: reformat expand from function signature --- doctr/utils/geometry.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index bc65dd12e4..51bae4190f 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -179,7 +179,6 @@ def rotate_boxes( boxes: np.ndarray, angle: float = 0., min_angle: float = 1., - expand: bool = False, orig_shape: Optional[Tuple[int, int]] = None, mask_shape: Optional[Tuple[int, int]] = None, ) -> np.ndarray: @@ -190,7 +189,6 @@ def rotate_boxes( boxes: (N, 4) array of RELATIVE boxes angle: angle between -90 and +90 degrees min_angle: minimum angle to rotate boxes - expand: whether the image should be padded before the rotation orig_shape: shape of the origin image mask_shape: shape of the mask if the image is cropped after the rotation From 32d53e44225090f9b2664d263a28ea193031dfa4 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 29 Oct 2021 10:59:31 +0200 Subject: [PATCH 18/53] fix: rename keep_original_size to preserve_aspect_ratio --- doctr/utils/geometry.py | 6 +++--- test/common/test_utils_geometry.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 51bae4190f..102fc4908b 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -222,7 +222,7 @@ def rotate_image( image: np.ndarray, angle: float, expand: bool = False, - keep_original_size: bool = False + preserve_aspect_ratio: bool = False ) -> np.ndarray: """Rotate an image counterclockwise by an given angle. @@ -230,7 +230,7 @@ def rotate_image( image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 expand: whether the image should be padded before the rotation - keep_original_size: whether the image should be resized to the original image size after the rotation + preserve_aspect_ratio: whether the image should be resized to the original image size after the rotation Returns: Rotated array, padded by 0 by default. @@ -247,7 +247,7 @@ def rotate_image( height, width = exp_img.shape[:2] rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) - if keep_original_size: + if preserve_aspect_ratio: # Pad to get the same aspect ratio if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): # Pad width diff --git a/test/common/test_utils_geometry.py b/test/common/test_utils_geometry.py index b7b8e7def1..445f3ee6f9 100644 --- a/test/common/test_utils_geometry.py +++ b/test/common/test_utils_geometry.py @@ -121,7 +121,7 @@ def test_rotate_image(): assert rotated[0, :, 0].sum() <= 1 # Expand - rotated = geometry.rotate_image(img, 30., expand=True, keep_original_size=True) + rotated = geometry.rotate_image(img, 30., expand=True, preserve_aspect_ratio=True) assert rotated.shape[:-1] == (32, 64) assert rotated[0, :, 0].sum() <= 1 From 6821442265e9020de0169b6615e57448ac5fa5a6 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 29 Oct 2021 11:16:09 +0200 Subject: [PATCH 19/53] fix: vectorize box transformation --- doctr/utils/geometry.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 102fc4908b..d76292c129 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -38,10 +38,6 @@ def polygon_to_rbbox(polygon: Polygon4P) -> RotatedBbox: return fit_rbbox(cnt) -def bbox_to_rbbox(bbox: Bbox) -> RotatedBbox: - return (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2, bbox[2] - bbox[0], bbox[3] - bbox[1], 0 - - def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]: """Compute enclosing bbox either from: @@ -196,7 +192,11 @@ def rotate_boxes( A batch of rotated boxes (N, 5): (x, y, w, h, alpha) or a batch of straight bounding boxes """ # Change format of the boxes to rotated boxes - boxes = np.apply_along_axis(bbox_to_rbbox, 1, boxes) + boxes = np.column_stack(((boxes[:, 0] + boxes[:, 2]) / 2, + (boxes[:, 1] + boxes[:, 3]) / 2, + boxes[:, 2] - boxes[:, 0], + boxes[:, 3] - boxes[:, 1], + np.zeros(boxes.shape[0]))) # If small angle, return boxes (no rotation) if abs(angle) < min_angle or abs(angle) > 90 - min_angle: return boxes From 573f13fbc62557af77eab7be00323a452e438842 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Tue, 16 Nov 2021 16:15:00 +0100 Subject: [PATCH 20/53] fix: minor modifications + remove test_bbox_to_rbbox --- doctr/utils/geometry.py | 11 ++++++----- tests/common/test_utils_geometry.py | 8 +------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index e487d7dfe7..d733de9c14 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -146,8 +146,9 @@ def rotate_abs_boxes(boxes: np.ndarray, angle: float, img_shape: Tuple[int, int] def remap_boxes(boxes: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray: - """ Remaps a batch of RotatedBbox (x, y, w, h, alpha) expressed for an origin_shape to a destination_shape, - This does not impact the absolute shape of the boxes + """ Remaps a batch of RotatedBbox (x, y, w, h, alpha) expressed for an origin_shape to a destination_shape. + This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox + coordinates after a resizing of the image. Args: boxes: (N, 5) array of RELATIVE RotatedBbox (x, y, w, h, alpha) @@ -227,7 +228,7 @@ def rotate_image( image: np.ndarray, angle: float, expand: bool = False, - preserve_aspect_ratio: bool = False + preserve_origin_shape: bool = False, ) -> np.ndarray: """Rotate an image counterclockwise by an given angle. @@ -235,7 +236,7 @@ def rotate_image( image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 expand: whether the image should be padded before the rotation - preserve_aspect_ratio: whether the image should be resized to the original image size after the rotation + preserve_origin_shape: whether the image should be resized to the original image size after the rotation Returns: Rotated array, padded by 0 by default. @@ -252,7 +253,7 @@ def rotate_image( height, width = exp_img.shape[:2] rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) rot_img = cv2.warpAffine(exp_img.astype(np.float32), rot_mat, (width, height)) - if preserve_aspect_ratio: + if preserve_origin_shape: # Pad to get the same aspect ratio if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): # Pad width diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index 445f3ee6f9..612b815ed8 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -31,12 +31,6 @@ def test_polygon_to_rbbox(): assert all(abs(i - j) <= 1e-7 for (i, j) in zip(pred, target)) -def test_bbox_to_rbbox(): - pred = geometry.bbox_to_rbbox((0, 0, 0.6, 0.4)) - target = (0.3, 0.2, 0.6, 0.4, 0) - assert all(abs(i - j) <= 1e-7 for (i, j) in zip(pred, target)) - - def test_resolve_enclosing_rbbox(): pred = geometry.resolve_enclosing_rbbox([(.2, .2, .05, .05, 0), (.2, .2, .2, .2, 0)])[:4] target = (.2, .2, .2, .2) @@ -121,7 +115,7 @@ def test_rotate_image(): assert rotated[0, :, 0].sum() <= 1 # Expand - rotated = geometry.rotate_image(img, 30., expand=True, preserve_aspect_ratio=True) + rotated = geometry.rotate_image(img, 30., expand=True, preserve_origin_shape=True) assert rotated.shape[:-1] == (32, 64) assert rotated[0, :, 0].sum() <= 1 From 290e8ed470a9dc84707ce6ccad01419d4a943a5c Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 19 Nov 2021 11:09:29 +0100 Subject: [PATCH 21/53] fix: add the straighten_pages to the latest codebase --- doctr/models/builder.py | 8 +++---- doctr/models/predictor/base.py | 5 ++++- doctr/models/predictor/tensorflow.py | 6 ++--- doctr/utils/geometry.py | 33 ++++++++++++++-------------- tests/common/test_models.py | 2 +- tests/common/test_utils_geometry.py | 14 +++++++----- 6 files changed, 36 insertions(+), 32 deletions(-) diff --git a/doctr/models/builder.py b/doctr/models/builder.py index b7fcabc48b..838ec69ec4 100644 --- a/doctr/models/builder.py +++ b/doctr/models/builder.py @@ -215,7 +215,7 @@ def _build_blocks(self, boxes: np.ndarray, word_preds: List[Tuple[str, float]]) # Decide whether we try to form lines if self.resolve_lines: - lines = self._resolve_lines(boxes[:, :-1]) + lines = self._resolve_lines(boxes) # Decide whether we try to form blocks if self.resolve_blocks and len(lines) > 1: _blocks = self._resolve_blocks(boxes[:, :-1], lines) @@ -223,7 +223,7 @@ def _build_blocks(self, boxes: np.ndarray, word_preds: List[Tuple[str, float]]) _blocks = [lines] else: # Sort bounding boxes, one line for all boxes, one block for the line - lines = [self._sort_boxes(boxes[:, :-1])] + lines = [self._sort_boxes(boxes)] _blocks = [lines] blocks = [ @@ -233,7 +233,7 @@ def _build_blocks(self, boxes: np.ndarray, word_preds: List[Tuple[str, float]]) Word( *word_preds[idx], (boxes[idx, 0], boxes[idx, 1], boxes[idx, 2], boxes[idx, 3], boxes[idx, 4]) - ) if boxes.shape[1] == 6 else + ) if boxes.shape[1] == 5 else Word( *word_preds[idx], ((boxes[idx, 0], boxes[idx, 1]), (boxes[idx, 2], boxes[idx, 3])) @@ -273,7 +273,7 @@ def __call__( if self.export_as_straight_boxes and len(boxes) > 0: # If boxes are already straight OK, else fit a bounding rect - if boxes[0].shape[-1] == 6: + if boxes[0].shape[-1] == 5: straight_boxes = [] # Iterate over pages for page_boxes in boxes: diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py index 7a13951490..58d9e545b8 100644 --- a/doctr/models/predictor/base.py +++ b/doctr/models/predictor/base.py @@ -79,7 +79,10 @@ def _process_predictions( boxes, angles = zip(*loc_preds) # Rotate back boxes if necessary if allow_rotated_boxes: - boxes = [rotate_boxes(page_boxes, angle) for page_boxes, angle in zip(boxes, angles)] + boxes = [rotate_boxes(page_boxes[:, :-1], angle) for page_boxes, angle in zip(boxes, angles)] + # if straight boxes, convert to bbox + else: + boxes = [page_boxes[:, :-1] for page_boxes in boxes] # Text _idx = 0 for page_boxes in boxes: diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 269f1eed10..3934502394 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -9,12 +9,12 @@ import tensorflow as tf from doctr.io.elements import Document -from doctr.utils.geometry import rotate_boxes, rotate_image +from doctr.models._utils import estimate_orientation from doctr.models.builder import DocumentBuilder from doctr.models.detection.predictor import DetectionPredictor from doctr.models.recognition.predictor import RecognitionPredictor +from doctr.utils.geometry import rotate_boxes, rotate_image from doctr.utils.repr import NestedObject -from doctr.models._utils import estimate_orientation from .base import _OCRPredictor @@ -80,8 +80,6 @@ def __call__( if self.straighten_pages: boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], mask_shape=mask) for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)] - # override the current doc_builder with rotated_bbox - self.doc_builder = DocumentBuilder(export_as_straight_boxes=False, resolve_lines=False, resolve_blocks=False) out = self.doc_builder(boxes, text_preds, origin_page_shapes) # type: ignore[misc] return out diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index d733de9c14..94d367fc32 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -4,7 +4,7 @@ # See LICENSE or go to for full license details. from math import ceil -from typing import List, Union, Tuple, Optional +from typing import List, Optional, Tuple, Union import cv2 import numpy as np @@ -181,11 +181,11 @@ def rotate_boxes( orig_shape: Optional[Tuple[int, int]] = None, mask_shape: Optional[Tuple[int, int]] = None, ) -> np.ndarray: - """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax) of an angle, - if angle > min_angle, around the center of the page. + """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax) or rotated bounding boxes + (x, y, w, h, alpha) of an angle, if angle > min_angle, around the center of the page. Args: - boxes: (N, 4) array of RELATIVE boxes + boxes: (N, 4) or (N, 5) array of RELATIVE boxes angle: angle between -90 and +90 degrees min_angle: minimum angle to rotate boxes orig_shape: shape of the origin image @@ -195,11 +195,12 @@ def rotate_boxes( A batch of rotated boxes (N, 5): (x, y, w, h, alpha) or a batch of straight bounding boxes """ # Change format of the boxes to rotated boxes - boxes = np.column_stack(((boxes[:, 0] + boxes[:, 2]) / 2, - (boxes[:, 1] + boxes[:, 3]) / 2, - boxes[:, 2] - boxes[:, 0], - boxes[:, 3] - boxes[:, 1], - np.zeros(boxes.shape[0]))) + if boxes.shape[1] == 4: + boxes = np.column_stack(((boxes[:, 0] + boxes[:, 2]) / 2, + (boxes[:, 1] + boxes[:, 3]) / 2, + boxes[:, 2] - boxes[:, 0], + boxes[:, 3] - boxes[:, 1], + np.zeros(boxes.shape[0]))) # If small angle, return boxes (no rotation) if abs(angle) < min_angle or abs(angle) > 90 - min_angle: return boxes @@ -209,17 +210,14 @@ def rotate_boxes( [np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)] ], dtype=boxes.dtype) - # Compute unrotated boxes - x_unrotated, y_unrotated = (boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2 - width, height = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] # Rotate centers - centers = np.stack((x_unrotated, y_unrotated), axis=-1) + centers = np.stack((boxes[:, 0], boxes[:, 1]), axis=-1) rotated_centers = .5 + np.matmul(centers - .5, rotation_mat) x_center, y_center = rotated_centers[:, 0], rotated_centers[:, 1] # Compute rotated boxes - rotated_boxes = np.stack((x_center, y_center, width, height, angle * np.ones_like(boxes[:, 0])), axis=1) + rotated_boxes = np.stack((x_center, y_center, boxes[:, 2], boxes[:, 3], angle * np.ones_like(boxes[:, 0])), axis=1) # Apply a mask if requested - if mask_shape is not None: + if mask_shape is not None and orig_shape is not None: rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=mask_shape) return rotated_boxes @@ -245,14 +243,15 @@ def rotate_image( # Compute the expanded padding if expand: exp_shape = compute_expanded_shape(image.shape[:-1], angle) - h_pad, w_pad = int(max(0, ceil(exp_shape[0] - image.shape[0]))), int(max(0, ceil(exp_shape[1] - image.shape[1]))) + h_pad, w_pad = int(max(0, ceil(exp_shape[0] - image.shape[0]))), int( + max(0, ceil(exp_shape[1] - image.shape[1]))) exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) else: exp_img = image height, width = exp_img.shape[:2] rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) - rot_img = cv2.warpAffine(exp_img.astype(np.float32), rot_mat, (width, height)) + rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) if preserve_origin_shape: # Pad to get the same aspect ratio if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): diff --git a/tests/common/test_models.py b/tests/common/test_models.py index c0158415c5..8364bfd91b 100644 --- a/tests/common/test_models.py +++ b/tests/common/test_models.py @@ -6,8 +6,8 @@ import requests from doctr.io import DocumentFile, reader -from doctr.utils import geometry from doctr.models._utils import estimate_orientation, extract_crops, extract_rcrops, get_bitmap_angle +from doctr.utils import geometry def test_extract_crops(mock_pdf): # noqa: F811 diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index 612b815ed8..b297010508 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -1,6 +1,7 @@ +from math import hypot + import cv2 import numpy as np -from math import hypot from doctr.utils import geometry @@ -74,14 +75,18 @@ def test_remap_boxes(): len_dest = hypot(dest[0][0] - dest[2][0], dest[0][1] - dest[2][1]) assert len_orig == len_dest - alpha_orig = np.rad2deg(np.arctan((orig[0][1] - orig[2][1])/ (orig[0][0] - orig[2][0]))) - alpha_dest = np.rad2deg(np.arctan((dest[0][1] - dest[2][1])/ (dest[0][0] - dest[2][0]))) + alpha_orig = np.rad2deg(np.arctan((orig[0][1] - orig[2][1]) / (orig[0][0] - orig[2][0]))) + alpha_dest = np.rad2deg(np.arctan((dest[0][1] - dest[2][1]) / (dest[0][0] - dest[2][0]))) assert alpha_orig == alpha_dest def test_rotate_boxes(): boxes = np.array([[0.1, 0.1, 0.8, 0.3]]) - rboxes = np.apply_along_axis(geometry.bbox_to_rbbox, 1, boxes) + rboxes = np.column_stack(((boxes[:, 0] + boxes[:, 2]) / 2, + (boxes[:, 1] + boxes[:, 3]) / 2, + boxes[:, 2] - boxes[:, 0], + boxes[:, 3] - boxes[:, 1], + np.zeros(boxes.shape[0]))) # Angle = 0 rotated = geometry.rotate_boxes(boxes, angle=0.) assert rotated.all() == rboxes.all() @@ -123,4 +128,3 @@ def test_rotate_image(): rotated = geometry.rotate_image(img, 90., expand=True) assert rotated.shape[:-1] == (64, 64) assert rotated[0, :, 0].sum() <= 1 - From 1775ebfa90684f993d4e5ae957859df1802cc687 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 19 Nov 2021 11:29:02 +0100 Subject: [PATCH 22/53] feat: add the straighten_pages to the pytorch predictor --- doctr/models/predictor/pytorch.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index de466ec234..fcbf8b24e3 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -10,9 +10,11 @@ from torch import nn from doctr.io.elements import Document +from doctr.models._utils import estimate_orientation from doctr.models.builder import DocumentBuilder from doctr.models.detection.predictor import DetectionPredictor from doctr.models.recognition.predictor import RecognitionPredictor +from doctr.utils.geometry import rotate_boxes, rotate_image from .base import _OCRPredictor @@ -33,6 +35,7 @@ def __init__( reco_predictor: RecognitionPredictor, assume_straight_pages: bool = True, export_as_straight_boxes: bool = False, + straighten_pages: bool = False, ) -> None: super().__init__() @@ -40,6 +43,7 @@ def __init__( self.reco_predictor = reco_predictor.eval() # type: ignore[attr-defined] self.doc_builder = DocumentBuilder(export_as_straight_boxes=export_as_straight_boxes) self.assume_straight_pages = assume_straight_pages + self.straighten_pages = straighten_pages @torch.no_grad() def forward( @@ -52,6 +56,13 @@ def forward( if any(page.ndim != 3 for page in pages): raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") + origin_page_shapes = [page.shape[:2] for page in pages] + + # Detect document rotation and rotate pages + if self.straighten_pages: + origin_page_orientations = [estimate_orientation(page) for page in pages] + pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)] + # Localize text elements loc_preds = self.det_predictor(pages, **kwargs) # Check whether crop mode should be switched to channels first @@ -67,6 +78,11 @@ def forward( loc_preds, word_preds, allow_rotated_boxes=not self.doc_builder.export_as_straight_boxes ) + # Rotate back pages and boxes while keeping original image size + if self.straighten_pages: + boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], mask_shape=mask) for + page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)] + out = self.doc_builder( boxes, text_preds, From f199044d167f1a58f5297ee97afb7afe020278d6 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 19 Nov 2021 14:55:04 +0100 Subject: [PATCH 23/53] feat: add testing for the straighten_pages parameter --- doctr/models/predictor/pytorch.py | 3 +++ doctr/models/predictor/tensorflow.py | 3 +++ tests/tensorflow/test_models_zoo_tf.py | 13 +++++++++++++ 3 files changed, 19 insertions(+) diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index fcbf8b24e3..ddd2964d94 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -27,6 +27,9 @@ class OCRPredictor(nn.Module, _OCRPredictor): Args: det_predictor: detection module reco_predictor: recognition module + straighten_pages: if True, evaluates the page general orientation based on the median of each line orientation. + Then, rotates page before using Detection and Recognition Predictors. Then rotates page back to original + orientation. This improves the Detection and Recognition Predictors predictions. """ def __init__( diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 3934502394..c2c7c9077a 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -27,6 +27,9 @@ class OCRPredictor(NestedObject, _OCRPredictor): Args: det_predictor: detection module reco_predictor: recognition module + straighten_pages: if True, evaluates the page general orientation based on the median of each line orientation. + Then, rotates page before using Detection and Recognition Predictors. Then rotates page back to original + orientation. This improves the Detection and Recognition Predictors predictions. """ _children_names = ['det_predictor', 'reco_predictor'] diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py index 2042ece3b9..af6130d5f0 100644 --- a/tests/tensorflow/test_models_zoo_tf.py +++ b/tests/tensorflow/test_models_zoo_tf.py @@ -16,24 +16,37 @@ def test_ocrpredictor( test_detectionpredictor, test_recognitionpredictor, assume_straight_pages=True, + straighten_pages=False, ) r_predictor = OCRPredictor( test_rotated_detectionpredictor, test_recognitionpredictor, assume_straight_pages=False, + straighten_pages=False, + ) + + s_predictor = OCRPredictor( + test_rotated_detectionpredictor, + test_recognitionpredictor, + assume_straight_pages=False, + straighten_pages=True, ) doc = DocumentFile.from_pdf(mock_pdf).as_images() out = predictor(doc) r_out = r_predictor(doc) + s_out = s_predictor(doc) # Document assert isinstance(out, Document) assert isinstance(r_out, Document) + assert isinstance(s_out, Document) # The input PDF has 8 pages assert len(out.pages) == 8 + assert len(r_out.pages) == 8 + assert len(s_out.pages) == 8 # Dimension check with pytest.raises(ValueError): input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) From 887ed25ff06362829098b9b0212e5d8ddde22a81 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 19 Nov 2021 14:59:22 +0100 Subject: [PATCH 24/53] fix: in case no angle is found in estimate_orientation return 0 --- doctr/models/_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 514ce9c71b..af82ca5124 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -150,7 +150,11 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li angles.append(angle) elif w / h < 1 / ratio_threshold_for_lines: # if lines are vertical, substract 90 degree angles.append(angle - 90) - return -median_low(angles) + + if len(angles) == 0: + return 0 # in case no angles is found + else: + return -median_low(angles) def get_bitmap_angle(bitmap: np.ndarray, n_ct: int = 20, std_max: float = 3.) -> float: From 239c508dfd19e9fdc8e90476244832ce30c64d95 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 24 Nov 2021 15:39:19 +0100 Subject: [PATCH 25/53] fix: make sure boxes are outputted from _process_predictions --- doctr/models/builder.py | 2 +- doctr/models/predictor/base.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doctr/models/builder.py b/doctr/models/builder.py index 838ec69ec4..cfe9150f11 100644 --- a/doctr/models/builder.py +++ b/doctr/models/builder.py @@ -218,7 +218,7 @@ def _build_blocks(self, boxes: np.ndarray, word_preds: List[Tuple[str, float]]) lines = self._resolve_lines(boxes) # Decide whether we try to form blocks if self.resolve_blocks and len(lines) > 1: - _blocks = self._resolve_blocks(boxes[:, :-1], lines) + _blocks = self._resolve_blocks(boxes, lines) else: _blocks = [lines] else: diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py index c97e29131b..3b3f0183a4 100644 --- a/doctr/models/predictor/base.py +++ b/doctr/models/predictor/base.py @@ -65,12 +65,13 @@ def _process_predictions( word_preds: List[Tuple[str, float]], ) -> Tuple[List[np.ndarray], List[List[Tuple[str, float]]]]: - text_preds = [] + boxes, text_preds = [], [] if len(loc_preds) > 0: # Text _idx = 0 for page_boxes in loc_preds: + boxes.append(page_boxes[:, :-1]) text_preds.append(word_preds[_idx: _idx + page_boxes.shape[0]]) _idx += page_boxes.shape[0] - return loc_preds, text_preds + return boxes, text_preds From 52461dc7a47be21629d704334bde6889c2f92525 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 24 Nov 2021 15:55:51 +0100 Subject: [PATCH 26/53] fix: update docstrings in OCRPredictor --- doctr/models/predictor/pytorch.py | 6 +++--- doctr/models/predictor/tensorflow.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index 08726ff11f..e804f7c08d 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -31,9 +31,9 @@ class OCRPredictor(nn.Module, _OCRPredictor): without rotated textual elements. export_as_straight_boxes: when assume_straight_pages is set to False, export final predictions (potentially rotated) as straight bounding boxes. - straighten_pages: if True, evaluates the page general orientation based on the median of each line orientation. - Then, rotates page before using Detection and Recognition Predictors. Then rotates page back to original - orientation. This improves the Detection and Recognition Predictors predictions. + straighten_pages: if True, estimates the page general orientation based on the median line orientation. + Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped + accordingly. Doing so will improve performances for documents with page-uniform rotations. """ diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index ac261c2734..a9c1dde9f5 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -31,9 +31,9 @@ class OCRPredictor(NestedObject, _OCRPredictor): without rotated textual elements. export_as_straight_boxes: when assume_straight_pages is set to False, export final predictions (potentially rotated) as straight bounding boxes. - straighten_pages: if True, evaluates the page general orientation based on the median of each line orientation. - Then, rotates page before using Detection and Recognition Predictors. Then rotates page back to original - orientation. This improves the Detection and Recognition Predictors predictions. + straighten_pages: if True, estimates the page general orientation based on the median line orientation. + Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped + accordingly. Doing so will improve performances for documents with page-uniform rotations. """ _children_names = ['det_predictor', 'reco_predictor'] From b6f8ccaf0e44b0ec412202fc8f4522cc0e4401ee Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 24 Nov 2021 15:56:15 +0100 Subject: [PATCH 27/53] fix: create a copy of boxes inside rotate_boxes --- doctr/utils/geometry.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 94d367fc32..67671a9707 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -195,27 +195,28 @@ def rotate_boxes( A batch of rotated boxes (N, 5): (x, y, w, h, alpha) or a batch of straight bounding boxes """ # Change format of the boxes to rotated boxes - if boxes.shape[1] == 4: - boxes = np.column_stack(((boxes[:, 0] + boxes[:, 2]) / 2, - (boxes[:, 1] + boxes[:, 3]) / 2, - boxes[:, 2] - boxes[:, 0], - boxes[:, 3] - boxes[:, 1], - np.zeros(boxes.shape[0]))) + _boxes = boxes.copy() + if _boxes.shape[1] == 4: + _boxes = np.column_stack(((_boxes[:, 0] + _boxes[:, 2]) / 2, + (_boxes[:, 1] + _boxes[:, 3]) / 2, + _boxes[:, 2] - _boxes[:, 0], + _boxes[:, 3] - _boxes[:, 1], + np.zeros(_boxes.shape[0]))) # If small angle, return boxes (no rotation) if abs(angle) < min_angle or abs(angle) > 90 - min_angle: - return boxes + return _boxes # Compute rotation matrix angle_rad = angle * np.pi / 180. # compute radian angle for np functions rotation_mat = np.array([ [np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)] - ], dtype=boxes.dtype) + ], dtype=_boxes.dtype) # Rotate centers - centers = np.stack((boxes[:, 0], boxes[:, 1]), axis=-1) + centers = np.stack((_boxes[:, 0], _boxes[:, 1]), axis=-1) rotated_centers = .5 + np.matmul(centers - .5, rotation_mat) x_center, y_center = rotated_centers[:, 0], rotated_centers[:, 1] # Compute rotated boxes - rotated_boxes = np.stack((x_center, y_center, boxes[:, 2], boxes[:, 3], angle * np.ones_like(boxes[:, 0])), axis=1) + rotated_boxes = np.stack((x_center, y_center, _boxes[:, 2], _boxes[:, 3], angle * np.ones_like(_boxes[:, 0])), axis=1) # Apply a mask if requested if mask_shape is not None and orig_shape is not None: rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=mask_shape) From a9f3d6e56052e50e8ea4b0a7c50232ca45053a78 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 24 Nov 2021 16:05:34 +0100 Subject: [PATCH 28/53] fix: update docstring for rotate_image --- doctr/utils/geometry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 67671a9707..0335bf6a8e 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -235,7 +235,8 @@ def rotate_image( image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 expand: whether the image should be padded before the rotation - preserve_origin_shape: whether the image should be resized to the original image size after the rotation + preserve_origin_shape: whether the image should be resized to the original image size after the rotation. + Only useful if expand is True. Returns: Rotated array, padded by 0 by default. From ea69de630a09a62881c75d4848effc2faa0d939a Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 24 Nov 2021 16:15:33 +0100 Subject: [PATCH 29/53] fix: add comments inside remap_boxes --- doctr/utils/geometry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 0335bf6a8e..37c89c41fb 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -167,8 +167,10 @@ def remap_boxes(boxes: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tupl orig_height, orig_width = orig_shape dest_height, dest_width = dest_shape mboxes = boxes.copy() + # remaps position of the box center for the destination image shape mboxes[:, 0] = ((boxes[:, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width mboxes[:, 1] = ((boxes[:, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height + # remaps box dimension for the destination image shape mboxes[:, 2] = boxes[:, 2] * orig_width / dest_width mboxes[:, 3] = boxes[:, 3] * orig_height / dest_height return mboxes From 1a72a8c3cf80d0e52c949ad7c33ee8f1875a53ae Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 24 Nov 2021 16:28:05 +0100 Subject: [PATCH 30/53] fix: change testing in test_estimate_orientation --- tests/common/test_models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/common/test_models.py b/tests/common/test_models.py index 8364bfd91b..1e514859fe 100644 --- a/tests/common/test_models.py +++ b/tests/common/test_models.py @@ -95,7 +95,6 @@ def test_estimate_orientation(mock_image): angle = estimate_orientation(mock_image) assert abs(angle - 30.) < 1. - angle = estimate_orientation(mock_image) rotated = geometry.rotate_image(mock_image, -angle) angle_rotated = estimate_orientation(rotated) assert abs(angle_rotated - 0.) < 1. From d658be4cadabfc59f0a4fc209a236fff43820acd Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 24 Nov 2021 16:29:51 +0100 Subject: [PATCH 31/53] fix: change testing in test_estimate_orientation --- tests/common/test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common/test_models.py b/tests/common/test_models.py index 1e514859fe..98ab9c553f 100644 --- a/tests/common/test_models.py +++ b/tests/common/test_models.py @@ -97,4 +97,4 @@ def test_estimate_orientation(mock_image): rotated = geometry.rotate_image(mock_image, -angle) angle_rotated = estimate_orientation(rotated) - assert abs(angle_rotated - 0.) < 1. + assert abs(angle_rotated) < 1. From 9a6c658a3b43a0621144eef63f9fed834f656995 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Mon, 29 Nov 2021 09:35:59 +0100 Subject: [PATCH 32/53] fix: delete imports not used --- doctr/models/predictor/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py index 3b3f0183a4..254e4ea2e8 100644 --- a/doctr/models/predictor/base.py +++ b/doctr/models/predictor/base.py @@ -7,7 +7,6 @@ from typing import List, Tuple from doctr.models.builder import DocumentBuilder -from doctr.utils.geometry import rotate_boxes, rotate_image from .._utils import extract_crops, extract_rcrops __all__ = ['_OCRPredictor'] From a9cbe048771f1751c6555ea23a9a79571680e923 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Mon, 29 Nov 2021 10:04:00 +0100 Subject: [PATCH 33/53] fix: styling --- doctr/utils/geometry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 37c89c41fb..c318e5f151 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -218,7 +218,8 @@ def rotate_boxes( rotated_centers = .5 + np.matmul(centers - .5, rotation_mat) x_center, y_center = rotated_centers[:, 0], rotated_centers[:, 1] # Compute rotated boxes - rotated_boxes = np.stack((x_center, y_center, _boxes[:, 2], _boxes[:, 3], angle * np.ones_like(_boxes[:, 0])), axis=1) + rotated_boxes = np.stack((x_center, y_center, _boxes[:, 2], _boxes[:, 3], angle * np.ones_like(_boxes[:, 0])), + axis=1) # Apply a mask if requested if mask_shape is not None and orig_shape is not None: rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=mask_shape) From ebdc320a2c6a3a47f5d6998df2dc7fd665c0221f Mon Sep 17 00:00:00 2001 From: Rob192 Date: Tue, 30 Nov 2021 09:44:41 +0100 Subject: [PATCH 34/53] fix: change assertion in test_utils_geometry.py --- tests/common/test_utils_geometry.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index b297010508..f72210257f 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -41,15 +41,15 @@ def test_resolve_enclosing_rbbox(): def test_remap_boxes(): pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0.]]), (10, 10), (20, 20)) target = np.array([[0.5, 0.5, 0.05, 0.05, 0.]]) - assert pred.all() == target.all() + assert np.all(pred == target) pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0.]]), (10, 10), (20, 10)) target = np.array([[0.5, 0.5, 0.1, 0.05, 0.]]) - assert pred.all() == target.all() + assert np.all(pred == target) pred = geometry.remap_boxes(np.array([[0.25, 0.5, 0.5, 0.33, 0.]]), (80, 30), (160, 30)) target = np.array([[0.375, 0.5, 0.25, 0.1, 0.]]) - assert pred.all() == target.all() + assert np.all(pred == target) orig_dimension = (100, 100) dest_dimensions = (100, 200) @@ -89,10 +89,10 @@ def test_rotate_boxes(): np.zeros(boxes.shape[0]))) # Angle = 0 rotated = geometry.rotate_boxes(boxes, angle=0.) - assert rotated.all() == rboxes.all() + assert np.all(rotated == rboxes) # Angle < 1: rotated = geometry.rotate_boxes(boxes, angle=0.5) - assert rotated.all() == rboxes.all() + assert np.all(rotated == rboxes) # Angle = 30 rotated = geometry.rotate_boxes(boxes, angle=30) assert rotated.shape == (1, 5) @@ -101,10 +101,10 @@ def test_rotate_boxes(): boxes = np.array([[0., 0., 0.6, 0.2]]) # Angle = -90: rotated = geometry.rotate_boxes(boxes, angle=-90, min_angle=0) - assert rotated.all() == np.array([[0.1, 0.7, 0.6, 0.2, -90.]]).all() + assert np.all(rotated == np.array([[0.9, 0.3, 0.6, 0.2, -90.]])) # Angle = 90 rotated = geometry.rotate_boxes(boxes, angle=+90, min_angle=0) - assert rotated.all() == np.array([[0.9, 0.3, 0.6, 0.2, 90.]]).all() + assert np.all(rotated == np.array([[0.9, 0.3, 0.6, 0.2, -90.]])) def test_rotate_image(): From d16fba3c443f33114c53c9747f02cfc8ad66a21b Mon Sep 17 00:00:00 2001 From: Rob192 Date: Tue, 30 Nov 2021 09:46:04 +0100 Subject: [PATCH 35/53] fix: keep check with if expand in rotate_image --- doctr/utils/geometry.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index c318e5f151..5cc84d0046 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -257,17 +257,18 @@ def rotate_image( height, width = exp_img.shape[:2] rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) - if preserve_origin_shape: - # Pad to get the same aspect ratio - if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): - # Pad width - if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]): - h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1]) - # Pad height - else: - h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0 - rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) - # rescale - rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) + if expand: + if preserve_origin_shape: + # Pad to get the same aspect ratio + if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): + # Pad width + if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]): + h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1]) + # Pad height + else: + h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0 + rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) + # rescale + rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) return rot_img From e344e5ce13015af4ef95273c7f81a98419761d10 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Tue, 30 Nov 2021 13:57:30 +0100 Subject: [PATCH 36/53] fix: change rotate_boxes signature --- doctr/models/predictor/pytorch.py | 2 +- doctr/models/predictor/tensorflow.py | 2 +- doctr/utils/geometry.py | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index e804f7c08d..2ce43f4277 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -86,7 +86,7 @@ def forward( # Rotate back pages and boxes while keeping original image size if self.straighten_pages: - boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], mask_shape=mask) for + boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], target_shape=mask) for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)] out = self.doc_builder( diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index a9c1dde9f5..cd5fdafdd7 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -83,7 +83,7 @@ def __call__( # Rotate back pages and boxes while keeping original image size if self.straighten_pages: - boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], mask_shape=mask) for + boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], target_shape=mask) for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)] out = self.doc_builder(boxes, text_preds, origin_page_shapes) # type: ignore[misc] diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 5cc84d0046..d1af425e2c 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -181,17 +181,19 @@ def rotate_boxes( angle: float = 0., min_angle: float = 1., orig_shape: Optional[Tuple[int, int]] = None, - mask_shape: Optional[Tuple[int, int]] = None, + target_shape: Optional[Tuple[int, int]] = None, ) -> np.ndarray: """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax) or rotated bounding boxes (x, y, w, h, alpha) of an angle, if angle > min_angle, around the center of the page. + If orig_shape and target_shape are specified, the boxes are remapped to the target shape after the rotation. This + is done to remove the padding that is created by rotate_page(expand=True) Args: boxes: (N, 4) or (N, 5) array of RELATIVE boxes angle: angle between -90 and +90 degrees min_angle: minimum angle to rotate boxes orig_shape: shape of the origin image - mask_shape: shape of the mask if the image is cropped after the rotation + target_shape: shape of the target image Returns: A batch of rotated boxes (N, 5): (x, y, w, h, alpha) or a batch of straight bounding boxes @@ -221,8 +223,8 @@ def rotate_boxes( rotated_boxes = np.stack((x_center, y_center, _boxes[:, 2], _boxes[:, 3], angle * np.ones_like(_boxes[:, 0])), axis=1) # Apply a mask if requested - if mask_shape is not None and orig_shape is not None: - rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=mask_shape) + if target_shape is not None and orig_shape is not None: + rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape) return rotated_boxes From 2e11dc87328adc57d13796ce325fa4a23d56fb1d Mon Sep 17 00:00:00 2001 From: Rob192 Date: Tue, 30 Nov 2021 14:47:19 +0100 Subject: [PATCH 37/53] fix: use loc_preds instead of boxes --- doctr/models/builder.py | 10 ++++---- doctr/models/predictor/base.py | 5 ++-- doctr/utils/geometry.py | 37 +++++++++++++++-------------- tests/common/test_utils_geometry.py | 27 +++++++++++---------- 4 files changed, 40 insertions(+), 39 deletions(-) diff --git a/doctr/models/builder.py b/doctr/models/builder.py index cfe9150f11..b7fcabc48b 100644 --- a/doctr/models/builder.py +++ b/doctr/models/builder.py @@ -215,15 +215,15 @@ def _build_blocks(self, boxes: np.ndarray, word_preds: List[Tuple[str, float]]) # Decide whether we try to form lines if self.resolve_lines: - lines = self._resolve_lines(boxes) + lines = self._resolve_lines(boxes[:, :-1]) # Decide whether we try to form blocks if self.resolve_blocks and len(lines) > 1: - _blocks = self._resolve_blocks(boxes, lines) + _blocks = self._resolve_blocks(boxes[:, :-1], lines) else: _blocks = [lines] else: # Sort bounding boxes, one line for all boxes, one block for the line - lines = [self._sort_boxes(boxes)] + lines = [self._sort_boxes(boxes[:, :-1])] _blocks = [lines] blocks = [ @@ -233,7 +233,7 @@ def _build_blocks(self, boxes: np.ndarray, word_preds: List[Tuple[str, float]]) Word( *word_preds[idx], (boxes[idx, 0], boxes[idx, 1], boxes[idx, 2], boxes[idx, 3], boxes[idx, 4]) - ) if boxes.shape[1] == 5 else + ) if boxes.shape[1] == 6 else Word( *word_preds[idx], ((boxes[idx, 0], boxes[idx, 1]), (boxes[idx, 2], boxes[idx, 3])) @@ -273,7 +273,7 @@ def __call__( if self.export_as_straight_boxes and len(boxes) > 0: # If boxes are already straight OK, else fit a bounding rect - if boxes[0].shape[-1] == 5: + if boxes[0].shape[-1] == 6: straight_boxes = [] # Iterate over pages for page_boxes in boxes: diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py index 254e4ea2e8..bef116bcde 100644 --- a/doctr/models/predictor/base.py +++ b/doctr/models/predictor/base.py @@ -64,13 +64,12 @@ def _process_predictions( word_preds: List[Tuple[str, float]], ) -> Tuple[List[np.ndarray], List[List[Tuple[str, float]]]]: - boxes, text_preds = [], [] + text_preds = [] if len(loc_preds) > 0: # Text _idx = 0 for page_boxes in loc_preds: - boxes.append(page_boxes[:, :-1]) text_preds.append(word_preds[_idx: _idx + page_boxes.shape[0]]) _idx += page_boxes.shape[0] - return boxes, text_preds + return loc_preds, text_preds diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index d1af425e2c..984da4a7da 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -145,18 +145,18 @@ def rotate_abs_boxes(boxes: np.ndarray, angle: float, img_shape: Tuple[int, int] return rotated_boxes -def remap_boxes(boxes: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray: - """ Remaps a batch of RotatedBbox (x, y, w, h, alpha) expressed for an origin_shape to a destination_shape. +def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray: + """ Remaps a batch of rotated locpred (x, y, w, h, alpha, c) expressed for an origin_shape to a destination_shape. This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox coordinates after a resizing of the image. Args: - boxes: (N, 5) array of RELATIVE RotatedBbox (x, y, w, h, alpha) + loc_preds: (N, 6) array of RELATIVE locpred (x, y, w, h, alpha, c) orig_shape: shape of the origin image dest_shape: shape of the destination image Returns: - A batch of rotated boxes (N, 5): (x, y, w, h, alpha) expressed in the destination referencial + A batch of rotated loc_preds (N, 6): (x, y, w, h, alpha, c) expressed in the destination referencial """ @@ -166,30 +166,30 @@ def remap_boxes(boxes: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tupl raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}") orig_height, orig_width = orig_shape dest_height, dest_width = dest_shape - mboxes = boxes.copy() + mboxes = loc_preds.copy() # remaps position of the box center for the destination image shape - mboxes[:, 0] = ((boxes[:, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width - mboxes[:, 1] = ((boxes[:, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height + mboxes[:, 0] = ((loc_preds[:, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width + mboxes[:, 1] = ((loc_preds[:, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height # remaps box dimension for the destination image shape - mboxes[:, 2] = boxes[:, 2] * orig_width / dest_width - mboxes[:, 3] = boxes[:, 3] * orig_height / dest_height + mboxes[:, 2] = loc_preds[:, 2] * orig_width / dest_width + mboxes[:, 3] = loc_preds[:, 3] * orig_height / dest_height return mboxes def rotate_boxes( - boxes: np.ndarray, + loc_preds: np.ndarray, angle: float = 0., min_angle: float = 1., orig_shape: Optional[Tuple[int, int]] = None, target_shape: Optional[Tuple[int, int]] = None, ) -> np.ndarray: - """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax) or rotated bounding boxes - (x, y, w, h, alpha) of an angle, if angle > min_angle, around the center of the page. + """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes + (x, y, w, h, alpha, c) of an angle, if angle > min_angle, around the center of the page. If orig_shape and target_shape are specified, the boxes are remapped to the target shape after the rotation. This is done to remove the padding that is created by rotate_page(expand=True) Args: - boxes: (N, 4) or (N, 5) array of RELATIVE boxes + loc_preds: (N, 5) or (N, 6) array of RELATIVE boxes angle: angle between -90 and +90 degrees min_angle: minimum angle to rotate boxes orig_shape: shape of the origin image @@ -199,13 +199,14 @@ def rotate_boxes( A batch of rotated boxes (N, 5): (x, y, w, h, alpha) or a batch of straight bounding boxes """ # Change format of the boxes to rotated boxes - _boxes = boxes.copy() - if _boxes.shape[1] == 4: + _boxes = loc_preds.copy() + if _boxes.shape[1] == 5: _boxes = np.column_stack(((_boxes[:, 0] + _boxes[:, 2]) / 2, (_boxes[:, 1] + _boxes[:, 3]) / 2, _boxes[:, 2] - _boxes[:, 0], _boxes[:, 3] - _boxes[:, 1], - np.zeros(_boxes.shape[0]))) + np.zeros(_boxes.shape[0]), + _boxes[:, 4])) # If small angle, return boxes (no rotation) if abs(angle) < min_angle or abs(angle) > 90 - min_angle: return _boxes @@ -220,8 +221,8 @@ def rotate_boxes( rotated_centers = .5 + np.matmul(centers - .5, rotation_mat) x_center, y_center = rotated_centers[:, 0], rotated_centers[:, 1] # Compute rotated boxes - rotated_boxes = np.stack((x_center, y_center, _boxes[:, 2], _boxes[:, 3], angle * np.ones_like(_boxes[:, 0])), - axis=1) + rotated_boxes = np.stack((x_center, y_center, _boxes[:, 2], _boxes[:, 3], angle * np.ones_like(_boxes[:, 0]), + _boxes[:, 5]), axis=1) # Apply a mask if requested if target_shape is not None and orig_shape is not None: rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape) diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index f72210257f..052381d043 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -39,12 +39,12 @@ def test_resolve_enclosing_rbbox(): def test_remap_boxes(): - pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0.]]), (10, 10), (20, 20)) - target = np.array([[0.5, 0.5, 0.05, 0.05, 0.]]) + pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0., 0.5]]), (10, 10), (20, 20)) + target = np.array([[0.5, 0.5, 0.05, 0.05, 0., 0.5]]) assert np.all(pred == target) - pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0.]]), (10, 10), (20, 10)) - target = np.array([[0.5, 0.5, 0.1, 0.05, 0.]]) + pred = geometry.remap_boxes(np.array([[0.5, 0.5, 0.1, 0.1, 0., 0.5]]), (10, 10), (20, 10)) + target = np.array([[0.5, 0.5, 0.1, 0.05, 0., 0.5]]) assert np.all(pred == target) pred = geometry.remap_boxes(np.array([[0.25, 0.5, 0.5, 0.33, 0.]]), (80, 30), (160, 30)) @@ -53,19 +53,19 @@ def test_remap_boxes(): orig_dimension = (100, 100) dest_dimensions = (100, 200) - orig_box = np.array([[0.5, 0.5, 0.2, 0., 45]]) + orig_box = np.array([[0.5, 0.5, 0.2, 0., 45, 0.5]]) # Unpack height_o, width_o = orig_dimension height_d, width_d = dest_dimensions pred = geometry.remap_boxes(orig_box, orig_dimension, dest_dimensions) - x, y, w, h, a = orig_box[0] + x, y, w, h, a, c = orig_box[0] # Switch to absolute coords x, w = x * width_o, w * width_o y, h = y * height_o, h * height_o orig = cv2.boxPoints(((x, y), (w, h), a)) - x, y, w, h, a = pred[0] + x, y, w, h, a, c = pred[0] # Switch to absolute coords x, w = x * width_d, w * width_d y, h = y * height_d, h * height_d @@ -81,12 +81,13 @@ def test_remap_boxes(): def test_rotate_boxes(): - boxes = np.array([[0.1, 0.1, 0.8, 0.3]]) + boxes = np.array([[0.1, 0.1, 0.8, 0.3, 0.5]]) rboxes = np.column_stack(((boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2, boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1], - np.zeros(boxes.shape[0]))) + np.zeros(boxes.shape[0]), + boxes[:, 4])) # Angle = 0 rotated = geometry.rotate_boxes(boxes, angle=0.) assert np.all(rotated == rboxes) @@ -95,16 +96,16 @@ def test_rotate_boxes(): assert np.all(rotated == rboxes) # Angle = 30 rotated = geometry.rotate_boxes(boxes, angle=30) - assert rotated.shape == (1, 5) + assert rotated.shape == (1, 6) assert rotated[0, 4] == 30. - boxes = np.array([[0., 0., 0.6, 0.2]]) + boxes = np.array([[0., 0., 0.6, 0.2, 0.5]]) # Angle = -90: rotated = geometry.rotate_boxes(boxes, angle=-90, min_angle=0) - assert np.all(rotated == np.array([[0.9, 0.3, 0.6, 0.2, -90.]])) + assert np.allclose(rotated, np.array([[0.9, 0.3, 0.6, 0.2, -90., 0.5]])) # Angle = 90 rotated = geometry.rotate_boxes(boxes, angle=+90, min_angle=0) - assert np.all(rotated == np.array([[0.9, 0.3, 0.6, 0.2, -90.]])) + assert np.allclose(rotated, np.array([[0.1, 0.7, 0.6, 0.2, +90., 0.5]])) def test_rotate_image(): From bb6bc79f13eea1a6dc2f316eb15290d3ae2899e3 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Tue, 30 Nov 2021 14:47:36 +0100 Subject: [PATCH 38/53] fix: wrong test in remap boxes --- tests/common/test_utils_geometry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index 052381d043..a2fc8dc5fc 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -47,8 +47,8 @@ def test_remap_boxes(): target = np.array([[0.5, 0.5, 0.1, 0.05, 0., 0.5]]) assert np.all(pred == target) - pred = geometry.remap_boxes(np.array([[0.25, 0.5, 0.5, 0.33, 0.]]), (80, 30), (160, 30)) - target = np.array([[0.375, 0.5, 0.25, 0.1, 0.]]) + pred = geometry.remap_boxes(np.array([[0.5, 0.0, 0.5, 0.25, 0., 0.5]]), (80, 40), (160, 40)) + target = np.array([[0.5, 0.25, 0.5, 0.125, 0., 0.5]]) assert np.all(pred == target) orig_dimension = (100, 100) From b22309d415fa3653ada4b0544303276714f03dcf Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 1 Dec 2021 14:54:51 +0100 Subject: [PATCH 39/53] add unit tests for pytorch --- doctr/models/predictor/pytorch.py | 11 +++-- tests/common/test_utils_geometry.py | 4 +- tests/pytorch/test_models_detection_pt.py | 47 ++++++++++++++++++- tests/pytorch/test_models_recognition_pt.py | 31 +++++++++++++ tests/pytorch/test_models_zoo_pt.py | 51 ++++++++++++++++++++- tests/tensorflow/test_models_zoo_tf.py | 4 +- 6 files changed, 137 insertions(+), 11 deletions(-) diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index 2ce43f4277..4ed2539eab 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -47,8 +47,8 @@ def __init__( ) -> None: super().__init__() - self.det_predictor = det_predictor.eval() # type: ignore[attr-defined] - self.reco_predictor = reco_predictor.eval() # type: ignore[attr-defined] + self.det_predictor = det_predictor # type: ignore[attr-defined] + self.reco_predictor = reco_predictor # type: ignore[attr-defined] self.doc_builder = DocumentBuilder(export_as_straight_boxes=export_as_straight_boxes) self.assume_straight_pages = assume_straight_pages self.straighten_pages = straighten_pages @@ -64,7 +64,7 @@ def forward( if any(page.ndim != 3 for page in pages): raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") - origin_page_shapes = [page.shape[:2] for page in pages] + origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages] # Detect document rotation and rotate pages if self.straighten_pages: @@ -86,7 +86,10 @@ def forward( # Rotate back pages and boxes while keeping original image size if self.straighten_pages: - boxes = [rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2], target_shape=mask) for + boxes = [rotate_boxes(page_boxes, + angle, + orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:], + target_shape=mask) for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)] out = self.doc_builder( diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index a2fc8dc5fc..b32389c51d 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -59,13 +59,13 @@ def test_remap_boxes(): height_d, width_d = dest_dimensions pred = geometry.remap_boxes(orig_box, orig_dimension, dest_dimensions) - x, y, w, h, a, c = orig_box[0] + x, y, w, h, a, _ = orig_box[0] # Switch to absolute coords x, w = x * width_o, w * width_o y, h = y * height_o, h * height_o orig = cv2.boxPoints(((x, y), (w, h), a)) - x, y, w, h, a, c = pred[0] + x, y, w, h, a, _ = pred[0] # Switch to absolute coords x, w = x * width_d, w * width_d y, h = y * height_d, h * height_d diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py index 13fa79f980..038b979eff 100644 --- a/tests/pytorch/test_models_detection_pt.py +++ b/tests/pytorch/test_models_detection_pt.py @@ -2,9 +2,10 @@ import pytest import torch +from doctr.io import DocumentFile from doctr.models import detection from doctr.models.detection.predictor import DetectionPredictor - +from doctr.models.preprocessor import PreProcessor @pytest.mark.parametrize( "arch_name, input_shape, output_size, out_prob", @@ -44,6 +45,50 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob): assert isinstance(out['loss'], torch.Tensor) +@pytest.fixture(scope="session") +def test_detectionpredictor(mock_pdf): # noqa: F811 + + batch_size = 4 + predictor = DetectionPredictor( + PreProcessor(output_size=(512, 512), batch_size=batch_size), + detection.db_resnet50(input_shape=(512, 512, 3)) + ) + + pages = DocumentFile.from_pdf(mock_pdf).as_images() + out = predictor(pages) + # The input PDF has 8 pages + assert len(out) == 8 + + # Dimension check + with pytest.raises(ValueError): + input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) + _ = predictor([input_page]) + + return predictor + + +@pytest.fixture(scope="session") +def test_rotated_detectionpredictor(mock_pdf): # noqa: F811 + + batch_size = 4 + predictor = DetectionPredictor( + PreProcessor(output_size=(512, 512), batch_size=batch_size), + detection.db_resnet50(assume_straight_pages=False, input_shape=(512, 512, 3)) + ) + + pages = DocumentFile.from_pdf(mock_pdf).as_images() + out = predictor(pages) + + # The input PDF has 8 pages + assert len(out) == 8 + + # Dimension check + with pytest.raises(ValueError): + input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) + _ = predictor([input_page]) + + return predictor + @pytest.mark.parametrize( "arch_name", [ diff --git a/tests/pytorch/test_models_recognition_pt.py b/tests/pytorch/test_models_recognition_pt.py index fb59c1efb4..d1b3190d33 100644 --- a/tests/pytorch/test_models_recognition_pt.py +++ b/tests/pytorch/test_models_recognition_pt.py @@ -1,7 +1,11 @@ +import numpy as np import pytest import torch +from doctr.io import DocumentFile from doctr.models import recognition +from doctr.models._utils import extract_crops +from doctr.models.preprocessor import PreProcessor from doctr.models.recognition.predictor import RecognitionPredictor @@ -54,6 +58,33 @@ def test_reco_postprocessors(post_processor, input_shape, mock_vocab): assert repr(processor) == f'{post_processor}(vocab_size={len(mock_vocab)})' +@pytest.fixture(scope="session") +def test_recognitionpredictor(mock_pdf, mock_vocab): # noqa: F811 + + batch_size = 4 + predictor = RecognitionPredictor( + PreProcessor(output_size=(32, 128), batch_size=batch_size, preserve_aspect_ratio=True), + recognition.crnn_vgg16_bn(vocab=mock_vocab, input_shape=(32, 128, 3)) + ) + + pages = DocumentFile.from_pdf(mock_pdf).as_images() + # Create bounding boxes + boxes = np.array([[.5, .5, 0.75, 0.75], [0.5, 0.5, 1., 1.]], dtype=np.float32) + crops = extract_crops(pages[0], boxes) + + out = predictor(crops) + + # One prediction per crop + assert len(out) == boxes.shape[0] + assert all(isinstance(val, str) and isinstance(conf, float) for val, conf in out) + + # Dimension check + with pytest.raises(ValueError): + input_crop = (255 * np.random.rand(1, 128, 64, 3)).astype(np.uint8) + _ = predictor([input_crop]) + + return predictor + @pytest.mark.parametrize( "arch_name", [ diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py index a45ad25ecf..070f22683b 100644 --- a/tests/pytorch/test_models_zoo_pt.py +++ b/tests/pytorch/test_models_zoo_pt.py @@ -1,9 +1,56 @@ import numpy as np import pytest +from test_models_detection_pt import test_detectionpredictor, test_rotated_detectionpredictor # noqa: F401 +from test_models_recognition_pt import test_recognitionpredictor from doctr import models -from doctr.io import Document -from doctr.models.predictor import OCRPredictor +from doctr.io import Document, DocumentFile +from doctr.models.predictor.pytorch import OCRPredictor + + +def test_ocrpredictor( + mock_pdf, test_detectionpredictor, test_recognitionpredictor, test_rotated_detectionpredictor # noqa: F811 +): + + predictor = OCRPredictor( + test_detectionpredictor, + test_recognitionpredictor, + assume_straight_pages=True, + straighten_pages=False, + ) + + r_predictor = OCRPredictor( + test_rotated_detectionpredictor, + test_recognitionpredictor, + assume_straight_pages=False, + straighten_pages=False, + ) + + s_predictor = OCRPredictor( + test_detectionpredictor, + test_recognitionpredictor, + assume_straight_pages=True, + straighten_pages=True, + ) + + doc = DocumentFile.from_pdf(mock_pdf).as_images() + out = predictor(doc) + r_out = r_predictor(doc) + s_out = s_predictor(doc) + + # Document + assert isinstance(out, Document) + assert isinstance(r_out, Document) + assert isinstance(s_out, Document) + + # The input PDF has 8 pages + assert len(out.pages) == 8 + assert len(r_out.pages) == 8 + assert len(s_out.pages) == 8 + # Dimension check + with pytest.raises(ValueError): + input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) + _ = predictor([input_page]) @pytest.mark.parametrize( diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py index af6130d5f0..c99fd29b9d 100644 --- a/tests/tensorflow/test_models_zoo_tf.py +++ b/tests/tensorflow/test_models_zoo_tf.py @@ -27,9 +27,9 @@ def test_ocrpredictor( ) s_predictor = OCRPredictor( - test_rotated_detectionpredictor, + test_detectionpredictor, test_recognitionpredictor, - assume_straight_pages=False, + assume_straight_pages=True, straighten_pages=True, ) From 495ad8c3cf9c61dab54c398a768f5860de44d346 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 1 Dec 2021 15:34:04 +0100 Subject: [PATCH 40/53] add unit tests for remap_boxes and estimate_orientation --- tests/common/test_models.py | 11 ++++++++++- tests/common/test_utils_geometry.py | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/common/test_models.py b/tests/common/test_models.py index 98ab9c553f..7c8911b32f 100644 --- a/tests/common/test_models.py +++ b/tests/common/test_models.py @@ -86,12 +86,21 @@ def mock_bitmap(mock_image): return bitmap +@pytest.fixture(scope="function") +def mock_unreadable_image(mock_image): + image = mock_image * 0 + return image + + def test_get_bitmap_angle(mock_bitmap): angle = get_bitmap_angle(mock_bitmap) assert abs(angle - 30.) < 1. -def test_estimate_orientation(mock_image): +def test_estimate_orientation(mock_image, mock_unreadable_image): + angle = estimate_orientation(mock_unreadable_image) + assert angle == 0 + angle = estimate_orientation(mock_image) assert abs(angle - 30.) < 1. diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index b32389c51d..f9e2e0e53b 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -1,4 +1,5 @@ from math import hypot +import pytest import cv2 import numpy as np @@ -51,6 +52,12 @@ def test_remap_boxes(): target = np.array([[0.5, 0.25, 0.5, 0.125, 0., 0.5]]) assert np.all(pred == target) + with pytest.raises(ValueError): + geometry.remap_boxes(np.array([[0.5, 0.0, 0.5, 0.25, 0., 0.5]]), (80, 40, 150), (160, 40)) + + with pytest.raises(ValueError): + geometry.remap_boxes(np.array([[0.5, 0.0, 0.5, 0.25, 0., 0.5]]), (80, 40), (160,)) + orig_dimension = (100, 100) dest_dimensions = (100, 200) orig_box = np.array([[0.5, 0.5, 0.2, 0., 45, 0.5]]) From 98b44f6dbcb74a83586dae3dc3f12ad3202a74df Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 1 Dec 2021 15:38:23 +0100 Subject: [PATCH 41/53] fix: styling --- tests/pytorch/test_models_detection_pt.py | 2 ++ tests/pytorch/test_models_recognition_pt.py | 1 + 2 files changed, 3 insertions(+) diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py index 038b979eff..21bfe2d651 100644 --- a/tests/pytorch/test_models_detection_pt.py +++ b/tests/pytorch/test_models_detection_pt.py @@ -7,6 +7,7 @@ from doctr.models.detection.predictor import DetectionPredictor from doctr.models.preprocessor import PreProcessor + @pytest.mark.parametrize( "arch_name, input_shape, output_size, out_prob", [ @@ -89,6 +90,7 @@ def test_rotated_detectionpredictor(mock_pdf): # noqa: F811 return predictor + @pytest.mark.parametrize( "arch_name", [ diff --git a/tests/pytorch/test_models_recognition_pt.py b/tests/pytorch/test_models_recognition_pt.py index d1b3190d33..57ad87c865 100644 --- a/tests/pytorch/test_models_recognition_pt.py +++ b/tests/pytorch/test_models_recognition_pt.py @@ -85,6 +85,7 @@ def test_recognitionpredictor(mock_pdf, mock_vocab): # noqa: F811 return predictor + @pytest.mark.parametrize( "arch_name", [ From 28f1fd8678dfafcba5cd7bc21eae68b5b66aff30 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 1 Dec 2021 15:43:28 +0100 Subject: [PATCH 42/53] fix: isort --- tests/common/test_utils_geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index f9e2e0e53b..c76d41442c 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -1,8 +1,8 @@ from math import hypot -import pytest import cv2 import numpy as np +import pytest from doctr.utils import geometry From 1d61de70230f147ff18ea94a5571add0affc8de7 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Wed, 1 Dec 2021 16:43:09 +0100 Subject: [PATCH 43/53] fix: remove unnecessary fixture --- tests/common/test_models.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/common/test_models.py b/tests/common/test_models.py index 7c8911b32f..7f87df4960 100644 --- a/tests/common/test_models.py +++ b/tests/common/test_models.py @@ -86,20 +86,13 @@ def mock_bitmap(mock_image): return bitmap -@pytest.fixture(scope="function") -def mock_unreadable_image(mock_image): - image = mock_image * 0 - return image - - def test_get_bitmap_angle(mock_bitmap): angle = get_bitmap_angle(mock_bitmap) assert abs(angle - 30.) < 1. -def test_estimate_orientation(mock_image, mock_unreadable_image): - angle = estimate_orientation(mock_unreadable_image) - assert angle == 0 +def test_estimate_orientation(mock_image): + assert estimate_orientation(mock_image * 0) == 0 angle = estimate_orientation(mock_image) assert abs(angle - 30.) < 1. From faac0bd872179aa17b4ff86ef32305c22b39d76d Mon Sep 17 00:00:00 2001 From: Rob192 Date: Thu, 2 Dec 2021 10:08:59 +0100 Subject: [PATCH 44/53] fix: add testing for pytorch predictor --- doctr/models/predictor/pytorch.py | 4 +-- tests/pytorch/test_models_detection_pt.py | 39 +++++---------------- tests/pytorch/test_models_recognition_pt.py | 6 ++-- tests/pytorch/test_models_zoo_pt.py | 24 ++++--------- 4 files changed, 20 insertions(+), 53 deletions(-) diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index 4ed2539eab..a015baa86b 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -47,8 +47,8 @@ def __init__( ) -> None: super().__init__() - self.det_predictor = det_predictor # type: ignore[attr-defined] - self.reco_predictor = reco_predictor # type: ignore[attr-defined] + self.det_predictor = det_predictor.eval() # type: ignore[attr-defined] + self.reco_predictor = reco_predictor.eval() # type: ignore[attr-defined] self.doc_builder = DocumentBuilder(export_as_straight_boxes=export_as_straight_boxes) self.assume_straight_pages = assume_straight_pages self.straighten_pages = straighten_pages diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py index 21bfe2d651..4f15f8211b 100644 --- a/tests/pytorch/test_models_detection_pt.py +++ b/tests/pytorch/test_models_detection_pt.py @@ -4,8 +4,8 @@ from doctr.io import DocumentFile from doctr.models import detection -from doctr.models.detection.predictor import DetectionPredictor -from doctr.models.preprocessor import PreProcessor +from doctr.models.detection.predictor.pytorch import DetectionPredictor +from doctr.models.preprocessor.pytorch import PreProcessor @pytest.mark.parametrize( @@ -47,12 +47,12 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob): @pytest.fixture(scope="session") -def test_detectionpredictor(mock_pdf): # noqa: F811 +def test_detectionpredictor_pt(mock_pdf): # noqa: F811 batch_size = 4 predictor = DetectionPredictor( PreProcessor(output_size=(512, 512), batch_size=batch_size), - detection.db_resnet50(input_shape=(512, 512, 3)) + detection.db_resnet50(pretrained=False).eval() ) pages = DocumentFile.from_pdf(mock_pdf).as_images() @@ -60,33 +60,10 @@ def test_detectionpredictor(mock_pdf): # noqa: F811 # The input PDF has 8 pages assert len(out) == 8 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - return predictor - - -@pytest.fixture(scope="session") -def test_rotated_detectionpredictor(mock_pdf): # noqa: F811 - - batch_size = 4 - predictor = DetectionPredictor( - PreProcessor(output_size=(512, 512), batch_size=batch_size), - detection.db_resnet50(assume_straight_pages=False, input_shape=(512, 512, 3)) - ) - - pages = DocumentFile.from_pdf(mock_pdf).as_images() - out = predictor(pages) - - # The input PDF has 8 pages - assert len(out) == 8 - - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) + # # Dimension check + # with pytest.raises(ValueError): + # input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) + # _ = predictor([input_page]) return predictor diff --git a/tests/pytorch/test_models_recognition_pt.py b/tests/pytorch/test_models_recognition_pt.py index 57ad87c865..2c10fc7abc 100644 --- a/tests/pytorch/test_models_recognition_pt.py +++ b/tests/pytorch/test_models_recognition_pt.py @@ -5,8 +5,8 @@ from doctr.io import DocumentFile from doctr.models import recognition from doctr.models._utils import extract_crops -from doctr.models.preprocessor import PreProcessor -from doctr.models.recognition.predictor import RecognitionPredictor +from doctr.models.preprocessor.pytorch import PreProcessor +from doctr.models.recognition.predictor.pytorch import RecognitionPredictor @pytest.mark.parametrize( @@ -59,7 +59,7 @@ def test_reco_postprocessors(post_processor, input_shape, mock_vocab): @pytest.fixture(scope="session") -def test_recognitionpredictor(mock_pdf, mock_vocab): # noqa: F811 +def test_recognitionpredictor_pt(mock_pdf, mock_vocab): # noqa: F811 batch_size = 4 predictor = RecognitionPredictor( diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py index 070f22683b..e928069dda 100644 --- a/tests/pytorch/test_models_zoo_pt.py +++ b/tests/pytorch/test_models_zoo_pt.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from test_models_detection_pt import test_detectionpredictor, test_rotated_detectionpredictor # noqa: F401 -from test_models_recognition_pt import test_recognitionpredictor +from test_models_detection_pt import test_detectionpredictor_pt # noqa: F401 +from test_models_recognition_pt import test_recognitionpredictor_pt # noqa: F401 from doctr import models from doctr.io import Document, DocumentFile @@ -9,43 +9,33 @@ def test_ocrpredictor( - mock_pdf, test_detectionpredictor, test_recognitionpredictor, test_rotated_detectionpredictor # noqa: F811 + mock_pdf, test_detectionpredictor_pt, test_recognitionpredictor_pt # noqa: F811 ): predictor = OCRPredictor( - test_detectionpredictor, - test_recognitionpredictor, + test_detectionpredictor_pt, + test_recognitionpredictor_pt, assume_straight_pages=True, straighten_pages=False, ) - r_predictor = OCRPredictor( - test_rotated_detectionpredictor, - test_recognitionpredictor, - assume_straight_pages=False, - straighten_pages=False, - ) - s_predictor = OCRPredictor( - test_detectionpredictor, - test_recognitionpredictor, + test_detectionpredictor_pt, + test_recognitionpredictor_pt, assume_straight_pages=True, straighten_pages=True, ) doc = DocumentFile.from_pdf(mock_pdf).as_images() out = predictor(doc) - r_out = r_predictor(doc) s_out = s_predictor(doc) # Document assert isinstance(out, Document) - assert isinstance(r_out, Document) assert isinstance(s_out, Document) # The input PDF has 8 pages assert len(out.pages) == 8 - assert len(r_out.pages) == 8 assert len(s_out.pages) == 8 # Dimension check with pytest.raises(ValueError): From 938c9f26c235b2696bd573bf9d937921f049bcf8 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Thu, 2 Dec 2021 10:13:27 +0100 Subject: [PATCH 45/53] fix: styling --- tests/pytorch/test_models_zoo_pt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py index e928069dda..60dfcc13dc 100644 --- a/tests/pytorch/test_models_zoo_pt.py +++ b/tests/pytorch/test_models_zoo_pt.py @@ -1,7 +1,7 @@ import numpy as np import pytest from test_models_detection_pt import test_detectionpredictor_pt # noqa: F401 -from test_models_recognition_pt import test_recognitionpredictor_pt # noqa: F401 +from test_models_recognition_pt import test_recognitionpredictor_pt # noqa: F401 from doctr import models from doctr.io import Document, DocumentFile From 30a70f2fd4f249ef5eac0d768f54c87502ac0fa9 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Thu, 2 Dec 2021 13:42:21 +0100 Subject: [PATCH 46/53] fix: correct testing for ocrpredictor with pytorch --- tests/pytorch/test_models_detection_pt.py | 24 ---------------- tests/pytorch/test_models_recognition_pt.py | 32 --------------------- tests/pytorch/test_models_zoo_pt.py | 30 ++++++++++++------- 3 files changed, 20 insertions(+), 66 deletions(-) diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py index 4f15f8211b..d073fc2bbd 100644 --- a/tests/pytorch/test_models_detection_pt.py +++ b/tests/pytorch/test_models_detection_pt.py @@ -2,10 +2,8 @@ import pytest import torch -from doctr.io import DocumentFile from doctr.models import detection from doctr.models.detection.predictor.pytorch import DetectionPredictor -from doctr.models.preprocessor.pytorch import PreProcessor @pytest.mark.parametrize( @@ -46,28 +44,6 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob): assert isinstance(out['loss'], torch.Tensor) -@pytest.fixture(scope="session") -def test_detectionpredictor_pt(mock_pdf): # noqa: F811 - - batch_size = 4 - predictor = DetectionPredictor( - PreProcessor(output_size=(512, 512), batch_size=batch_size), - detection.db_resnet50(pretrained=False).eval() - ) - - pages = DocumentFile.from_pdf(mock_pdf).as_images() - out = predictor(pages) - # The input PDF has 8 pages - assert len(out) == 8 - - # # Dimension check - # with pytest.raises(ValueError): - # input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - # _ = predictor([input_page]) - - return predictor - - @pytest.mark.parametrize( "arch_name", [ diff --git a/tests/pytorch/test_models_recognition_pt.py b/tests/pytorch/test_models_recognition_pt.py index 2c10fc7abc..d987b6d2d7 100644 --- a/tests/pytorch/test_models_recognition_pt.py +++ b/tests/pytorch/test_models_recognition_pt.py @@ -1,11 +1,7 @@ -import numpy as np import pytest import torch -from doctr.io import DocumentFile from doctr.models import recognition -from doctr.models._utils import extract_crops -from doctr.models.preprocessor.pytorch import PreProcessor from doctr.models.recognition.predictor.pytorch import RecognitionPredictor @@ -58,34 +54,6 @@ def test_reco_postprocessors(post_processor, input_shape, mock_vocab): assert repr(processor) == f'{post_processor}(vocab_size={len(mock_vocab)})' -@pytest.fixture(scope="session") -def test_recognitionpredictor_pt(mock_pdf, mock_vocab): # noqa: F811 - - batch_size = 4 - predictor = RecognitionPredictor( - PreProcessor(output_size=(32, 128), batch_size=batch_size, preserve_aspect_ratio=True), - recognition.crnn_vgg16_bn(vocab=mock_vocab, input_shape=(32, 128, 3)) - ) - - pages = DocumentFile.from_pdf(mock_pdf).as_images() - # Create bounding boxes - boxes = np.array([[.5, .5, 0.75, 0.75], [0.5, 0.5, 1., 1.]], dtype=np.float32) - crops = extract_crops(pages[0], boxes) - - out = predictor(crops) - - # One prediction per crop - assert len(out) == boxes.shape[0] - assert all(isinstance(val, str) and isinstance(conf, float) for val, conf in out) - - # Dimension check - with pytest.raises(ValueError): - input_crop = (255 * np.random.rand(1, 128, 64, 3)).astype(np.uint8) - _ = predictor([input_crop]) - - return predictor - - @pytest.mark.parametrize( "arch_name", [ diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py index 60dfcc13dc..06febf8aa3 100644 --- a/tests/pytorch/test_models_zoo_pt.py +++ b/tests/pytorch/test_models_zoo_pt.py @@ -1,27 +1,37 @@ import numpy as np import pytest -from test_models_detection_pt import test_detectionpredictor_pt # noqa: F401 -from test_models_recognition_pt import test_recognitionpredictor_pt # noqa: F401 from doctr import models from doctr.io import Document, DocumentFile -from doctr.models.predictor.pytorch import OCRPredictor +from doctr.models import detection, recognition +from doctr.models.predictor import OCRPredictor +from doctr.models.preprocessor import PreProcessor +from doctr.models.detection.predictor import DetectionPredictor +from doctr.models.recognition.predictor import RecognitionPredictor -def test_ocrpredictor( - mock_pdf, test_detectionpredictor_pt, test_recognitionpredictor_pt # noqa: F811 -): +def test_ocrpredictor(mock_pdf, mock_vocab): + batch_size = 4 + detectionpredictor = DetectionPredictor( + PreProcessor(output_size=(512, 512), batch_size=batch_size), + detection.db_resnet50(pretrained=False).eval() + ) + + recognitionpredictor = RecognitionPredictor( + PreProcessor(output_size=(32, 128), batch_size=batch_size, preserve_aspect_ratio=True), + recognition.crnn_vgg16_bn(vocab=mock_vocab, input_shape=(32, 128, 3)) + ) predictor = OCRPredictor( - test_detectionpredictor_pt, - test_recognitionpredictor_pt, + detectionpredictor, + recognitionpredictor, assume_straight_pages=True, straighten_pages=False, ) s_predictor = OCRPredictor( - test_detectionpredictor_pt, - test_recognitionpredictor_pt, + detectionpredictor, + recognitionpredictor, assume_straight_pages=True, straighten_pages=True, ) From a7c0d55d4882ebf503ab39389158d5623b125af3 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Thu, 2 Dec 2021 13:44:22 +0100 Subject: [PATCH 47/53] fix: correct imports for testing --- tests/pytorch/test_models_detection_pt.py | 2 +- tests/pytorch/test_models_recognition_pt.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py index d073fc2bbd..13fa79f980 100644 --- a/tests/pytorch/test_models_detection_pt.py +++ b/tests/pytorch/test_models_detection_pt.py @@ -3,7 +3,7 @@ import torch from doctr.models import detection -from doctr.models.detection.predictor.pytorch import DetectionPredictor +from doctr.models.detection.predictor import DetectionPredictor @pytest.mark.parametrize( diff --git a/tests/pytorch/test_models_recognition_pt.py b/tests/pytorch/test_models_recognition_pt.py index d987b6d2d7..fb59c1efb4 100644 --- a/tests/pytorch/test_models_recognition_pt.py +++ b/tests/pytorch/test_models_recognition_pt.py @@ -2,7 +2,7 @@ import torch from doctr.models import recognition -from doctr.models.recognition.predictor.pytorch import RecognitionPredictor +from doctr.models.recognition.predictor import RecognitionPredictor @pytest.mark.parametrize( From ce23100db17b79a289a570c42f98e858ac9103a5 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Thu, 2 Dec 2021 17:23:24 +0100 Subject: [PATCH 48/53] fix: isort --- tests/pytorch/test_models_zoo_pt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py index 06febf8aa3..9daf379191 100644 --- a/tests/pytorch/test_models_zoo_pt.py +++ b/tests/pytorch/test_models_zoo_pt.py @@ -4,9 +4,9 @@ from doctr import models from doctr.io import Document, DocumentFile from doctr.models import detection, recognition +from doctr.models.detection.predictor import DetectionPredictor from doctr.models.predictor import OCRPredictor from doctr.models.preprocessor import PreProcessor -from doctr.models.detection.predictor import DetectionPredictor from doctr.models.recognition.predictor import RecognitionPredictor From 8525b14471a8f9e426cdc04eb598e304cf50940f Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 3 Dec 2021 15:05:47 +0100 Subject: [PATCH 49/53] fix: make sure that expand in rotate_image is keeping the same image ratio --- doctr/utils/geometry.py | 23 +++++++++++------------ tests/common/test_utils_geometry.py | 4 ++-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 984da4a7da..1a429af645 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -240,9 +240,8 @@ def rotate_image( Args: image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 - expand: whether the image should be padded before the rotation - preserve_origin_shape: whether the image should be resized to the original image size after the rotation. - Only useful if expand is True. + expand: whether the image should be padded before the rotation, + preserve_origin_shape: if expand is set to True, resizes the final output to the original image size Returns: Rotated array, padded by 0 by default. @@ -261,16 +260,16 @@ def rotate_image( rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) if expand: + # Pad to get the same aspect ratio + if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): + # Pad width + if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]): + h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1]) + # Pad height + else: + h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0 + rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) if preserve_origin_shape: - # Pad to get the same aspect ratio - if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): - # Pad width - if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]): - h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1]) - # Pad height - else: - h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0 - rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) # rescale rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index c76d41442c..050dac3265 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -124,7 +124,7 @@ def test_rotate_image(): # Expand rotated = geometry.rotate_image(img, 30., expand=True) - assert rotated.shape[:-1] == (60, 72) + assert rotated.shape[:-1] == (60, 120) assert rotated[0, :, 0].sum() <= 1 # Expand @@ -134,5 +134,5 @@ def test_rotate_image(): # Expand with 90° rotation rotated = geometry.rotate_image(img, 90., expand=True) - assert rotated.shape[:-1] == (64, 64) + assert rotated.shape[:-1] == (64, 128) assert rotated[0, :, 0].sum() <= 1 From 7bcf639e6c1acb9670d6ccf42bd4305d2c199135 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Fri, 3 Dec 2021 17:14:22 +0100 Subject: [PATCH 50/53] fix: styling --- doctr/utils/geometry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 1a429af645..38871d1403 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -240,7 +240,7 @@ def rotate_image( Args: image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 - expand: whether the image should be padded before the rotation, + expand: whether the image should be padded before the rotation preserve_origin_shape: if expand is set to True, resizes the final output to the original image size Returns: From 2205737d981b107ade03207b0f1b3ee2d9b1f1be Mon Sep 17 00:00:00 2001 From: Rob192 Date: Sun, 5 Dec 2021 13:24:46 +0100 Subject: [PATCH 51/53] fix: use absolute centers for rotate_boxes --- doctr/utils/geometry.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 38871d1403..8e56282159 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -216,10 +216,11 @@ def rotate_boxes( [np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)] ], dtype=_boxes.dtype) - # Rotate centers - centers = np.stack((_boxes[:, 0], _boxes[:, 1]), axis=-1) - rotated_centers = .5 + np.matmul(centers - .5, rotation_mat) - x_center, y_center = rotated_centers[:, 0], rotated_centers[:, 1] + # Rotate absolute centers + centers = np.stack((_boxes[:, 0] * orig_shape[1], _boxes[:, 1] * orig_shape[0]), axis=-1) + image_center = (orig_shape[1] // 2, orig_shape[0] // 2) + rotated_centers = image_center + np.matmul(centers - image_center, rotation_mat) + x_center, y_center = rotated_centers[:, 0] / orig_shape[1], rotated_centers[:, 1] / orig_shape[0] # Compute rotated boxes rotated_boxes = np.stack((x_center, y_center, _boxes[:, 2], _boxes[:, 3], angle * np.ones_like(_boxes[:, 0]), _boxes[:, 5]), axis=1) From 484451b94ed79aa9bc2614f88c98b78e9f625c50 Mon Sep 17 00:00:00 2001 From: Rob192 Date: Sun, 5 Dec 2021 14:00:47 +0100 Subject: [PATCH 52/53] fix: calculation of image_center and documentation --- doctr/utils/geometry.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 8e56282159..338907f859 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -180,12 +180,12 @@ def rotate_boxes( loc_preds: np.ndarray, angle: float = 0., min_angle: float = 1., - orig_shape: Optional[Tuple[int, int]] = None, + orig_shape: Tuple[int, int] = (1, 1), target_shape: Optional[Tuple[int, int]] = None, ) -> np.ndarray: """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes (x, y, w, h, alpha, c) of an angle, if angle > min_angle, around the center of the page. - If orig_shape and target_shape are specified, the boxes are remapped to the target shape after the rotation. This + If target_shape is specified, the boxes are remapped to the target shape after the rotation. This is done to remove the padding that is created by rotate_page(expand=True) Args: @@ -218,14 +218,14 @@ def rotate_boxes( ], dtype=_boxes.dtype) # Rotate absolute centers centers = np.stack((_boxes[:, 0] * orig_shape[1], _boxes[:, 1] * orig_shape[0]), axis=-1) - image_center = (orig_shape[1] // 2, orig_shape[0] // 2) + image_center = (orig_shape[1] / 2, orig_shape[0] / 2) rotated_centers = image_center + np.matmul(centers - image_center, rotation_mat) x_center, y_center = rotated_centers[:, 0] / orig_shape[1], rotated_centers[:, 1] / orig_shape[0] # Compute rotated boxes rotated_boxes = np.stack((x_center, y_center, _boxes[:, 2], _boxes[:, 3], angle * np.ones_like(_boxes[:, 0]), _boxes[:, 5]), axis=1) # Apply a mask if requested - if target_shape is not None and orig_shape is not None: + if target_shape is not None: rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape) return rotated_boxes From 66fad677a84548050fba420a9b29c1bb0d3613db Mon Sep 17 00:00:00 2001 From: Rob192 Date: Sun, 5 Dec 2021 20:59:04 +0100 Subject: [PATCH 53/53] fix: remove default value for orig_shape in rotate_boxes --- doctr/utils/geometry.py | 7 ++++--- tests/common/test_utils_geometry.py | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 338907f859..003867eba7 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -178,9 +178,9 @@ def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: def rotate_boxes( loc_preds: np.ndarray, - angle: float = 0., + angle: float, + orig_shape: Tuple[int, int], min_angle: float = 1., - orig_shape: Tuple[int, int] = (1, 1), target_shape: Optional[Tuple[int, int]] = None, ) -> np.ndarray: """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes @@ -191,13 +191,14 @@ def rotate_boxes( Args: loc_preds: (N, 5) or (N, 6) array of RELATIVE boxes angle: angle between -90 and +90 degrees - min_angle: minimum angle to rotate boxes orig_shape: shape of the origin image + min_angle: minimum angle to rotate boxes target_shape: shape of the target image Returns: A batch of rotated boxes (N, 5): (x, y, w, h, alpha) or a batch of straight bounding boxes """ + # Change format of the boxes to rotated boxes _boxes = loc_preds.copy() if _boxes.shape[1] == 5: diff --git a/tests/common/test_utils_geometry.py b/tests/common/test_utils_geometry.py index 050dac3265..e94589cd97 100644 --- a/tests/common/test_utils_geometry.py +++ b/tests/common/test_utils_geometry.py @@ -96,22 +96,22 @@ def test_rotate_boxes(): np.zeros(boxes.shape[0]), boxes[:, 4])) # Angle = 0 - rotated = geometry.rotate_boxes(boxes, angle=0.) + rotated = geometry.rotate_boxes(boxes, angle=0., orig_shape=(1, 1)) assert np.all(rotated == rboxes) # Angle < 1: - rotated = geometry.rotate_boxes(boxes, angle=0.5) + rotated = geometry.rotate_boxes(boxes, angle=0.5, orig_shape=(1, 1)) assert np.all(rotated == rboxes) # Angle = 30 - rotated = geometry.rotate_boxes(boxes, angle=30) + rotated = geometry.rotate_boxes(boxes, angle=30, orig_shape=(1, 1)) assert rotated.shape == (1, 6) assert rotated[0, 4] == 30. boxes = np.array([[0., 0., 0.6, 0.2, 0.5]]) # Angle = -90: - rotated = geometry.rotate_boxes(boxes, angle=-90, min_angle=0) + rotated = geometry.rotate_boxes(boxes, angle=-90, orig_shape=(1, 1), min_angle=0) assert np.allclose(rotated, np.array([[0.9, 0.3, 0.6, 0.2, -90., 0.5]])) # Angle = 90 - rotated = geometry.rotate_boxes(boxes, angle=+90, min_angle=0) + rotated = geometry.rotate_boxes(boxes, angle=+90, orig_shape=(1, 1), min_angle=0) assert np.allclose(rotated, np.array([[0.1, 0.7, 0.6, 0.2, +90., 0.5]]))