From 7b11315db6088d1abc2e2d1452b220eba6325f43 Mon Sep 17 00:00:00 2001 From: HamzaGbada Date: Sun, 30 Apr 2023 20:43:52 +0100 Subject: [PATCH 1/2] [FIX] Error in unpacking archive of CORD dataset --- doctr/datasets/cord.py | 6 ++++-- tests/pytorch/test_datasets_pt.py | 2 +- tests/tensorflow/test_datasets_tf.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doctr/datasets/cord.py b/doctr/datasets/cord.py index febcfa65d3..73ff11fabc 100644 --- a/doctr/datasets/cord.py +++ b/doctr/datasets/cord.py @@ -38,11 +38,13 @@ class CORD(VisionDataset): TRAIN = ( "https://doctr-static.mindee.com/models?id=v0.1.1/cord_train.zip&src=0", "45f9dc77f126490f3e52d7cb4f70ef3c57e649ea86d19d862a2757c9c455d7f8", + "cord_train.zip" ) TEST = ( "https://doctr-static.mindee.com/models?id=v0.1.1/cord_test.zip&src=0", "8c895e3d6f7e1161c5b7245e3723ce15c04d84be89eaa6093949b75a66fb3c58", + "cord_test.zip" ) def __init__( @@ -52,10 +54,10 @@ def __init__( recognition_task: bool = False, **kwargs: Any, ) -> None: - url, sha256 = self.TRAIN if train else self.TEST + url, sha256, name = self.TRAIN if train else self.TEST super().__init__( url, - None, + name, sha256, True, pre_transforms=convert_target_to_relative if not recognition_task else None, diff --git a/tests/pytorch/test_datasets_pt.py b/tests/pytorch/test_datasets_pt.py index 0bdb00d277..2e34d8963c 100644 --- a/tests/pytorch/test_datasets_pt.py +++ b/tests/pytorch/test_datasets_pt.py @@ -414,7 +414,7 @@ def test_funsd(input_size, num_samples, rotate, recognition, mock_funsd_dataset) ) def test_cord(input_size, num_samples, rotate, recognition, mock_cord_dataset): # monkeypatch the path to temporary dataset - datasets.CORD.TRAIN = (mock_cord_dataset, None) + datasets.CORD.TRAIN = (mock_cord_dataset, None, "cord_train.zip") ds = datasets.CORD( train=True, diff --git a/tests/tensorflow/test_datasets_tf.py b/tests/tensorflow/test_datasets_tf.py index 567177a743..ecfe36d3a4 100644 --- a/tests/tensorflow/test_datasets_tf.py +++ b/tests/tensorflow/test_datasets_tf.py @@ -388,7 +388,7 @@ def test_funsd(input_size, num_samples, rotate, recognition, mock_funsd_dataset) ) def test_cord(input_size, num_samples, rotate, recognition, mock_cord_dataset): # monkeypatch the path to temporary dataset - datasets.CORD.TRAIN = (mock_cord_dataset, None) + datasets.CORD.TRAIN = (mock_cord_dataset, None, "cord_train.zip") ds = datasets.CORD( train=True, From 4bea0da25b405467c86fc79ffafab82d6910b791 Mon Sep 17 00:00:00 2001 From: HamzaGbada Date: Sun, 30 Apr 2023 22:25:54 +0100 Subject: [PATCH 2/2] [FIX] Error in unpacking archive of CORD dataset --- doctr/datasets/cord.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doctr/datasets/cord.py b/doctr/datasets/cord.py index 73ff11fabc..623df8ac8e 100644 --- a/doctr/datasets/cord.py +++ b/doctr/datasets/cord.py @@ -38,13 +38,13 @@ class CORD(VisionDataset): TRAIN = ( "https://doctr-static.mindee.com/models?id=v0.1.1/cord_train.zip&src=0", "45f9dc77f126490f3e52d7cb4f70ef3c57e649ea86d19d862a2757c9c455d7f8", - "cord_train.zip" + "cord_train.zip", ) TEST = ( "https://doctr-static.mindee.com/models?id=v0.1.1/cord_test.zip&src=0", "8c895e3d6f7e1161c5b7245e3723ce15c04d84be89eaa6093949b75a66fb3c58", - "cord_test.zip" + "cord_test.zip", ) def __init__(