mindee · felixdittrich92 · Jun 27, 2023 · Jun 24, 2023 · Jun 24, 2023 · Jun 24, 2023
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -113,9 +113,9 @@ jobs:
     needs: [ pytest-common, pytest-tf, pytest-torch ]
     steps:
       - uses: actions/checkout@v3
-      - uses: actions/download-artifact@v2
+      - uses: actions/download-artifact@v3
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
         with:
           flags: unittests
           fail_ci_if_error: true
diff --git a/doctr/file_utils.py b/doctr/file_utils.py
@@ -13,7 +13,7 @@
 CLASS_NAME: str = "words"
 
 
-if sys.version_info < (3, 8):
+if sys.version_info < (3, 8):  # pragma: no cover
     import importlib_metadata
 else:
     import importlib.metadata as importlib_metadata
@@ -34,9 +34,9 @@
         try:
             _torch_version = importlib_metadata.version("torch")
             logging.info(f"PyTorch version {_torch_version} available.")
-        except importlib_metadata.PackageNotFoundError:
+        except importlib_metadata.PackageNotFoundError:  # pragma: no cover
             _torch_available = False
-else:
+else:  # pragma: no cover
     logging.info("Disabling PyTorch because USE_TF is set")
     _torch_available = False
 
@@ -65,17 +65,17 @@
                 pass
         _tf_available = _tf_version is not None
     if _tf_available:
-        if int(_tf_version.split(".")[0]) < 2:  # type: ignore[union-attr]
+        if int(_tf_version.split(".")[0]) < 2:  # type: ignore[union-attr]  # pragma: no cover
             logging.info(f"TensorFlow found but with version {_tf_version}. DocTR requires version 2 minimum.")
             _tf_available = False
         else:
             logging.info(f"TensorFlow version {_tf_version} available.")
-else:
+else:  # pragma: no cover
     logging.info("Disabling Tensorflow because USE_TORCH is set")
     _tf_available = False
 
 
-if not _torch_available and not _tf_available:
+if not _torch_available and not _tf_available:  # pragma: no cover
     raise ModuleNotFoundError(
         "DocTR requires either TensorFlow or PyTorch to be installed. Please ensure one of them"
         " is installed and that either USE_TF or USE_TORCH is enabled."

diff --git a/doctr/io/image/tensorflow.py b/doctr/io/image/tensorflow.py
@@ -8,11 +8,7 @@
 import numpy as np
 import tensorflow as tf
 from PIL import Image
-
-if tf.__version__ >= "2.6.0":
-    from tensorflow.keras.utils import img_to_array
-else:
-    from tensorflow.keras.preprocessing.image import img_to_array
+from tensorflow.keras.utils import img_to_array
 
 from doctr.utils.common_types import AbstractPath
 

diff --git a/doctr/models/factory/hub.py b/doctr/models/factory/hub.py
@@ -32,7 +32,7 @@
 }
 
 
-def login_to_hub() -> None:
+def login_to_hub() -> None:  # pragma: no cover
     """Login to huggingface hub"""
     access_token = HfFolder.get_token()
     if access_token is not None and HfApi()._is_valid_token(access_token):
@@ -81,7 +81,7 @@ def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task
         json.dump(model_config, f, indent=2, ensure_ascii=False)
 
 
-def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None:
+def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None:  # pragma: no cover
     """Save model and its configuration on HF hub
 
     >>> from doctr.models import login_to_hub, push_to_hf_hub

diff --git a/doctr/models/recognition/utils.py b/doctr/models/recognition/utils.py
@@ -31,7 +31,7 @@ def merge_strings(a: str, b: str, dil_factor: float) -> str:
     """
     seq_len = min(len(a), len(b))
     if seq_len == 0:  # One sequence is empty, return the other
-        return b if len(a) == 0 else b
+        return b if len(a) == 0 else a
 
     # Initialize merging index and corresponding score (mean Levenstein)
     min_score, index = 1.0, 0  # No overlap, just concatenate

diff --git a/tests/common/test_core.py b/tests/common/test_core.py
@@ -1,13 +1,17 @@
+import pytest
+
 import doctr
 
 
 def test_version():
     assert len(doctr.__version__.split(".")) == 3
 
 
+@pytest.mark.xfail
 def test_is_tf_available():
     assert doctr.is_tf_available()
 
 
+@pytest.mark.xfail
 def test_is_torch_available():
     assert not doctr.is_torch_available()
diff --git a/tests/common/test_datasets_utils.py b/tests/common/test_datasets_utils.py
@@ -21,6 +21,11 @@ def test_translate(input_str, vocab, output_str):
     assert out == output_str
 
 
+def test_translate_unknown_vocab():
+    with pytest.raises(KeyError):
+        _ = utils.translate("test", "unknown_vocab")
+
+
 @pytest.mark.parametrize(
     "input_str",
     [
@@ -38,6 +43,11 @@ def test_encode_decode(input_str):
     assert decoded == input_str
 
 
+def test_encode_string_unknown_char():
+    with pytest.raises(ValueError):
+        _ = utils.encode_string("abc", "xyz")
+
+
 def test_decode_sequence():
     mapping = "abcdef"
     with pytest.raises(TypeError):
@@ -54,6 +64,8 @@ def test_decode_sequence():
     "sequences, vocab, target_size, sos, eos, pad, dynamic_len, error, out_shape, gts",
     [
         [["cba"], "abcdef", None, None, 1, None, False, True, (1, 3), [[2, 1, 0]]],  # eos in vocab
+        [["cba"], "abcdef", None, 1, -1, None, False, True, (1, 3), [[2, 1, 0]]],  # sos in vocab
+        [["cba"], "abcdef", None, None, -1, 1, False, True, (1, 3), [[2, 1, 0]]],  # pad in vocab
         [["cba", "a"], "abcdef", None, None, -1, None, False, False, (2, 4), [[2, 1, 0, -1], [0, -1, -1, -1]]],
         [["cba", "a"], "abcdef", None, None, 6, None, False, False, (2, 4), [[2, 1, 0, 6], [0, 6, 6, 6]]],
         [["cba", "a"], "abcdef", 2, None, -1, None, False, False, (2, 2), [[2, 1], [0, -1]]],

diff --git a/tests/common/test_io.py b/tests/common/test_io.py
@@ -1,4 +1,5 @@
 from io import BytesIO
+from pathlib import Path
 
 import numpy as np
 import pytest
@@ -18,6 +19,10 @@ def test_read_pdf(mock_pdf):
     doc = io.read_pdf(mock_pdf)
     _check_doc_content(doc, 2)
 
+    # Test with Path
+    doc = io.read_pdf(Path(mock_pdf))
+    _check_doc_content(doc, 2)
+
     with open(mock_pdf, "rb") as f:
         doc = io.read_pdf(f.read())
     _check_doc_content(doc, 2)

diff --git a/tests/common/test_models_recognition_utils.py b/tests/common/test_models_recognition_utils.py
@@ -11,6 +11,8 @@
         ["abcde", "def", "abcdef"],
         ["abcdef", "def", "abcdef"],
         ["abcccc", "cccccc", "abcccccccc"],
+        ["abc", "", "abc"],
+        ["", "abc", "abc"],
     ],
 )
 def test_merge_strings(a, b, merged):

diff --git a/tests/pytorch/test_io_image_pt.py b/tests/pytorch/test_io_image_pt.py
@@ -17,6 +17,9 @@ def test_read_img_as_tensor(mock_image_path):
     img = read_img_as_tensor(mock_image_path, dtype=torch.uint8)
     assert img.dtype == torch.uint8
 
+    with pytest.raises(ValueError):
+        _ = read_img_as_tensor(mock_image_path, dtype=torch.float64)
+
 
 def test_decode_img_as_tensor(mock_image_stream):
     img = decode_img_as_tensor(mock_image_stream)
@@ -30,6 +33,9 @@ def test_decode_img_as_tensor(mock_image_stream):
     img = decode_img_as_tensor(mock_image_stream, dtype=torch.uint8)
     assert img.dtype == torch.uint8
 
+    with pytest.raises(ValueError):
+        _ = decode_img_as_tensor(mock_image_stream, dtype=torch.float64)
+
 
 def test_tensor_from_numpy(mock_image_stream):
     with pytest.raises(ValueError):

diff --git a/tests/pytorch/test_models_classification_pt.py b/tests/pytorch/test_models_classification_pt.py
@@ -42,6 +42,8 @@ def _test_classification(model, input_shape, output_size, batch_size=2):
         ["mobilenet_v3_large", (3, 32, 32), (126,)],
         ["vit_s", (3, 32, 32), (126,)],
         ["vit_b", (3, 32, 32), (126,)],
+        # Check that the interpolation of positional embeddings for vit models works correctly
+        ["vit_s", (3, 64, 64), (126,)],
     ],
 )
 def test_classification_architectures(arch_name, input_shape, output_size):

diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py
@@ -14,19 +14,26 @@
 
 
 @pytest.mark.parametrize(
-    "arch_name, input_shape, output_size, out_prob",
+    "arch_name, input_shape, output_size, out_prob, train_mode",
     [
-        ["db_resnet34", (3, 512, 512), (1, 512, 512), True],
-        ["db_resnet50", (3, 512, 512), (1, 512, 512), True],
-        ["db_mobilenet_v3_large", (3, 512, 512), (1, 512, 512), True],
-        ["linknet_resnet18", (3, 512, 512), (1, 512, 512), False],
-        ["linknet_resnet34", (3, 512, 512), (1, 512, 512), False],
-        ["linknet_resnet50", (3, 512, 512), (1, 512, 512), False],
+        ["db_resnet34", (3, 512, 512), (1, 512, 512), True, True],
+        ["db_resnet34", (3, 512, 512), (1, 512, 512), True, False],
+        ["db_resnet50", (3, 512, 512), (1, 512, 512), True, True],
+        ["db_resnet50", (3, 512, 512), (1, 512, 512), True, False],
+        ["db_mobilenet_v3_large", (3, 512, 512), (1, 512, 512), True, True],
+        ["db_mobilenet_v3_large", (3, 512, 512), (1, 512, 512), True, False],
+        ["linknet_resnet18", (3, 512, 512), (1, 512, 512), True, True],
+        ["linknet_resnet18", (3, 512, 512), (1, 512, 512), True, False],
+        ["linknet_resnet34", (3, 512, 512), (1, 512, 512), True, True],
+        ["linknet_resnet34", (3, 512, 512), (1, 512, 512), True, False],
+        ["linknet_resnet50", (3, 512, 512), (1, 512, 512), True, True],
+        ["linknet_resnet50", (3, 512, 512), (1, 512, 512), True, False],
     ],
 )
-def test_detection_models(arch_name, input_shape, output_size, out_prob):
+def test_detection_models(arch_name, input_shape, output_size, out_prob, train_mode):
     batch_size = 2
-    model = detection.__dict__[arch_name](pretrained=False).eval()
+    model = detection.__dict__[arch_name](pretrained=True)
+    model = model.train() if train_mode else model.eval()
     assert isinstance(model, torch.nn.Module)
     input_tensor = torch.rand((batch_size, *input_shape))
     target = [
@@ -36,20 +43,21 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob):
     if torch.cuda.is_available():
         model.cuda()
         input_tensor = input_tensor.cuda()
-    out = model(input_tensor, target, return_model_output=True, return_preds=True)
+    out = model(input_tensor, target, return_model_output=True, return_preds=True if not train_mode else False)
     assert isinstance(out, dict)
-    assert len(out) == 3
+    assert len(out) == 3 if not train_mode else len(out) == 2
     # Check proba map
     assert out["out_map"].shape == (batch_size, *output_size)
     assert out["out_map"].dtype == torch.float32
     if out_prob:
         assert torch.all((out["out_map"] >= 0) & (out["out_map"] <= 1))
     # Check boxes
-    for boxes_dict in out["preds"]:
-        for boxes in boxes_dict.values():
-            assert boxes.shape[1] == 5
-            assert np.all(boxes[:, :2] < boxes[:, 2:4])
-            assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1)
+    if not train_mode:
+        for boxes_dict in out["preds"]:
+            for boxes in boxes_dict.values():
+                assert boxes.shape[1] == 5
+                assert np.all(boxes[:, :2] < boxes[:, 2:4])
+                assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1)
     # Check loss
     assert isinstance(out["loss"], torch.Tensor)
     # Check the rotated case (same targets)

diff --git a/tests/pytorch/test_models_recognition_pt.py b/tests/pytorch/test_models_recognition_pt.py
@@ -19,34 +19,44 @@
 
 
 @pytest.mark.parametrize(
-    "arch_name, input_shape, pretrained",
+    "arch_name, input_shape, train_mode",
     [
         ["crnn_vgg16_bn", (3, 32, 128), True],
+        ["crnn_vgg16_bn", (3, 32, 128), False],
         ["crnn_mobilenet_v3_small", (3, 32, 128), True],
+        ["crnn_mobilenet_v3_small", (3, 32, 128), False],
         ["crnn_mobilenet_v3_large", (3, 32, 128), True],
+        ["crnn_mobilenet_v3_large", (3, 32, 128), False],
+        ["sar_resnet31", (3, 32, 128), True],
         ["sar_resnet31", (3, 32, 128), False],
+        ["master", (3, 32, 128), True],
         ["master", (3, 32, 128), False],
+        ["vitstr_small", (3, 32, 128), True],
         ["vitstr_small", (3, 32, 128), False],
+        ["vitstr_base", (3, 32, 128), True],
         ["vitstr_base", (3, 32, 128), False],
+        ["parseq", (3, 32, 128), True],
         ["parseq", (3, 32, 128), False],
     ],
 )
-def test_recognition_models(arch_name, input_shape, pretrained, mock_vocab):
+def test_recognition_models(arch_name, input_shape, train_mode, mock_vocab):
     batch_size = 4
-    model = recognition.__dict__[arch_name](vocab=mock_vocab, pretrained=pretrained, input_shape=input_shape).eval()
+    model = recognition.__dict__[arch_name](vocab=mock_vocab, pretrained=True, input_shape=input_shape)
+    model = model.train() if train_mode else model.eval()
     assert isinstance(model, torch.nn.Module)
     input_tensor = torch.rand((batch_size, *input_shape))
     target = ["i", "am", "a", "jedi"]
 
     if torch.cuda.is_available():
         model.cuda()
         input_tensor = input_tensor.cuda()
-    out = model(input_tensor, target, return_model_output=True, return_preds=True)
+    out = model(input_tensor, target, return_model_output=True, return_preds=True if not train_mode else False)
     assert isinstance(out, dict)
-    assert len(out) == 3
-    assert isinstance(out["preds"], list)
-    assert len(out["preds"]) == batch_size
-    assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"])
+    assert len(out) == 3 if not train_mode else len(out) == 2
+    if not train_mode:
+        assert isinstance(out["preds"], list)
+        assert len(out["preds"]) == batch_size
+        assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"])
     assert isinstance(out["out_map"], torch.Tensor)
     assert out["out_map"].dtype == torch.float32
     assert isinstance(out["loss"], torch.Tensor)

diff --git a/tests/tensorflow/test_io_image_tf.py b/tests/tensorflow/test_io_image_tf.py
@@ -17,6 +17,9 @@ def test_read_img_as_tensor(mock_image_path):
     img = read_img_as_tensor(mock_image_path, dtype=tf.uint8)
     assert img.dtype == tf.uint8
 
+    with pytest.raises(ValueError):
+        _ = read_img_as_tensor(mock_image_path, dtype=tf.float64)
+
 
 def test_decode_img_as_tensor(mock_image_stream):
     img = decode_img_as_tensor(mock_image_stream)
@@ -30,6 +33,9 @@ def test_decode_img_as_tensor(mock_image_stream):
     img = decode_img_as_tensor(mock_image_stream, dtype=tf.uint8)
     assert img.dtype == tf.uint8
 
+    with pytest.raises(ValueError):
+        _ = decode_img_as_tensor(mock_image_stream, dtype=tf.float64)
+
 
 def test_tensor_from_numpy(mock_image_stream):
     with pytest.raises(ValueError):