diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 262c425cd2..00a10786b9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -113,9 +113,9 @@ jobs: needs: [ pytest-common, pytest-tf, pytest-torch ] steps: - uses: actions/checkout@v3 - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v3 with: flags: unittests fail_ci_if_error: true diff --git a/doctr/file_utils.py b/doctr/file_utils.py index 5e00d5a5a5..d60ae40d89 100644 --- a/doctr/file_utils.py +++ b/doctr/file_utils.py @@ -13,7 +13,7 @@ CLASS_NAME: str = "words" -if sys.version_info < (3, 8): +if sys.version_info < (3, 8): # pragma: no cover import importlib_metadata else: import importlib.metadata as importlib_metadata @@ -34,9 +34,9 @@ try: _torch_version = importlib_metadata.version("torch") logging.info(f"PyTorch version {_torch_version} available.") - except importlib_metadata.PackageNotFoundError: + except importlib_metadata.PackageNotFoundError: # pragma: no cover _torch_available = False -else: +else: # pragma: no cover logging.info("Disabling PyTorch because USE_TF is set") _torch_available = False @@ -65,17 +65,17 @@ pass _tf_available = _tf_version is not None if _tf_available: - if int(_tf_version.split(".")[0]) < 2: # type: ignore[union-attr] + if int(_tf_version.split(".")[0]) < 2: # type: ignore[union-attr] # pragma: no cover logging.info(f"TensorFlow found but with version {_tf_version}. DocTR requires version 2 minimum.") _tf_available = False else: logging.info(f"TensorFlow version {_tf_version} available.") -else: +else: # pragma: no cover logging.info("Disabling Tensorflow because USE_TORCH is set") _tf_available = False -if not _torch_available and not _tf_available: +if not _torch_available and not _tf_available: # pragma: no cover raise ModuleNotFoundError( "DocTR requires either TensorFlow or PyTorch to be installed. Please ensure one of them" " is installed and that either USE_TF or USE_TORCH is enabled." diff --git a/doctr/io/image/tensorflow.py b/doctr/io/image/tensorflow.py index 66da7fe1a6..e7cbce058d 100644 --- a/doctr/io/image/tensorflow.py +++ b/doctr/io/image/tensorflow.py @@ -8,11 +8,7 @@ import numpy as np import tensorflow as tf from PIL import Image - -if tf.__version__ >= "2.6.0": - from tensorflow.keras.utils import img_to_array -else: - from tensorflow.keras.preprocessing.image import img_to_array +from tensorflow.keras.utils import img_to_array from doctr.utils.common_types import AbstractPath diff --git a/doctr/models/factory/hub.py b/doctr/models/factory/hub.py index 4547407fa2..c2be0e8980 100644 --- a/doctr/models/factory/hub.py +++ b/doctr/models/factory/hub.py @@ -32,7 +32,7 @@ } -def login_to_hub() -> None: +def login_to_hub() -> None: # pragma: no cover """Login to huggingface hub""" access_token = HfFolder.get_token() if access_token is not None and HfApi()._is_valid_token(access_token): @@ -81,7 +81,7 @@ def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task json.dump(model_config, f, indent=2, ensure_ascii=False) -def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None: +def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None: # pragma: no cover """Save model and its configuration on HF hub >>> from doctr.models import login_to_hub, push_to_hf_hub diff --git a/doctr/models/recognition/utils.py b/doctr/models/recognition/utils.py index 584db6fcf2..85784535a3 100644 --- a/doctr/models/recognition/utils.py +++ b/doctr/models/recognition/utils.py @@ -31,7 +31,7 @@ def merge_strings(a: str, b: str, dil_factor: float) -> str: """ seq_len = min(len(a), len(b)) if seq_len == 0: # One sequence is empty, return the other - return b if len(a) == 0 else b + return b if len(a) == 0 else a # Initialize merging index and corresponding score (mean Levenstein) min_score, index = 1.0, 0 # No overlap, just concatenate diff --git a/tests/common/test_core.py b/tests/common/test_core.py index 8f55223bed..cd2758b0df 100644 --- a/tests/common/test_core.py +++ b/tests/common/test_core.py @@ -1,3 +1,5 @@ +import pytest + import doctr @@ -5,9 +7,11 @@ def test_version(): assert len(doctr.__version__.split(".")) == 3 +@pytest.mark.skipif(doctr.is_torch_available() and doctr.is_tf_available(), reason="torch and tf are available") def test_is_tf_available(): assert doctr.is_tf_available() +@pytest.mark.skipif(doctr.is_torch_available() and doctr.is_tf_available(), reason="torch and tf are available") def test_is_torch_available(): assert not doctr.is_torch_available() diff --git a/tests/common/test_datasets_utils.py b/tests/common/test_datasets_utils.py index 971dbcf46b..5ad997d793 100644 --- a/tests/common/test_datasets_utils.py +++ b/tests/common/test_datasets_utils.py @@ -21,6 +21,11 @@ def test_translate(input_str, vocab, output_str): assert out == output_str +def test_translate_unknown_vocab(): + with pytest.raises(KeyError): + utils.translate("test", "unknown_vocab") + + @pytest.mark.parametrize( "input_str", [ @@ -38,6 +43,11 @@ def test_encode_decode(input_str): assert decoded == input_str +def test_encode_string_unknown_char(): + with pytest.raises(ValueError): + utils.encode_string("abc", "xyz") + + def test_decode_sequence(): mapping = "abcdef" with pytest.raises(TypeError): @@ -54,6 +64,8 @@ def test_decode_sequence(): "sequences, vocab, target_size, sos, eos, pad, dynamic_len, error, out_shape, gts", [ [["cba"], "abcdef", None, None, 1, None, False, True, (1, 3), [[2, 1, 0]]], # eos in vocab + [["cba"], "abcdef", None, 1, -1, None, False, True, (1, 3), [[2, 1, 0]]], # sos in vocab + [["cba"], "abcdef", None, None, -1, 1, False, True, (1, 3), [[2, 1, 0]]], # pad in vocab [["cba", "a"], "abcdef", None, None, -1, None, False, False, (2, 4), [[2, 1, 0, -1], [0, -1, -1, -1]]], [["cba", "a"], "abcdef", None, None, 6, None, False, False, (2, 4), [[2, 1, 0, 6], [0, 6, 6, 6]]], [["cba", "a"], "abcdef", 2, None, -1, None, False, False, (2, 2), [[2, 1], [0, -1]]], diff --git a/tests/common/test_io.py b/tests/common/test_io.py index 625e762f1c..8e5fd1118d 100644 --- a/tests/common/test_io.py +++ b/tests/common/test_io.py @@ -1,4 +1,5 @@ from io import BytesIO +from pathlib import Path import numpy as np import pytest @@ -18,6 +19,10 @@ def test_read_pdf(mock_pdf): doc = io.read_pdf(mock_pdf) _check_doc_content(doc, 2) + # Test with Path + doc = io.read_pdf(Path(mock_pdf)) + _check_doc_content(doc, 2) + with open(mock_pdf, "rb") as f: doc = io.read_pdf(f.read()) _check_doc_content(doc, 2) diff --git a/tests/common/test_models_recognition_utils.py b/tests/common/test_models_recognition_utils.py index 6f655fef7a..b376a42df5 100644 --- a/tests/common/test_models_recognition_utils.py +++ b/tests/common/test_models_recognition_utils.py @@ -11,6 +11,8 @@ ["abcde", "def", "abcdef"], ["abcdef", "def", "abcdef"], ["abcccc", "cccccc", "abcccccccc"], + ["abc", "", "abc"], + ["", "abc", "abc"], ], ) def test_merge_strings(a, b, merged): diff --git a/tests/pytorch/test_io_image_pt.py b/tests/pytorch/test_io_image_pt.py index 07a7a12918..2c1ab69c0b 100644 --- a/tests/pytorch/test_io_image_pt.py +++ b/tests/pytorch/test_io_image_pt.py @@ -17,6 +17,9 @@ def test_read_img_as_tensor(mock_image_path): img = read_img_as_tensor(mock_image_path, dtype=torch.uint8) assert img.dtype == torch.uint8 + with pytest.raises(ValueError): + read_img_as_tensor(mock_image_path, dtype=torch.float64) + def test_decode_img_as_tensor(mock_image_stream): img = decode_img_as_tensor(mock_image_stream) @@ -30,6 +33,9 @@ def test_decode_img_as_tensor(mock_image_stream): img = decode_img_as_tensor(mock_image_stream, dtype=torch.uint8) assert img.dtype == torch.uint8 + with pytest.raises(ValueError): + decode_img_as_tensor(mock_image_stream, dtype=torch.float64) + def test_tensor_from_numpy(mock_image_stream): with pytest.raises(ValueError): diff --git a/tests/pytorch/test_models_classification_pt.py b/tests/pytorch/test_models_classification_pt.py index d9bc436552..0ea879097a 100644 --- a/tests/pytorch/test_models_classification_pt.py +++ b/tests/pytorch/test_models_classification_pt.py @@ -42,6 +42,8 @@ def _test_classification(model, input_shape, output_size, batch_size=2): ["mobilenet_v3_large", (3, 32, 32), (126,)], ["vit_s", (3, 32, 32), (126,)], ["vit_b", (3, 32, 32), (126,)], + # Check that the interpolation of positional embeddings for vit models works correctly + ["vit_s", (3, 64, 64), (126,)], ], ) def test_classification_architectures(arch_name, input_shape, output_size): diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py index de929a13e8..1a4cbe57ff 100644 --- a/tests/pytorch/test_models_detection_pt.py +++ b/tests/pytorch/test_models_detection_pt.py @@ -14,19 +14,26 @@ @pytest.mark.parametrize( - "arch_name, input_shape, output_size, out_prob", + "arch_name, input_shape, output_size, out_prob, train_mode", [ - ["db_resnet34", (3, 512, 512), (1, 512, 512), True], - ["db_resnet50", (3, 512, 512), (1, 512, 512), True], - ["db_mobilenet_v3_large", (3, 512, 512), (1, 512, 512), True], - ["linknet_resnet18", (3, 512, 512), (1, 512, 512), False], - ["linknet_resnet34", (3, 512, 512), (1, 512, 512), False], - ["linknet_resnet50", (3, 512, 512), (1, 512, 512), False], + ["db_resnet34", (3, 512, 512), (1, 512, 512), True, True], + ["db_resnet34", (3, 512, 512), (1, 512, 512), True, False], + ["db_resnet50", (3, 512, 512), (1, 512, 512), True, True], + ["db_resnet50", (3, 512, 512), (1, 512, 512), True, False], + ["db_mobilenet_v3_large", (3, 512, 512), (1, 512, 512), True, True], + ["db_mobilenet_v3_large", (3, 512, 512), (1, 512, 512), True, False], + ["linknet_resnet18", (3, 512, 512), (1, 512, 512), True, True], + ["linknet_resnet18", (3, 512, 512), (1, 512, 512), True, False], + ["linknet_resnet34", (3, 512, 512), (1, 512, 512), True, True], + ["linknet_resnet34", (3, 512, 512), (1, 512, 512), True, False], + ["linknet_resnet50", (3, 512, 512), (1, 512, 512), True, True], + ["linknet_resnet50", (3, 512, 512), (1, 512, 512), True, False], ], ) -def test_detection_models(arch_name, input_shape, output_size, out_prob): +def test_detection_models(arch_name, input_shape, output_size, out_prob, train_mode): batch_size = 2 - model = detection.__dict__[arch_name](pretrained=False).eval() + model = detection.__dict__[arch_name](pretrained=True) + model = model.train() if train_mode else model.eval() assert isinstance(model, torch.nn.Module) input_tensor = torch.rand((batch_size, *input_shape)) target = [ @@ -36,20 +43,21 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob): if torch.cuda.is_available(): model.cuda() input_tensor = input_tensor.cuda() - out = model(input_tensor, target, return_model_output=True, return_preds=True) + out = model(input_tensor, target, return_model_output=True, return_preds=not train_mode) assert isinstance(out, dict) - assert len(out) == 3 + assert len(out) == 3 if not train_mode else len(out) == 2 # Check proba map assert out["out_map"].shape == (batch_size, *output_size) assert out["out_map"].dtype == torch.float32 if out_prob: assert torch.all((out["out_map"] >= 0) & (out["out_map"] <= 1)) # Check boxes - for boxes_dict in out["preds"]: - for boxes in boxes_dict.values(): - assert boxes.shape[1] == 5 - assert np.all(boxes[:, :2] < boxes[:, 2:4]) - assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1) + if not train_mode: + for boxes_dict in out["preds"]: + for boxes in boxes_dict.values(): + assert boxes.shape[1] == 5 + assert np.all(boxes[:, :2] < boxes[:, 2:4]) + assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1) # Check loss assert isinstance(out["loss"], torch.Tensor) # Check the rotated case (same targets) diff --git a/tests/pytorch/test_models_recognition_pt.py b/tests/pytorch/test_models_recognition_pt.py index 96e6365145..9ddb5c1cc1 100644 --- a/tests/pytorch/test_models_recognition_pt.py +++ b/tests/pytorch/test_models_recognition_pt.py @@ -19,21 +19,30 @@ @pytest.mark.parametrize( - "arch_name, input_shape, pretrained", + "arch_name, input_shape, train_mode", [ ["crnn_vgg16_bn", (3, 32, 128), True], + ["crnn_vgg16_bn", (3, 32, 128), False], ["crnn_mobilenet_v3_small", (3, 32, 128), True], + ["crnn_mobilenet_v3_small", (3, 32, 128), False], ["crnn_mobilenet_v3_large", (3, 32, 128), True], + ["crnn_mobilenet_v3_large", (3, 32, 128), False], + ["sar_resnet31", (3, 32, 128), True], ["sar_resnet31", (3, 32, 128), False], + ["master", (3, 32, 128), True], ["master", (3, 32, 128), False], + ["vitstr_small", (3, 32, 128), True], ["vitstr_small", (3, 32, 128), False], + ["vitstr_base", (3, 32, 128), True], ["vitstr_base", (3, 32, 128), False], + ["parseq", (3, 32, 128), True], ["parseq", (3, 32, 128), False], ], ) -def test_recognition_models(arch_name, input_shape, pretrained, mock_vocab): +def test_recognition_models(arch_name, input_shape, train_mode, mock_vocab): batch_size = 4 - model = recognition.__dict__[arch_name](vocab=mock_vocab, pretrained=pretrained, input_shape=input_shape).eval() + model = recognition.__dict__[arch_name](vocab=mock_vocab, pretrained=True, input_shape=input_shape) + model = model.train() if train_mode else model.eval() assert isinstance(model, torch.nn.Module) input_tensor = torch.rand((batch_size, *input_shape)) target = ["i", "am", "a", "jedi"] @@ -41,12 +50,13 @@ def test_recognition_models(arch_name, input_shape, pretrained, mock_vocab): if torch.cuda.is_available(): model.cuda() input_tensor = input_tensor.cuda() - out = model(input_tensor, target, return_model_output=True, return_preds=True) + out = model(input_tensor, target, return_model_output=True, return_preds=not train_mode) assert isinstance(out, dict) - assert len(out) == 3 - assert isinstance(out["preds"], list) - assert len(out["preds"]) == batch_size - assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) + assert len(out) == 3 if not train_mode else len(out) == 2 + if not train_mode: + assert isinstance(out["preds"], list) + assert len(out["preds"]) == batch_size + assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) assert isinstance(out["out_map"], torch.Tensor) assert out["out_map"].dtype == torch.float32 assert isinstance(out["loss"], torch.Tensor) diff --git a/tests/tensorflow/test_io_image_tf.py b/tests/tensorflow/test_io_image_tf.py index 4d2f81cbe0..6542f459c2 100644 --- a/tests/tensorflow/test_io_image_tf.py +++ b/tests/tensorflow/test_io_image_tf.py @@ -17,6 +17,9 @@ def test_read_img_as_tensor(mock_image_path): img = read_img_as_tensor(mock_image_path, dtype=tf.uint8) assert img.dtype == tf.uint8 + with pytest.raises(ValueError): + read_img_as_tensor(mock_image_path, dtype=tf.float64) + def test_decode_img_as_tensor(mock_image_stream): img = decode_img_as_tensor(mock_image_stream) @@ -30,6 +33,9 @@ def test_decode_img_as_tensor(mock_image_stream): img = decode_img_as_tensor(mock_image_stream, dtype=tf.uint8) assert img.dtype == tf.uint8 + with pytest.raises(ValueError): + decode_img_as_tensor(mock_image_stream, dtype=tf.float64) + def test_tensor_from_numpy(mock_image_stream): with pytest.raises(ValueError): diff --git a/tests/tensorflow/test_models_detection_tf.py b/tests/tensorflow/test_models_detection_tf.py index 52fc36d8f0..60b2642e59 100644 --- a/tests/tensorflow/test_models_detection_tf.py +++ b/tests/tensorflow/test_models_detection_tf.py @@ -19,16 +19,21 @@ @pytest.mark.parametrize( - "arch_name, input_shape, output_size, out_prob", + "arch_name, input_shape, output_size, out_prob, train_mode", [ - ["db_resnet50", (512, 512, 3), (512, 512, 1), True], - ["db_mobilenet_v3_large", (512, 512, 3), (512, 512, 1), True], - ["linknet_resnet18", (512, 512, 3), (512, 512, 1), False], - ["linknet_resnet34", (512, 512, 3), (512, 512, 1), False], - ["linknet_resnet50", (512, 512, 3), (512, 512, 1), False], + ["db_resnet50", (512, 512, 3), (512, 512, 1), True, True], + ["db_resnet50", (512, 512, 3), (512, 512, 1), True, False], + ["db_mobilenet_v3_large", (512, 512, 3), (512, 512, 1), True, True], + ["db_mobilenet_v3_large", (512, 512, 3), (512, 512, 1), True, False], + ["linknet_resnet18", (512, 512, 3), (512, 512, 1), True, True], + ["linknet_resnet18", (512, 512, 3), (512, 512, 1), True, False], + ["linknet_resnet34", (512, 512, 3), (512, 512, 1), True, True], + ["linknet_resnet34", (512, 512, 3), (512, 512, 1), True, False], + ["linknet_resnet50", (512, 512, 3), (512, 512, 1), True, True], + ["linknet_resnet50", (512, 512, 3), (512, 512, 1), True, False], ], ) -def test_detection_models(arch_name, input_shape, output_size, out_prob): +def test_detection_models(arch_name, input_shape, output_size, out_prob, train_mode): batch_size = 2 tf.keras.backend.clear_session() model = detection.__dict__[arch_name](pretrained=True, input_shape=input_shape) @@ -39,9 +44,15 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob): {CLASS_NAME: np.array([[0.5, 0.5, 1, 1], [0.5, 0.5, 0.8, 0.9]], dtype=np.float32)}, ] # test training model - out = model(input_tensor, target, return_model_output=True, return_preds=True, training=True) + out = model( + input_tensor, + target, + return_model_output=True, + return_preds=not train_mode, + training=train_mode, + ) assert isinstance(out, dict) - assert len(out) == 3 + assert len(out) == 3 if not train_mode else len(out) == 2 # Check proba map assert isinstance(out["out_map"], tf.Tensor) assert out["out_map"].dtype == tf.float32 @@ -50,11 +61,12 @@ def test_detection_models(arch_name, input_shape, output_size, out_prob): if out_prob: assert np.all(np.logical_and(seg_map >= 0, seg_map <= 1)) # Check boxes - for boxes_dict in out["preds"]: - for boxes in boxes_dict.values(): - assert boxes.shape[1] == 5 - assert np.all(boxes[:, :2] < boxes[:, 2:4]) - assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1) + if not train_mode: + for boxes_dict in out["preds"]: + for boxes in boxes_dict.values(): + assert boxes.shape[1] == 5 + assert np.all(boxes[:, :2] < boxes[:, 2:4]) + assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1) # Check loss assert isinstance(out["loss"], tf.Tensor) # Target checks diff --git a/tests/tensorflow/test_models_recognition_tf.py b/tests/tensorflow/test_models_recognition_tf.py index d42b06dcdf..b7d67aaaf4 100644 --- a/tests/tensorflow/test_models_recognition_tf.py +++ b/tests/tensorflow/test_models_recognition_tf.py @@ -24,34 +24,49 @@ @pytest.mark.parametrize( - "arch_name, input_shape", + "arch_name, input_shape, train_mode", [ - ["crnn_vgg16_bn", (32, 128, 3)], - ["crnn_mobilenet_v3_small", (32, 128, 3)], - ["crnn_mobilenet_v3_large", (32, 128, 3)], - ["sar_resnet31", (32, 128, 3)], - ["master", (32, 128, 3)], - ["vitstr_small", (32, 128, 3)], - ["vitstr_base", (32, 128, 3)], - ["parseq", (32, 128, 3)], + ["crnn_vgg16_bn", (32, 128, 3), True], + ["crnn_vgg16_bn", (32, 128, 3), False], + ["crnn_mobilenet_v3_small", (32, 128, 3), True], + ["crnn_mobilenet_v3_small", (32, 128, 3), False], + ["crnn_mobilenet_v3_large", (32, 128, 3), True], + ["crnn_mobilenet_v3_large", (32, 128, 3), False], + ["sar_resnet31", (32, 128, 3), True], + ["sar_resnet31", (32, 128, 3), False], + ["master", (32, 128, 3), True], + ["master", (32, 128, 3), False], + ["vitstr_small", (32, 128, 3), True], + ["vitstr_small", (32, 128, 3), False], + ["vitstr_base", (32, 128, 3), True], + ["vitstr_base", (32, 128, 3), False], + ["parseq", (32, 128, 3), True], + ["parseq", (32, 128, 3), False], ], ) -def test_recognition_models(arch_name, input_shape): +def test_recognition_models(arch_name, input_shape, train_mode): batch_size = 4 reco_model = recognition.__dict__[arch_name](pretrained=True, input_shape=input_shape) assert isinstance(reco_model, tf.keras.Model) input_tensor = tf.random.uniform(shape=[batch_size, *input_shape], minval=0, maxval=1) target = ["i", "am", "a", "jedi"] - out = reco_model(input_tensor, target, return_model_output=True, return_preds=True) + out = reco_model( + input_tensor, + target, + return_model_output=True, + return_preds=not train_mode, + training=train_mode, + ) assert isinstance(out, dict) - assert len(out) == 3 + assert len(out) == 3 if not train_mode else len(out) == 2 assert isinstance(out["out_map"], tf.Tensor) assert out["out_map"].dtype == tf.float32 - assert isinstance(out["preds"], list) - assert len(out["preds"]) == batch_size - assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) - assert isinstance(out["loss"], tf.Tensor) + if not train_mode: + assert isinstance(out["preds"], list) + assert len(out["preds"]) == batch_size + assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) + assert isinstance(out["loss"], tf.Tensor) # test model in train mode needs targets with pytest.raises(ValueError): reco_model(input_tensor, None, training=True)