diff --git a/.github/workflows/clear_caches.yml b/.github/workflows/clear_caches.yml new file mode 100644 index 0000000000..1f5510f529 --- /dev/null +++ b/.github/workflows/clear_caches.yml @@ -0,0 +1,15 @@ +name: Clear GitHub runner caches + +on: + workflow_dispatch: + schedule: + - cron: '0 0 * * *' # Runs once a day + +jobs: + clear: + name: Clear caches + runs-on: ubuntu-latest + steps: + - uses: MyAlbum/purge-cache@v2 + with: + max-age: 172800 # Caches older than 2 days are deleted diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 47cc0d6e56..744194a02b 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -43,7 +43,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} + key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-style - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/doctr/datasets/datasets/pytorch.py b/doctr/datasets/datasets/pytorch.py index c24b1c2a8e..a6cf541c07 100644 --- a/doctr/datasets/datasets/pytorch.py +++ b/doctr/datasets/datasets/pytorch.py @@ -50,9 +50,9 @@ def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]: @staticmethod def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]: images, targets = zip(*samples) - images = torch.stack(images, dim=0) # type: ignore[assignment] + images = torch.stack(images, dim=0) - return images, list(targets) # type: ignore[return-value] + return images, list(targets) class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101 diff --git a/doctr/io/image/pytorch.py b/doctr/io/image/pytorch.py index 26167f81f5..48c719db51 100644 --- a/doctr/io/image/pytorch.py +++ b/doctr/io/image/pytorch.py @@ -96,4 +96,4 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) - def get_img_shape(img: torch.Tensor) -> Tuple[int, int]: """Get the shape of an image""" - return img.shape[-2:] # type: ignore[return-value] + return img.shape[-2:] diff --git a/doctr/models/classification/predictor/pytorch.py b/doctr/models/classification/predictor/pytorch.py index 7a3e73af7a..e7c44d1df4 100644 --- a/doctr/models/classification/predictor/pytorch.py +++ b/doctr/models/classification/predictor/pytorch.py @@ -60,7 +60,7 @@ def forward( predicted_batches = [out_batch.argmax(dim=1).cpu().detach().numpy() for out_batch in predicted_batches] class_idxs = [int(pred) for batch in predicted_batches for pred in batch] - classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs] # type: ignore[union-attr] + classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs] # type: ignore confs = [round(float(p), 2) for prob in probs for p in prob] return [class_idxs, classes, confs] diff --git a/doctr/models/detection/differentiable_binarization/pytorch.py b/doctr/models/detection/differentiable_binarization/pytorch.py index cb9daab2dc..5625dbec62 100644 --- a/doctr/models/detection/differentiable_binarization/pytorch.py +++ b/doctr/models/detection/differentiable_binarization/pytorch.py @@ -270,7 +270,7 @@ def compute_loss( dice_map = torch.softmax(out_map, dim=1) else: # compute binary map instead - dice_map = 1 / (1 + torch.exp(-50.0 * (prob_map - thresh_map))) + dice_map = 1 / (1 + torch.exp(-50.0 * (prob_map - thresh_map))) # type: ignore[assignment] # Class reduced inter = (seg_mask * dice_map * seg_target).sum((0, 2, 3)) cardinality = (seg_mask * (dice_map + seg_target)).sum((0, 2, 3)) diff --git a/doctr/models/detection/predictor/pytorch.py b/doctr/models/detection/predictor/pytorch.py index 257164d4b6..80467d3d97 100644 --- a/doctr/models/detection/predictor/pytorch.py +++ b/doctr/models/detection/predictor/pytorch.py @@ -59,11 +59,11 @@ def forward( ] # Remove padding from loc predictions preds = _remove_padding( - pages, # type: ignore[arg-type] + pages, [pred for batch in predicted_batches for pred in batch["preds"]], preserve_aspect_ratio=preserve_aspect_ratio, symmetric_pad=symmetric_pad, - assume_straight_pages=assume_straight_pages, + assume_straight_pages=assume_straight_pages, # type: ignore[arg-type] ) if return_maps: diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py index 61ab910241..de459c0ff2 100644 --- a/doctr/models/kie_predictor/pytorch.py +++ b/doctr/models/kie_predictor/pytorch.py @@ -88,7 +88,7 @@ def forward( for out_map in out_maps ] if self.detect_orientation: - general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps) # type: ignore[arg-type] + general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps) orientations = [ {"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations ] @@ -97,7 +97,7 @@ def forward( general_pages_orientations = None origin_pages_orientations = None if self.straighten_pages: - pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations) # type: ignore + pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations) # update page shapes after straightening origin_page_shapes = [page.shape[:2] for page in pages] @@ -124,7 +124,7 @@ def forward( crops = {} for class_name in dict_loc_preds.keys(): crops[class_name], dict_loc_preds[class_name] = self._prepare_crops( - pages, # type: ignore[arg-type] + pages, dict_loc_preds[class_name], channels_last=channels_last, assume_straight_pages=self.assume_straight_pages, @@ -169,7 +169,7 @@ def forward( languages_dict = None out = self.doc_builder( - pages, # type: ignore[arg-type] + pages, boxes_per_page, objectness_scores_per_page, text_preds_per_page, diff --git a/doctr/models/modules/layers/pytorch.py b/doctr/models/modules/layers/pytorch.py index b7ad119ec9..e20f15f104 100644 --- a/doctr/models/modules/layers/pytorch.py +++ b/doctr/models/modules/layers/pytorch.py @@ -106,7 +106,7 @@ def _identity_to_conv( id_tensor = torch.from_numpy(kernel_value).to(identity.weight.device) self.id_tensor = self._pad_to_mxn_tensor(id_tensor) kernel = self.id_tensor - std = (identity.running_var + identity.eps).sqrt() + std = (identity.running_var + identity.eps).sqrt() # type: ignore t = (identity.weight / std).reshape(-1, 1, 1, 1) return kernel * t, identity.bias - identity.running_mean * identity.weight / std diff --git a/doctr/models/modules/transformer/pytorch.py b/doctr/models/modules/transformer/pytorch.py index c7c1f113a3..312eba9a26 100644 --- a/doctr/models/modules/transformer/pytorch.py +++ b/doctr/models/modules/transformer/pytorch.py @@ -38,7 +38,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: Returns: positional embeddings (batch, max_len, d_model) """ - x = x + self.pe[:, : x.size(1)] + x = x + self.pe[:, : x.size(1)] # type: ignore[index] return self.dropout(x) @@ -49,8 +49,8 @@ def scaled_dot_product_attention( scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1)) if mask is not None: # NOTE: to ensure the ONNX compatibility, masked_fill works only with int equal condition - scores = scores.masked_fill(mask == 0, float("-inf")) - p_attn = torch.softmax(scores, dim=-1) + scores = scores.masked_fill(mask == 0, float("-inf")) # type: ignore[attr-defined] + p_attn = torch.softmax(scores, dim=-1) # type: ignore[call-overload] return torch.matmul(p_attn, value), p_attn diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index f9a3d47097..a0d26957e6 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -86,7 +86,7 @@ def forward( for out_map in out_maps ] if self.detect_orientation: - general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps) # type: ignore[arg-type] + general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps) orientations = [ {"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations ] @@ -95,7 +95,7 @@ def forward( general_pages_orientations = None origin_pages_orientations = None if self.straighten_pages: - pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations) # type: ignore + pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations) # update page shapes after straightening origin_page_shapes = [page.shape[:2] for page in pages] @@ -118,7 +118,7 @@ def forward( # Crop images crops, loc_preds = self._prepare_crops( - pages, # type: ignore[arg-type] + pages, loc_preds, channels_last=channels_last, assume_straight_pages=self.assume_straight_pages, @@ -146,7 +146,7 @@ def forward( languages_dict = None out = self.doc_builder( - pages, # type: ignore[arg-type] + pages, boxes, objectness_scores, text_preds, diff --git a/doctr/models/preprocessor/pytorch.py b/doctr/models/preprocessor/pytorch.py index b8e6ee55b5..1015e14bbd 100644 --- a/doctr/models/preprocessor/pytorch.py +++ b/doctr/models/preprocessor/pytorch.py @@ -77,7 +77,7 @@ def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: else: x = x.to(dtype=torch.float32) # type: ignore[union-attr] - return x + return x # type: ignore[return-value] def __call__(self, x: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, np.ndarray]]]) -> List[torch.Tensor]: """Prepare document data for model forwarding @@ -99,7 +99,7 @@ def __call__(self, x: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, n elif x.dtype not in (torch.uint8, torch.float16, torch.float32): raise TypeError("unsupported data type for torch.Tensor") # Resizing - if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]: + if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]: # type: ignore[union-attr] x = F.resize( x, self.resize.size, interpolation=self.resize.interpolation, antialias=self.resize.antialias ) @@ -114,11 +114,11 @@ def __call__(self, x: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, n # Sample transform (to tensor, resize) samples = list(multithread_exec(self.sample_transforms, x)) # Batching - batches = self.batch_inputs(samples) + batches = self.batch_inputs(samples) # type: ignore[assignment] else: raise TypeError(f"invalid input type: {type(x)}") # Batch transforms (normalize) batches = list(multithread_exec(self.normalize, batches)) - return batches + return batches # type: ignore[return-value] diff --git a/doctr/models/recognition/master/pytorch.py b/doctr/models/recognition/master/pytorch.py index d44139ab32..7585b1d8a6 100644 --- a/doctr/models/recognition/master/pytorch.py +++ b/doctr/models/recognition/master/pytorch.py @@ -106,7 +106,7 @@ def make_source_and_target_mask( # NOTE: nn.TransformerDecoder takes the inverse from this implementation # [True, True, True, ..., False, False, False] -> False is masked # (N, 1, 1, max_length) - target_pad_mask = (target != self.vocab_size + 2).unsqueeze(1).unsqueeze(1) + target_pad_mask = (target != self.vocab_size + 2).unsqueeze(1).unsqueeze(1) # type: ignore[attr-defined] target_length = target.size(1) # sub mask filled diagonal with True = see and False = masked (max_length, max_length) # NOTE: onnxruntime tril/triu works only with float currently (onnxruntime 1.11.1 - opset 14) @@ -139,7 +139,7 @@ def compute_loss( # Input length : number of timesteps input_len = model_output.shape[1] # Add one for additional token (sos disappear in shift!) - seq_len = seq_len + 1 + seq_len = seq_len + 1 # type: ignore[assignment] # Compute loss: don't forget to shift gt! Otherwise the model learns to output the gt[t-1]! # The "masked" first gt char is . Delete last logit of the model output. cce = F.cross_entropy(model_output[:, :-1, :].permute(0, 2, 1), gt[:, 1:], reduction="none") diff --git a/doctr/models/recognition/parseq/pytorch.py b/doctr/models/recognition/parseq/pytorch.py index 8ff24f67f2..a6ecea88e0 100644 --- a/doctr/models/recognition/parseq/pytorch.py +++ b/doctr/models/recognition/parseq/pytorch.py @@ -209,7 +209,7 @@ def generate_permutations(self, seqlen: torch.Tensor) -> torch.Tensor: sos_idx = torch.zeros(len(final_perms), 1, device=seqlen.device) eos_idx = torch.full((len(final_perms), 1), max_num_chars + 1, device=seqlen.device) - combined = torch.cat([sos_idx, final_perms + 1, eos_idx], dim=1).int() + combined = torch.cat([sos_idx, final_perms + 1, eos_idx], dim=1).int() # type: ignore[list-item] if len(combined) > 1: combined[1, 1:] = max_num_chars + 1 - torch.arange(max_num_chars + 1, device=seqlen.device) return combined @@ -280,7 +280,7 @@ def decode_autoregressive(self, features: torch.Tensor, max_len: Optional[int] = # Stop decoding if all sequences have reached the EOS token # NOTE: `break` isn't correctly translated to Onnx so we don't break here if we want to export - if not self.exportable and max_len is None and (ys == self.vocab_size).any(dim=-1).all(): + if not self.exportable and max_len is None and (ys == self.vocab_size).any(dim=-1).all(): # type: ignore[attr-defined] break logits = torch.cat(pos_logits, dim=1) # (N, max_length, vocab_size + 1) @@ -295,7 +295,7 @@ def decode_autoregressive(self, features: torch.Tensor, max_len: Optional[int] = # Create padding mask for refined target input maskes all behind EOS token as False # (N, 1, 1, max_length) - target_pad_mask = ~((ys == self.vocab_size).int().cumsum(-1) > 0).unsqueeze(1).unsqueeze(1) + target_pad_mask = ~((ys == self.vocab_size).int().cumsum(-1) > 0).unsqueeze(1).unsqueeze(1) # type: ignore[attr-defined] mask = (target_pad_mask.bool() & query_mask[:, : ys.shape[1]].bool()).int() logits = self.head(self.decode(ys, features, mask, target_query=pos_queries)) diff --git a/doctr/models/recognition/sar/pytorch.py b/doctr/models/recognition/sar/pytorch.py index 69f58a1a5d..c6a6f31501 100644 --- a/doctr/models/recognition/sar/pytorch.py +++ b/doctr/models/recognition/sar/pytorch.py @@ -289,7 +289,7 @@ def compute_loss( # Input length : number of timesteps input_len = model_output.shape[1] # Add one for additional token - seq_len = seq_len + 1 + seq_len = seq_len + 1 # type: ignore[assignment] # Compute loss # (N, L, vocab_size + 1) cce = F.cross_entropy(model_output.permute(0, 2, 1), gt, reduction="none") diff --git a/doctr/models/recognition/vitstr/pytorch.py b/doctr/models/recognition/vitstr/pytorch.py index 1cc8a619b2..fe47461d73 100644 --- a/doctr/models/recognition/vitstr/pytorch.py +++ b/doctr/models/recognition/vitstr/pytorch.py @@ -134,7 +134,7 @@ def compute_loss( # Input length : number of steps input_len = model_output.shape[1] # Add one for additional token (sos disappear in shift!) - seq_len = seq_len + 1 + seq_len = seq_len + 1 # type: ignore[assignment] # Compute loss: don't forget to shift gt! Otherwise the model learns to output the gt[t-1]! # The "masked" first gt char is . cce = F.cross_entropy(model_output.permute(0, 2, 1), gt[:, 1:], reduction="none") diff --git a/doctr/models/utils/pytorch.py b/doctr/models/utils/pytorch.py index 69160c5801..8c975b94d4 100644 --- a/doctr/models/utils/pytorch.py +++ b/doctr/models/utils/pytorch.py @@ -150,7 +150,7 @@ def export_model_to_onnx(model: nn.Module, model_name: str, dummy_input: torch.T """ torch.onnx.export( model, - dummy_input, # type: ignore[arg-type] + dummy_input, f"{model_name}.onnx", input_names=["input"], output_names=["logits"], diff --git a/doctr/transforms/functional/pytorch.py b/doctr/transforms/functional/pytorch.py index 19699a8b4a..3c65d76b7d 100644 --- a/doctr/transforms/functional/pytorch.py +++ b/doctr/transforms/functional/pytorch.py @@ -33,9 +33,9 @@ def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor: rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape) # Inverse the color if out.dtype == torch.uint8: - out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8) + out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8) # type: ignore[attr-defined] else: - out = out * rgb_shift.to(dtype=out.dtype) + out = out * rgb_shift.to(dtype=out.dtype) # type: ignore[attr-defined] # Inverse the color out = 255 - out if out.dtype == torch.uint8 else 1 - out return out @@ -77,7 +77,7 @@ def rotate_sample( rotated_geoms: np.ndarray = rotate_abs_geoms( _geoms, angle, - img.shape[1:], # type: ignore[arg-type] + img.shape[1:], expand, ).astype(np.float32) @@ -124,7 +124,7 @@ def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwarg Returns: shaded image """ - shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) # type: ignore[arg-type] + shadow_mask = create_shadow_mask(img.shape[1:], **kwargs) opacity = np.random.uniform(*opacity_range) shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...]) diff --git a/doctr/transforms/modules/pytorch.py b/doctr/transforms/modules/pytorch.py index c7181719ea..66d99c8ec5 100644 --- a/doctr/transforms/modules/pytorch.py +++ b/doctr/transforms/modules/pytorch.py @@ -135,9 +135,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # Reshape the distribution noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std if x.dtype == torch.uint8: - return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) + return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) # type: ignore[attr-defined] else: - return (x + noise.to(dtype=x.dtype)).clamp(0, 1) + return (x + noise.to(dtype=x.dtype)).clamp(0, 1) # type: ignore[attr-defined] def extra_repr(self) -> str: return f"mean={self.mean}, std={self.std}" @@ -194,7 +194,7 @@ def __call__(self, x: torch.Tensor) -> torch.Tensor: try: if x.dtype == torch.uint8: return ( - ( + ( # type: ignore[attr-defined] 255 * random_shadow( x.to(dtype=torch.float32) / 255,