diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 52216e4..7feabe2 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -15,9 +15,9 @@ assignees: '' Steps to reproduce the behavior: 1. Go to '...' -1. Run '....' -1. Scroll down to '....' -1. See error +2. Run '....' +3. Scroll down to '....' +4. See error diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5096e76..964cccd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ default_language_version: - python: python3.8 + python: python3 ci: autofix_prs: true @@ -14,52 +14,30 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - id: check-case-conflict + - id: check-json - id: check-yaml - id: check-toml - #- id: pretty-format-json - id: check-added-large-files exclude: .*\.ipynb args: ['--maxkb=250', '--enforce-all'] - id: check-docstring-first - id: detect-private-key - - repo: https://github.com/asottile/pyupgrade - rev: v3.16.0 - hooks: - - id: pyupgrade - args: [--py37-plus] - name: Upgrade code - - - repo: https://github.com/PyCQA/docformatter - rev: v1.7.5 - hooks: - - id: docformatter - args: [--in-place, --wrap-summaries=115, --wrap-descriptions=120] - - - repo: https://github.com/omnilib/ufmt - rev: v2.7.0 - hooks: - - id: ufmt - additional_dependencies: - - black == 22.3.0 - - usort == 1.0.2 - - repo: https://github.com/executablebooks/mdformat rev: 0.7.17 hooks: - id: mdformat + args: ['--number'] additional_dependencies: - mdformat-gfm - mdformat-black - mdformat_frontmatter - - repo: https://github.com/asottile/yesqa - rev: v1.5.0 - hooks: - - id: yesqa - - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.5.0 hooks: + # try to fix what is possible - id: ruff args: ["--fix"] + # perform formatting updates + - id: ruff-format diff --git a/README.md b/README.md index cbba8aa..dd388c6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # Kaggle: Image classification challenges ![CI complete testing](https://github.com/Borda/kaggle_image-classify/workflows/CI%20complete%20testing/badge.svg?branch=main&event=push) -[![Code formatting](https://github.com/Borda/kaggle_image-classify/actions/workflows/code-format.yml/badge.svg?branch=main&event=push)](https://github.com/Borda/kaggle_image-classify/actions/workflows/code-format.yml) [![codecov](https://codecov.io/gh/Borda/kaggle_image-classify/branch/main/graph/badge.svg?token=5t1Aj5BIyS)](https://codecov.io/gh/Borda/kaggle_image-classify) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/Borda/kaggle_image-classify/main.svg)](https://results.pre-commit.ci/latest/github/Borda/kaggle_image-classify/main) diff --git a/kaggle_imgclassif/cassava/data.py b/kaggle_imgclassif/cassava/data.py index 5c135f2..b50e12c 100644 --- a/kaggle_imgclassif/cassava/data.py +++ b/kaggle_imgclassif/cassava/data.py @@ -10,28 +10,24 @@ from torch.utils.data import DataLoader, Dataset from torchvision import transforms as T -TRAIN_TRANSFORM = T.Compose( - [ - T.Resize(512), - T.RandomPerspective(), - T.RandomResizedCrop(224), - T.RandomHorizontalFlip(), - T.RandomVerticalFlip(), - T.ToTensor(), - # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - T.Normalize([0.431, 0.498, 0.313], [0.237, 0.239, 0.227]), - ] -) - -VALID_TRANSFORM = T.Compose( - [ - T.Resize(256), - T.CenterCrop(224), - T.ToTensor(), - # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - T.Normalize([0.431, 0.498, 0.313], [0.237, 0.239, 0.227]), - ] -) +TRAIN_TRANSFORM = T.Compose([ + T.Resize(512), + T.RandomPerspective(), + T.RandomResizedCrop(224), + T.RandomHorizontalFlip(), + T.RandomVerticalFlip(), + T.ToTensor(), + # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + T.Normalize([0.431, 0.498, 0.313], [0.237, 0.239, 0.227]), +]) + +VALID_TRANSFORM = T.Compose([ + T.Resize(256), + T.CenterCrop(224), + T.ToTensor(), + # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + T.Normalize([0.431, 0.498, 0.313], [0.237, 0.239, 0.227]), +]) class CassavaDataset(Dataset): diff --git a/kaggle_imgclassif/imet_collect/data.py b/kaggle_imgclassif/imet_collect/data.py index 6e58c61..535bce0 100644 --- a/kaggle_imgclassif/imet_collect/data.py +++ b/kaggle_imgclassif/imet_collect/data.py @@ -9,7 +9,7 @@ import pandas as pd import torch import tqdm -from joblib import delayed, Parallel +from joblib import Parallel, delayed from PIL import Image from pytorch_lightning import LightningDataModule from torch import Tensor @@ -24,28 +24,24 @@ # ImageFile.LOAD_TRUNCATED_IMAGES = True #: default training augmentation -TORCHVISION_TRAIN_TRANSFORM = T.Compose( - [ - T.Resize(size=256, interpolation=Image.BILINEAR), - T.RandomRotation(degrees=25), - T.RandomPerspective(distortion_scale=0.2), - T.RandomResizedCrop(size=224), - # T.RandomHorizontalFlip(p=0.5), - T.RandomVerticalFlip(p=0.5), - # T.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05), - T.ToTensor(), - T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - ] -) +TORCHVISION_TRAIN_TRANSFORM = T.Compose([ + T.Resize(size=256, interpolation=Image.BILINEAR), + T.RandomRotation(degrees=25), + T.RandomPerspective(distortion_scale=0.2), + T.RandomResizedCrop(size=224), + # T.RandomHorizontalFlip(p=0.5), + T.RandomVerticalFlip(p=0.5), + # T.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05), + T.ToTensor(), + T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), +]) #: default validation augmentation -TORCHVISION_VALID_TRANSFORM = T.Compose( - [ - T.Resize(size=256, interpolation=Image.BILINEAR), - T.CenterCrop(size=224), - T.ToTensor(), - T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - ] -) +TORCHVISION_VALID_TRANSFORM = T.Compose([ + T.Resize(size=256, interpolation=Image.BILINEAR), + T.CenterCrop(size=224), + T.ToTensor(), + T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), +]) def load_image(path_img: str) -> Image.Image: @@ -334,3 +330,4 @@ def test_dataloader(self) -> Optional[DataLoader]: shuffle=False, ) logging.warning("no testing images found") + return None diff --git a/kaggle_imgclassif/imet_collect/models.py b/kaggle_imgclassif/imet_collect/models.py index 38acc9f..fca0645 100644 --- a/kaggle_imgclassif/imet_collect/models.py +++ b/kaggle_imgclassif/imet_collect/models.py @@ -3,7 +3,7 @@ import timm import torch from pytorch_lightning import LightningModule -from torch import nn, Tensor +from torch import Tensor, nn from torch.nn import functional as F from torchmetrics import Accuracy, F1Score, Precision diff --git a/kaggle_imgclassif/plant_pathology/augment.py b/kaggle_imgclassif/plant_pathology/augment.py index 223416b..f900287 100644 --- a/kaggle_imgclassif/plant_pathology/augment.py +++ b/kaggle_imgclassif/plant_pathology/augment.py @@ -1,4 +1,5 @@ """Module to perform efficient preprocess and data augmentation.""" + from typing import Tuple import numpy as np @@ -18,30 +19,26 @@ from kaggle_imgclassif.plant_pathology import DATASET_IMAGE_MEAN, DATASET_IMAGE_STD #: default training augmentation -TORCHVISION_TRAIN_TRANSFORM = T.Compose( - [ - T.Resize(size=512, interpolation=Image.BILINEAR), - T.RandomRotation(degrees=30), - T.RandomPerspective(distortion_scale=0.4), - T.RandomResizedCrop(size=224), - T.RandomHorizontalFlip(p=0.5), - T.RandomVerticalFlip(p=0.5), - # T.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05), - T.ToTensor(), - # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - T.Normalize(DATASET_IMAGE_MEAN, DATASET_IMAGE_STD), # custom - ] -) +TORCHVISION_TRAIN_TRANSFORM = T.Compose([ + T.Resize(size=512, interpolation=Image.BILINEAR), + T.RandomRotation(degrees=30), + T.RandomPerspective(distortion_scale=0.4), + T.RandomResizedCrop(size=224), + T.RandomHorizontalFlip(p=0.5), + T.RandomVerticalFlip(p=0.5), + # T.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.05, hue=0.05), + T.ToTensor(), + # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + T.Normalize(DATASET_IMAGE_MEAN, DATASET_IMAGE_STD), # custom +]) #: default validation augmentation -TORCHVISION_VALID_TRANSFORM = T.Compose( - [ - T.Resize(size=256, interpolation=Image.BILINEAR), - T.CenterCrop(size=224), - T.ToTensor(), - # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - T.Normalize(DATASET_IMAGE_MEAN, DATASET_IMAGE_STD), # custom - ] -) +TORCHVISION_VALID_TRANSFORM = T.Compose([ + T.Resize(size=256, interpolation=Image.BILINEAR), + T.CenterCrop(size=224), + T.ToTensor(), + # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + T.Normalize(DATASET_IMAGE_MEAN, DATASET_IMAGE_STD), # custom +]) class Resize(nn.Module): diff --git a/kaggle_imgclassif/plant_pathology/data.py b/kaggle_imgclassif/plant_pathology/data.py index 3e77c73..bac0a9f 100644 --- a/kaggle_imgclassif/plant_pathology/data.py +++ b/kaggle_imgclassif/plant_pathology/data.py @@ -197,7 +197,8 @@ def prepare_data(self): @property def num_classes(self) -> int: - assert self.train_dataset and self.valid_dataset + assert self.train_dataset + assert self.valid_dataset return max(self.train_dataset.num_classes, self.valid_dataset.num_classes) @staticmethod @@ -316,3 +317,4 @@ def test_dataloader(self) -> Optional[DataLoader]: shuffle=False, ) logging.warning("no testing images found") + return None diff --git a/kaggle_imgclassif/plant_pathology/models.py b/kaggle_imgclassif/plant_pathology/models.py index c054978..1f2d1c2 100644 --- a/kaggle_imgclassif/plant_pathology/models.py +++ b/kaggle_imgclassif/plant_pathology/models.py @@ -3,7 +3,7 @@ import timm import torch from pytorch_lightning import LightningModule -from torch import nn, Tensor +from torch import Tensor, nn from torch.nn import functional as F from torchmetrics import Accuracy, F1Score, Precision diff --git a/pyproject.toml b/pyproject.toml index 32f14d0..dd53a38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,57 +4,69 @@ requires = [ "wheel", ] -[tool.black] -# https://github.com/psf/black -line-length = 120 -exclude = "(.eggs|.git|.hg|.mypy_cache|.venv|_build|buck-out|build|dist)" +[tool.pytest.ini_options] +norecursedirs = [ + ".git", + ".github", + "dist", + "build", + "docs", +] +addopts = [ + "--strict-markers", + "--doctest-modules", + "--color=yes", + "--disable-pytest-warnings", +] +filterwarnings = [ + "error::FutureWarning", +] +xfail_strict = true +junit_duration_report = "call" -[tool.usort] -known_first_party = [ - "kaggle_imgclassif", +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "pass", ] -skip_glob = [] -profile = "black" -line_length = 120 [tool.ruff] +target-version = "py38" line-length = 120 -# Enable Pyflakes `E` and `F` codes by default. -select = [ - "E", "W", # see: https://pypi.org/project/pycodestyle - "F", # see: https://pypi.org/project/pyflakes -# "D", # see: https://pypi.org/project/pydocstyle -# "N", # see: https://pypi.org/project/pep8-naming -] -#extend-select = [ -# "C4", # see: https://pypi.org/project/flake8-comprehensions -# "PT", # see: https://pypi.org/project/flake8-pytest-style -# "RET", # see: https://pypi.org/project/flake8-return -# "SIM", # see: https://pypi.org/project/flake8-simplify -#] -ignore = [ - "E731", # Do not assign a lambda expression, use a def -] -# Exclude a variety of commonly ignored directories. -exclude = [ - ".eggs", - ".git", - ".ruff_cache", - "__pypackages__", - "_build", - "build", - "dist", - "docs" -] -ignore-init-module-imports = true - -[tool.ruff.pydocstyle] -# Use Google-style docstrings. -convention = "google" - -#[tool.ruff.pycodestyle] -#ignore-overlong-task-comments = true -[tool.ruff.mccabe] # Unlike Flake8, default to a complexity level of 10. -max-complexity = 10 +lint.mccabe.max-complexity = 10 +# Use Google-style docstrings. +lint.pydocstyle.convention = "google" +format.preview = true +lint.select = [ + "E", + "F", # see: https://pypi.org/project/pyflakes + "I", #see: https://pypi.org/project/isort + "S", # see: https://pypi.org/project/flake8-bandit + "UP", # see: https://docs.astral.sh/ruff/rules/#pyupgrade-up +# "D", # see: https://pypi.org/project/pydocstyle + "W", # see: https://pypi.org/project/pycodestyle +] +lint.extend-select = [ +# "C4", # see: https://pypi.org/project/flake8-comprehensions + "PLE", # see: https://pypi.org/project/pylint/ + "PT", # see: https://pypi.org/project/flake8-pytest-style + "RET", # see: https://pypi.org/project/flake8-return + "RUF100", # Ralternative to yesqa + "SIM", # see: https://pypi.org/project/flake8-simplify +] +lint.ignore = [ + "S101", # todo: Use of `assert` detected +] +lint.unfixable = [ + "F401", +] +[tool.ruff.lint.per-file-ignores] +"setup.py" = ["D100", "SIM115"] +"scripts_*/**" = [ + "S", "D" +] +"tests/**" = [ + "S", "D" +] diff --git a/scripts/birdclef_convert-spectrograms.py b/scripts/birdclef_convert-spectrograms.py index 05415af..c435058 100644 --- a/scripts/birdclef_convert-spectrograms.py +++ b/scripts/birdclef_convert-spectrograms.py @@ -6,7 +6,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from joblib import delayed, Parallel +from joblib import Parallel, delayed from tqdm.auto import tqdm from kaggle_imgclassif.birdclef.data import convert_and_export diff --git a/scripts/herbarium_train-model.py b/scripts/herbarium_train-model.py index cb47780..905287e 100644 --- a/scripts/herbarium_train-model.py +++ b/scripts/herbarium_train-model.py @@ -35,32 +35,28 @@ class ImageClassificationInputTransform(InputTransform): color_std: Tuple[float, float, float] = (0.241, 0.245, 0.249) def input_per_sample_transform(self): - return T.Compose( - [ - T.ToTensor(), - T.Resize(self.image_size), - T.Normalize(self.color_mean, self.color_std), - ] - ) + return T.Compose([ + T.ToTensor(), + T.Resize(self.image_size), + T.Normalize(self.color_mean, self.color_std), + ]) def train_input_per_sample_transform(self): - return T.Compose( - [ - T.TrivialAugmentWide(), - T.RandomPosterize(bits=6), - T.RandomEqualize(), - T.ToTensor(), - T.Resize(self.image_size), - T.RandomHorizontalFlip(), - # T.ColorJitter(brightness=0.2, hue=0.1), - T.RandomAutocontrast(), - T.RandomAdjustSharpness(sharpness_factor=2), - T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)), - T.RandomAffine(degrees=10, scale=(0.9, 1.1), translate=(0.1, 0.1)), - # T.RandomPerspective(distortion_scale=0.1), - T.Normalize(self.color_mean, self.color_std), - ] - ) + return T.Compose([ + T.TrivialAugmentWide(), + T.RandomPosterize(bits=6), + T.RandomEqualize(), + T.ToTensor(), + T.Resize(self.image_size), + T.RandomHorizontalFlip(), + # T.ColorJitter(brightness=0.2, hue=0.1), + T.RandomAutocontrast(), + T.RandomAdjustSharpness(sharpness_factor=2), + T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)), + T.RandomAffine(degrees=10, scale=(0.9, 1.1), translate=(0.1, 0.1)), + # T.RandomPerspective(distortion_scale=0.1), + T.Normalize(self.color_mean, self.color_std), + ]) def target_per_sample_transform(self) -> Callable: return torch.as_tensor diff --git a/setup.cfg b/setup.cfg index efe6b12..7571f0a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -80,32 +80,3 @@ exclude = docs* notebooks* tests* - -[tool:pytest] -norecursedirs = - .git - dist - build -addopts = - --strict - --doctest-modules - --durations=25 - --color=yes - -[coverage:report] -exclude_lines = - pragma: no-cover - pass - -[flake8] -max-line-length = 120 -exclude = .tox,*.egg,build,temp -select = E,W,F -doctests = True -verbose = 2 -# https://pep8.readthedocs.io/en/latest/intro.html#error-codes -format = pylint -# see: https://www.flake8rules.com/ -ignore = - # Do not assign a lambda expression, use a def - E731 diff --git a/streamlit-app.py b/streamlit-app.py index abd66c0..d9e0e25 100644 --- a/streamlit-app.py +++ b/streamlit-app.py @@ -2,6 +2,7 @@ >> streamlit run streamlit-app.py """ + import os import gdown diff --git a/tests/cassava/test_data.py b/tests/cassava/test_data.py index c4d017c..79b42c4 100644 --- a/tests/cassava/test_data.py +++ b/tests/cassava/test_data.py @@ -3,7 +3,6 @@ import numpy from kaggle_imgclassif.cassava.data import CassavaDataModule, CassavaDataset - from tests import _ROOT_DATA PATH_DATA = os.path.join(_ROOT_DATA, "cassava") diff --git a/tests/cassava/test_models.py b/tests/cassava/test_models.py index a8ac65e..451f9f8 100644 --- a/tests/cassava/test_models.py +++ b/tests/cassava/test_models.py @@ -4,10 +4,8 @@ from kaggle_imgclassif.cassava.data import CassavaDataModule from kaggle_imgclassif.cassava.models import LitCassava - from tests import _ROOT_DATA - PATH_DATA = os.path.join(_ROOT_DATA, "cassava") diff --git a/tests/imet_collect/test_data.py b/tests/imet_collect/test_data.py index 2cdaf51..9915acb 100644 --- a/tests/imet_collect/test_data.py +++ b/tests/imet_collect/test_data.py @@ -6,7 +6,6 @@ from torch import tensor from kaggle_imgclassif.imet_collect.data import IMetDataset, IMetDM - from tests import _ROOT_DATA PATH_DATA = os.path.join(_ROOT_DATA, "imet-collect") diff --git a/tests/imet_collect/test_models.py b/tests/imet_collect/test_models.py index 44d0f94..559abe3 100644 --- a/tests/imet_collect/test_models.py +++ b/tests/imet_collect/test_models.py @@ -5,7 +5,6 @@ from kaggle_imgclassif.imet_collect.data import IMetDM from kaggle_imgclassif.imet_collect.models import LitMet - from tests import _ROOT_DATA PATH_DATA = os.path.join(_ROOT_DATA, "imet-collect") diff --git a/tests/plant_pathology/test_data.py b/tests/plant_pathology/test_data.py index a4fdadc..a330888 100644 --- a/tests/plant_pathology/test_data.py +++ b/tests/plant_pathology/test_data.py @@ -5,7 +5,6 @@ from torch import Tensor from kaggle_imgclassif.plant_pathology.data import PlantPathologyDataset, PlantPathologyDM, PlantPathologySimpleDataset - from tests import _ROOT_DATA PATH_DATA = os.path.join(_ROOT_DATA, "plant-pathology") @@ -40,7 +39,7 @@ @pytest.mark.parametrize( - "data_cls,labels", + ("data_cls", "labels"), [ (PlantPathologyDataset, _TEST_LABELS_BINARY), (PlantPathologySimpleDataset, [3, 1, 4, 0, 2, 3, 1]), @@ -58,7 +57,7 @@ def test_dataset(data_cls, labels, phase, path_data=PATH_DATA): img, _ = dataset[0] assert isinstance(img, numpy.ndarray) assert _TEST_IMAGE_NAMES == tuple(dataset.img_names) == tuple(dataset.data["image"]) - assert _TEST_UNIQUE_LABELS == dataset.labels_unique + assert dataset.labels_unique == _TEST_UNIQUE_LABELS lbs = [dataset[i][1] for i in range(len(dataset))] if isinstance(lbs[0], Tensor): lbs = [list(lb.numpy()) for lb in lbs] diff --git a/tests/plant_pathology/test_models.py b/tests/plant_pathology/test_models.py index b2fad09..4fbc47c 100644 --- a/tests/plant_pathology/test_models.py +++ b/tests/plant_pathology/test_models.py @@ -5,7 +5,6 @@ from kaggle_imgclassif.plant_pathology.data import PlantPathologyDM from kaggle_imgclassif.plant_pathology.models import LitPlantPathology, MultiPlantPathology - from tests import _ROOT_DATA PATH_DATA = os.path.join(_ROOT_DATA, "plant-pathology") @@ -17,7 +16,7 @@ def test_create_model(model_cls, net: str = "resnet18"): @pytest.mark.parametrize( - "ds_simple,model_cls", + ("ds_simple", "model_cls"), [ (True, LitPlantPathology), (False, MultiPlantPathology),