Skip to content

Commit

Permalink
[fbsync] Disable download for StanfordCars dataset (#8309)
Browse files Browse the repository at this point in the history
Reviewed By: vmoens

Differential Revision: D55062805

fbshipit-source-id: 7fc8ee2b8aa238c4df057965de46b63ba8a531ae
  • Loading branch information
NicolasHug authored and facebook-github-bot committed Mar 20, 2024
1 parent 38b8333 commit 0ce9194
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 43 deletions.
12 changes: 3 additions & 9 deletions test/test_datasets_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,12 +327,6 @@ def kitti():
)


def stanford_cars():
return itertools.chain.from_iterable(
[collect_urls(datasets.StanfordCars, ROOT, split=split, download=True) for split in ["train", "test"]]
)


def url_parametrization(*dataset_urls_and_ids_fns):
return pytest.mark.parametrize(
"url",
Expand Down Expand Up @@ -378,9 +372,9 @@ def test_url_is_accessible(url):
retry(lambda: assert_url_is_accessible(url))


@url_parametrization(
stanford_cars, # https://github.com/pytorch/vision/issues/7545
)
# TODO: if e.g. caltech101 starts failing, remove the pytest.mark.parametrize below and use
# @url_parametrization(caltech101)
@pytest.mark.parametrize("url", ("http://url_that_doesnt_exist.com",)) # here until we actually have a failing dataset
@pytest.mark.xfail
def test_url_is_not_accessible(url):
"""
Expand Down
53 changes: 19 additions & 34 deletions torchvision/datasets/stanford_cars.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@

from PIL import Image

from .utils import download_and_extract_archive, download_url, verify_str_arg
from .utils import verify_str_arg
from .vision import VisionDataset


class StanfordCars(VisionDataset):
"""`Stanford Cars <https://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_ Dataset
"""Stanford Cars Dataset
The Cars dataset contains 16,185 images of 196 classes of cars. The data is
split into 8,144 training images and 8,041 testing images, where each class
has been split roughly in a 50-50 split
The original URL is https://ai.stanford.edu/~jkrause/cars/car_dataset.html, but it is broken.
.. note::
This class needs `scipy <https://docs.scipy.org/doc/>`_ to load target files from `.mat` format.
Expand All @@ -25,9 +27,11 @@ class StanfordCars(VisionDataset):
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again."""
download (bool, optional): This parameter exists for backward compatibility but it does not
download the dataset, since the original URL is not available anymore. The dataset
seems to be available on Kaggle so you can try to manually download it using
`these instructions <https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616>`_.
"""

def __init__(
self,
Expand Down Expand Up @@ -57,10 +61,18 @@ def __init__(
self._images_base_path = self._base_folder / "cars_test"

if download:
self.download()
raise ValueError(
"The original URL is broken so the StanfordCars dataset is not available for automatic "
"download anymore. You can try to download it manually following "
"https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616, "
"and set download=False to avoid this error."
)

if not self._check_exists():
raise RuntimeError("Dataset not found. You can use download=True to download it")
raise RuntimeError(
"Dataset not found. Try to manually download following the instructions in "
"https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616."
)

self._samples = [
(
Expand All @@ -87,33 +99,6 @@ def __getitem__(self, idx: int) -> Tuple[Any, Any]:
target = self.target_transform(target)
return pil_image, target

def download(self) -> None:
if self._check_exists():
return

download_and_extract_archive(
url="https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz",
download_root=str(self._base_folder),
md5="c3b158d763b6e2245038c8ad08e45376",
)
if self._split == "train":
download_and_extract_archive(
url="https://ai.stanford.edu/~jkrause/car196/cars_train.tgz",
download_root=str(self._base_folder),
md5="065e5b463ae28d29e77c1b4b166cfe61",
)
else:
download_and_extract_archive(
url="https://ai.stanford.edu/~jkrause/car196/cars_test.tgz",
download_root=str(self._base_folder),
md5="4ce7ebf6a94d07f1952d94dd34c4d501",
)
download_url(
url="https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat",
root=str(self._base_folder),
md5="b0a2b23655a3edd16d84508592a98d10",
)

def _check_exists(self) -> bool:
if not (self._base_folder / "devkit").is_dir():
return False
Expand Down

0 comments on commit 0ce9194

Please sign in to comment.