[fbsync] Disable download for StanfordCars dataset (#8309)

Reviewed By: vmoens Differential Revision: D55062805 fbshipit-source-id: 7fc8ee2b8aa238c4df057965de46b63ba8a531ae
pytorch · Mar 20, 2024 · 0ce9194 · 0ce9194
1 parent 38b8333
commit 0ce9194
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 43 deletions.
diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py
@@ -327,12 +327,6 @@ def kitti():
     )
 
 
-def stanford_cars():
-    return itertools.chain.from_iterable(
-        [collect_urls(datasets.StanfordCars, ROOT, split=split, download=True) for split in ["train", "test"]]
-    )
-
-
 def url_parametrization(*dataset_urls_and_ids_fns):
     return pytest.mark.parametrize(
         "url",
@@ -378,9 +372,9 @@ def test_url_is_accessible(url):
     retry(lambda: assert_url_is_accessible(url))
 
 
-@url_parametrization(
-    stanford_cars,  # https://github.com/pytorch/vision/issues/7545
-)
+# TODO: if e.g. caltech101 starts failing, remove the pytest.mark.parametrize below and use
+# @url_parametrization(caltech101)
+@pytest.mark.parametrize("url", ("http://url_that_doesnt_exist.com",))  # here until we actually have a failing dataset
 @pytest.mark.xfail
 def test_url_is_not_accessible(url):
     """

diff --git a/torchvision/datasets/stanford_cars.py b/torchvision/datasets/stanford_cars.py
@@ -3,17 +3,19 @@
 
 from PIL import Image
 
-from .utils import download_and_extract_archive, download_url, verify_str_arg
+from .utils import verify_str_arg
 from .vision import VisionDataset
 
 
 class StanfordCars(VisionDataset):
-    """`Stanford Cars <https://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_ Dataset
+    """Stanford Cars  Dataset
 
     The Cars dataset contains 16,185 images of 196 classes of cars. The data is
     split into 8,144 training images and 8,041 testing images, where each class
     has been split roughly in a 50-50 split
 
+    The original URL is https://ai.stanford.edu/~jkrause/cars/car_dataset.html, but it is broken.
+
     .. note::
 
         This class needs `scipy <https://docs.scipy.org/doc/>`_ to load target files from `.mat` format.
@@ -25,9 +27,11 @@ class StanfordCars(VisionDataset):
             and returns a transformed version. E.g, ``transforms.RandomCrop``
         target_transform (callable, optional): A function/transform that takes in the
             target and transforms it.
-        download (bool, optional): If True, downloads the dataset from the internet and
-            puts it in root directory. If dataset is already downloaded, it is not
-            downloaded again."""
+        download (bool, optional): This parameter exists for backward compatibility but it does not
+            download the dataset, since the original URL is not available anymore. The dataset
+            seems to be available on Kaggle so you can try to manually download it using
+            `these instructions <https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616>`_.
+    """
 
     def __init__(
         self,
@@ -57,10 +61,18 @@ def __init__(
             self._images_base_path = self._base_folder / "cars_test"
 
         if download:
-            self.download()
+            raise ValueError(
+                "The original URL is broken so the StanfordCars dataset is not available for automatic "
+                "download anymore. You can try to download it manually following "
+                "https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616, "
+                "and set download=False to avoid this error."
+            )
 
         if not self._check_exists():
-            raise RuntimeError("Dataset not found. You can use download=True to download it")
+            raise RuntimeError(
+                "Dataset not found. Try to manually download following the instructions in "
+                "https://github.com/pytorch/vision/issues/7545#issuecomment-1631441616."
+            )
 
         self._samples = [
             (
@@ -87,33 +99,6 @@ def __getitem__(self, idx: int) -> Tuple[Any, Any]:
             target = self.target_transform(target)
         return pil_image, target
 
-    def download(self) -> None:
-        if self._check_exists():
-            return
-
-        download_and_extract_archive(
-            url="https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz",
-            download_root=str(self._base_folder),
-            md5="c3b158d763b6e2245038c8ad08e45376",
-        )
-        if self._split == "train":
-            download_and_extract_archive(
-                url="https://ai.stanford.edu/~jkrause/car196/cars_train.tgz",
-                download_root=str(self._base_folder),
-                md5="065e5b463ae28d29e77c1b4b166cfe61",
-            )
-        else:
-            download_and_extract_archive(
-                url="https://ai.stanford.edu/~jkrause/car196/cars_test.tgz",
-                download_root=str(self._base_folder),
-                md5="4ce7ebf6a94d07f1952d94dd34c4d501",
-            )
-            download_url(
-                url="https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat",
-                root=str(self._base_folder),
-                md5="b0a2b23655a3edd16d84508592a98d10",
-            )
-
     def _check_exists(self) -> bool:
         if not (self._base_folder / "devkit").is_dir():
             return False