[fbsync] Better logic for ignoring CPU tests on GPU CI machines (#4025)

Reviewed By: fmassa Differential Revision: D29105975 fbshipit-source-id: 0f3446a61934e6b5ee3151c390e604e5b858d355
pytorch · Jun 15, 2021 · 5865648 · 5865648
1 parent 7b21f69
commit 5865648
Show file tree

Hide file tree

Showing 6 changed files with 67 additions and 99 deletions.
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -259,58 +259,12 @@ def call_args_to_kwargs_only(call_args, *callable_or_arg_names):
 
 def cpu_and_gpu():
     import pytest  # noqa
-
-    # ignore CPU tests in RE as they're already covered by another contbuild
-    # also ignore CPU tests in CircleCI machines that have a GPU: these tests
-    # are run on CPU-only machines already.
-    if IN_RE_WORKER:
-        devices = []
-    else:
-        if IN_CIRCLE_CI and torch.cuda.is_available():
-            mark = pytest.mark.skip(reason=CIRCLECI_GPU_NO_CUDA_MSG)
-        else:
-            mark = ()
-        devices = [pytest.param('cpu', marks=mark)]
-
-    if torch.cuda.is_available():
-        cuda_marks = ()
-    elif IN_FBCODE:
-        # Dont collect cuda tests on fbcode if the machine doesnt have a GPU
-        # This avoids skipping the tests. More robust would be to detect if
-        # we're in sancastle instead of fbcode?
-        cuda_marks = pytest.mark.dont_collect()
-    else:
-        cuda_marks = pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG)
-
-    devices.append(pytest.param('cuda', marks=cuda_marks))
-
-    return devices
+    return ('cpu', pytest.param('cuda', marks=pytest.mark.needs_cuda))
 
 
 def needs_cuda(test_func):
     import pytest  # noqa
-
-    if IN_FBCODE and not IN_RE_WORKER:
-        # We don't want to skip in fbcode, so we just don't collect
-        # TODO: slightly more robust way would be to detect if we're in a sandcastle instance
-        # so that the test will still be collected (and skipped) in the devvms.
-        return pytest.mark.dont_collect(test_func)
-    elif torch.cuda.is_available():
-        return test_func
-    else:
-        return pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG)(test_func)
-
-
-def cpu_only(test_func):
-    import pytest  # noqa
-
-    if IN_RE_WORKER:
-        # The assumption is that all RE workers have GPUs.
-        return pytest.mark.dont_collect(test_func)
-    elif IN_CIRCLE_CI and torch.cuda.is_available():
-        return pytest.mark.skip(reason=CIRCLECI_GPU_NO_CUDA_MSG)(test_func)
-    else:
-        return test_func
+    return pytest.mark.needs_cuda(test_func)
 
 
 def _create_data(height=3, width=3, channels=3, device="cpu"):

diff --git a/test/conftest.py b/test/conftest.py
@@ -1,14 +1,59 @@
+from common_utils import IN_CIRCLE_CI, CIRCLECI_GPU_NO_CUDA_MSG, IN_FBCODE, IN_RE_WORKER, CUDA_NOT_AVAILABLE_MSG
+import torch
+import pytest
+
+
 def pytest_configure(config):
     # register an additional marker (see pytest_collection_modifyitems)
     config.addinivalue_line(
-        "markers", "dont_collect: marks a test that should not be collected (avoids skipping it)"
+        "markers", "needs_cuda: mark for tests that rely on a CUDA device"
+    )
+    config.addinivalue_line(
+        "markers", "dont_collect: mark for tests that should not be collected"
     )
 
 
 def pytest_collection_modifyitems(items):
     # This hook is called by pytest after it has collected the tests (google its name!)
-    # We can ignore some tests as we see fit here. In particular we ignore the tests that
-    # we have marked with the custom 'dont_collect' mark. This avoids skipping the tests,
-    # since the internal fb infra doesn't like skipping tests.
-    to_keep = [item for item in items if item.get_closest_marker('dont_collect') is None]
-    items[:] = to_keep
+    # We can ignore some tests as we see fit here, or add marks, such as a skip mark.
+
+    out_items = []
+    for item in items:
+        # The needs_cuda mark will exist if the test was explicitely decorated with
+        # the @needs_cuda decorator. It will also exist if it was parametrized with a
+        # parameter that has the mark: for example if a test is parametrized with
+        # @pytest.mark.parametrize('device', cpu_and_gpu())
+        # the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark,
+        # and the ones with device == 'cpu' won't have the mark.
+        needs_cuda = item.get_closest_marker('needs_cuda') is not None
+
+        if needs_cuda and not torch.cuda.is_available():
+            # In general, we skip cuda tests on machines without a GPU
+            # There are special cases though, see below
+            item.add_marker(pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG))
+
+        if IN_FBCODE:
+            # fbcode doesn't like skipping tests, so instead we  just don't collect the test
+            # so that they don't even "exist", hence the continue statements.
+            if not needs_cuda and IN_RE_WORKER:
+                # The RE workers are the machines with GPU, we don't want them to run CPU-only tests.
+                continue
+            if needs_cuda and not torch.cuda.is_available():
+                # On the test machines without a GPU, we want to ignore the tests that need cuda.
+                # TODO: something more robust would be to do that only in a sandcastle instance,
+                # so that we can still see the test being skipped when testing locally from a devvm
+                continue
+        elif IN_CIRCLE_CI:
+            # Here we're not in fbcode, so we can safely collect and skip tests.
+            if not needs_cuda and torch.cuda.is_available():
+                # Similar to what happens in RE workers: we don't need the CircleCI GPU machines
+                # to run the CPU-only tests.
+                item.add_marker(pytest.mark.skip(reason=CIRCLECI_GPU_NO_CUDA_MSG))
+
+        if item.get_closest_marker('dont_collect') is not None:
+            # currently, this is only used for some tests we're sure we dont want to run on fbcode
+            continue
+
+        out_items.append(item)
+
+    items[:] = out_items
diff --git a/test/test_image.py b/test/test_image.py
@@ -9,7 +9,7 @@
 import torch
 from PIL import Image
 import torchvision.transforms.functional as F
-from common_utils import get_tmp_dir, needs_cuda, cpu_only
+from common_utils import get_tmp_dir, needs_cuda
 from _assert_utils import assert_equal
 
 from torchvision.io.image import (
@@ -335,7 +335,6 @@ def test_decode_jpeg_cuda_errors():
         torch.ops.image.decode_jpeg_cuda(data, ImageReadMode.UNCHANGED.value, 'cpu')
 
 
-@cpu_only
 def test_encode_jpeg_errors():
 
     with pytest.raises(RuntimeError, match="Input tensor dtype should be uint8"):
@@ -360,7 +359,7 @@ def test_encode_jpeg_errors():
 
 
 def _collect_if(cond):
-    # TODO: remove this once test_encode_jpeg_windows and test_write_jpeg_windows
+    # TODO: remove this once test_encode_jpeg_reference and test_write_jpeg_reference
     # are removed
     def _inner(test_func):
         if cond:
@@ -370,15 +369,14 @@ def _inner(test_func):
     return _inner
 
 
-@cpu_only
 @_collect_if(cond=IS_WINDOWS)
 @pytest.mark.parametrize('img_path', [
     pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path))
     for jpeg_path in get_images(ENCODE_JPEG, ".jpg")
 ])
-def test_encode_jpeg_windows(img_path):
+def test_encode_jpeg_reference(img_path):
     # This test is *wrong*.
-    # It compares a torchvision-encoded jpeg with a PIL-encoded jpeg, but it
+    # It compares a torchvision-encoded jpeg with a PIL-encoded jpeg (the reference), but it
     # starts encoding the torchvision version from an image that comes from
     # decode_jpeg, which can yield different results from pil.decode (see
     # test_decode... which uses a high tolerance).
@@ -403,14 +401,13 @@ def test_encode_jpeg_windows(img_path):
         assert_equal(jpeg_bytes, pil_bytes)
 
 
-@cpu_only
 @_collect_if(cond=IS_WINDOWS)
 @pytest.mark.parametrize('img_path', [
     pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path))
     for jpeg_path in get_images(ENCODE_JPEG, ".jpg")
 ])
-def test_write_jpeg_windows(img_path):
-    # FIXME: Remove this eventually, see test_encode_jpeg_windows
+def test_write_jpeg_reference(img_path):
+    # FIXME: Remove this eventually, see test_encode_jpeg_reference
     with get_tmp_dir() as d:
         data = read_file(img_path)
         img = decode_jpeg(data)
@@ -433,8 +430,9 @@ def test_write_jpeg_windows(img_path):
         assert_equal(torch_bytes, pil_bytes)
 
 
-@cpu_only
-@_collect_if(cond=not IS_WINDOWS)
+@pytest.mark.skipif(IS_WINDOWS, reason=(
+    'this test fails on windows because PIL uses libjpeg-turbo on windows'
+))
 @pytest.mark.parametrize('img_path', [
     pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path))
     for jpeg_path in get_images(ENCODE_JPEG, ".jpg")
@@ -455,8 +453,9 @@ def test_encode_jpeg(img_path):
         assert_equal(encoded_jpeg_torch, encoded_jpeg_pil)
 
 
-@cpu_only
-@_collect_if(cond=not IS_WINDOWS)
+@pytest.mark.skipif(IS_WINDOWS, reason=(
+    'this test fails on windows because PIL uses libjpeg-turbo on windows'
+))
 @pytest.mark.parametrize('img_path', [
     pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path))
     for jpeg_path in get_images(ENCODE_JPEG, ".jpg")

diff --git a/test/test_models.py b/test/test_models.py
@@ -1,7 +1,7 @@
 import os
 import io
 import sys
-from common_utils import map_nested_tensor_object, freeze_rng_state, set_rng_seed, cpu_and_gpu, needs_cuda, cpu_only
+from common_utils import map_nested_tensor_object, freeze_rng_state, set_rng_seed, cpu_and_gpu, needs_cuda
 from _utils_internal import get_relative_path
 from collections import OrderedDict
 import functools
@@ -234,7 +234,6 @@ def _make_sliced_model(model, stop_layer):
     return new_model
 
 
-@cpu_only
 @pytest.mark.parametrize('model_name', ['densenet121', 'densenet169', 'densenet201', 'densenet161'])
 def test_memory_efficient_densenet(model_name):
     input_shape = (1, 3, 300, 300)
@@ -257,7 +256,6 @@ def test_memory_efficient_densenet(model_name):
     torch.testing.assert_close(out1, out2, rtol=0.0, atol=1e-5)
 
 
-@cpu_only
 @pytest.mark.parametrize('dilate_layer_2', (True, False))
 @pytest.mark.parametrize('dilate_layer_3', (True, False))
 @pytest.mark.parametrize('dilate_layer_4', (True, False))
@@ -272,7 +270,6 @@ def test_resnet_dilation(dilate_layer_2, dilate_layer_3, dilate_layer_4):
     assert out.shape == (1, 2048, 7 * f, 7 * f)
 
 
-@cpu_only
 def test_mobilenet_v2_residual_setting():
     model = models.__dict__["mobilenet_v2"](inverted_residual_setting=[[1, 16, 1, 1], [6, 24, 2, 2]])
     model.eval()
@@ -281,7 +278,6 @@ def test_mobilenet_v2_residual_setting():
     assert out.shape[-1] == 1000
 
 
-@cpu_only
 @pytest.mark.parametrize('model_name', ["mobilenet_v2", "mobilenet_v3_large", "mobilenet_v3_small"])
 def test_mobilenet_norm_layer(model_name):
     model = models.__dict__[model_name]()
@@ -295,7 +291,6 @@ def get_gn(num_channels):
     assert any(isinstance(x, nn.GroupNorm) for x in model.modules())
 
 
-@cpu_only
 def test_inception_v3_eval():
     # replacement for models.inception_v3(pretrained=True) that does not download weights
     kwargs = {}
@@ -311,7 +306,6 @@ def test_inception_v3_eval():
     _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
 
 
-@cpu_only
 def test_fasterrcnn_double():
     model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False)
     model.double()
@@ -327,7 +321,6 @@ def test_fasterrcnn_double():
     assert "labels" in out[0]
 
 
-@cpu_only
 def test_googlenet_eval():
     # replacement for models.googlenet(pretrained=True) that does not download weights
     kwargs = {}
@@ -376,7 +369,6 @@ def checkOut(out):
     checkOut(out_cpu)
 
 
-@cpu_only
 def test_generalizedrcnn_transform_repr():
 
     min_size, max_size = 224, 299
@@ -573,7 +565,6 @@ def compute_mean_std(tensor):
         pytest.skip(msg)
 
 
-@cpu_only
 @pytest.mark.parametrize('model_name', get_available_detection_models())
 def test_detection_model_validation(model_name):
     set_rng_seed(0)