Enabled TEGroupedMLP test to run on ROCm.

ROCm · Nov 20, 2024 · 9d03142 · 9d03142
1 parent d6524e2
commit 9d03142
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 5 deletions.
diff --git a/pytest.ini b/pytest.ini
@@ -1,9 +1,10 @@
 # content of pytest.ini
 [pytest]
 markers =
-    internal: mark a test as a test to private/internal functions.
-    failing_on_rocm: Currently Failing Tests on Rocm
-    failing_on_rocm_mi250: Tests failing on MI250
+    internal: Mark a test as a test to private/internal functions.
+    failing_on_rocm: Currently Failing Tests on ROCm.
+    failing_on_rocm_mi250: Tests failing on MI250.
+    test_on_rocm: Mark a test that we run on ROCm specifically.
 
 addopts =
-    --ignore tests/unit_tests/test_utilities.py
+    --ignore tests/unit_tests/test_utilities.py
diff --git a/run_unit_tests.sh b/run_unit_tests.sh
@@ -2,4 +2,4 @@
 
 set -x
 export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-torchrun --nproc_per_node=8 -m pytest --color=yes -m "not flaky and not internal and not failing_on_rocm_mi250 and not failing_on_rocm" --csv output/test_report.csv tests/unit_tests/
+torchrun --nproc_per_node=8 -m pytest --color=yes -m "not flaky and not internal and not failing_on_rocm_mi250 and not failing_on_rocm or test_on_rocm" --csv output/test_report.csv tests/unit_tests/
diff --git a/tests/unit_tests/transformer/moe/test_grouped_mlp.py b/tests/unit_tests/transformer/moe/test_grouped_mlp.py
@@ -278,6 +278,7 @@ def setup_method(self, method, use_cpu_initialization=False, swiglu=True):
     def teardown_method(self, method):
         Utils.destroy_model_parallel()
 
+    @pytest.mark.test_on_rocm
     @pytest.mark.internal
     def test_constructor(self):
         assert isinstance(self.sequential_mlp, MoELayer)
@@ -313,6 +314,7 @@ def test_constructor(self):
             )
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.test_on_rocm
     @pytest.mark.internal
     def test_gpu_forward_backward(self):
         self.sequential_mlp.cuda()
@@ -356,6 +358,7 @@ def test_gpu_forward_backward(self):
             torch.testing.assert_close(smm_result, gmm_result)
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+    @pytest.mark.test_on_rocm
     @pytest.mark.internal
     def test_gpu_forward_backward_with_no_tokens_allocated(self):
         """Test the case when no token is allocated for groupedGEMM kernels."""