default to avx2

apache · Jul 28, 2022 · f8df56b · f8df56b
1 parent fd4ee67
commit f8df56b
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/tests/python/unittest/test_roofline.py b/tests/python/unittest/test_roofline.py
@@ -88,7 +88,7 @@ def test_estimate_peak_bandwidth(target, dev):
 
 
 @tvm.testing.skip_if_32bit(reason="Cannot allocate enough memory on i386")
-@tvm.testing.parametrize_targets("llvm", "cuda")
+@tvm.testing.parametrize_targets("llvm -mattr=+fma+avx2", "cuda")
 def test_roofline_analysis(target, dev):
     a = relay.var("a", relay.TensorType((512, 512), "float32"))
     b = relay.var("b", relay.TensorType((512, 512), "float32"))
@@ -107,11 +107,11 @@ def test_roofline_analysis(target, dev):
     assert "Percent of Theoretical Optimal" in report.table()
     for call in report.calls:
         if "Percent of Theoretical Optimal" in call:
-            if target == "llvm":
+            if target.startswith("llvm"):
                 # Ideally we'd like a little tighter bound here, but it is hard to
                 # know how well this dense will perform without tuning. And we
                 # don't have an operator that uses a specific number of flops.
-                assert 90 >= call["Percent of Theoretical Optimal"].ratio >= 5.0
+                assert call["Percent of Theoretical Optimal"].ratio >= 5.0
             elif target == "cuda":
                 # The cuda gpu kernel is really poorly optimized
                 assert 90 >= call["Percent of Theoretical Optimal"].ratio >= 0.01