Skip to content

Commit

Permalink
default to avx2
Browse files Browse the repository at this point in the history
  • Loading branch information
Tristan Konolige committed Jul 28, 2022
1 parent fd4ee67 commit f8df56b
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions tests/python/unittest/test_roofline.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_estimate_peak_bandwidth(target, dev):


@tvm.testing.skip_if_32bit(reason="Cannot allocate enough memory on i386")
@tvm.testing.parametrize_targets("llvm", "cuda")
@tvm.testing.parametrize_targets("llvm -mattr=+fma+avx2", "cuda")
def test_roofline_analysis(target, dev):
a = relay.var("a", relay.TensorType((512, 512), "float32"))
b = relay.var("b", relay.TensorType((512, 512), "float32"))
Expand All @@ -107,11 +107,11 @@ def test_roofline_analysis(target, dev):
assert "Percent of Theoretical Optimal" in report.table()
for call in report.calls:
if "Percent of Theoretical Optimal" in call:
if target == "llvm":
if target.startswith("llvm"):
# Ideally we'd like a little tighter bound here, but it is hard to
# know how well this dense will perform without tuning. And we
# don't have an operator that uses a specific number of flops.
assert 90 >= call["Percent of Theoretical Optimal"].ratio >= 5.0
assert call["Percent of Theoretical Optimal"].ratio >= 5.0
elif target == "cuda":
# The cuda gpu kernel is really poorly optimized
assert 90 >= call["Percent of Theoretical Optimal"].ratio >= 0.01
Expand Down

0 comments on commit f8df56b

Please sign in to comment.