Bump maxinum num warps (#132458)

Summary: Fix for pytorch/pytorch#129104 Our heuristic for num_warps was giving the optimal number, but we were capping maximum num_warps at 8. Gives 1% speedup on HF and TIMM in inference, 2% speedup in TIMM training, neutral otherwise. ultimately, I think we want live var analysis for register usage.. still worth landing this now. X-link: pytorch/pytorch#132458 Approved by: https://github.com/Chillee, https://github.com/shunting314 Reviewed By: jovianjaison Differential Revision: D61308271 Pulled By: eellison fbshipit-source-id: 3ceafd3701ab712693abfdd1ebe40aed845d3e6f
pytorch · Aug 15, 2024 · c6db4bc · c6db4bc
1 parent 23fe825
commit c6db4bc
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/userbenchmark/dynamo/dynamobench/timm_models.py b/userbenchmark/dynamo/dynamobench/timm_models.py
@@ -79,13 +79,13 @@ def pip_install(package):
     "mobilenetv3_large_100",
     "sebotnet33ts_256",
     "selecsls42b",
-    "cspdarknet53",
 }
 
 REQUIRE_EVEN_HIGHER_TOLERANCE = {
     "levit_128",
     "sebotnet33ts_256",
     "beit_base_patch16_224",
+    "cspdarknet53",
 }
 
 # These models need higher tolerance in MaxAutotune mode
@@ -121,6 +121,7 @@ def pip_install(package):
 REQUIRE_LARGER_MULTIPLIER_FOR_SMALLER_TENSOR = {
     "inception_v3",
     "mobilenetv3_large_100",
+    "cspdarknet53",
 }