From 680d64ea670bc898f974b5d57f8db440ae11abd6 Mon Sep 17 00:00:00 2001
From: Valentine233 <xuan.liao@intel.com>
Date: Sat, 12 Oct 2024 15:22:34 -0700
Subject: [PATCH] change GPT2ForSequenceClassification inference accuracy
 tolerance (#136749)

Summary:
Fixes https://github.com/pytorch/pytorch/issues/123503.

https://github.com/pytorch/pytorch/pull/121866 makes GPT2ForSequenceClassification hit the SDPA pattern 18 and then encounter the accuracy issue. The issue only happens with BF16 inference single thread. This PR tends to increase the model tolerance from 4e-3 to 5e-3 and make the check pass. Note that the issue is due to some small implementation diff. For example, the sdpa math backend scales q, k before matmul for stability; the flash attention backend has more diffs as a new algorithm.

X-link: https://github.com/pytorch/pytorch/pull/136749
Approved by: https://github.com/jgong5, https://github.com/jansel

Reviewed By: jovianjaison

Differential Revision: D64290722

fbshipit-source-id: a3e7248f57a97cd767257354d410b3508b5e0325
---
 userbenchmark/dynamo/dynamobench/huggingface.py   | 4 ++--
 userbenchmark/dynamo/dynamobench/huggingface.yaml | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/userbenchmark/dynamo/dynamobench/huggingface.py b/userbenchmark/dynamo/dynamobench/huggingface.py
index 06bf4f0ee..a96bad12b 100755
--- a/userbenchmark/dynamo/dynamobench/huggingface.py
+++ b/userbenchmark/dynamo/dynamobench/huggingface.py
@@ -501,12 +501,12 @@ def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
             else:
                 return 1e-2, cosine
         else:
-            if name in self._config["tolerance"]["higher_inference"]:
-                return 4e-3, cosine
             if (
                 current_device == "cpu"
                 and name in self._config["tolerance"]["higher_inference_cpu"]
             ):
+                return 5e-3, cosine
+            if name in self._config["tolerance"]["higher_inference"]:
                 return 4e-3, cosine
         return 1e-3, cosine
 
diff --git a/userbenchmark/dynamo/dynamobench/huggingface.yaml b/userbenchmark/dynamo/dynamobench/huggingface.yaml
index 2ddc24253..f0ee57a58 100644
--- a/userbenchmark/dynamo/dynamobench/huggingface.yaml
+++ b/userbenchmark/dynamo/dynamobench/huggingface.yaml
@@ -89,6 +89,7 @@ tolerance:
 
   higher_inference_cpu:
     - LayoutLMForSequenceClassification
+    - GPT2ForSequenceClassification
 
   cosine: []