From 680d64ea670bc898f974b5d57f8db440ae11abd6 Mon Sep 17 00:00:00 2001 From: Valentine233 Date: Sat, 12 Oct 2024 15:22:34 -0700 Subject: [PATCH] change GPT2ForSequenceClassification inference accuracy tolerance (#136749) Summary: Fixes https://github.com/pytorch/pytorch/issues/123503. https://github.com/pytorch/pytorch/pull/121866 makes GPT2ForSequenceClassification hit the SDPA pattern 18 and then encounter the accuracy issue. The issue only happens with BF16 inference single thread. This PR tends to increase the model tolerance from 4e-3 to 5e-3 and make the check pass. Note that the issue is due to some small implementation diff. For example, the sdpa math backend scales q, k before matmul for stability; the flash attention backend has more diffs as a new algorithm. X-link: https://github.com/pytorch/pytorch/pull/136749 Approved by: https://github.com/jgong5, https://github.com/jansel Reviewed By: jovianjaison Differential Revision: D64290722 fbshipit-source-id: a3e7248f57a97cd767257354d410b3508b5e0325 --- userbenchmark/dynamo/dynamobench/huggingface.py | 4 ++-- userbenchmark/dynamo/dynamobench/huggingface.yaml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/userbenchmark/dynamo/dynamobench/huggingface.py b/userbenchmark/dynamo/dynamobench/huggingface.py index 06bf4f0ee..a96bad12b 100755 --- a/userbenchmark/dynamo/dynamobench/huggingface.py +++ b/userbenchmark/dynamo/dynamobench/huggingface.py @@ -501,12 +501,12 @@ def get_tolerance_and_cosine_flag(self, is_training, current_device, name): else: return 1e-2, cosine else: - if name in self._config["tolerance"]["higher_inference"]: - return 4e-3, cosine if ( current_device == "cpu" and name in self._config["tolerance"]["higher_inference_cpu"] ): + return 5e-3, cosine + if name in self._config["tolerance"]["higher_inference"]: return 4e-3, cosine return 1e-3, cosine diff --git a/userbenchmark/dynamo/dynamobench/huggingface.yaml b/userbenchmark/dynamo/dynamobench/huggingface.yaml index 2ddc24253..f0ee57a58 100644 --- a/userbenchmark/dynamo/dynamobench/huggingface.yaml +++ b/userbenchmark/dynamo/dynamobench/huggingface.yaml @@ -89,6 +89,7 @@ tolerance: higher_inference_cpu: - LayoutLMForSequenceClassification + - GPT2ForSequenceClassification cosine: []