From a05dd0bdb73d5d488f185c17dee6bd516b711e07 Mon Sep 17 00:00:00 2001 From: noooop Date: Sat, 31 Aug 2024 12:36:04 +0800 Subject: [PATCH] flakey test, see: #7874 #8051 --- tests/basic_correctness/test_chunked_prefill.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/basic_correctness/test_chunked_prefill.py b/tests/basic_correctness/test_chunked_prefill.py index fc6f829c37b06..a39030827a9aa 100644 --- a/tests/basic_correctness/test_chunked_prefill.py +++ b/tests/basic_correctness/test_chunked_prefill.py @@ -116,6 +116,9 @@ def test_models_with_fp8_kv_cache( pytest.skip( "#7378: CUDA illegal memory access (undiagnosed) facebook/opt-125m" ) + if ((model, kv_cache_dtype, chunked_prefill_token_size) == + ("nm-testing/Qwen2-1.5B-Instruct-FP8-K-V", "fp8_e4m3", 4)): + pytest.skip("flakey test, see: #7874 #8051") max_num_seqs = chunked_prefill_token_size max_num_batched_tokens = chunked_prefill_token_size