From 67990405dc63d01f11cc5b464e1ce0a5106e0232 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Tue, 14 Dec 2021 16:27:00 -0800 Subject: [PATCH] cpu: rnn: disable packed optimization for threadpool --- src/cpu/rnn/rnn_utils.hpp | 17 +++++++++++++++++ tests/gtests/test_iface_runtime_attr.cpp | 6 +++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/cpu/rnn/rnn_utils.hpp b/src/cpu/rnn/rnn_utils.hpp index f53aa224393..54e1cb38531 100644 --- a/src/cpu/rnn/rnn_utils.hpp +++ b/src/cpu/rnn/rnn_utils.hpp @@ -765,6 +765,23 @@ bool init_conf(rnn_conf_t &rnn, const rnn_desc_t &rd, || rnn.is_int8() || is_bf16) : false; +#if DNNL_CPU_RUNTIME == DNNL_RUNTIME_THREADPOOL + // XXX: Threadpool runtime may use different number of threads at execute + // and create stages. GEMM packed API is not aware of number of threads as + // of now. In order to synchronize all layers, GEMM pack API should be + // modified to accept number of threads instead of taking it from + // `dnnl_get_max_threads()`, and rnn_packed_desc_t should be updated with + // `nthr` member to pass this information between different parts of packed + // API, since `get_size` call happens on RNN side, while packing happens + // on reorder side. Consider enabling later. + // `test_iface_runtime_attr` was disabled for RNN with threadpool due to + // this is the only working approach for int8 computations in RNN for now. + // Consider enabling it once resolved. + rnn.use_layer_packed_gemm = false; + rnn.use_iter_packed_gemm = false; + rnn.use_projection_packed_gemm = false; +#endif + /* Set packed gemm sizes */ /* TODO: investigate the benefit of mixing packed and non-packed weights parts */ const auto set_pack_sizes diff --git a/tests/gtests/test_iface_runtime_attr.cpp b/tests/gtests/test_iface_runtime_attr.cpp index 75260cdbf7c..c0cb3d4e353 100644 --- a/tests/gtests/test_iface_runtime_attr.cpp +++ b/tests/gtests/test_iface_runtime_attr.cpp @@ -360,10 +360,14 @@ CPU_TEST_F(runtime_attr_test_t, TestReorder) { TEST_F(runtime_attr_test_t, TestRNN) { SKIP_IF_CUDA(true, "RNN primitive not supported for CUDA"); - + // Int8 RNN relies on packed API solely which is available only for X64. #if !DNNL_X64 return; #endif + // XXX: Threadpool doesn't work correctly with packed API which is the only + // working mechanism for int8 computations. Disable it for now. + SKIP_IF(DNNL_CPU_RUNTIME == DNNL_RUNTIME_THREADPOOL, + "Threadpool does not have working packed API"); memory::dim n = 1, t = 1, l = 10, c = 8, g = 4, d = 1; memory::desc src_layer_md {{t, n, c}, data_type::u8, tag::tnc};