From 67990405dc63d01f11cc5b464e1ce0a5106e0232 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Tue, 14 Dec 2021 16:27:00 -0800
Subject: [PATCH] cpu: rnn: disable packed optimization for threadpool

---
 src/cpu/rnn/rnn_utils.hpp                | 17 +++++++++++++++++
 tests/gtests/test_iface_runtime_attr.cpp |  6 +++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/cpu/rnn/rnn_utils.hpp b/src/cpu/rnn/rnn_utils.hpp
index f53aa224393..54e1cb38531 100644
--- a/src/cpu/rnn/rnn_utils.hpp
+++ b/src/cpu/rnn/rnn_utils.hpp
@@ -765,6 +765,23 @@ bool init_conf(rnn_conf_t &rnn, const rnn_desc_t &rd,
                             || rnn.is_int8() || is_bf16)
             : false;
 
+#if DNNL_CPU_RUNTIME == DNNL_RUNTIME_THREADPOOL
+    // XXX: Threadpool runtime may use different number of threads at execute
+    // and create stages. GEMM packed API is not aware of number of threads as
+    // of now. In order to synchronize all layers, GEMM pack API should be
+    // modified to accept number of threads instead of taking it from
+    // `dnnl_get_max_threads()`, and rnn_packed_desc_t should be updated with
+    // `nthr` member to pass this information between different parts of packed
+    // API, since `get_size` call happens on RNN side, while packing happens
+    // on reorder side. Consider enabling later.
+    // `test_iface_runtime_attr` was disabled for RNN with threadpool due to
+    // this is the only working approach for int8 computations in RNN for now.
+    // Consider enabling it once resolved.
+    rnn.use_layer_packed_gemm = false;
+    rnn.use_iter_packed_gemm = false;
+    rnn.use_projection_packed_gemm = false;
+#endif
+
     /* Set packed gemm sizes */
     /* TODO: investigate the benefit of mixing packed and non-packed weights parts */
     const auto set_pack_sizes
diff --git a/tests/gtests/test_iface_runtime_attr.cpp b/tests/gtests/test_iface_runtime_attr.cpp
index 75260cdbf7c..c0cb3d4e353 100644
--- a/tests/gtests/test_iface_runtime_attr.cpp
+++ b/tests/gtests/test_iface_runtime_attr.cpp
@@ -360,10 +360,14 @@ CPU_TEST_F(runtime_attr_test_t, TestReorder) {
 
 TEST_F(runtime_attr_test_t, TestRNN) {
     SKIP_IF_CUDA(true, "RNN primitive not supported for CUDA");
-
+    // Int8 RNN relies on packed API solely which is available only for X64.
 #if !DNNL_X64
     return;
 #endif
+    // XXX: Threadpool doesn't work correctly with packed API which is the only
+    // working mechanism for int8 computations. Disable it for now.
+    SKIP_IF(DNNL_CPU_RUNTIME == DNNL_RUNTIME_THREADPOOL,
+            "Threadpool does not have working packed API");
 
     memory::dim n = 1, t = 1, l = 10, c = 8, g = 4, d = 1;
     memory::desc src_layer_md {{t, n, c}, data_type::u8, tag::tnc};