From 4bbd2040c8e5069e41f9115f49cbb1f3a1dce9a8 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Mon, 14 Aug 2023 01:30:09 -0700
Subject: [PATCH 01/17] fix avx2 fp16 load_store_emitters issue

---
 .../plugin/x64/jit_load_store_emitters.cpp    | 118 ++++++++++--------
 1 file changed, 64 insertions(+), 54 deletions(-)

diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
index e7e668335e49ee..226c7f1dcd4779 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
@@ -497,53 +497,72 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
     auto ymm = Xbyak::Ymm(vmm.getIdx());
     auto zmm = Xbyak::Zmm(vmm.getIdx());
 
+    auto load_words_to_dword_base = [&]() {
+        load_bytes(xmm, reg, offset, load_size);
+        if (is_bf16) {
+            h->uni_vpmovzxwd(vmm, xmm);
+            h->uni_vpslld(vmm, vmm, 16);
+        } else if (is_f16) {
+            h->vcvtph2ps(ymm, xmm);
+        } else {
+            if (is_signed)
+                h->uni_vpmovsxwd(vmm, xmm);
+            else
+                h->uni_vpmovzxwd(vmm, xmm);
+        }
+    };
+
     // For load_size == 32/16/8, do load/extension in one go
     // including xmm/ymm tail block for ymm/zmm, so explicite xmm/ymm/zmm
     switch (load_size) {
-        case 32: {
-            if (is_bf16) {
-                h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
-                h->uni_vpslld(zmm, zmm, 16);
-            } else if (is_f16) {
-                h->vcvtph2ps(zmm, ptr[reg + offset]);
-            } else {
-                if (is_signed)
-                    h->uni_vpmovsxwd(zmm, ptr[reg + offset]);
-                else
+    case 32: {
+        if (mayiuse(cpu::x64::avx512_core)) {
+                if (is_bf16) {
                     h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
-            }
-            break;
+                    h->uni_vpslld(zmm, zmm, 16);
+                } else if (is_f16) {
+                    h->vcvtph2ps(zmm, ptr[reg + offset]);
+                } else {
+                    if (is_signed)
+                        h->uni_vpmovsxwd(zmm, ptr[reg + offset]);
+                    else
+                        h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
+                }
+                break;
+        } else {
+                load_words_to_dword_base();
         }
-        case 16: {
-            if (is_bf16) {
+    }
+    case 16: {
+        if (is_bf16) {
                 h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
                 h->uni_vpslld(ymm, ymm, 16);
-            } else if (is_f16) {
+        } else if (is_f16) {
                 h->vcvtph2ps(ymm, ptr[reg + offset]);
-            } else {
+        } else {
                 if (is_signed)
                     h->uni_vpmovsxwd(ymm, ptr[reg + offset]);
                 else
                     h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
-            }
-            break;
         }
-        case 8: {
-            if (is_bf16) {
+        break;
+    }
+    case 8: {
+        if (is_bf16) {
                 h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
                 h->uni_vpslld(xmm, xmm, 16);
-            } else if (is_f16) {
+        } else if (is_f16) {
                 h->vcvtph2ps(xmm, ptr[reg + offset]);
-            } else {
+        } else {
                 if (is_signed)
                     h->uni_vpmovsxwd(xmm, ptr[reg + offset]);
                 else
                     h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
-            }
-            break;
         }
-        default: {
-            if (is_zmm && load_size > threshold_for_mask_emu_load) {
+        break;
+    }
+    default: {
+        if (is_zmm && load_size > threshold_for_mask_emu_load) {
                 unsigned int mask = 1;
                 mask = (mask << (load_size / 2)) - mask;
                 h->mov(Reg32(aux_gpr_idxs[0]), mask);
@@ -559,23 +578,12 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
                     else
                         h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
                 }
-            } else {
+        } else {
                 // xmm or ymm version
-                load_bytes(xmm, reg, offset, load_size);
-                if (is_bf16) {
-                    h->uni_vpmovzxwd(vmm, xmm);
-                    h->uni_vpslld(vmm, vmm, 16);
-                } else if (is_f16) {
-                    h->vcvtph2ps(ymm, xmm);
-                } else {
-                    if (is_signed)
-                        h->uni_vpmovsxwd(vmm, xmm);
-                    else
-                        h->uni_vpmovzxwd(vmm, xmm);
-                }
-            }
-            break;
+                load_words_to_dword_base();
         }
+        break;
+    }
     }
 }
 
@@ -1188,20 +1196,22 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
             store_bytes<Vmm>(reg, offset, store_num * 2);
         }
     } else if (is_f16) {
-        if (!mayiuse(cpu::x64::avx512_core_fp16))
-            OPENVINO_THROW("Store emitter in ", name_, " only support fp16 on platform with avx512_core_fp16.");
-        // to avoid src vmm pollution
-        if (src_prc_ == ov::element::f32) {
-            // since avx512, zmm(fp32) => ymm(fp16)
-            ymm = Ymm(aux_vec_idxs[0]);
-        } // in I32 case, zmm&ymm is already in aux reg
-
-        h->vcvtps2ph(ymm, zmm, 0x4);
-        if (store_num == 16) {
-            h->vmovdqu16(ptr[reg + offset], ymm);
+        if (mayiuse(cpu::x64::avx512_core_fp16)) {
+            // to avoid src vmm pollution
+            if (src_prc_ == ov::element::f32) {
+                // since avx512, zmm(fp32) => ymm(fp16)
+                ymm = Ymm(aux_vec_idxs[0]);
+            }  // in I32 case, zmm&ymm is already in aux reg
+
+            h->vcvtps2ph(ymm, zmm, 0x4);
+            if (store_num == 16) {
+                h->vmovdqu16(ptr[reg + offset], ymm);
+            } else {
+                data_idx = static_cast<int>(ymm.getIdx());
+                store_bytes<Vmm>(reg, offset, store_num * 2);
+            }
         } else {
-            data_idx = static_cast<int>(ymm.getIdx());
-            store_bytes<Vmm>(reg, offset, store_num * 2);
+            store_dword_to_word_base();
         }
     } else {
         switch (store_num) {

From d375bd553b8fdab021ae5138e87395e6cf0ac4ad Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Wed, 16 Aug 2023 01:37:02 -0700
Subject: [PATCH 02/17] add eltwise_node bf16 support, and brgconv_avx2 for
 common cpu node DefaultImplPriority

---
 src/plugins/intel_cpu/src/config.cpp          | 10 ++++-----
 .../plugin/x64/jit_load_store_emitters.cpp    | 21 ++++++++++++++++---
 src/plugins/intel_cpu/src/node.cpp            |  3 +++
 src/plugins/intel_cpu/src/nodes/eltwise.cpp   | 13 +++++++-----
 4 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 53b2779936b7b3..a3a061891d6634 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -219,7 +219,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                ". Expected only true/false");
             }
             if (enable) {
-                if (mayiuse(avx512_core)) {
+                if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) {
                     inferencePrecision = ov::element::bf16;
                 } else {
                     OPENVINO_THROW("Platform doesn't support BF16 format");
@@ -234,12 +234,12 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                 auto const prec = val.as<ov::element::Type>();
                 inferencePrecisionSetExplicitly = true;
                 if (prec == ov::element::bf16) {
-                    if (mayiuse(avx512_core)) {
+                    if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) {
                         inferencePrecision = ov::element::bf16;
                     }
                 } else if (prec == ov::element::f16) {
 #if defined(OPENVINO_ARCH_X86_64)
-                    if (mayiuse(avx512_core_fp16) || mayiuse(avx512_core_amx_fp16)) {
+                    if (mayiuse(avx512_core_fp16) || mayiuse(avx512_core_amx_fp16) || mayiuse(avx2_vnni_2)) {
                         inferencePrecision = ov::element::f16;
                     }
 #elif defined(OV_CPU_ARM_ENABLE_FP16)
@@ -327,7 +327,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
             if (modelType != ModelType::CNN)
                 inferencePrecision = ov::element::f16;
 #else
-            if (mayiuse(avx512_core_bf16))
+            if (mayiuse(avx512_core_bf16) || mayiuse(avx2_vnni_2))
                 inferencePrecision = ov::element::bf16;
 #endif
         } else {
@@ -398,4 +398,4 @@ void Config::updateProperties() {
 }
 
 }  // namespace intel_cpu
-}   // namespace ov
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
index 226c7f1dcd4779..e63cef2e49b3f9 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
@@ -474,8 +474,10 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
     bool is_f16 = (prc == ov::element::f16);
     bool is_signed = prc.is_signed();
 
-    if (is_f16 && !mayiuse(cpu::x64::avx512_core_fp16))
-        OPENVINO_THROW("Load emitter in ", name_, " only support fp16 on platform with avx512_core_fp16.");
+    if (is_f16 && !mayiuse(cpu::x64::avx512_core_fp16) && !mayiuse(cpu::x64::avx2_vnni_2))
+        OPENVINO_THROW("Load emitter in ",
+                       name_,
+                       " only support fp16 on platform with avx512_core_fp16 or avx2_vnni_2.");
 
     // Ensure extended double words fit inside Zmm (32/2(num) * 32 <= 512)
     // For Ymm register, load capacity is halved (16/2(num) * 32 <= 128)
@@ -1210,8 +1212,21 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
                 data_idx = static_cast<int>(ymm.getIdx());
                 store_bytes<Vmm>(reg, offset, store_num * 2);
             }
+        } else if (mayiuse(cpu::x64::avx2_vnni_2)) {
+            // to avoid src vmm pollution
+            if (src_prc_ == Precision::FP32) {
+                xmm = Xmm(aux_vec_idxs[0]);
+            }
+            h->vcvtps2ph(xmm, ymm, 0x4);
+            if (store_num == 16) {
+                h->uni_vmovdqu(ptr[reg + offset], xmm);
+            } else {
+                data_idx = static_cast<int>(xmm.getIdx());
+                store_bytes<Vmm>(reg, offset, store_num * 2);
+            }
         } else {
-            store_dword_to_word_base();
+            IE_THROW() << "Store emitter in " << name_
+                       << " only support fp16 on platform with avx512_core_fp16 or avx2_vnni_2.";
         }
     } else {
         switch (store_num) {
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index d9b5bfc490db8e..2293c427bfdcb4 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -1019,6 +1019,9 @@ const std::vector<impl_desc_type>& Node::getDefaultImplPriority() {
         impl_desc_type::jit_avx512_dw,
         impl_desc_type::jit_avx512_1x1,
         impl_desc_type::jit_avx512,
+        // [WA]default support after fully evaluate
+        // impl_desc_type::brgconv_avx2_1x1,
+        // impl_desc_type::brgconv_avx2,
         impl_desc_type::jit_avx2_dw,
         impl_desc_type::jit_avx2_1x1,
         impl_desc_type::jit_avx2,
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index e471e29fc87185..92b478fb8518ee 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -283,7 +283,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                     this, p->entry_[i], vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias));
         }
 
-        if (mayiuse(avx512_core))
+        if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2))
             uni_vcvtneps2bf16.reset(new jit_uni_vcvtneps2bf16(this, isa));
 
         const auto &jep = jep_;
@@ -839,8 +839,12 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                 uni_vmovups(op, vmm_dst);
                 break;
             case ov::element::bf16:
-                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
-                vmovdqu16(op, ymm_dst);
+                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
+                                             {static_cast<size_t>(ymm_dst.getIdx())});
+                if (isa == x64::avx512_core)
+                    vmovdqu16(op, ymm_dst);
+                else
+                    uni_vmovdqu(op, ymm_dst);
                 break;
             case ov::element::f16:
                 vcvtps2ph(op, vmm_dst, 0x4);
@@ -2184,8 +2188,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
     if (!fusedWith.empty()) {
         outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
-
-    if (!mayiuse(avx512_core)) {
+    if (!mayiuse(avx512_core) && !mayiuse(avx2_vnni_2)) {
         bool hasBF16 = false;
         for (auto &inPrc : inputPrecisions)
             if (inPrc == ov::element::bf16)

From cfd846da7ac85cfec0b1647a36f30e38c7fa401b Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Thu, 21 Sep 2023 23:44:37 -0700
Subject: [PATCH 03/17] MVN node support avx2_bf16

---
 src/plugins/intel_cpu/src/nodes/mvn.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp
index 97495042f2c6a4..dbfd36296fb484 100644
--- a/src/plugins/intel_cpu/src/nodes/mvn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp
@@ -1829,7 +1829,7 @@ void MVN::initSupportedPrimitiveDescriptors() {
 
     ov::element::Type inputPrecision = getOriginalInputPrecisionAtPort(0);
     ov::element::Type outputPrecision = getOriginalOutputPrecisionAtPort(0);
-    if (!mayiuse(avx512_core)) {
+    if (!mayiuse(avx512_core) && !mayiuse(avx2_vnni_2)) {
         if (outputPrecision == ov::element::bf16)
             outputPrecision = ov::element::f32;
     }

From 6ab20978aa34e206a03ffbdafb61a1a6db8421a3 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Mon, 9 Oct 2023 22:19:42 -0700
Subject: [PATCH 04/17] limit brgconv support of avx2 to avx2_vnni_2, and
 enable s8s8 conv on avx2_vnni_2

---
 src/plugins/intel_cpu/src/nodes/conv.cpp               | 10 ++++++----
 .../src/transformations/transformation_pipeline.cpp    |  5 +++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index 9b0f355a9e2f16..fdfee25e531f0c 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -373,7 +373,8 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
 }
 
 const bool Convolution::isBrgConvAvailable() {
-    static const bool isBrgConvAvailable = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
+    static const bool isBrgConvAvailable = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ||
+                                           dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2);
     return isBrgConvAvailable;
 }
 
@@ -1634,12 +1635,13 @@ void Convolution::initializeInputZeroPoints(const uint8_t* inputZpData, const si
         if (inputZpData[j] != inputZpData[0])
             inputZeroPointType = zpType::PerChannel;
     }
-    // Only enable per-tensor zero point on avx512-amx and avx512-core-vnni.
+    // Only enable per-tensor zero point on avx512-amx and avx512-core-vnni, avx2_vnni_2.
     // If zero point is pertensor, both legacy zp and stock zp
     // would be passed into conv node. The conv node would determine how to create
     // post-ops attribute and prioritize to choose final onednn kernel.
-    if (inputZeroPointType == zpType::PerTensor &&
-        (impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_amx) || impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_vnni)))
+    if (inputZeroPointType == zpType::PerTensor && (impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_amx) ||
+                                                    impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_vnni) ||
+                                                    impl::cpu::x64::mayiuse(impl::cpu::x64::avx2_vnni_2)))
         inputZeroPoints.push_back(static_cast<int32_t>(inputZpData[0]));
     else
         inputZeroPointType = zpType::PerChannel;
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index 47a1efbf9494ed..abef96a1197938 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -518,9 +518,10 @@ void Transformations::Lpt(const bool hasINT16orINT32Levels, const std::vector<ov
     using namespace ov::pass::low_precision;
     CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
-    //Only enable conv/group conv signed input on AMX platform.
+    // Only enable conv/group conv signed input on AMX and avx2_vnni_2 platform.
     std::vector<ov::element::Type> input0LowPrecisionList;
-    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
+    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) ||
+        dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) {
         input0LowPrecisionList = {ov::element::u8, ov::element::i8};
     } else {
         input0LowPrecisionList = {ov::element::u8};

From e44e5d3e17ab2c3d64d372a1cfdedbc3128eb433 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Thu, 12 Oct 2023 01:55:42 -0700
Subject: [PATCH 05/17] fix avx2 bf16 memory free issue of Eltwise node

---
 src/plugins/intel_cpu/src/nodes/eltwise.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index 92b478fb8518ee..e02b1b422b059a 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -839,12 +839,15 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                 uni_vmovups(op, vmm_dst);
                 break;
             case ov::element::bf16:
-                uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
-                                             {static_cast<size_t>(ymm_dst.getIdx())});
-                if (isa == x64::avx512_core)
+                if (isa == x64::avx512_core) {
+                    uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
+                                                 {static_cast<size_t>(ymm_dst.getIdx())});
                     vmovdqu16(op, ymm_dst);
-                else
-                    uni_vmovdqu(op, ymm_dst);
+                } else {
+                    uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
+                                                 {static_cast<size_t>(xmm_dst.getIdx())});
+                    uni_vmovdqu(op, xmm_dst);
+                }
                 break;
             case ov::element::f16:
                 vcvtps2ph(op, vmm_dst, 0x4);

From 6565690ac76b4d55513f04674d7594a831d89ad2 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Mon, 16 Oct 2023 02:25:08 -0700
Subject: [PATCH 06/17] re-add avx2_vnni_2 configure as temp commit been
 dropped

---
 src/inference/dev_api/ie_system_conf.h                 | 7 +++++++
 src/inference/dev_api/openvino/runtime/system_conf.hpp | 7 +++++++
 src/inference/src/system_conf.cpp                      | 5 +++++
 src/plugins/intel_cpu/src/nodes/conv.cpp               | 2 ++
 4 files changed, 21 insertions(+)

diff --git a/src/inference/dev_api/ie_system_conf.h b/src/inference/dev_api/ie_system_conf.h
index c0d2d81704f432..adc887c7284da6 100644
--- a/src/inference/dev_api/ie_system_conf.h
+++ b/src/inference/dev_api/ie_system_conf.h
@@ -109,6 +109,13 @@ using ov::with_cpu_x86_avx2;
  */
 using ov::with_cpu_x86_avx2_vnni;
 
+/**
+ * @brief      Checks whether CPU supports AVX2_VNNI_2 capability
+ * @ingroup    ie_dev_api_system_conf
+ * @return     `True` is AVX2_VNNI_2 instructions are available, `false` otherwise
+ */
+using ov::with_cpu_x86_avx2_vnni_2;
+
 /**
  * @brief      Checks whether CPU supports AVX 512 capability
  * @ingroup    ie_dev_api_system_conf
diff --git a/src/inference/dev_api/openvino/runtime/system_conf.hpp b/src/inference/dev_api/openvino/runtime/system_conf.hpp
index c3648a69118e6b..72ecf9a61694ef 100644
--- a/src/inference/dev_api/openvino/runtime/system_conf.hpp
+++ b/src/inference/dev_api/openvino/runtime/system_conf.hpp
@@ -97,6 +97,13 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx2();
  */
 OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni();
 
+/**
+ * @brief      Checks whether CPU supports AVX2_VNNI_2 capability
+ * @ingroup    ov_dev_api_system_conf
+ * @return     `True` is AVX2_VNNI_2 instructions are available, `false` otherwise
+ */
+OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni_2();
+
 /**
  * @brief      Checks whether CPU supports AVX 512 capability
  * @ingroup    ov_dev_api_system_conf
diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp
index 68e6e36df4f051..07278e2dde5fdb 100644
--- a/src/inference/src/system_conf.cpp
+++ b/src/inference/src/system_conf.cpp
@@ -60,6 +60,11 @@ bool with_cpu_x86_avx2_vnni() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX2 | Xbyak::util::Cpu::tAVX_VNNI);
 }
 
+bool with_cpu_x86_avx2_vnni_2() {
+    return with_cpu_x86_avx2_vnni() &&
+           get_cpu_info().has(Xbyak::util::Cpu::tAVX_VNNI_INT8 | Xbyak::util::Cpu::tAVX_NE_CONVERT);
+}
+
 bool with_cpu_x86_avx512f() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F);
 }
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index fdfee25e531f0c..71506791ae0c3d 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -349,6 +349,8 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
         impl_desc_type::jit_avx512_dw,
         impl_desc_type::jit_avx512_1x1,
         impl_desc_type::jit_avx512,
+        impl_desc_type::brgconv_avx2_1x1,
+        impl_desc_type::brgconv_avx2,
         impl_desc_type::jit_avx2_dw,
         impl_desc_type::jit_avx2_1x1,
         impl_desc_type::jit_avx2,

From f4ace001c171213b4ab8c1b9c336890cbd25a397 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Wed, 18 Oct 2023 23:03:09 -0700
Subject: [PATCH 07/17] W.A bf16/fp16 of gather jit impl and bf16/fp16
 weightsDecompression of fullyconnected not supported on avx2_vnni_2

---
 src/plugins/intel_cpu/src/nodes/fullyconnected.cpp | 5 +++++
 src/plugins/intel_cpu/src/nodes/gather.cpp         | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index ba0df9d418ba84..3a5ee5a96ce7c1 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -206,6 +206,11 @@ void FullyConnected::getSupportedDescriptors() {
         if (one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) {
             outputDataType = memory::data_type::bf16;
         }
+        // W.A. WeightsDecompression not supported on avx2_vnni_2
+        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2) &&
+            weightsDataType == memory::data_type::u8) {
+            inputDataType = outputDataType = memory::data_type::f32;
+        }
     } else if (inputDataType == memory::data_type::f16) {
 #if defined(OV_CPU_WITH_ACL)
         // acl fc does not support precisions conversion
diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp
index 6a9949365ced87..82acd63aab84a3 100644
--- a/src/plugins/intel_cpu/src/nodes/gather.cpp
+++ b/src/plugins/intel_cpu/src/nodes/gather.cpp
@@ -179,6 +179,12 @@ void Gather::createPrimitive() {
     if (isInPlace()) {
         return;
     }
+    // W.A gather bf16/fp16 jit impl has ACC issue on avx2_vnni_2
+    Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA);
+    if (one_of(dataPrecision, Precision::BF16, Precision::FP16) &&
+        dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2))
+        return;
+
 #if defined(OPENVINO_ARCH_X86_64)
     uint64_t idxElPerVec = 1;
     if (!isDynamicNode()) {

From d1494a201236d653cd9626aa967c0dd192724b6d Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Thu, 26 Oct 2023 20:28:01 -0700
Subject: [PATCH 08/17] fix elementwise functional test 'primTypeCheck' fail
 error, extend 'hasHardwareSupport' function on avx2_vnni_2

---
 src/plugins/intel_cpu/src/utils/precision_support.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_cpu/src/utils/precision_support.cpp b/src/plugins/intel_cpu/src/utils/precision_support.cpp
index cc942777697c51..ebd71290624cfb 100644
--- a/src/plugins/intel_cpu/src/utils/precision_support.cpp
+++ b/src/plugins/intel_cpu/src/utils/precision_support.cpp
@@ -14,7 +14,8 @@ bool hasHardwareSupport(const ov::element::Type& precision) {
     switch (precision) {
     case ov::element::f16: {
 #if defined(OPENVINO_ARCH_X86_64)
-        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16))
+        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_fp16) ||
+            dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2))
             return true;
         return false;
 #elif defined(OV_CPU_ARM_ENABLE_FP16)
@@ -25,7 +26,8 @@ bool hasHardwareSupport(const ov::element::Type& precision) {
     }
     case ov::element::bf16: {
 #if defined(OPENVINO_ARCH_X86_64)
-        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
+        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ||
+            dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2))
             return true;
         return false;
 #else

From 31bd8414baeb7b3cd950774a550b1f31d2b1d09d Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Wed, 1 Nov 2023 00:23:55 -0700
Subject: [PATCH 09/17] use avx2_vnni_2 new convert instructions

---
 .../emitters/plugin/x64/jit_bf16_emitters.hpp |   3 +
 .../plugin/x64/jit_load_store_emitters.cpp    | 115 +++++++++---------
 src/plugins/intel_cpu/src/nodes/eltwise.cpp   |  14 ++-
 3 files changed, 70 insertions(+), 62 deletions(-)

diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp
index ca958355154a56..d8332c218b3822 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp
@@ -55,6 +55,9 @@ class jit_uni_vcvtneps2bf16 : public jit_emitter {
             h->vfixupimmps(aux, in, table_val("selector"), 0);
             h->vpsrad(aux, aux, 16);
             h->vpmovdw(out, aux);
+        } else if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::cpu_isa_t::avx2_vnni_2)) {
+            Xmm out = Xmm(out_vec_idxs[0]);
+            h->vcvtneps2bf16(out, in, PreferredEncoding::VexEncoding);
         } else {  // round_to_nearest_even emulation
             Vmm aux = Vmm(aux_vec_idxs[0]);
             Xmm out = Xmm(out_vec_idxs[0]);
diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
index e63cef2e49b3f9..85a631ca0277cc 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
@@ -499,90 +499,87 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
     auto ymm = Xbyak::Ymm(vmm.getIdx());
     auto zmm = Xbyak::Zmm(vmm.getIdx());
 
-    auto load_words_to_dword_base = [&]() {
-        load_bytes(xmm, reg, offset, load_size);
-        if (is_bf16) {
-            h->uni_vpmovzxwd(vmm, xmm);
-            h->uni_vpslld(vmm, vmm, 16);
-        } else if (is_f16) {
-            h->vcvtph2ps(ymm, xmm);
-        } else {
-            if (is_signed)
-                h->uni_vpmovsxwd(vmm, xmm);
-            else
-                h->uni_vpmovzxwd(vmm, xmm);
-        }
-    };
-
     // For load_size == 32/16/8, do load/extension in one go
     // including xmm/ymm tail block for ymm/zmm, so explicite xmm/ymm/zmm
     switch (load_size) {
     case 32: {
-        if (mayiuse(cpu::x64::avx512_core)) {
-                if (is_bf16) {
-                    h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
-                    h->uni_vpslld(zmm, zmm, 16);
-                } else if (is_f16) {
-                    h->vcvtph2ps(zmm, ptr[reg + offset]);
-                } else {
-                    if (is_signed)
-                        h->uni_vpmovsxwd(zmm, ptr[reg + offset]);
-                    else
-                        h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
-                }
-                break;
+        // needed here?
+        if (!is_zmm)
+            IE_THROW() << "Load emitter in " << name_
+                       << " has unexpected number of values(32) to load to non-zmm in load_words_to_dword_extension.";
+        if (is_bf16) {
+            h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
+            h->uni_vpslld(zmm, zmm, 16);
+        } else if (is_f16) {
+            h->vcvtph2ps(zmm, ptr[reg + offset]);
         } else {
-                load_words_to_dword_base();
+            if (is_signed)
+                h->uni_vpmovsxwd(zmm, ptr[reg + offset]);
+            else
+                h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
         }
+        break;
     }
     case 16: {
         if (is_bf16) {
-                h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
-                h->uni_vpslld(ymm, ymm, 16);
+            h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
+            h->uni_vpslld(ymm, ymm, 16);
+
         } else if (is_f16) {
-                h->vcvtph2ps(ymm, ptr[reg + offset]);
+            h->vcvtph2ps(ymm, ptr[reg + offset]);
         } else {
-                if (is_signed)
-                    h->uni_vpmovsxwd(ymm, ptr[reg + offset]);
-                else
-                    h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
+            if (is_signed)
+                h->uni_vpmovsxwd(ymm, ptr[reg + offset]);
+            else
+                h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
         }
         break;
     }
     case 8: {
         if (is_bf16) {
-                h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
-                h->uni_vpslld(xmm, xmm, 16);
+            h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
+            h->uni_vpslld(xmm, xmm, 16);
         } else if (is_f16) {
-                h->vcvtph2ps(xmm, ptr[reg + offset]);
+            h->vcvtph2ps(xmm, ptr[reg + offset]);
         } else {
-                if (is_signed)
-                    h->uni_vpmovsxwd(xmm, ptr[reg + offset]);
-                else
-                    h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
+            if (is_signed)
+                h->uni_vpmovsxwd(xmm, ptr[reg + offset]);
+            else
+                h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
         }
         break;
     }
     default: {
         if (is_zmm && load_size > threshold_for_mask_emu_load) {
-                unsigned int mask = 1;
-                mask = (mask << (load_size / 2)) - mask;
-                h->mov(Reg32(aux_gpr_idxs[0]), mask);
-                h->kmovw(k_mask, Reg32(aux_gpr_idxs[0]));
-                if (is_bf16) {
+            unsigned int mask = 1;
+            mask = (mask << (load_size / 2)) - mask;
+            h->mov(Reg32(aux_gpr_idxs[0]), mask);
+            h->kmovw(k_mask, Reg32(aux_gpr_idxs[0]));
+            if (is_bf16) {
+                h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
+                h->uni_vpslld(vmm, vmm, 16);
+            } else if (is_f16) {
+                h->vcvtph2ps(vmm | k_mask | T_z, ptr[reg + offset]);
+            } else {
+                if (is_signed)
+                    h->uni_vpmovsxwd(vmm | k_mask | T_z, ptr[reg + offset]);
+                else
                     h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
-                    h->uni_vpslld(vmm, vmm, 16);
-                } else if (is_f16) {
-                    h->vcvtph2ps(vmm | k_mask | T_z, ptr[reg + offset]);
-                } else {
-                    if (is_signed)
-                        h->uni_vpmovsxwd(vmm | k_mask | T_z, ptr[reg + offset]);
-                    else
-                        h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
-                }
+            }
         } else {
-                // xmm or ymm version
-                load_words_to_dword_base();
+            // xmm or ymm version
+            load_bytes(xmm, reg, offset, load_size);
+            if (is_bf16) {
+                h->uni_vpmovzxwd(vmm, xmm);
+                h->uni_vpslld(vmm, vmm, 16);
+            } else if (is_f16) {
+                h->vcvtph2ps(ymm, xmm);
+            } else {
+                if (is_signed)
+                    h->uni_vpmovsxwd(vmm, xmm);
+                else
+                    h->uni_vpmovzxwd(vmm, xmm);
+            }
         }
         break;
     }
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index e02b1b422b059a..ebff119982cc5f 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -771,11 +771,19 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
                 uni_vmovss(xmm_src, op);
                 break;
             case ov::element::bf16:
-                uni_vpinsrw(xmm_src, xmm_src, op, 0);
-                uni_vpslld(xmm_src, xmm_src, 16);
+                if (isa == x64::avx2_vnni_2) {
+                    vbcstnebf162ps(xmm_src, op);
+                } else {
+                    uni_vpinsrw(xmm_src, xmm_src, op, 0);
+                    uni_vpslld(xmm_src, xmm_src, 16);
+                }
                 break;
             case ov::element::f16:
-                vcvtph2ps(xmm_src, op);
+                if (isa == x64::avx2_vnni_2) {
+                    vbcstnesh2ps(xmm_src, op);
+                } else {
+                    vcvtph2ps(xmm_src, op);
+                }
                 break;
             case ov::element::i16:
                 uni_vpinsrw(xmm_src, xmm_src, op, 0);

From 0b753ba2c66ff21254589df41fba27d2a6289871 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Sat, 4 Nov 2023 20:10:05 -0700
Subject: [PATCH 10/17] fix simple_if testcase failed issue

---
 .../intel_cpu/src/transformations/transformation_pipeline.cpp  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index abef96a1197938..909453b8be3b72 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -283,7 +283,8 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
         };
 
         // @todo should we always convert to f32 regardless of hardware support, as it is done for f16?
-        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
+        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) &&
+            !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2))
             map.insert({ov::element::bf16, ov::element::f32});
 #if defined(OV_CPU_ARM_ENABLE_FP16)
         if (inferencePrecision != ov::element::f16)

From eb9aa08152377ce4c272f6ace755816891ed4c2c Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Sat, 4 Nov 2023 20:16:41 -0700
Subject: [PATCH 11/17] remove gather Node WA, as gpt-j-6b AC issue has been
 fixed

---
 src/plugins/intel_cpu/src/nodes/gather.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp
index 82acd63aab84a3..6a9949365ced87 100644
--- a/src/plugins/intel_cpu/src/nodes/gather.cpp
+++ b/src/plugins/intel_cpu/src/nodes/gather.cpp
@@ -179,12 +179,6 @@ void Gather::createPrimitive() {
     if (isInPlace()) {
         return;
     }
-    // W.A gather bf16/fp16 jit impl has ACC issue on avx2_vnni_2
-    Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA);
-    if (one_of(dataPrecision, Precision::BF16, Precision::FP16) &&
-        dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2))
-        return;
-
 #if defined(OPENVINO_ARCH_X86_64)
     uint64_t idxElPerVec = 1;
     if (!isDynamicNode()) {

From 304d8e475f61c021e64869e965ddcb0eacb67d73 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Tue, 28 Nov 2023 19:03:36 -0800
Subject: [PATCH 12/17] fix rebase conflict with new api

---
 .../src/emitters/plugin/x64/jit_load_store_emitters.cpp         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
index 85a631ca0277cc..08506acecf26dc 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
@@ -1211,7 +1211,7 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
             }
         } else if (mayiuse(cpu::x64::avx2_vnni_2)) {
             // to avoid src vmm pollution
-            if (src_prc_ == Precision::FP32) {
+            if (src_prc_ == ov::element::f32) {
                 xmm = Xmm(aux_vec_idxs[0]);
             }
             h->vcvtps2ph(xmm, ymm, 0x4);

From f864ce222cadf9a6fd551a682e58b2d735aad968 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Tue, 28 Nov 2023 21:23:20 -0800
Subject: [PATCH 13/17] change default inferencePrecision from bf16 to f32 on
 avx2_vnni_2

---
 src/plugins/intel_cpu/src/config.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index a3a061891d6634..4882744939163c 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -327,7 +327,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
             if (modelType != ModelType::CNN)
                 inferencePrecision = ov::element::f16;
 #else
-            if (mayiuse(avx512_core_bf16) || mayiuse(avx2_vnni_2))
+            if (mayiuse(avx512_core_bf16))
                 inferencePrecision = ov::element::bf16;
 #endif
         } else {

From 39c3d87e370d40b820250492bf157d662bc17bc2 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Tue, 12 Dec 2023 22:05:26 -0800
Subject: [PATCH 14/17] skip prepare_table() in jit_uni_vcvtneps2bf16 for
 avx2_vnni_2

---
 .../intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp
index d8332c218b3822..a803995b74df68 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp
@@ -13,7 +13,8 @@ class jit_uni_vcvtneps2bf16 : public jit_emitter {
 public:
     jit_uni_vcvtneps2bf16(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
         ov::element::Type exec_prc = ov::element::bf16) : jit_emitter(host, host_isa, exec_prc) {
-        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
+        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16) &&
+            !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2))
             prepare_table();
     }
 

From d6400d160adabfe8c73dc21f101fcf74a2ab2f5b Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Mon, 25 Dec 2023 00:09:56 -0800
Subject: [PATCH 15/17] loose fp16 support limitation of
 jit_load_store_emitters to avx512_core and avx2

---
 .../plugin/x64/jit_load_store_emitters.cpp    | 138 +++++++++---------
 1 file changed, 65 insertions(+), 73 deletions(-)

diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
index 08506acecf26dc..55ddbe36547014 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
@@ -474,10 +474,8 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
     bool is_f16 = (prc == ov::element::f16);
     bool is_signed = prc.is_signed();
 
-    if (is_f16 && !mayiuse(cpu::x64::avx512_core_fp16) && !mayiuse(cpu::x64::avx2_vnni_2))
-        OPENVINO_THROW("Load emitter in ",
-                       name_,
-                       " only support fp16 on platform with avx512_core_fp16 or avx2_vnni_2.");
+    if (is_f16 && !mayiuse(cpu::x64::avx512_core) && !mayiuse(cpu::x64::avx2))
+        OPENVINO_THROW("Load emitter in ", name_, " only support fp16 on platform with avx512_core or avx2.");
 
     // Ensure extended double words fit inside Zmm (32/2(num) * 32 <= 512)
     // For Ymm register, load capacity is halved (16/2(num) * 32 <= 128)
@@ -502,87 +500,82 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
     // For load_size == 32/16/8, do load/extension in one go
     // including xmm/ymm tail block for ymm/zmm, so explicite xmm/ymm/zmm
     switch (load_size) {
-    case 32: {
-        // needed here?
-        if (!is_zmm)
-            IE_THROW() << "Load emitter in " << name_
-                       << " has unexpected number of values(32) to load to non-zmm in load_words_to_dword_extension.";
-        if (is_bf16) {
-            h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
-            h->uni_vpslld(zmm, zmm, 16);
-        } else if (is_f16) {
-            h->vcvtph2ps(zmm, ptr[reg + offset]);
-        } else {
-            if (is_signed)
-                h->uni_vpmovsxwd(zmm, ptr[reg + offset]);
-            else
+        case 32: {
+            if (is_bf16) {
                 h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
+                h->uni_vpslld(zmm, zmm, 16);
+            } else if (is_f16) {
+                h->vcvtph2ps(zmm, ptr[reg + offset]);
+            } else {
+                if (is_signed)
+                    h->uni_vpmovsxwd(zmm, ptr[reg + offset]);
+                else
+                    h->uni_vpmovzxwd(zmm, ptr[reg + offset]);
+            }
+            break;
         }
-        break;
-    }
-    case 16: {
-        if (is_bf16) {
-            h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
-            h->uni_vpslld(ymm, ymm, 16);
-
-        } else if (is_f16) {
-            h->vcvtph2ps(ymm, ptr[reg + offset]);
-        } else {
-            if (is_signed)
-                h->uni_vpmovsxwd(ymm, ptr[reg + offset]);
-            else
-                h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
-        }
-        break;
-    }
-    case 8: {
-        if (is_bf16) {
-            h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
-            h->uni_vpslld(xmm, xmm, 16);
-        } else if (is_f16) {
-            h->vcvtph2ps(xmm, ptr[reg + offset]);
-        } else {
-            if (is_signed)
-                h->uni_vpmovsxwd(xmm, ptr[reg + offset]);
-            else
-                h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
-        }
-        break;
-    }
-    default: {
-        if (is_zmm && load_size > threshold_for_mask_emu_load) {
-            unsigned int mask = 1;
-            mask = (mask << (load_size / 2)) - mask;
-            h->mov(Reg32(aux_gpr_idxs[0]), mask);
-            h->kmovw(k_mask, Reg32(aux_gpr_idxs[0]));
+        case 16: {
             if (is_bf16) {
-                h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
-                h->uni_vpslld(vmm, vmm, 16);
+                h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
+                h->uni_vpslld(ymm, ymm, 16);
             } else if (is_f16) {
-                h->vcvtph2ps(vmm | k_mask | T_z, ptr[reg + offset]);
+                h->vcvtph2ps(ymm, ptr[reg + offset]);
             } else {
                 if (is_signed)
-                    h->uni_vpmovsxwd(vmm | k_mask | T_z, ptr[reg + offset]);
+                    h->uni_vpmovsxwd(ymm, ptr[reg + offset]);
                 else
-                    h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
+                    h->uni_vpmovzxwd(ymm, ptr[reg + offset]);
             }
-        } else {
-            // xmm or ymm version
-            load_bytes(xmm, reg, offset, load_size);
+            break;
+        }
+        case 8: {
             if (is_bf16) {
-                h->uni_vpmovzxwd(vmm, xmm);
-                h->uni_vpslld(vmm, vmm, 16);
+                h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
+                h->uni_vpslld(xmm, xmm, 16);
             } else if (is_f16) {
-                h->vcvtph2ps(ymm, xmm);
+                h->vcvtph2ps(xmm, ptr[reg + offset]);
             } else {
                 if (is_signed)
-                    h->uni_vpmovsxwd(vmm, xmm);
+                    h->uni_vpmovsxwd(xmm, ptr[reg + offset]);
                 else
+                    h->uni_vpmovzxwd(xmm, ptr[reg + offset]);
+            }
+            break;
+        }
+        default: {
+            if (is_zmm && load_size > threshold_for_mask_emu_load) {
+                unsigned int mask = 1;
+                mask = (mask << (load_size / 2)) - mask;
+                h->mov(Reg32(aux_gpr_idxs[0]), mask);
+                h->kmovw(k_mask, Reg32(aux_gpr_idxs[0]));
+                if (is_bf16) {
+                    h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
+                    h->uni_vpslld(vmm, vmm, 16);
+                } else if (is_f16) {
+                    h->vcvtph2ps(vmm | k_mask | T_z, ptr[reg + offset]);
+                } else {
+                    if (is_signed)
+                        h->uni_vpmovsxwd(vmm | k_mask | T_z, ptr[reg + offset]);
+                    else
+                        h->uni_vpmovzxwd(vmm | k_mask | T_z, ptr[reg + offset]);
+                }
+            } else {
+                // xmm or ymm version
+                load_bytes(xmm, reg, offset, load_size);
+                if (is_bf16) {
                     h->uni_vpmovzxwd(vmm, xmm);
+                    h->uni_vpslld(vmm, vmm, 16);
+                } else if (is_f16) {
+                    h->vcvtph2ps(ymm, xmm);
+                } else {
+                    if (is_signed)
+                        h->uni_vpmovsxwd(vmm, xmm);
+                    else
+                        h->uni_vpmovzxwd(vmm, xmm);
+                }
             }
+            break;
         }
-        break;
-    }
     }
 }
 
@@ -1195,7 +1188,7 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
             store_bytes<Vmm>(reg, offset, store_num * 2);
         }
     } else if (is_f16) {
-        if (mayiuse(cpu::x64::avx512_core_fp16)) {
+        if (mayiuse(cpu::x64::avx512_core)) {
             // to avoid src vmm pollution
             if (src_prc_ == ov::element::f32) {
                 // since avx512, zmm(fp32) => ymm(fp16)
@@ -1209,7 +1202,7 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
                 data_idx = static_cast<int>(ymm.getIdx());
                 store_bytes<Vmm>(reg, offset, store_num * 2);
             }
-        } else if (mayiuse(cpu::x64::avx2_vnni_2)) {
+        } else if (mayiuse(cpu::x64::avx2)) {
             // to avoid src vmm pollution
             if (src_prc_ == ov::element::f32) {
                 xmm = Xmm(aux_vec_idxs[0]);
@@ -1222,8 +1215,7 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
                 store_bytes<Vmm>(reg, offset, store_num * 2);
             }
         } else {
-            IE_THROW() << "Store emitter in " << name_
-                       << " only support fp16 on platform with avx512_core_fp16 or avx2_vnni_2.";
+            IE_THROW() << "Store emitter in " << name_ << " only support fp16 on platform with avx512_core or avx2.";
         }
     } else {
         switch (store_num) {

From 6a94fe26b743e4559929ed58029239ed7a4d70eb Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Wed, 3 Jan 2024 01:18:12 -0800
Subject: [PATCH 16/17] Apply suggestions from code review: remove
 'ov::with_cpu_x86_avx2_vnni_2' api, fix store_num and OPENVINO_THROW issue,
 keep  brgconv related types only for  new platform priorities list, update
 WeightsDecompressionImpl of fullyconnected Node, replace mayiuse() by
 hasHardwareSupport() for Precision check

---
 src/inference/dev_api/ie_system_conf.h                 |  7 -------
 src/inference/dev_api/openvino/runtime/system_conf.hpp |  7 -------
 src/inference/src/system_conf.cpp                      |  5 -----
 .../emitters/plugin/x64/jit_load_store_emitters.cpp    |  8 ++++----
 src/plugins/intel_cpu/src/node.cpp                     |  3 ---
 src/plugins/intel_cpu/src/nodes/conv.cpp               | 10 +++++++++-
 src/plugins/intel_cpu/src/nodes/fullyconnected.cpp     |  5 ++---
 src/plugins/intel_cpu/src/nodes/interpolate.cpp        |  3 +--
 src/plugins/intel_cpu/src/nodes/mvn.cpp                |  6 ++----
 9 files changed, 18 insertions(+), 36 deletions(-)

diff --git a/src/inference/dev_api/ie_system_conf.h b/src/inference/dev_api/ie_system_conf.h
index adc887c7284da6..c0d2d81704f432 100644
--- a/src/inference/dev_api/ie_system_conf.h
+++ b/src/inference/dev_api/ie_system_conf.h
@@ -109,13 +109,6 @@ using ov::with_cpu_x86_avx2;
  */
 using ov::with_cpu_x86_avx2_vnni;
 
-/**
- * @brief      Checks whether CPU supports AVX2_VNNI_2 capability
- * @ingroup    ie_dev_api_system_conf
- * @return     `True` is AVX2_VNNI_2 instructions are available, `false` otherwise
- */
-using ov::with_cpu_x86_avx2_vnni_2;
-
 /**
  * @brief      Checks whether CPU supports AVX 512 capability
  * @ingroup    ie_dev_api_system_conf
diff --git a/src/inference/dev_api/openvino/runtime/system_conf.hpp b/src/inference/dev_api/openvino/runtime/system_conf.hpp
index 72ecf9a61694ef..c3648a69118e6b 100644
--- a/src/inference/dev_api/openvino/runtime/system_conf.hpp
+++ b/src/inference/dev_api/openvino/runtime/system_conf.hpp
@@ -97,13 +97,6 @@ OPENVINO_RUNTIME_API bool with_cpu_x86_avx2();
  */
 OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni();
 
-/**
- * @brief      Checks whether CPU supports AVX2_VNNI_2 capability
- * @ingroup    ov_dev_api_system_conf
- * @return     `True` is AVX2_VNNI_2 instructions are available, `false` otherwise
- */
-OPENVINO_RUNTIME_API bool with_cpu_x86_avx2_vnni_2();
-
 /**
  * @brief      Checks whether CPU supports AVX 512 capability
  * @ingroup    ov_dev_api_system_conf
diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp
index 07278e2dde5fdb..68e6e36df4f051 100644
--- a/src/inference/src/system_conf.cpp
+++ b/src/inference/src/system_conf.cpp
@@ -60,11 +60,6 @@ bool with_cpu_x86_avx2_vnni() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX2 | Xbyak::util::Cpu::tAVX_VNNI);
 }
 
-bool with_cpu_x86_avx2_vnni_2() {
-    return with_cpu_x86_avx2_vnni() &&
-           get_cpu_info().has(Xbyak::util::Cpu::tAVX_VNNI_INT8 | Xbyak::util::Cpu::tAVX_NE_CONVERT);
-}
-
 bool with_cpu_x86_avx512f() {
     return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F);
 }
diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
index 55ddbe36547014..d6e6d8f46fbf14 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_load_store_emitters.cpp
@@ -474,8 +474,8 @@ void jit_load_emitter::load_words_to_dword_extension(const Vmm &vmm, const Xbyak
     bool is_f16 = (prc == ov::element::f16);
     bool is_signed = prc.is_signed();
 
-    if (is_f16 && !mayiuse(cpu::x64::avx512_core) && !mayiuse(cpu::x64::avx2))
-        OPENVINO_THROW("Load emitter in ", name_, " only support fp16 on platform with avx512_core or avx2.");
+    if (is_f16 && !mayiuse(cpu::x64::avx2))
+        OPENVINO_THROW("Load emitter in ", name_, " only support fp16 on platform with avx2 or above.");
 
     // Ensure extended double words fit inside Zmm (32/2(num) * 32 <= 512)
     // For Ymm register, load capacity is halved (16/2(num) * 32 <= 128)
@@ -1208,14 +1208,14 @@ void jit_store_emitter::store_dword_to_word_extension(const Xbyak::Reg64 &reg,
                 xmm = Xmm(aux_vec_idxs[0]);
             }
             h->vcvtps2ph(xmm, ymm, 0x4);
-            if (store_num == 16) {
+            if (store_num == 8) {
                 h->uni_vmovdqu(ptr[reg + offset], xmm);
             } else {
                 data_idx = static_cast<int>(xmm.getIdx());
                 store_bytes<Vmm>(reg, offset, store_num * 2);
             }
         } else {
-            IE_THROW() << "Store emitter in " << name_ << " only support fp16 on platform with avx512_core or avx2.";
+            OPENVINO_THROW("Store emitter in ", name_, " only support fp16 on platform with avx512_core or avx2.");
         }
     } else {
         switch (store_num) {
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index 2293c427bfdcb4..d9b5bfc490db8e 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -1019,9 +1019,6 @@ const std::vector<impl_desc_type>& Node::getDefaultImplPriority() {
         impl_desc_type::jit_avx512_dw,
         impl_desc_type::jit_avx512_1x1,
         impl_desc_type::jit_avx512,
-        // [WA]default support after fully evaluate
-        // impl_desc_type::brgconv_avx2_1x1,
-        // impl_desc_type::brgconv_avx2,
         impl_desc_type::jit_avx2_dw,
         impl_desc_type::jit_avx2_1x1,
         impl_desc_type::jit_avx2,
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index 71506791ae0c3d..6fe0b9175c27d8 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -330,7 +330,7 @@ ov::element::Type Convolution::fusedEltwisePrecision(const NodePtr& fusingNode)
 }
 
 const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
-    static const std::vector<impl_desc_type> priorities = {
+    static std::vector<impl_desc_type> priorities = {
         impl_desc_type::unknown,
         impl_desc_type::dw_acl,
         impl_desc_type::winograd_acl,
@@ -371,6 +371,14 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
         impl_desc_type::ref,
     };
 
+    priorities.erase(std::remove_if(priorities.begin(),
+                                    priorities.end(),
+                                    [](impl_desc_type type) {
+                                        return !isBrgConvAvailable() && (type == impl_desc_type::brgconv_avx2_1x1 ||
+                                                                         type == impl_desc_type::brgconv_avx2);
+                                    }),
+                     priorities.end());
+
     return priorities;
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index 3a5ee5a96ce7c1..0d6aa56b9d5907 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -206,9 +206,8 @@ void FullyConnected::getSupportedDescriptors() {
         if (one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) {
             outputDataType = memory::data_type::bf16;
         }
-        // W.A. WeightsDecompression not supported on avx2_vnni_2
-        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2) &&
-            weightsDataType == memory::data_type::u8) {
+        // TODO: Ticket CVS-122347 - support WeightsDecompression with bf16 inputDataType on avx2_vnni_2
+        if (useWeightsDecompressionImpl && !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) {
             inputDataType = outputDataType = memory::data_type::f32;
         }
     } else if (inputDataType == memory::data_type::f16) {
diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp
index 935c2a4d04367e..a10784492c28f9 100644
--- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp
+++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp
@@ -2024,9 +2024,8 @@ void Interpolate::initSupportedPrimitiveDescriptors() {
         inputPrecision = ov::element::f32;
     }
 
-    if ((inputPrecision == ov::element::bf16) && !mayiuse(avx512_core)) {
+    if (!hasHardwareSupport(inputPrecision))
         inputPrecision = ov::element::f32;
-    }
 
     // support input with rank<=3 only with float precision and planar layout.
     // Jit for avx2(gather is available) and ref for no-avx2 machine.
diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp
index dbfd36296fb484..6c0e34963618fc 100644
--- a/src/plugins/intel_cpu/src/nodes/mvn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp
@@ -1829,10 +1829,8 @@ void MVN::initSupportedPrimitiveDescriptors() {
 
     ov::element::Type inputPrecision = getOriginalInputPrecisionAtPort(0);
     ov::element::Type outputPrecision = getOriginalOutputPrecisionAtPort(0);
-    if (!mayiuse(avx512_core) && !mayiuse(avx2_vnni_2)) {
-        if (outputPrecision == ov::element::bf16)
-            outputPrecision = ov::element::f32;
-    }
+    if (!hasHardwareSupport(outputPrecision))
+        outputPrecision = ov::element::f32;
 
     if (!fusedWith.empty()) {
         outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);

From 104b6e27738b0227e6aea27ef4b22fa8cf7bf9f6 Mon Sep 17 00:00:00 2001
From: liubo-intel <bo4.liu@intel.com>
Date: Wed, 3 Jan 2024 23:38:46 -0800
Subject: [PATCH 17/17] hasHardwareSupport replacements, Conv
 DefaultImplPriority erase condiation for all brgconv

---
 src/plugins/intel_cpu/src/config.cpp                       | 7 ++++---
 src/plugins/intel_cpu/src/nodes/conv.cpp                   | 3 +--
 src/plugins/intel_cpu/src/nodes/eltwise.cpp                | 2 +-
 .../src/transformations/transformation_pipeline.cpp        | 3 +--
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
index 4882744939163c..ba5c54d6bfe219 100644
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@@ -12,6 +12,7 @@
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "utils/debug_capabilities.h"
+#include "utils/precision_support.h"
 
 #include <algorithm>
 #include <map>
@@ -219,7 +220,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                ". Expected only true/false");
             }
             if (enable) {
-                if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) {
+                if (hasHardwareSupport(ov::element::bf16)) {
                     inferencePrecision = ov::element::bf16;
                 } else {
                     OPENVINO_THROW("Platform doesn't support BF16 format");
@@ -234,12 +235,12 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                 auto const prec = val.as<ov::element::Type>();
                 inferencePrecisionSetExplicitly = true;
                 if (prec == ov::element::bf16) {
-                    if (mayiuse(avx512_core) || mayiuse(avx2_vnni_2)) {
+                    if (hasHardwareSupport(ov::element::bf16)) {
                         inferencePrecision = ov::element::bf16;
                     }
                 } else if (prec == ov::element::f16) {
 #if defined(OPENVINO_ARCH_X86_64)
-                    if (mayiuse(avx512_core_fp16) || mayiuse(avx512_core_amx_fp16) || mayiuse(avx2_vnni_2)) {
+                    if (hasHardwareSupport(ov::element::f16)) {
                         inferencePrecision = ov::element::f16;
                     }
 #elif defined(OV_CPU_ARM_ENABLE_FP16)
diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp
index 6fe0b9175c27d8..a1eb6f49e9900f 100644
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -374,8 +374,7 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
     priorities.erase(std::remove_if(priorities.begin(),
                                     priorities.end(),
                                     [](impl_desc_type type) {
-                                        return !isBrgConvAvailable() && (type == impl_desc_type::brgconv_avx2_1x1 ||
-                                                                         type == impl_desc_type::brgconv_avx2);
+                                        return !isBrgConvAvailable() && (type & impl_desc_type::brgconv);
                                     }),
                      priorities.end());
 
diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
index ebff119982cc5f..4ed4174b750aad 100644
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -2199,7 +2199,7 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
     if (!fusedWith.empty()) {
         outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
-    if (!mayiuse(avx512_core) && !mayiuse(avx2_vnni_2)) {
+    if (!hasHardwareSupport(ov::element::bf16)) {
         bool hasBF16 = false;
         for (auto &inPrc : inputPrecisions)
             if (inPrc == ov::element::bf16)
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index 909453b8be3b72..b98d7a8979701d 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -283,8 +283,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
         };
 
         // @todo should we always convert to f32 regardless of hardware support, as it is done for f16?
-        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) &&
-            !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2))
+        if (!hasHardwareSupport(ov::element::bf16))
             map.insert({ov::element::bf16, ov::element::f32});
 #if defined(OV_CPU_ARM_ENABLE_FP16)
         if (inferencePrecision != ov::element::f16)