Skip to content

Commit

Permalink
[RISCV64] add nhwc layout support for eltwise executor (#26531)
Browse files Browse the repository at this point in the history
### Details:
 - *Add `nhwc` layout support for shl eltwise executor*
 - *Enable some tests with `nhwc` layout*

### Tickets:
 - *N/A*
  • Loading branch information
BHbean authored Oct 24, 2024
1 parent 27780a6 commit 2a9c69d
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 21 deletions.
2 changes: 2 additions & 0 deletions src/plugins/intel_cpu/src/nodes/eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2583,6 +2583,8 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
supportedPrimitiveDescriptors.emplace_back(nodeDesc);
};

if (isChannelsFirstApplicable)
addDesc(supportedPrimitiveDescriptors, ChannelsFirst);
addDesc(supportedPrimitiveDescriptors, Planar);

canUseEltwiseExecPtr = !supportedPrimitiveDescriptors.empty();
Expand Down
39 changes: 23 additions & 16 deletions src/plugins/intel_cpu/src/nodes/executors/shl/shl_eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,11 @@
#include "shl_utils.hpp"
#include "csinn/csi_nn.h"
#include "utils/debug_capabilities.h"
#include "memory_desc/cpu_blocked_memory_desc.h"

namespace ov {
namespace intel_cpu {

inline void log_unsupported_prec(const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const Algorithm eltwiseAlgorithm) {
std::string srcPrec;
for (size_t i = 0; i < srcDescs.size(); i++) {
srcPrec += srcDescs[i]->getPrecision().to_string() + " ";
}
DEBUG_LOG(algToString(eltwiseAlgorithm), ": provided combination of src precisions: [", srcPrec,
"] and dst precision: ", dstDescs[0]->getPrecision().to_string(), " is not supported");
}

bool ShlEltwiseExecutor::isEltwiseAlgorithmSupported(Algorithm algorithm) {
if (one_of(algorithm, Algorithm::EltwiseAdd,
Algorithm::EltwiseSubtract,
Expand Down Expand Up @@ -53,6 +43,26 @@ bool ShlEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs,
return false;
}

// check whether input and output layouts are equal
if(srcDescs.front()->hasLayoutType(LayoutType::nCsp16c) || srcDescs.front()->hasLayoutType(LayoutType::nCsp8c)) {
DEBUG_LOG("ShlEltwise does not support 'nCsp16c' or 'nCsp8c' layouts");
return false;
}
const auto unifiedLayout = srcDescs.front()->hasLayoutType(LayoutType::ncsp) ? LayoutType::ncsp : LayoutType::nspc;
const auto unifiedRank = srcDescs.front()->as<BlockedMemoryDesc>()->getBlockDims().size();
auto has_unified_layout = [unifiedLayout, unifiedRank](const MemoryDescPtr& desc) {
if (desc->hasLayoutType(LayoutType::nspc)) { // ensure the same rank
if (desc->as<BlockedMemoryDesc>()->getBlockDims().size() != unifiedRank)
return false;
}
return desc->hasLayoutType(unifiedLayout);
};
if (!(std::all_of(srcDescs.cbegin(), srcDescs.cend(), has_unified_layout) &&
std::all_of(dstDescs.cbegin(), dstDescs.cend(), has_unified_layout))) {
DEBUG_LOG("ShlEltwise needs to ensure all inputs and outputs are in the same 'ncsp' or 'nspc' layouts");
return false;
}

for (const auto& srcDesc : srcDescs) {
csinn_layout_enum supportedLayout = getShlDataLayoutByMemoryDesc(srcDesc);
switch (eltwiseAttrs.algorithm) {
Expand Down Expand Up @@ -93,14 +103,11 @@ bool ShlEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs,
srcTensors = std::vector<ShlTensor>(srcDescs.size());
dstTensors = std::vector<ShlTensor>(dstDescs.size());

// Allocate Shl session
sess = ShlSession();

for (size_t i = 0; i < srcDescs.size(); i++) {
srcTensors[i] = ShlTensor(sess, precisionToShlDataType(srcDescs[i]->getPrecision()), getShlDataLayoutByMemoryDesc(srcDescs[i]), srcDescs[i]->getShape().getStaticDims());
srcTensors[i] = ShlTensor(sess, precisionToShlDataType(srcDescs[i]->getPrecision()), getShlDataLayoutByMemoryDesc(srcDescs[i]), srcDescs[i]->as<BlockedMemoryDesc>()->getBlockDims());
}
for (size_t i = 0; i < dstDescs.size(); i++) {
dstTensors[i] = ShlTensor(sess, precisionToShlDataType(dstDescs[i]->getPrecision()), getShlDataLayoutByMemoryDesc(dstDescs[i]), dstDescs[i]->getShape().getStaticDims());
dstTensors[i] = ShlTensor(sess, precisionToShlDataType(dstDescs[i]->getPrecision()), getShlDataLayoutByMemoryDesc(dstDescs[i]), dstDescs[i]->as<BlockedMemoryDesc>()->getBlockDims());
}

std::function<int()> initFunc = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ class ReLuConcatConvSumInPlaceTest : virtual public SubgraphBaseStaticTest {
const size_t convOutChannels = 64;
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
const auto targetFormat = with_cpu_x86_avx512_core() ? nChw16c : nChw8c;
#elif defined(OV_CPU_WITH_SHL)
const auto targetFormat = nchw;
#else
const auto targetFormat = nhwc;
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,12 @@ std::vector<CPUSpecificParams> filterCPUInfoForDeviceWithFP16(const std::vector<
}

std::vector<CPUSpecificParams> filterCPUSpecificParams(const std::vector<CPUSpecificParams> &paramsVector) {
static const std::vector<CPUTestUtils::cpu_memory_format_t> supported_f = {CPUTestUtils::cpu_memory_format_t::ncw,
CPUTestUtils::cpu_memory_format_t::nchw,
CPUTestUtils::cpu_memory_format_t::ncdhw};
static const std::vector<CPUTestUtils::cpu_memory_format_t> supported_f = {CPUTestUtils::cpu_memory_format_t::nwc,
CPUTestUtils::cpu_memory_format_t::ncw,
CPUTestUtils::cpu_memory_format_t::nchw,
CPUTestUtils::cpu_memory_format_t::nhwc,
CPUTestUtils::cpu_memory_format_t::ndhwc,
CPUTestUtils::cpu_memory_format_t::ncdhw};
std::vector<CPUSpecificParams> filteredParamsVector = paramsVector;
filteredParamsVector.erase(std::remove_if(filteredParamsVector.begin(),
filteredParamsVector.end(),
Expand Down

0 comments on commit 2a9c69d

Please sign in to comment.