Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cherry-pick] [Inference] Support NNAdapter and ascend310 #35882

Merged
merged 2 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ if (LITE_WITH_XPU)
ENDIF()
endif()

if (LITE_WITH_NNADAPTER)
add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER)
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
add_definitions(-DLITE_SUBGRAPH_WITH_NPU)
set(NPU_SDK_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE STRING "default NPU SDK ROOT")
endif()
endif()

if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
include(ExternalProject)
set(LITE_PROJECT extern_lite)
Expand Down Expand Up @@ -67,6 +75,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER}
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=ON)
ExternalProject_Add(
Expand Down Expand Up @@ -110,6 +121,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER}
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=OFF)

Expand All @@ -120,6 +134,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR}
UPDATE_COMMAND ""
PATCH_COMMAND sed -i "s?NNadapter_bridges_path = os.path.abspath('..')+\"\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?NNadapter_bridges_path = os.path.abspath(\'..\')+\"\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?" ${LITE_SOURCES_DIR}/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py && sed -i "/general::ssa::ConvertToSSA(cpp_prog)$<SEMICOLON>/d" ${LITE_SOURCES_DIR}/src/extern_lite/lite/model_parser/model_parser.cc
BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
Expand All @@ -146,6 +161,11 @@ endif()
if (WITH_ARM)
if(LITE_WITH_XPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
elseif(LITE_WITH_NNADAPTER)
message("Enable LITE_WITH_NNADAPTER")
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
endif()
Expand Down Expand Up @@ -174,5 +194,16 @@ endfunction()
external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)

if (LITE_WITH_NNADAPTER)
set(LITE_NNADAPTER_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so)
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so)
set(LITE_DEPS lite_full_static lite_nnadapter)
set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so)
endif()
else()
set(LITE_DEPS lite_full_static)
endif()

add_definitions(-DPADDLE_WITH_LITE)
add_definitions(-DLITE_WITH_LOG)
16 changes: 16 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,22 @@ struct Argument {
DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string);
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);

DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir, NNAdapterModelCacheDir,
std::string);
DECL_ARGUMENT_FIELD(nnadapter_device_names, NNAdapterDeviceNames,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(nnadapter_context_properties, NNAdapterContextProperties,
std::string);
DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_buffer,
NNAdapterSubgraphPartitionConfigBuffer, std::string);
DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_path,
NNAdapterSubgraphPartitionConfigPath, std::string);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_token, NNAdapterModelCacheToken,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_buffer, NNAdapterModelCacheBuffer,
std::vector<std::vector<char>>);

// Memory optimized related.
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);

Expand Down
21 changes: 21 additions & 0 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,27 @@ void IRPassManager::CreatePasses(Argument *argument,
new std::string(argument->xpu_autotune_file()));
pass->Set("precision", new std::string(argument->xpu_precision()));
pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
// NNAdapter Related
pass->Set("use_nnadapter", new bool(argument->use_nnadapter()));
pass->Set("nnadapter_model_cache_dir",
new std::string(argument->nnadapter_model_cache_dir()));
pass->Set(
"nnadapter_device_names",
new std::vector<std::string>(argument->nnadapter_device_names()));
pass->Set("nnadapter_context_properties",
new std::string(argument->nnadapter_context_properties()));
pass->Set("nnadapter_subgraph_partition_config_buffer",
new std::string(
argument->nnadapter_subgraph_partition_config_buffer()));
pass->Set("nnadapter_subgraph_partition_config_path",
new std::string(
argument->nnadapter_subgraph_partition_config_path()));
pass->Set("nnadapter_model_cache_buffer",
new std::vector<std::vector<char>>(
argument->nnadapter_model_cache_buffer()));
pass->Set("nnadapter_model_cache_token",
new std::vector<std::string>(
argument->nnadapter_model_cache_token()));
}
disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") {
Expand Down
29 changes: 29 additions & 0 deletions paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,30 @@ void LiteSubgraphPass::SetUpEngine(
std::string autotune_file = Get<std::string>("autotune_file");
std::string precision = Get<std::string>("precision");
bool adaptive_seqlen = Get<bool>("adaptive_seqlen");
// NNAdapter Related
bool use_nnadapter = Get<bool>("use_nnadapter");
std::string nnadapter_model_cache_dir =
Get<std::string>("nnadapter_model_cache_dir");
auto nnadapter_device_names =
Get<std::vector<std::string>>("nnadapter_device_names");
std::string nnadapter_context_properties =
Get<std::string>("nnadapter_context_properties");
std::string nnadapter_subgraph_partition_config_buffer =
Get<std::string>("nnadapter_subgraph_partition_config_buffer");
std::string nnadapter_subgraph_partition_config_path =
Get<std::string>("nnadapter_subgraph_partition_config_path");
auto nnadapter_model_cache_buffer =
Get<std::vector<std::vector<char>>>("nnadapter_model_cache_buffer");
auto nnadapter_model_cache_token =
Get<std::vector<std::string>>("nnadapter_model_cache_token");

lite_api::TargetType target_type;
if (use_gpu) {
target_type = TARGET(kCUDA);
} else if (use_xpu) {
target_type = TARGET(kXPU);
} else if (use_nnadapter) {
target_type = TARGET(kNNAdapter);
} else {
#ifdef PADDLE_WITH_ARM
target_type = TARGET(kARM);
Expand Down Expand Up @@ -292,6 +310,17 @@ void LiteSubgraphPass::SetUpEngine(
config.autotune_file = autotune_file;
config.precision = precision;
config.adaptive_seqlen = adaptive_seqlen;
// NNAdapter Related
config.nnadapter_model_cache_dir = nnadapter_model_cache_dir;
config.nnadapter_device_names = nnadapter_device_names;
config.nnadapter_context_properties = nnadapter_context_properties;
config.nnadapter_subgraph_partition_config_buffer =
nnadapter_subgraph_partition_config_buffer;
config.nnadapter_subgraph_partition_config_path =
nnadapter_subgraph_partition_config_path;
config.nnadapter_model_cache_buffer = nnadapter_model_cache_buffer;
config.nnadapter_model_cache_token = nnadapter_model_cache_token;

if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model);
lite::StrToBinaryFile("./param.bin", config.param);
Expand Down
58 changes: 57 additions & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// NPU related.
CP_MEMBER(use_npu_);
CP_MEMBER(npu_device_id_);
CP_MEMBER(nnadapter_config_);

// profile related.
CP_MEMBER(with_profile_);
Expand Down Expand Up @@ -554,7 +555,7 @@ void AnalysisConfig::Update() {
}

if (use_npu_) {
#ifdef PADDLE_WITH_ASCEND_CL
#if defined(PADDLE_WITH_ASCEND_CL) || defined(LITE_SUBGRAPH_WITH_NPU)
PADDLE_ENFORCE_EQ(use_gpu_, false,
platform::errors::Unavailable(
"Currently, NPU and GPU cannot be enabled in the "
Expand Down Expand Up @@ -833,6 +834,61 @@ std::string AnalysisConfig::Summary() {
return os.PrintTable();
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetDeviceNames(
const std::vector<std::string> &names) {
nnadapter_device_names = names;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetContextProperties(
const std::string &properties) {
nnadapter_context_properties = properties;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheDir(
const std::string &dir) {
nnadapter_model_cache_dir = dir;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheBuffers(
const std::string &model_cache_token,
const std::vector<char> &model_cache_buffer) {
PADDLE_ENFORCE_EQ(model_cache_token.empty(), false,
platform::errors::InvalidArgument(
"model_cache_token should not be empty."));
PADDLE_ENFORCE_EQ(model_cache_buffer.empty(), false,
platform::errors::InvalidArgument(
"model_cache_buffer should not be empty."));
PADDLE_ENFORCE_EQ(nnadapter_model_cache_buffers.count(model_cache_token),
false, platform::errors::InvalidArgument(
"model_cache_token has already been set."));

nnadapter_model_cache_buffers[model_cache_token] = model_cache_buffer;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigPath(
const std::string &path) {
nnadapter_subgraph_partition_config_path = path;
return *this;
}

LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigBuffer(
const std::string &buffer) {
nnadapter_subgraph_partition_config_buffer = buffer;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::Enable() {
use_nnadapter = true;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::Disable() {
use_nnadapter = false;
return *this;
}

void AnalysisConfig::CollectShapeRangeInfo(
const std::string &shape_range_info_path) {
LOG(INFO) << "In CollectShapeInfo mode, we will disable optimizations and "
Expand Down
36 changes: 36 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,22 @@ bool AnalysisPredictor::CreateExecutor() {
"You tried to use NPU forward propagation, but Paddle was not compiled "
"with WITH_ASCEND_CL."));
#endif
} else if (config_.NNAdapter().use_nnadapter) {
if (config_.lite_engine_enabled()) {
place_ = paddle::platform::CPUPlace();
#ifndef LITE_SUBGRAPH_WITH_NNADAPTER
PADDLE_THROW(
platform::errors::Unavailable("You tried to use an NNAdapter lite "
"engine, but Paddle was not compiled "
"with it."));
#endif // LITE_SUBGRAPH_WITH_NNADAPTER
} else {
PADDLE_THROW(
platform::errors::Unavailable("You tried to use NNadapter forward "
"propagation (inference without lite "
"engine), but Paddle was not compiled "
"with LITE_WITH_NNADAPTER."));
}
} else {
place_ = paddle::platform::CPUPlace();
}
Expand Down Expand Up @@ -601,6 +617,26 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_);
argument_.SetXpuPrecision(config_.xpu_precision_);
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
// NNAdapter related
argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter);
argument_.SetNNAdapterDeviceNames(
config_.NNAdapter().nnadapter_device_names);
argument_.SetNNAdapterContextProperties(
config_.NNAdapter().nnadapter_context_properties);
argument_.SetNNAdapterModelCacheDir(
config_.NNAdapter().nnadapter_model_cache_dir);
argument_.SetNNAdapterSubgraphPartitionConfigBuffer(
config_.NNAdapter().nnadapter_subgraph_partition_config_buffer);
argument_.SetNNAdapterSubgraphPartitionConfigPath(
config_.NNAdapter().nnadapter_subgraph_partition_config_path);
std::vector<std::string> buffer_keys;
std::vector<std::vector<char>> buffer_vals;
for (auto it : config_.NNAdapter().nnadapter_model_cache_buffers) {
buffer_keys.emplace_back(it.first);
buffer_vals.emplace_back(it.second);
}
argument_.SetNNAdapterModelCacheToken(buffer_keys);
argument_.SetNNAdapterModelCacheBuffer(buffer_vals);
LOG(INFO) << "Lite subgraph engine is enabled";
}

Expand Down
20 changes: 20 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,26 @@ TEST(AnalysisPredictor, analysis_off) {
ASSERT_TRUE(predictor->Run(inputs, &outputs));
}

#ifndef WIN32
TEST(AnalysisPredictor, lite_nn_adapter_npu) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
config.EnableLiteEngine();
config.NNAdapter()
.Disable()
.Enable()
.SetDeviceNames({"huawei_ascend_npu"})
.SetContextProperties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0")
.SetModelCacheDir("cache_dirr")
.SetSubgraphPartitionConfigPath("")
.SetModelCacheBuffers("c1", {'c'});
#ifndef LITE_SUBGRAPH_WITH_NNADAPTER
EXPECT_THROW(CreatePaddlePredictor<AnalysisConfig>(config),
paddle::platform::EnforceNotMet);
#endif
}
#endif

TEST(AnalysisPredictor, analysis_on) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
Expand Down
33 changes: 33 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,34 @@ namespace paddle {
class AnalysisPredictor;
struct MkldnnQuantizerConfig;

struct LiteNNAdapterConfig {
bool use_nnadapter{false};
std::string nnadapter_model_cache_dir;
std::map<std::string, std::vector<char>> nnadapter_model_cache_buffers;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::string nnadapter_subgraph_partition_config_path;
std::string nnadapter_subgraph_partition_config_buffer;

LiteNNAdapterConfig& SetDeviceNames(const std::vector<std::string>& names);

LiteNNAdapterConfig& SetContextProperties(const std::string& properties);

LiteNNAdapterConfig& SetModelCacheDir(const std::string& dir);

LiteNNAdapterConfig& SetModelCacheBuffers(
const std::string& model_cache_token,
const std::vector<char>& model_cache_buffer);

LiteNNAdapterConfig& SetSubgraphPartitionConfigPath(const std::string& path);

LiteNNAdapterConfig& SetSubgraphPartitionConfigBuffer(
const std::string& buffer);

LiteNNAdapterConfig& Enable();
LiteNNAdapterConfig& Disable();
};

///
/// \brief configuration manager for AnalysisPredictor.
/// \since 1.7.0
Expand Down Expand Up @@ -692,6 +720,8 @@ struct PD_INFER_DECL AnalysisConfig {
///
std::string Summary();

LiteNNAdapterConfig& NNAdapter() { return nnadapter_config_; }

protected:
// Update the config.
void Update();
Expand Down Expand Up @@ -800,6 +830,9 @@ struct PD_INFER_DECL AnalysisConfig {
std::string xpu_precision_;
bool xpu_adaptive_seqlen_;

// NNAdapter related
LiteNNAdapterConfig nnadapter_config_;

// mkldnn related.
int mkldnn_cache_capacity_{10};
bool use_mkldnn_quantizer_{false};
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/inference/lite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ if(XPU_SDK_ROOT)
set(XPU_DEPS xpuapi xpurt)
endif()

cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto ${XPU_DEPS})
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context ${XPU_DEPS})
cc_library(lite_op_teller SRCS op_teller.cc DEPS ${LITE_DEPS} framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS ${LITE_DEPS} framework_proto ${XPU_DEPS})
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy ${LITE_DEPS} framework_proto boost device_context ${XPU_DEPS})
cc_test(test_lite_engine SRCS test_engine_lite.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils)
Loading