Skip to content

Commit

Permalink
merge upstream and resolve conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
chenfeiyu committed Feb 18, 2022
2 parents e663c32 + bbf31a4 commit 891dd4e
Show file tree
Hide file tree
Showing 205 changed files with 11,525 additions and 7,028 deletions.
32 changes: 20 additions & 12 deletions cmake/external/poplar.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,35 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if(WITH_IPU)
set(POPLAR_DIR CACHE PATH "Path to a Poplar install")
set(POPART_DIR CACHE PATH "Path to a Popart install")
set(POPLAR_SDK_DIR CACHE PATH "Path to an extracted SDK archive or to a Poplar & Popart install directory (Will populate POPLAR_DIR and POPART_DIR)")

# support setting SDK both from environment variable or command line arguments

if(DEFINED ENV{POPLAR_SDK_DIR})
set(POPLAR_SDK_DIR $ENV{POPLAR_SDK_DIR})
endif()
if(EXISTS ${POPLAR_SDK_DIR})
execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "popart*"
OUTPUT_VARIABLE POPART_DIR OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "poplar-*" -o -name "poplar"
OUTPUT_VARIABLE POPLAR_DIR OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT IS_DIRECTORY "${POPLAR_DIR}")
message(FATAL_ERROR "Couldn't find a \"poplar\" or \"poplar-*\" folder in '${POPLAR_SDK_DIR}'")
endif()
if(NOT IS_DIRECTORY "${POPART_DIR}")
message(FATAL_ERROR "Couldn't find a \"popart*\" folder in '${POPLAR_SDK_DIR}'")
endif()
else()
message(FATAL_ERROR "You must provide a path to a Poplar install using export POPLAR_SDK_DIR=/path/to/poplar_sdk")
endif()

message("POPLAR_DIR is ${POPLAR_DIR}")
message("POPART_DIR is ${POPART_DIR}")
if(DEFINED ENV{POPLAR_DIR})
set(POPLAR_DIR $ENV{POPLAR_DIR})
endif()
if(DEFINED ENV{POPART_DIR})
set(POPART_DIR $ENV{POPART_DIR})
endif()

if(EXISTS ${POPLAR_DIR})
message("POPLAR_DIR is ${POPLAR_DIR}")
if(NOT IS_DIRECTORY "${POPLAR_DIR}")
message(FATAL_ERROR "Couldn't find a \"poplar\" or \"poplar-*\" folder in '${POPLAR_SDK_DIR}'")
endif()
list(APPEND CMAKE_PREFIX_PATH ${POPLAR_DIR})
set(ENABLE_POPLAR_CMD "source ${POPLAR_DIR}/enable.sh")
find_package(poplar REQUIRED)
Expand All @@ -45,8 +49,11 @@ if(WITH_IPU)
if(NOT poplar_FOUND)
message(FATAL_ERROR "You must provide a path to a Poplar install using -DPOPLAR_DIR=/path/to/popart/build/install")
endif()

if(EXISTS ${POPART_DIR})
message("POPART_DIR is ${POPART_DIR}")
if(NOT IS_DIRECTORY "${POPART_DIR}")
message(FATAL_ERROR "Couldn't find a \"popart*\" folder in '${POPLAR_SDK_DIR}'")
endif()
list(APPEND CMAKE_PREFIX_PATH ${POPART_DIR})
set(ENABLE_POPART_CMD "source ${POPART_DIR}/enable.sh")
find_package(popart REQUIRED COMPONENTS popart-only)
Expand All @@ -56,6 +63,7 @@ if(WITH_IPU)
if(NOT popart_FOUND)
message(FATAL_ERROR "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build")
endif()

add_definitions(-DONNX_NAMESPACE=onnx)
add_custom_target(extern_poplar DEPENDS poplar popart-only)
endif()
39 changes: 39 additions & 0 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1036,3 +1036,42 @@ function(generate_dummy_static_lib)
add_library(${dummy_LIB_NAME} STATIC ${dummy_FILE_PATH})
endfunction()

function(math_library TARGET)
# math_library is a function to create math library.
# The interface is the same as cc_library.
# But it handle split GPU/CPU code and link some common library.
set(cc_srcs)
set(cu_srcs)
set(hip_srcs)
set(math_common_deps device_context framework_proto enforce)
if (WITH_GPU)
if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
list(APPEND math_common_deps cub)
else()
list(APPEND math_common_deps)
endif()
endif()
set(multiValueArgs DEPS)
cmake_parse_arguments(math_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})

if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
list(APPEND cc_srcs ${TARGET}.cc)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
list(APPEND cu_srcs ${TARGET}.cu.cc)
endif()

list(LENGTH cc_srcs cc_srcs_len)
if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
elseif (WITH_ROCM)
hip_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
elseif(${cc_srcs_len} GREATER 0)
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
endif()
endfunction()

8 changes: 3 additions & 5 deletions paddle/fluid/distributed/common/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,16 @@
#include <utility>
#include <vector>

#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/pten/kernels/funcs/blas/blas.h"

namespace paddle {
namespace distributed {

template <typename T>
inline paddle::operators::math::BlasT<paddle::platform::CPUDeviceContext, T>
GetBlas() {
inline pten::funcs::BlasT<paddle::platform::CPUDeviceContext, T> GetBlas() {
paddle::platform::CPUDeviceContext cpu_ctx;
return paddle::operators::math::GetBlas<paddle::platform::CPUDeviceContext,
T>(cpu_ctx);
return pten::funcs::GetBlas<paddle::platform::CPUDeviceContext, T>(cpu_ctx);
}

template <typename T>
Expand Down
14 changes: 4 additions & 10 deletions paddle/fluid/distributed/ps/service/communicator/communicator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1161,8 +1161,7 @@ void GeoCommunicator::SendDense(const CommContext &send_ctx) {
t_delta->mutable_data<float>(t_latest.dims(), cpu_ctx.GetPlace());

auto blas =
paddle::operators::math::GetBlas<platform::CPUDeviceContext, float>(
cpu_ctx);
pten::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
blas.VSUB(t_latest.numel(), t_latest.data<float>(),
t_timestamp->data<float>(), t_delta->data<float>());

Expand Down Expand Up @@ -1201,8 +1200,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) {
t_delta->mutable_data<float>(t_latest->dims(), cpu_ctx.GetPlace());

auto blas =
paddle::operators::math::GetBlas<platform::CPUDeviceContext, float>(
cpu_ctx);
pten::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
blas.VSUB(t_latest->numel(), t_pserver.data<float>(), t_old->data<float>(),
t_delta->data<float>());
blas.VADD(t_latest->numel(), t_latest->data<float>(),
Expand Down Expand Up @@ -1303,9 +1301,7 @@ void GeoCommunicator::SendSparse(const std::string &varname,
t_delta->set_rows(sparse_ids);
t_delta->set_height(t_latest.dims()[0]);

auto blas =
paddle::operators::math::GetBlas<platform::CPUDeviceContext, float>(
cpu_ctx);
auto blas = pten::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
float coefficient = 1.0 / static_cast<float>(trainers_);

std::vector<float *> push_g_vec;
Expand Down Expand Up @@ -1371,9 +1367,7 @@ void GeoCommunicator::RecvSparse(const std::string &varname, int table_id,
v_delta.resize(numel);

paddle::platform::CPUDeviceContext cpu_ctx;
auto blas =
paddle::operators::math::GetBlas<platform::CPUDeviceContext, float>(
cpu_ctx);
auto blas = pten::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);

for (auto j = 0; j < static_cast<int>(keys.size()); ++j) {
VLOG(5) << "DEBUG GeoCommunicator::RecvSparse recv sparse key" << keys[j]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
#include "paddle/pten/kernels/funcs/blas/blas.h"
#include "paddle/pten/kernels/funcs/math_function.h"

#include "paddle/fluid/distributed/ps/service/ps_client.h"
Expand Down
64 changes: 14 additions & 50 deletions paddle/fluid/framework/custom_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,17 @@ static void RunKernelFunc(pten::KernelContext* ctx,
if (backend == pten::Backend::CPU) {
// do nothing
} else {
#ifdef PADDLE_WITH_CUSTOM_DEVICE
size_t device_type_id_ = static_cast<size_t>(backend) -
static_cast<size_t>(pten::Backend::ALL_BACKEND);
std::string device_type = pten::GetGlobalDeviceType(device_type_id_);
if (!device_type.empty()) {
auto custom_ctx =
ctx->GetDeviceContext<paddle::platform::CustomDeviceContext>();
dev_ctx.set_stream(custom_ctx.stream());
return;
}
#endif
LOG(ERROR) << "[CUSTOM KERNEL] Unsupported kernel backend: " << backend
<< " with compiled Paddle.";
return;
Expand Down Expand Up @@ -343,25 +354,15 @@ void RegisterKernelWithMetaInfoMap(
}
}

void LoadCustomKernelLib(const std::string& dso_lib_path) {
void LoadCustomKernelLib(const std::string& dso_lib_path, void* dso_handle) {
#ifdef _LINUX
void* dso_handle = nullptr;
int dynload_flags = RTLD_NOW | RTLD_LOCAL;
dso_handle = dlopen(dso_lib_path.c_str(), dynload_flags);

// MUST valid dso_lib_path
PADDLE_ENFORCE_NOT_NULL(
dso_handle,
platform::errors::InvalidArgument(
"Fail to open library: %s with error: %s", dso_lib_path, dlerror()));

typedef OpKernelInfoMap& get_op_kernel_info_map_t();
auto* func = reinterpret_cast<get_op_kernel_info_map_t*>(
dlsym(dso_handle, "PD_GetOpKernelInfoMap"));

if (func == nullptr) {
LOG(INFO) << "Skipped lib [" << dso_lib_path << "]: fail to find "
<< "PD_GetOpKernelInfoMap symbol in this lib.";
LOG(WARNING) << "Skipped lib [" << dso_lib_path << "]: fail to find "
<< "PD_GetOpKernelInfoMap symbol in this lib.";
return;
}
auto& op_kernel_info_map = func();
Expand All @@ -373,42 +374,5 @@ void LoadCustomKernelLib(const std::string& dso_lib_path) {
return;
}

// List all libs with given path
std::vector<std::string> ListAllLib(const std::string& libs_path) {
DIR* dir = nullptr;
dir = opendir(libs_path.c_str());

// MUST valid libs_path
PADDLE_ENFORCE_NOT_NULL(dir, platform::errors::InvalidArgument(
"Fail to open path: %s", libs_path));

dirent* ptr = nullptr;
std::vector<std::string> libs;
std::regex express(".*\\.so");
std::match_results<std::string::iterator> results;
while ((ptr = readdir(dir)) != nullptr) {
std::string filename(ptr->d_name);
if (std::regex_match(filename.begin(), filename.end(), results, express)) {
libs.emplace_back(libs_path + '/' + filename);
LOG(INFO) << "Found lib [" << filename << "]";
} else {
VLOG(3) << "Skipped file [" << filename << "] without .so postfix";
}
}
closedir(dir);
return libs;
}

// Load custom kernels with given path
void LoadCustomKernel(const std::string& libs_path) {
VLOG(3) << "Try loading custom libs from: [" << libs_path << "]";
std::vector<std::string> libs = ListAllLib(libs_path);
for (auto& lib_path : libs) {
LoadCustomKernelLib(lib_path);
}
LOG(INFO) << "Finished in LoadCustomKernel with libs_path: [" << libs_path
<< "]";
}

} // namespace framework
} // namespace paddle
5 changes: 1 addition & 4 deletions paddle/fluid/framework/custom_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@ limitations under the License. */
namespace paddle {
namespace framework {

// Load custom kernel lib from giwen path
void LoadCustomKernel(const std::string& libs_path);

void LoadCustomKernelLib(const std::string& dso_lib_path);
void LoadCustomKernelLib(const std::string& dso_lib_path, void* dso_handle);

// Load custom kernel api: register kernel after user compiled
void LoadOpKernelInfoAndRegister(const std::string& dso_name);
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/pten/kernels/funcs/blas/blas.h"

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -121,14 +121,14 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,

// broadcast biases
std::vector<float> ones(m, 1.0f);
paddle::operators::math::CBlas<float>::GEMM(
pten::funcs::CBlas<float>::GEMM(
CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, 1, alpha, &ones[0], 1,
&combined_biases[0], n, 0.0f, embeddings_data, n);

// Wx*embeddings + biases
paddle::operators::math::CBlas<float>::GEMM(
CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha,
embedding_data, k, weightx_data, n, beta, embeddings_data, n);
pten::funcs::CBlas<float>::GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, k, alpha, embedding_data, k,
weightx_data, n, beta, embeddings_data, n);
op_desc.SetInput("Embeddings", {embeddings});

op_desc.SetInput("H0", {});
Expand Down
13 changes: 13 additions & 0 deletions paddle/fluid/framework/ir/ipu/forward_graph_extract_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,19 @@ void ForwardGraphExtractPass::ApplyImpl(ir::Graph* graph) const {
}
}
}
// learning_rate var
for (auto* node : all_ops[OpRole::kOptimize]) {
if (node->Op()->Inputs().count("LearningRate") &&
!node->Op()->Inputs().at("LearningRate").empty()) {
auto lr_var_name = node->Op()->Inputs().at("LearningRate").front();
for (auto* in_var : node->inputs) {
if (in_var->Name() == lr_var_name) {
VLOG(10) << "found LearningRate var: " << in_var->Name();
forward_vars.insert(in_var);
}
}
}
}
// control_vars & backward_vars
for (auto* node : graph->Nodes()) {
if (!node->IsVar()) {
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/ir/ipu/inference_process_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
if (num_ipus > 1) {
ipu_strategy_instance_->need_avg_shard = true;
ipu_strategy_instance_->popart_options.virtualGraphMode =
platform::ipu::VirtualGraphMode::Manual;
popart::VirtualGraphMode::Manual;
} else {
ipu_strategy_instance_->need_avg_shard = false;
ipu_strategy_instance_->popart_options.virtualGraphMode =
platform::ipu::VirtualGraphMode::Off;
popart::VirtualGraphMode::Off;
}
// total num IPUs = num_ipus * replica_num
ipu_strategy_instance_->num_ipus = num_ipus * replica_num;
Expand Down
Loading

0 comments on commit 891dd4e

Please sign in to comment.