Skip to content

Commit

Permalink
fix conflicts, test=develop
Browse files Browse the repository at this point in the history
  • Loading branch information
LDOUBLEV committed Aug 24, 2020
2 parents 149b466 + 0e81626 commit c581fc8
Show file tree
Hide file tree
Showing 285 changed files with 24,463 additions and 3,902 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ include(generic) # simplify cmake module
# TODO(Shibo Tao): remove find_package(CUDA) completely.
find_package(CUDA QUIET)
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})

option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN" OFF)
if (WITH_GPU AND WITH_XPU)
message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
endif()
# cmake 3.12, 3.13, 3.14 will append gcc link options to nvcc, and nvcc doesn't recognize them.
if(WITH_GPU AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.12) AND (${CMAKE_VERSION} VERSION_LESS 3.15))
message(FATAL_ERROR "cmake ${CMAKE_VERSION} is not supported when WITH_GPU=ON because of bug https://cmake.org/pipermail/cmake/2018-September/068195.html. "
Expand Down
5 changes: 5 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ if(WITH_BOX_PS)
add_definitions(-DPADDLE_WITH_BOX_PS)
endif()

if(WITH_XPU)
message(STATUS "Compile with XPU!")
add_definitions(-DPADDLE_WITH_XPU)
endif()

if(WITH_GPU)
add_definitions(-DPADDLE_WITH_CUDA)
add_definitions(-DEIGEN_USE_GPU)
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/cub.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ include(ExternalProject)
set(CUB_PREFIX_DIR ${THIRD_PARTY_PATH}/cub)
set(CUB_SOURCE_DIR ${THIRD_PARTY_PATH}/cub/src/extern_cub)
set(CUB_REPOSITORY https://github.com/NVlabs/cub.git)
set(CUB_TAG 1.9.8)
set(CUB_TAG 1.8.0)

cache_third_party(extern_cub
REPOSITORY ${CUB_REPOSITORY}
Expand Down
54 changes: 54 additions & 0 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
if (NOT WITH_XPU)
return()
endif()

INCLUDE(ExternalProject)
SET(XPU_PROJECT "extern_xpu")
SET(XPU_URL "https://kunlun1.su.bcebos.com/xpu.tar.gz" CACHE STRING "" FORCE)
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
SET(XPU_API_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/api/include")
SET(XPU_RUNTIME_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/runtime/include")
SET(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib")

SET(XPU_API_LIB_NAME "libxpuapi.so")
SET(XPU_RT_LIB_NAME "libxpurt.so")
SET(XPU_SIM_LIB_NAME "libxpusim.so")
SET(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}")
SET(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}")
SET(XPU_SIM_LIB "${XPU_LIB_DIR}/${XPU_SIM_LIB_NAME}")

SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")

INCLUDE_DIRECTORIES(${XPU_API_INC_DIR})
INCLUDE_DIRECTORIES(${XPU_RUNTIME_INC_DIR})

FILE(WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt
"PROJECT(XPU)\n"
"cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY xpu/api xpu/runtime xpu/lib \n"
" DESTINATION ${XPU_INSTALL_DIR})\n")

ExternalProject_Add(
${XPU_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${XPU_SOURCE_DIR}
DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate ${XPU_URL} -c -q -O xpu.tar.gz
&& tar xvf xpu.tar.gz
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT}
)

ADD_LIBRARY(shared_xpuapi SHARED IMPORTED GLOBAL)
set_property(TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION "${XPU_API_LIB}")

# generate a static dummy target to track xpulib dependencies
# for cc_library(xxx SRCS xxx.c DEPS xpulib)
generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake")

TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_SIM_LIB})
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})
18 changes: 16 additions & 2 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ function(op_library TARGET)
set(hip_cu_srcs)
set(miopen_hip_cc_srcs)
set(cu_cc_srcs)
set(xpu_cc_srcs)
set(cudnn_cu_cc_srcs)
set(cudnn_cu_srcs)
set(CUDNN_FILE)
Expand Down Expand Up @@ -60,6 +61,12 @@ function(op_library TARGET)
list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc)
endif()
endif()
if(WITH_XPU)
string(REPLACE "_op" "_xpu_op" XPU_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${XPU_FILE}.cc)
list(APPEND xpu_cc_srcs xpu/${XPU_FILE}.cc)
endif()
endif()
else()
foreach(src ${op_library_SRCS})
if (${src} MATCHES ".*\\.hip.cu$")
Expand All @@ -76,6 +83,8 @@ function(op_library TARGET)
list(APPEND mkldnn_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cu.cc$")
list(APPEND cu_cc_srcs ${src})
elseif(WITH_XPU AND ${src} MATCHES ".*_xpu_op.cc$")
list(APPEND xpu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src})
else()
Expand Down Expand Up @@ -109,7 +118,7 @@ function(op_library TARGET)
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cu_srcs} ${miopen_hip_cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
else()
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
endif()

Expand Down Expand Up @@ -150,10 +159,11 @@ function(op_library TARGET)
list(LENGTH cu_srcs cu_srcs_len)
list(LENGTH cu_cc_srcs cu_cc_srcs_len)
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH hip_cu_srcs hip_cu_srcs_len)
list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len)
if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND
${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0)
${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0)
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
set(pybind_flag 1)
endif()
Expand All @@ -179,6 +189,9 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MIOPEN);\n")
endif()

if (WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, XPU);\n")
endif()
# pybind USE_OP_DEVICE_KERNEL for MKLDNN
if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
# Append first implemented MKLDNN activation operator
Expand Down Expand Up @@ -228,6 +241,7 @@ function(register_operators)

file(GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc")
string(REPLACE "_mkldnn" "" OPS "${OPS}")
string(REPLACE "_xpu" "" OPS "${OPS}")
string(REPLACE ".cc" "" OPS "${OPS}")
list(REMOVE_DUPLICATES OPS)
list(LENGTH register_operators_DEPS register_operators_DEPS_len)
Expand Down
5 changes: 5 additions & 0 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ if(WITH_GPU)
file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage
endif(WITH_GPU)

if(WITH_XPU)
include(external/xpu) # download, build, install xpu
list(APPEND third_party_deps extern_xpu)
endif(WITH_XPU)

if(WITH_PSLIB)
include(external/pslib) # download, build, install pslib
list(APPEND third_party_deps extern_pslib)
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor
cc_library(attribute SRCS attribute.cc DEPS framework_proto boost)
cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc
device_context)

cc_library(op_version_registry SRCS op_version_registry.cc DEPS framework_proto boost)
cc_test(op_version_registry_test SRCS op_version_registry_test.cc DEPS op_version_registry)

cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute glog)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(no_need_buffer_vars_inference SRCS no_need_buffer_vars_inference.cc DEPS attribute device_context)
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/dlpack_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ struct DLContextVisitor : public boost::static_visitor<::DLContext> {
return ctx;
}

inline ::DLContext operator()(const platform::XPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::XPUPlace is not supported"));
}

inline ::DLContext operator()(const platform::CUDAPlace &place) const {
#ifdef PADDLE_WITH_CUDA
::DLContext ctx;
Expand Down
17 changes: 13 additions & 4 deletions paddle/fluid/framework/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -444,22 +444,31 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
int64_t max_memory_size = GetEagerDeletionThreshold();
std::unique_ptr<GarbageCollector> gc;
if (!ctx->force_disable_gc_ && max_memory_size >= 0) {
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(place_)) {
#ifdef PADDLE_WITH_CUDA
if (IsFastEagerDeletionModeEnabled()) {
gc.reset(new UnsafeFastGPUGarbageCollector(
BOOST_GET_CONST(platform::CUDAPlace, place_), max_memory_size));
} else {
gc.reset(new DefaultStreamGarbageCollector(
BOOST_GET_CONST(platform::CUDAPlace, place_), max_memory_size));
}
} else if (platform::is_cpu_place(place_)) {
#else
PADDLE_THROW(
platform::errors::Unimplemented("No GPU gc found in CPU/XPU paddle"));
#endif
} else if (platform::is_cpu_place(place_)) {
gc.reset(new CPUGarbageCollector(
BOOST_GET_CONST(platform::CPUPlace, place_), max_memory_size));
#ifdef PADDLE_WITH_CUDA
}
} else if (platform::is_xpu_place(place_)) {
#ifdef PADDLE_WITH_XPU
gc.reset(new XPUGarbageCollector(
BOOST_GET_CONST(platform::XPUPlace, place_), max_memory_size));
#else
PADDLE_THROW(
platform::errors::Unimplemented("No XPU gc found in CPU/GPU paddle"));
#endif
}
}

for (int64_t i = start_op_index; i < end_op_index; ++i) {
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/framework/garbage_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ void CPUGarbageCollector::ClearCallback(const std::function<void()> &callback) {
callback();
}

#ifdef PADDLE_WITH_XPU
XPUGarbageCollector::XPUGarbageCollector(const platform::XPUPlace &place,
size_t max_memory_size)
: GarbageCollector(place, max_memory_size) {}
void XPUGarbageCollector::ClearCallback(const std::function<void()> &callback) {
callback();
}
#endif

#ifdef PADDLE_WITH_CUDA
UnsafeFastGPUGarbageCollector::UnsafeFastGPUGarbageCollector(
const platform::CUDAPlace &place, size_t max_memory_size)
Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/framework/garbage_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ class CPUGarbageCollector : public GarbageCollector {
void ClearCallback(const std::function<void()> &callback) override;
};

#ifdef PADDLE_WITH_XPU
class XPUGarbageCollector : public GarbageCollector {
public:
XPUGarbageCollector(const platform::XPUPlace &place, size_t max_memory_size);

protected:
void ClearCallback(const std::function<void()> &callback) override;
};
#endif

#ifdef PADDLE_WITH_CUDA
class UnsafeFastGPUGarbageCollector : public GarbageCollector {
public:
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/library_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ inline LibraryType StringToLibraryType(const char* ctype) {
// CPU, CUDA, PLAIN are same library type.
} else if (s == std::string("CPU")) {
return LibraryType::kPlain;
} else if (s == std::string("XPU")) {
return LibraryType::kPlain;
} else if (s == std::string("CUDA")) {
return LibraryType::kPlain;
} else {
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/framework/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)

#define REGISTER_OP_XPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)

#define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class, \
customized_name, \
customized_type_value, \
Expand Down Expand Up @@ -298,6 +301,12 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__)

#define REGISTER_OP_XPU_KERNEL_FUNCTOR(op_type, ...) \
REGISTER_OP_KERNEL_EX( \
op_type, XPU, ::paddle::platform::XPUPlace, DEFAULT_TYPE, \
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__)

/**
* Macro to mark what Operator and Kernel
* we will use and tell the compiler to
Expand Down
15 changes: 15 additions & 0 deletions paddle/fluid/framework/op_version_registry.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/op_version_registry.h"
Loading

1 comment on commit c581fc8

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.