Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#36 from mthreads/optimize_bulid_musa
Browse files Browse the repository at this point in the history
[MTAI-484] fix(build): fix small compiling bugs for MUSA
  • Loading branch information
caizhi-mt authored and mt-robot committed Aug 13, 2023
2 parents e0be7af + 4a2b9f1 commit b3c73b9
Show file tree
Hide file tree
Showing 11 changed files with 34 additions and 46 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ class AllocatorFacadePrivate {
auto chunk_size = FLAGS_auto_growth_chunk_size_in_mb << 20;
VLOG(4) << "FLAGS_auto_growth_chunk_size_in_mb is "
<< FLAGS_auto_growth_chunk_size_in_mb;
#if defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
#if defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
auto cuda_allocator = CreateCUDAAllocator(p);
cuda_allocators_[p][stream] = std::make_shared<AutoGrowthBestFitAllocator>(
cuda_allocator,
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/operators/nccl/nccl_gpu_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_RCCL
#include "paddle/fluid/platform/dynload/rccl.h"
#elif defined(PADDLE_WITH_MCCL)
#include "paddle/fluid/platform/dynload/mccl.h"
#else
#include "paddle/fluid/platform/dynload/nccl.h"
#endif
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/platform/device/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ elseif(WITH_MUSA)
gpu_info
SRCS gpu_info.cc
DEPS phi glog enforce monitor dynload_cuda)

endif()

cc_library(
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/platform/device/gpu/nccl_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
#ifdef PADDLE_WITH_RCCL
#include "paddle/fluid/platform/dynload/rccl.h"
#endif
#ifdef PADDLE_WITH_MCCL
#include "paddle/fluid/platform/dynload/mccl.h"
#endif
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/enforce.h"
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/platform/enforce.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ limitations under the License. */
#include <mublas.h>
#include <mudnn.h>
#include <mufft.h>
// TODO(@caizhi):
//#include <murand.h>
//#include <musparse.h>
#include <murand.h>
#include <musparse.h>
#include <thrust/system/musa/error.h>
#include <thrust/system_error.h>
#endif // PADDLE_WITH_MUSA
Expand Down
7 changes: 2 additions & 5 deletions paddle/fluid/platform/stream_callback_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ static void StreamCallbackFunc(gpuStream_t stream,
void *user_data)
#endif
#ifdef PADDLE_WITH_MUSA
static void StreamCallbackFunc(gpuStream_t stream,
gpuError_t status,
void *user_data)
static void MUSART_CB StreamCallbackFunc(void *user_data)
#endif
#ifdef PADDLE_WITH_CUDA
#if CUDA_VERSION >= 10000
Expand Down Expand Up @@ -65,8 +63,7 @@ void StreamCallbackManager<Stream>::AddCallback(
#endif
#ifdef PADDLE_WITH_MUSA
PADDLE_ENFORCE_GPU_SUCCESS(
musaStreamAddCallback(stream_, StreamCallbackFunc, func, 0));
//musaLaunchHostFunc(stream_, StreamCallbackFunc, func));
musaLaunchHostFunc(stream_, StreamCallbackFunc, func));
#endif
#ifdef PADDLE_WITH_CUDA
#if CUDA_VERSION >= 10000
Expand Down
49 changes: 20 additions & 29 deletions paddle/phi/backends/device_code.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ static bool CheckCUDADriverResult(MUresult result,
std::string kernel_name = "") {
if (result != MUSA_SUCCESS) {
const char* error = nullptr;
// TODO(@caizhi): enable dynload module
// dynload::muGetErrorString(result, &error);
dynload::muGetErrorString(result, &error);
#else
static bool CheckCUDADriverResult(CUresult result,
std::string caller,
Expand Down Expand Up @@ -153,10 +152,8 @@ void GPUDeviceCode::CheckAvailableStatus() {
hipError_t driver_result = dynload::hipDriverGetVersion(&driver_version);
if (driver_result == hipSuccess) {
#elif defined(PADDLE_WITH_MUSA)
// TODO(@caizhi): enable dynload module
// MUresult driver_result = dynload::muDriverGetVersion(&driver_version);
// if (driver_result == MUSA_SUCCESS) {
if (true) {
MUresult driver_result = dynload::muDriverGetVersion(&driver_version);
if (driver_result == MUSA_SUCCESS) {
#else
CUresult driver_result = dynload::cuDriverGetVersion(&driver_version);
if (driver_result == CUDA_SUCCESS) {
Expand Down Expand Up @@ -184,11 +181,8 @@ void GPUDeviceCode::CheckAvailableStatus() {
if (CheckCUDADriverResult(dynload::hipGetDeviceCount(&count),
"hipGetDeviceCount")) {
#elif defined(PADDLE_WITH_MUSA)
(void)count;
// TODO(@caizhi): enable dynload module
// if (CheckCUDADriverResult(dynload::muDeviceGetCount(&count),
// "muDeviceGetCount")) {
if (true) {
if (CheckCUDADriverResult(dynload::muDeviceGetCount(&count),
"muDeviceGetCount")) {
#else
if (CheckCUDADriverResult(dynload::cuDeviceGetCount(&count),
"cuDeviceGetCount")) {
Expand Down Expand Up @@ -473,24 +467,21 @@ void GPUDeviceCode::Launch(const size_t n, std::vector<void*>* args) const {
errors::External("Fail to launch kernel %s (in hipModuleLaunchKernel.)",
name_.c_str()));
#elif defined(PADDLE_WITH_MUSA)
(void)num_blocks;
(void)dev_ctx;
// TODO(@caizhi): enable dynload module
// PADDLE_ENFORCE_EQ(
// dynload::muLaunchKernel(function_,
// num_blocks,
// 1,
// 1, // grid dim
// num_threads_,
// 1,
// 1, // block dim
// 0, // shared memory
// dev_ctx->stream(), // stream
// args->data(), // arguments
// nullptr),
// MUSA_SUCCESS,
// errors::External("Fail to launch kernel %s (in muLaunchKernel.)",
// name_.c_str()));
PADDLE_ENFORCE_EQ(
dynload::muLaunchKernel(function_,
num_blocks,
1,
1, // grid dim
num_threads_,
1,
1, // block dim
0, // shared memory
dev_ctx->stream(), // stream
args->data(), // arguments
nullptr),
MUSA_SUCCESS,
errors::External("Fail to launch kernel %s (in muLaunchKernel.)",
name_.c_str()));
#else
PADDLE_ENFORCE_EQ(
dynload::cuLaunchKernel(function_,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/batch_norm_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ PD_REGISTER_KERNEL(batch_norm_infer,
}
#endif
#endif
#if defined(PADDLE_WITH_HIP)
#ifdef PADDLE_WITH_HIP
PD_REGISTER_KERNEL(batch_norm_infer,
GPU,
ALL_LAYOUT,
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/funcs/eigen/slice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ struct EigenSlice<Eigen::GpuDevice, T, Rank> {
const InType& in,
const Array& offsets,
const Array& extents) {
//out.device(dev) = in.slice(offsets, extents);
out.device(dev) = in.slice(offsets, extents);
}

static void Eval(const Eigen::GpuDevice& dev,
OutType32BitIndex out,
const InType32BitIndex& in,
const Array32Bit& offsets,
const Array32Bit& extents) {
//out.device(dev) = in.slice(offsets, extents);
out.device(dev) = in.slice(offsets, extents);
}
};

Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/funcs/gather_scatter_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace funcs {
Instantiate_Template_Function_index_t( \
func, double) Instantiate_Template_Function_index_t(func, int64_t) \
Instantiate_Template_Function_index_t(func, phi::dtype::float16) \
Instantiate_Template_Function_index_t(func, \
Instantiate_Template_Function_index_t(func, \
phi::dtype::bfloat16) \
Instantiate_Template_Function_index_t(func, unsigned char)

Expand Down
3 changes: 0 additions & 3 deletions paddle/phi/kernels/funcs/select_impl.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@
#include <hipcub/hipcub.hpp>
namespace cub = hipcub;
#endif
#ifdef __MUSACC__
//TODO
#endif

#include <algorithm>
#include "paddle/phi/backends/gpu/gpu_launch_config.h"
Expand Down

0 comments on commit b3c73b9

Please sign in to comment.