diff --git a/paddle/fluid/distributed/collective/BKCLTools.h b/paddle/fluid/distributed/collective/BKCLTools.h index e08bb61438c88..0572b852f6e90 100644 --- a/paddle/fluid/distributed/collective/BKCLTools.h +++ b/paddle/fluid/distributed/collective/BKCLTools.h @@ -77,23 +77,11 @@ class XPUEventManager { device_index_)); platform::XPUDeviceGuard guard(device_index_); - PADDLE_ENFORCE_XPU_SUCCESS(xpu_event_record(event_, ctx.stream())); + // TODO(zhangxiaoci) temporary solution: xpu::event seems buggy + PADDLE_ENFORCE_XPU_SUCCESS(xpu_wait(ctx.stream())); } - void Block(const XPUContext& ctx) const { - if (is_created_) { - auto device_index = ctx.GetPlace().device; - PADDLE_ENFORCE_EQ(device_index, - device_index_, - platform::errors::PreconditionNotMet( - "XPUContext's device %d does not match" - "Event's device %d", - device_index, - device_index_)); - platform::XPUDeviceGuard guard(device_index_); - PADDLE_ENFORCE_XPU_SUCCESS(xpu_stream_wait_event(ctx.stream(), event_)); - } - } + void Block(const XPUContext& ctx) const {} private: bool is_created_{false}; diff --git a/paddle/fluid/distributed/collective/ProcessGroupBKCL.cc b/paddle/fluid/distributed/collective/ProcessGroupBKCL.cc index a5c80cb04108d..ffa715dbde4e0 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupBKCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupBKCL.cc @@ -57,8 +57,14 @@ bool ProcessGroupBKCL::BKCLTask::Wait(std::chrono::milliseconds timeout) { if (barrier_) { // If we use the work to do barrier, we should block cpu + + // TODO(zhangxiaoci) There is no such function that can sync entire device + // for xpu (for now), so all we can do is sync whatever stream that we know + // and hope for the best. Note that for correctness the communication stream + // needs to be in sync mode. platform::XPUDeviceGuard guard(place_.GetDeviceId()); xpu_wait(); + calc_ctx->Wait(); } return true; } diff --git a/paddle/phi/backends/xpu/xpu_context.cc b/paddle/phi/backends/xpu/xpu_context.cc index 7257f3f20b06b..4065306abc798 100644 --- a/paddle/phi/backends/xpu/xpu_context.cc +++ b/paddle/phi/backends/xpu/xpu_context.cc @@ -64,7 +64,7 @@ struct XPUContext::Impl { // manually destroy XPUStream here until xpu::api integrates this work // into Context dtor xpu_wait(context_->xpu_stream); - PADDLE_ENFORCE_XPU_SUCCESS(xpu_stream_destroy(context_->xpu_stream)); + xpu_stream_destroy(context_->xpu_stream); context_->xpu_stream = nullptr; xpu::destroy_context(context_); context_ = nullptr; diff --git a/paddle/phi/kernels/xpu/concat_and_split.cc b/paddle/phi/kernels/xpu/concat_and_split_functor.cc similarity index 87% rename from paddle/phi/kernels/xpu/concat_and_split.cc rename to paddle/phi/kernels/xpu/concat_and_split_functor.cc index 225f9555b02e6..769458523a68c 100644 --- a/paddle/phi/kernels/xpu/concat_and_split.cc +++ b/paddle/phi/kernels/xpu/concat_and_split_functor.cc @@ -13,8 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" - -#include "paddle/fluid/platform/device_context.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" namespace phi { @@ -67,14 +65,7 @@ class ConcatFunctor { reinterpret_cast(output->data()), xdims_list, axis); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - paddle::platform::errors::External( - "XPU API return wrong value[%d %s], please check whether " - "Baidu Kunlun Card is properly installed.", - r, - XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "concat"); } }; @@ -126,14 +117,7 @@ class SplitFunctor { xdims_list, split_list, axis); - PADDLE_ENFORCE_EQ( - r, - XPU_SUCCESS, - paddle::platform::errors::External( - "XPU API return wrong value[%d %s], please check whether " - "Baidu Kunlun Card is properly installed.", - r, - XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "split"); } };