Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[kunlun] support xpu runtime profiler #54685

Merged
merged 6 commits into from
Jun 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ set(XPU_PROJECT "extern_xpu")
set(XPU_API_LIB_NAME "libxpuapi.so")
set(XPU_RT_LIB_NAME "libxpurt.so")
set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_XPTI_LIB_NAME "libxpti.so")

set(XPU_BASE_DATE "20230602")
set(XPU_XCCL_BASE_VERSION "1.0.49.2")
set(XPU_XFT_BASE_VERSION "latest")
set(XPU_XPTI_BASE_VERSION "0.0.1")

if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
Expand All @@ -30,6 +32,10 @@ if(NOT XPU_XFT_BASE_URL)
)
endif()

set(XPU_XPTI_BASE_URL
"https://klx-sdk-release-public.su.bcebos.com/xpti/dev/${XPU_XPTI_BASE_VERSION}"
)

if(WITH_XCCL_RDMA)
set(XPU_XCCL_PREFIX "xccl_rdma")
else()
Expand Down Expand Up @@ -67,6 +73,7 @@ else()
set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-ubuntu_x86_64")
set(XPU_XFT_DIR_NAME "xft_ubuntu1604_x86_64")
endif()
set(XPU_XPTI_DIR_NAME "xpti")

set(XPU_XRE_URL
"${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz"
Expand All @@ -78,12 +85,18 @@ set(XPU_XCCL_URL
"${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_XFT_URL "${XPU_XFT_BASE_URL}/${XPU_XFT_DIR_NAME}.tar.gz")
set(XPU_XPTI_URL
"${XPU_XPTI_BASE_URL}/${XPU_XPTI_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_PACK_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh"
CACHE STRING "" FORCE)
set(XPU_XFT_GET_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xft_dependence.sh"
CACHE STRING "" FORCE)
set(XPU_XPTI_GET_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xpti_dependence.sh"
CACHE STRING "" FORCE)

set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
set(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}")
Expand Down Expand Up @@ -123,7 +136,8 @@ ExternalProject_Add(
pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL}
${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME} && wget
${XPU_XFT_GET_DEPENCE_URL} && bash get_xft_dependence.sh ${XPU_XFT_URL}
${XPU_XFT_DIR_NAME}
${XPU_XFT_DIR_NAME} && wget ${XPU_XPTI_GET_DEPENCE_URL} && bash
get_xpti_dependence.sh ${XPU_XPTI_URL} ${XPU_XPTI_DIR_NAME}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里应该像bkcl一样,单独搞出来一个判断吧?在WITH_XPU_XPTI的时候才需要用到这个dependence.sh以及下载依赖

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

忽略这条,现在bkcl也是默认下载的。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

还是改成有条件下载,因为目前这个脚本需要替换libxpurt.so
等后面XRE的klprof合入主线后再考虑是否改成默认下载
#54711

DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
Expand Down Expand Up @@ -151,6 +165,12 @@ if(WITH_XPU_XFT)
set(XPU_XFT_LIB "${XPU_LIB_DIR}/${XPU_XFT_LIB_NAME}")
endif()

if(WITH_XPU_XPTI)
message(STATUS "Compile with XPU XPTI!")
add_definitions(-DPADDLE_WITH_XPU_XPTI)
set(XPU_XPTI_LIB "${XPU_LIB_DIR}/${XPU_XPTI_LIB_NAME}")
endif()

if(WITH_XPU_BKCL AND WITH_XPU_XFT)
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}
${XPU_XFT_LIB})
Expand All @@ -162,6 +182,10 @@ else()
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
endif()

if(WITH_XPU_XPTI)
target_link_libraries(xpulib ${XPU_XPTI_LIB})
endif()

add_dependencies(xpulib ${XPU_PROJECT})

# Ensure that xpu/api.h can be included without dependency errors.
Expand Down
31 changes: 31 additions & 0 deletions paddle/fluid/platform/dynload/xpti.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#ifdef PADDLE_WITH_XPU

#include "paddle/fluid/platform/dynload/xpti.h"

namespace paddle {
namespace platform {
namespace dynload {

#define DEFINE_WRAP(__name) DynLoad__##__name __name

XPTI_ROUTINE_EACH(DEFINE_WRAP);

} // namespace dynload
} // namespace platform
} // namespace paddle

#endif // PADDLE_WITH_XPU
43 changes: 43 additions & 0 deletions paddle/fluid/platform/dynload/xpti.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once

#include <xpu/xpti.h>

#include <mutex> // NOLINT

#include "paddle/phi/backends/dynload/xpti.h"

namespace paddle {
namespace platform {
namespace dynload {

#define DECLARE_DYNAMIC_LOAD_XPTI_WRAP(__name) \
using DynLoad__##__name = phi::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name

#define XPTI_RAND_ROUTINE_EACH(__macro) \
__macro(xptiActivityEnable); \
__macro(xptiActivityDisable); \
__macro(xptiStartTracing); \
__macro(xptiStopTracing); \
__macro(xptiActivityFlushAll); \
__macro(xptiActivityGetNextRecord);

XPTI_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_XPTI_WRAP);

#undef DECLARE_DYNAMIC_LOAD_XPTI_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
210 changes: 210 additions & 0 deletions paddle/fluid/platform/profiler/xpu_tracer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/platform/profiler/xpu_tracer.h"

#include <mutex>
#include <unordered_map>

#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/os_info.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/device_manager.h"
#endif

#define XPTI_CALL(call) \
do { \
XPTIResult _status = call; \
if (_status != XPTI_SUCCESS) { \
LOG(ERROR) << "Function " << #call << " failed with error " << _status; \
exit(-1); \
} \
} while (0)

namespace paddle {
namespace platform {

void XPUTracer::PrepareTracing() {
PADDLE_ENFORCE_EQ(
state_ == TracerState::UNINITED || state_ == TracerState::STOPED,
true,
platform::errors::PreconditionNotMet("XPUTracer must be UNINITED"));
#ifdef PADDLE_WITH_XPU
XPTI_CALL(dynload::xptiActivityEnable());
VLOG(3) << "enable xpti activity";
#endif
state_ = TracerState::READY;
}

void XPUTracer::StartTracing() {
PADDLE_ENFORCE_EQ(
state_ == TracerState::READY,
true,
platform::errors::PreconditionNotMet("Tracer must be READY or STOPPED"));
#ifdef PADDLE_WITH_XPU
XPTI_CALL(dynload::xptiStartTracing());
#endif
tracing_start_ns_ = PosixInNsec();
state_ = TracerState::STARTED;
}

void XPUTracer::StopTracing() {
PADDLE_ENFORCE_EQ(
state_,
TracerState::STARTED,
platform::errors::PreconditionNotMet("Tracer must be STARTED"));
#ifdef PADDLE_WITH_XPU
XPTI_CALL(dynload::xptiStopTracing());
XPTI_CALL(dynload::xptiActivityDisable());
VLOG(3) << "disable xpti activity";
#endif
state_ = TracerState::STOPED;
}

#ifdef PADDLE_WITH_XPU
void AddApiRecord(const baidu::xpu::xpti::XPTIEventApi* api,
uint64_t start_ns,
TraceEventCollector* collector) {
if (api->start < start_ns) {
VLOG(4) << "xpu event " << api->get_name() << " start " << api->start
<< " is before profiler start " << start_ns << ", drop event";
return;
}
RuntimeTraceEvent event;
event.name = api->get_name();
event.start_ns = api->start;
event.end_ns = api->end;
event.process_id = api->pid;
event.thread_id = api->tid;
event.correlation_id = api->args.token;

collector->AddRuntimeEvent(std::move(event));
VLOG(4) << "Add api event " << event.name;
}

void AddKernelRecord(const baidu::xpu::xpti::XPTIEventKernel* kernel,
uint64_t start_ns,
TraceEventCollector* collector) {
if (kernel->start < start_ns) {
VLOG(4) << "xpu event " << kernel->get_name() << "start " << kernel->start
<< "is before profiler start " << start_ns << ", drop event";
return;
}
DeviceTraceEvent event;
event.name = kernel->get_name();
event.type = TracerEventType::Kernel;
event.start_ns = kernel->start;
event.end_ns = kernel->end;
event.device_id = kernel->args.board_id;
event.stream_id = kernel->args.stream_id;
event.correlation_id = kernel->args.token;

collector->AddDeviceEvent(std::move(event));
VLOG(4) << "Add kernel event " << event.name;
}

void AddWaitRecord(const baidu::xpu::xpti::XPTIEventWait* wait,
uint64_t start_ns,
TraceEventCollector* collector) {
if (wait->start < start_ns) {
VLOG(4) << "xpu event " << wait->get_name() << "start " << wait->start
<< "is before profiler start " << start_ns << ", drop event";
return;
}
RuntimeTraceEvent event;
event.name = wait->get_name();
event.start_ns = wait->start;
event.end_ns = wait->end;
event.process_id = wait->pid;
event.thread_id = wait->tid;

collector->AddRuntimeEvent(std::move(event));
VLOG(4) << "Add wait event " << event.name;
}

void AddMemcpyRecord(const baidu::xpu::xpti::XPTIEventMem* memcpy,
uint64_t start_ns,
TraceEventCollector* collector) {
if (memcpy->start < start_ns) {
VLOG(4) << "xpu event " << memcpy->get_name() << "start " << memcpy->start
<< "is before profiler start " << start_ns << ", drop event";
return;
}
RuntimeTraceEvent event;
event.name = memcpy->get_name();
event.start_ns = memcpy->start;
event.end_ns = memcpy->end;
event.process_id = memcpy->pid;
event.thread_id = memcpy->tid;

collector->AddRuntimeEvent(std::move(event));
VLOG(4) << "Add memcpy event " << event.name;
}
#endif

void XPUTracer::CollectTraceData(TraceEventCollector* collector) {
PADDLE_ENFORCE_EQ(
state_,
TracerState::STOPED,
platform::errors::PreconditionNotMet("Tracer must be STOPED"));
#ifdef PADDLE_WITH_XPU
XPTI_CALL(dynload::xptiActivityFlushAll());
baidu::xpu::xpti::XPTIEvent* record = nullptr;
while (true) {
XPTIResult status = dynload::xptiActivityGetNextRecord(&record);
if (status == XPTI_SUCCESS) {
record->PrintForDebug();
switch (record->type) {
case XPTI_EVENT_TYPE_API:
AddApiRecord(
reinterpret_cast<const baidu::xpu::xpti::XPTIEventApi*>(record),
tracing_start_ns_,
collector);
break;
case XPTI_EVENT_TYPE_KERNEL:
AddKernelRecord(
reinterpret_cast<const baidu::xpu::xpti::XPTIEventKernel*>(
record),
tracing_start_ns_,
collector);
break;
case XPTI_EVENT_TYPE_MEMCPY:
AddMemcpyRecord(
reinterpret_cast<const baidu::xpu::xpti::XPTIEventMem*>(record),
tracing_start_ns_,
collector);
break;
case XPTI_EVENT_TYPE_WAIT:
AddWaitRecord(
reinterpret_cast<const baidu::xpu::xpti::XPTIEventWait*>(record),
tracing_start_ns_,
collector);
break;
default:
break;
}
} else if (status == XPTI_INVALID_DATA) {
// data queue already empty
VLOG(4) << "xpti data queue is empty now, collect trace data done";
break;
} else {
XPTI_CALL(status);
}
// free XPTIEvent
}
#endif
}

} // namespace platform
} // namespace paddle
Loading