Skip to content

Commit

Permalink
Profiler skeleton (#38826)
Browse files Browse the repository at this point in the history
* add align for WorkQueue

* add spinlock

* merge develop

* merge

* Add EventsWaiter

* Revert "Add EventsWaiter"

This reverts commit e206173.

* profiler skeleton

* update

* update

* update

Co-authored-by: liutiexing <liutiexing@google.com>
  • Loading branch information
liutiexing and liutiexing authored Jan 10, 2022
1 parent e30150d commit a8afed6
Show file tree
Hide file tree
Showing 12 changed files with 154 additions and 44 deletions.
3 changes: 2 additions & 1 deletion paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
cc_library(workqueue SRCS workqueue.cc workqueue_utils.cc events_waiter.cc DEPS enforce glog)
cc_library(workqueue_utils SRCS workqueue_utils.cc events_waiter.cc DEPS enforce glog)
cc_library(workqueue SRCS workqueue.cc DEPS workqueue_utils enforce glog)
cc_test(workqueue_test SRCS workqueue_test.cc DEPS workqueue)
4 changes: 2 additions & 2 deletions paddle/fluid/framework/new_executor/workqueue/workqueue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ std::unique_ptr<WorkQueue> CreateMultiThreadedWorkQueue(
"WorkQueueOptions.num_threads must be "
"greater than 1."));
std::unique_ptr<WorkQueue> ptr(new WorkQueueImpl(options));
return std::move(ptr);
return ptr;
}

std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
Expand All @@ -208,7 +208,7 @@ std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
"For a WorkQueueGroup, the number of WorkQueueOptions "
"must be greater than 1."));
std::unique_ptr<WorkQueueGroup> ptr(new WorkQueueGroupImpl(queues_options));
return std::move(ptr);
return ptr;
}

} // namespace framework
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,8 @@ cc_test(timer_test SRCS timer_test.cc DEPS timer)
cc_library(lodtensor_printer SRCS lodtensor_printer.cc DEPS ddim place tensor scope lod_tensor variable_helper framework_proto)
cc_test(lodtensor_printer_test SRCS lodtensor_printer_test.cc DEPS lodtensor_printer)

cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info)
add_subdirectory(profiler)

cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
if(WITH_GPU)
nv_library(profiler SRCS profiler.cc profiler.cu DEPS host_event_recorder os_info device_tracer gpu_info enforce dynload_cuda)
Expand Down
34 changes: 0 additions & 34 deletions paddle/fluid/platform/event.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,39 +201,5 @@ class CudaEvent {
#endif
};

struct CommonEvent {
public:
CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: name(name), start_ns(start_ns), end_ns(end_ns), role(role) {}

CommonEvent(std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role, const std::string &attr_str)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
buf = static_cast<char *>(arena_allocator(attr_str.length() + 1));
strncpy(buf, attr_str.c_str(), attr_str.length() + 1);
attr = buf;
}

CommonEvent(const std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
}

const char *name = nullptr; // not owned, designed for performance
uint64_t start_ns = 0;
uint64_t end_ns = 0;
EventRole role = EventRole::kOrdinary;
const char *attr = nullptr; // not owned, designed for performance
};

} // namespace platform
} // namespace paddle
2 changes: 1 addition & 1 deletion paddle/fluid/platform/profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ limitations under the License. */

#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/host_event_recorder.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/host_event_recorder.h"
#include "paddle/fluid/platform/profiler_helper.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/nvtx.h"
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/platform/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ limitations under the License. */
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/event_tracing.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.pb.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#endif
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/platform/profiler/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info)
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/platform/host_event_recorder.h"
#include "paddle/fluid/platform/profiler/host_event_recorder.h"
#include "paddle/fluid/platform/os_info.h"

namespace paddle {
Expand All @@ -26,7 +26,7 @@ HostEventSection HostEventRecorder::GatherEvents() {
for (auto &kv : thread_recorders_) {
host_sec.thr_sections.emplace_back(std::move(kv.second->GatherEvents()));
}
return std::move(host_sec);
return host_sec;
}

} // namespace platform
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,40 @@ limitations under the License. */
namespace paddle {
namespace platform {

struct CommonEvent {
public:
CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: name(name), start_ns(start_ns), end_ns(end_ns), role(role) {}

CommonEvent(std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role, const std::string &attr_str)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
buf = static_cast<char *>(arena_allocator(attr_str.length() + 1));
strncpy(buf, attr_str.c_str(), attr_str.length() + 1);
attr = buf;
}

CommonEvent(const std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
}

const char *name = nullptr; // not owned, designed for performance
uint64_t start_ns = 0;
uint64_t end_ns = 0;
EventRole role = EventRole::kOrdinary;
const char *attr = nullptr; // not owned, designed for performance
};

template <typename HeadType, typename... RestTypes>
struct ContainsStdString
: std::conditional_t<
Expand Down Expand Up @@ -154,7 +188,7 @@ std::vector<EventType> EventContainer<EventType>::Reduce() {
cur = next;
}
event_blocks_ = cur_event_block_ = new EventBlock;
return std::move(all_events);
return all_events;
}

template <typename EventType>
Expand Down Expand Up @@ -204,7 +238,7 @@ class ThreadEventRecorder {
thr_sec.thread_name = thread_name_;
thr_sec.thread_id = thread_id_;
thr_sec.events = std::move(base_evt_cntr_.Reduce());
return std::move(thr_sec);
return thr_sec;
}

private:
Expand Down
65 changes: 65 additions & 0 deletions paddle/fluid/platform/profiler/trace_event_collector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <list>

namespace paddle {
namespace platform {

struct HostRecord {
std::string name;
uint64_t start_ns;
uint64_t end_ns;
uint64_t process_id;
uint64_t thread_id;
};

struct RuntimeRecord {
std::string name;
uint64_t start_ns;
uint64_t end_ns;
uint64_t process_id;
uint64_t thread_id;
uint32_t correlation_id;
};

struct DeviceRecord {
std::string name;
uint64_t start_ns;
uint64_t end_ns;
uint32_t correlation_id;
};

class TraceEventCollector {
public:
void AddHostRecord(HostRecord&& record) { host_records_.push_back(record); }

void AddRuntimeRecord(RuntimeRecord&& record) {
runtime_records_.push_back(record);
}

void AddDeviceRecord(DeviceRecord&& record) {
device_records_.push_back(record);
}

private:
std::list<HostRecord> host_records_;
std::list<RuntimeRecord> runtime_records_;
std::list<DeviceRecord> device_records_;
};

} // namespace platform
} // namespace paddle
42 changes: 42 additions & 0 deletions paddle/fluid/platform/profiler/tracer_base.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "paddle/fluid/platform/profiler/trace_event_collector.h"

namespace paddle {
namespace platform {

class TracerBase {
public:
// The state machine for a Tracer.
enum class TracerState { UNINITED, READY, STARTED, STOPED };

virtual void PrepareTracing() { state_ = TracerState::READY; }

virtual void StartTracing() = 0;

virtual void StopTracing() = 0;

virtual void CollectTraceData(TraceEventCollector* collector) = 0;

virtual ~TracerBase() {}

protected:
TracerState state_ = TracerState::UNINITED;
};

} // namespace platform
} // namespace paddle

0 comments on commit a8afed6

Please sign in to comment.