From c9e508573fbe65c10c55c0e70e2cd7264f365c75 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 9 Mar 2022 03:34:17 +0000 Subject: [PATCH 1/8] no --- paddle/phi/core/compat/op_utils.h | 1 + paddle/phi/kernels/funcs/reduce_functor.h | 8 ++ paddle/phi/kernels/gpu/batch_norm_kernel.cu | 4 + .../phi/kernels/gpu/take_along_axis_kernel.cu | 1 + paddle/phi/kernels/math_kernel.h | 2 +- paddle/phi/ops/compat/reduce_sig.cc | 24 ++++- python/paddle/profiler/profiler.py | 101 ++++++++++-------- python/paddle/profiler/utils.py | 1 - 8 files changed, 92 insertions(+), 50 deletions(-) diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h index 9947e00ecb53c5..1ab718c0794384 100644 --- a/paddle/phi/core/compat/op_utils.h +++ b/paddle/phi/core/compat/op_utils.h @@ -47,6 +47,7 @@ const std::unordered_set deprecated_op_names({"diag", "matmul_grad", "matmul_grad_grad", "mean", + "max", "reshape", "reshape_grad", "expand", diff --git a/paddle/phi/kernels/funcs/reduce_functor.h b/paddle/phi/kernels/funcs/reduce_functor.h index aebd155ac59cb2..4e83d0fa371032 100644 --- a/paddle/phi/kernels/funcs/reduce_functor.h +++ b/paddle/phi/kernels/funcs/reduce_functor.h @@ -41,5 +41,13 @@ struct ProdFunctor { } }; +//////// Max Functor /////// +struct MaxFunctor { + template + void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) { + y->device(place) = x->maximum(dim); + } +}; + } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 6ad12245d2a45a..49b550f51e60e1 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -460,10 +460,14 @@ void BatchNormKernel(const Context &ctx, void *reserve_space_ptr = nullptr; void *workspace_ptr = nullptr; DenseTensor workspace_tensor; + DenseTensor reserve_space_tensor; // Create reserve space and workspace for batch norm. // Create tensor for each batchnorm op, it will be used in the // backward. Thus this tensor shouldn't be temp. // auto *reserve_space = ctx.Output("ReserveSpace"); + if (reserve_space == nullptr) { + reserve_space = &reserve_space_tensor; + } PADDLE_ENFORCE_NOT_NULL( reserve_space, phi::errors::NotFound( diff --git a/paddle/phi/kernels/gpu/take_along_axis_kernel.cu b/paddle/phi/kernels/gpu/take_along_axis_kernel.cu index 63113e3e672f37..9665a917d9dc4a 100644 --- a/paddle/phi/kernels/gpu/take_along_axis_kernel.cu +++ b/paddle/phi/kernels/gpu/take_along_axis_kernel.cu @@ -53,6 +53,7 @@ PD_REGISTER_KERNEL(take_along_axis, GPU, ALL_LAYOUT, phi::TakeAlongAxisKernel, + float, double, int64_t, int, diff --git a/paddle/phi/kernels/math_kernel.h b/paddle/phi/kernels/math_kernel.h index fe8f3b749cdd8a..7569cbcff087d7 100644 --- a/paddle/phi/kernels/math_kernel.h +++ b/paddle/phi/kernels/math_kernel.h @@ -156,7 +156,7 @@ DenseTensor Mean(const Context& dev_ctx, bool keep_dim) { DenseTensor dense_out; MetaTensor meta_out(&dense_out); - ReduceInferMetaBase(x, axis, keep_dim, false, x.dtype(), &meta_out); + SumRawInferMeta(x, axis, keep_dim, false, x.dtype(), &meta_out); MeanKernel(dev_ctx, x, axis, keep_dim, &dense_out); return dense_out; } diff --git a/paddle/phi/ops/compat/reduce_sig.cc b/paddle/phi/ops/compat/reduce_sig.cc index 92839fb3030752..36798abe4c11b8 100644 --- a/paddle/phi/ops/compat/reduce_sig.cc +++ b/paddle/phi/ops/compat/reduce_sig.cc @@ -21,7 +21,7 @@ KernelSignature ReduceSumOpArgumentMapping(const ArgumentMappingContext& ctx) { bool reduce_all = paddle::any_cast(ctx.Attr("reduce_all")); // When ctx is InferShapeArgumentMappingContext, the reduce_all is used in // InferShape, so we must return the "sum_raw" KernelSignature. - // And the InferMeta function(i.e. ReduceInferMetaBase) is accordance with + // And the InferMeta function(i.e. SumRawInferMeta) is accordance with // the "sum_raw" KernelSignature if (ctx.IsForInferShape() || reduce_all) { return KernelSignature("sum_raw", @@ -40,7 +40,8 @@ KernelSignature ReduceMeanOpArgumentMapping(const ArgumentMappingContext& ctx) { bool reduce_all = paddle::any_cast(ctx.Attr("reduce_all")); // When ctx is InferShapeArgumentMappingContext, the reduce_all is used in // InferShape, so we must return the "mean_raw" KernelSignature. - // And the InferMeta function(i.e. MeanRawInferMeta) is accordance with the + // And the InferMeta function(i.e. ReduceInferMetaBase) is accordance with + // the // "mean_raw" KernelSignature if (ctx.IsForInferShape() || reduce_all) { return KernelSignature( @@ -56,11 +57,30 @@ KernelSignature ReduceProdOpArgumentMapping(const ArgumentMappingContext& ctx) { "reduce_prod", {"X"}, {"dim", "keep_dim", "reduce_all"}, {"Out"}); } +KernelSignature ReduceMaxOpArgumentMapping(const ArgumentMappingContext& ctx) { + if (ctx.IsDenseTensorInput("X")) { + bool reduce_all = paddle::any_cast(ctx.Attr("reduce_all")); + // When ctx is InferShapeArgumentMappingContext, the reduce_all is used in + // InferShape, so we must return the "max_raw" KernelSignature. + // And the InferMeta function(i.e. ReduceInferMetaBase) is accordance with + // the + // "max_raw" KernelSignature + if (ctx.IsForInferShape() || reduce_all) { + return KernelSignature( + "max_raw", {"X"}, {"dim", "keep_dim", "reduce_all"}, {"Out"}); + } + return KernelSignature("max", {"X"}, {"dim", "keep_dim"}, {"Out"}); + } + return KernelSignature("unregistered", {}, {}, {}); +} + } // namespace phi PD_REGISTER_BASE_KERNEL_NAME(reduce_sum, sum); PD_REGISTER_BASE_KERNEL_NAME(reduce_mean, mean); +PD_REGISTER_BASE_KERNEL_NAME(reduce_max, max); PD_REGISTER_ARG_MAPPING_FN(reduce_sum, phi::ReduceSumOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(reduce_mean, phi::ReduceMeanOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(reduce_prod, phi::ReduceProdOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(reduce_max, phi::ReduceMaxOpArgumentMapping); diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py index dc637bf983046b..3f67f69b2ad4a7 100644 --- a/python/paddle/profiler/profiler.py +++ b/python/paddle/profiler/profiler.py @@ -82,11 +82,9 @@ def make_scheduler(*, Examples: 1. profiling range [2, 5] batch 0: closed, batch 1: ready, batch [2, 5] record - .. code-block:: python make_scheduler(closed=1, ready=1, record=4, repeat=1) 2. profiling range [3,6], [9,12], [15,18]... batch 0: skiped, batch 1: closed, batch 2: ready, batch [3,6]: record, repeat - .. code-block:: python make_scheduler(closed=1, ready=1, record=4, skip_first=1) """ @@ -138,15 +136,16 @@ def export_chrome_tracing(dir_name: str, Examples: .. code-block:: python - import paddle.profiler as profiler - with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, - profiler.ProfilerTarget.GPU], - scheduler = (3, 10), - on_trace_ready = profiler.export_chrome_tracing('./log') - ) as p: - for iter in range(N): - train() - p.step() + + import paddle.profiler as profiler + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, + profiler.ProfilerTarget.GPU], + scheduler = (3, 10), + on_trace_ready = profiler.export_chrome_tracing('./log') + ) as p: + for iter in range(N): + #train() + p.step() """ if not os.path.exists(dir_name): try: @@ -181,15 +180,16 @@ def export_protobuf(dir_name: str, worker_name: Optional[str]=None) -> Callable: Examples: .. code-block:: python - import paddle.profiler as profiler - with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, - profiler.ProfilerTarget.GPU], - scheduler = (3, 10), - on_trace_ready = profiler.export_protobuf('./log') - ) as p: - for iter in range(N): - train() - p.step() + + import paddle.profiler as profiler + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, + profiler.ProfilerTarget.GPU], + scheduler = (3, 10), + on_trace_ready = profiler.export_protobuf('./log') + ) as p: + for iter in range(N): + #train() + p.step() """ if not os.path.exists(dir_name): try: @@ -238,36 +238,45 @@ class Profiler: Examples: 1. profiling range [2, 5) .. code-block:: python - import paddle.profiler as profiler - with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, - profiler.ProfilerTarget.GPU], - scheduler = (2, 5), - on_trace_ready = profiler.export_chrome_tracing('./log') - ) as p: - for iter in range(N): - train() - p.step() + :name: code-example1 + + import paddle.profiler as profiler + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, + profiler.ProfilerTarget.GPU], + scheduler = (2, 5), + on_trace_ready = profiler.export_chrome_tracing('./log') + ) as p: + for iter in range(N): + #train() + p.step() + 2. profiling range [2,4], [7, 9], [11,13] .. code-block:: python - import paddle.profiler as profiler - with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, - profiler.ProfilerTarget.GPU], - scheduler = profiler.make_scheduler(closed=1, ready=1, record=3, repeat=3), - on_trace_ready = profiler.export_chrome_tracing('./log') - ) as p: - for iter in range(N): - train() - p.step() + :name: code-example2 + + import paddle.profiler as profiler + with profiler.Profiler(targets=[profiler.ProfilerTarget.CPU, + profiler.ProfilerTarget.GPU], + scheduler = profiler.make_scheduler(closed=1, ready=1, record=3, repeat=3), + on_trace_ready = profiler.export_chrome_tracing('./log') + ) as p: + for iter in range(N): + #train() + p.step() + 3. Use profiler without context manager, and use default parameters .. code-block:: python - import paddle.profiler as profiler - p = profiler.Profiler() - p.start() - for iter in range(N): - train() - p.step() - p.stop() - p.summary() + :name: code-example3 + + import paddle.profiler as profiler + p = profiler.Profiler() + p.start() + for iter in range(N): + #train() + p.step() + p.stop() + p.summary() + """ def __init__( diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py index 642001dfbfc5a3..dea5feebccc195 100644 --- a/python/paddle/profiler/utils.py +++ b/python/paddle/profiler/utils.py @@ -36,7 +36,6 @@ class RecordEvent(ContextDecorator): event_type(TracerEventType): Type of the record event, can be used for statistics. Examples: - .. code-block:: python import paddle.profiler as profiler with profiler.RecordEvent(name='op1', event_type=TracerEventType=TracerEventType.UserDefined): op1() From 8b255d652a70a94b0d8c71052c3f1fb64b125826 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 30 Mar 2022 03:46:52 +0000 Subject: [PATCH 2/8] maintain old profiler --- paddle/fluid/platform/profiler.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 9427702cbcfdca..a5e0d4a8c6b0c5 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -128,7 +128,11 @@ RecordEvent::RecordEvent(const std::string &name, const std::string &attr, #endif #endif if (FLAGS_enable_host_event_recorder_hook == false) { - OriginalConstruct(name, role, attr); + if (type == TracerEventType::Operator || + type == TracerEventType::OperatorInner || + type == TracerEventType::UserDefined) { + OriginalConstruct(name, role, attr); + } return; } if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { From 5bc91d0349baec9acf30c4c6a84c8bd1342c5625 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 30 Mar 2022 06:01:05 +0000 Subject: [PATCH 3/8] exclude new python record events for old profiler --- paddle/fluid/platform/profiler.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index a5e0d4a8c6b0c5..14e2074f361b14 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -78,7 +78,11 @@ RecordEvent::RecordEvent(const char *name, const TracerEventType type, #endif if (FLAGS_enable_host_event_recorder_hook == false) { if (g_state != ProfilerState::kDisabled) { // avoid temp string - OriginalConstruct(name, role, "none"); + if (type == TracerEventType::Operator || + type == TracerEventType::OperatorInner || + type == TracerEventType::UserDefined) { + OriginalConstruct(name, role, "none"); + } } return; } @@ -103,7 +107,11 @@ RecordEvent::RecordEvent(const std::string &name, const TracerEventType type, #endif #endif if (FLAGS_enable_host_event_recorder_hook == false) { - OriginalConstruct(name, role, "none"); + if (type == TracerEventType::Operator || + type == TracerEventType::OperatorInner || + type == TracerEventType::UserDefined) { + OriginalConstruct(name, role, "none"); + } return; } if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { From fe8e37e4bcba78f9a45498ecab4979697dcd0e63 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 30 Mar 2022 06:45:35 +0000 Subject: [PATCH 4/8] maintain old profiler --- paddle/fluid/framework/operator.cc | 4 +++- paddle/fluid/platform/profiler.cc | 25 ++++++++++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index dbf94613f3de6c..cf2a36cde1f1f6 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -59,6 +59,7 @@ DECLARE_bool(benchmark); DECLARE_bool(check_nan_inf); DECLARE_bool(enable_unused_var_check); DECLARE_bool(run_kp_kernel); +DECLARE_bool(enable_host_event_recorder_hook); namespace paddle { namespace framework { @@ -264,7 +265,8 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { Type(), platform::TracerEventType::Operator, 1); auto op_name = platform::OpName(outputs_, Type()); platform::RecordEvent op_name_record_event( - op_name, platform::TracerEventType::Operator, 10, + op_name, platform::TracerEventType::Operator, + FLAGS_enable_host_event_recorder_hook ? 20 : 1, platform::EventRole::kUniqueOp); RunImpl(scope, place); } diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 9427702cbcfdca..a9ff5f03c4356e 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -76,15 +76,17 @@ RecordEvent::RecordEvent(const char *name, const TracerEventType type, } #endif #endif + if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { + return; + } + if (FLAGS_enable_host_event_recorder_hook == false) { if (g_state != ProfilerState::kDisabled) { // avoid temp string OriginalConstruct(name, role, "none"); } return; } - if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { - return; - } + is_enabled_ = true; shallow_copy_name_ = name; role_ = role; @@ -102,13 +104,15 @@ RecordEvent::RecordEvent(const std::string &name, const TracerEventType type, } #endif #endif - if (FLAGS_enable_host_event_recorder_hook == false) { - OriginalConstruct(name, role, "none"); + if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { return; } - if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { + + if (FLAGS_enable_host_event_recorder_hook == false) { + OriginalConstruct(name, role, "none"); return; } + is_enabled_ = true; name_ = new std::string(name); role_ = role; @@ -127,13 +131,16 @@ RecordEvent::RecordEvent(const std::string &name, const std::string &attr, } #endif #endif - if (FLAGS_enable_host_event_recorder_hook == false) { - OriginalConstruct(name, role, attr); + + if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { return; } - if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { + + if (FLAGS_enable_host_event_recorder_hook == false) { + OriginalConstruct(name, role, attr); return; } + is_enabled_ = true; type_ = type; name_ = new std::string(name); From 2d2f580467c585a23323f32cb6a4115e7531d901 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 30 Mar 2022 06:55:54 +0000 Subject: [PATCH 5/8] maintain --- paddle/fluid/platform/profiler.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index a9ff5f03c4356e..2434c15c80b566 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -82,7 +82,11 @@ RecordEvent::RecordEvent(const char *name, const TracerEventType type, if (FLAGS_enable_host_event_recorder_hook == false) { if (g_state != ProfilerState::kDisabled) { // avoid temp string - OriginalConstruct(name, role, "none"); + if (type == TracerEventType::Operator || + type == TracerEventType::OperatorInner || + type == TracerEventType::UserDefined) { + OriginalConstruct(name, role, "none"); + } } return; } @@ -109,7 +113,11 @@ RecordEvent::RecordEvent(const std::string &name, const TracerEventType type, } if (FLAGS_enable_host_event_recorder_hook == false) { - OriginalConstruct(name, role, "none"); + if (type == TracerEventType::Operator || + type == TracerEventType::OperatorInner || + type == TracerEventType::UserDefined) { + OriginalConstruct(name, role, "none"); + } return; } @@ -137,7 +145,11 @@ RecordEvent::RecordEvent(const std::string &name, const std::string &attr, } if (FLAGS_enable_host_event_recorder_hook == false) { - OriginalConstruct(name, role, attr); + if (type == TracerEventType::Operator || + type == TracerEventType::OperatorInner || + type == TracerEventType::UserDefined) { + OriginalConstruct(name, role, attr); + } return; } From 3483a7655ef721c2f63fca2cb8fa6e02d000a35c Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 30 Mar 2022 07:46:27 +0000 Subject: [PATCH 6/8] maintain old profiler --- paddle/fluid/platform/profiler.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 2434c15c80b566..307cbad577cfaf 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler/common_event.h" #include "paddle/fluid/platform/profiler/host_event_recorder.h" #include "paddle/fluid/platform/profiler/host_tracer.h" +#include "paddle/fluid/platform/profiler/profiler.h" #include "paddle/fluid/platform/profiler_helper.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/dynload/nvtx.h" @@ -351,6 +352,8 @@ void EnableProfiler(ProfilerState state) { return; } g_state = state; + ProfilerOptions option; + HostTraceLevel::GetInstance().SetLevel(option.trace_level); should_send_profile_state = true; GetDeviceTracer()->Enable(); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) From 2bbc74b19ce33d7fa3392e777e6d2e929afbaacf Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 30 Mar 2022 07:55:25 +0000 Subject: [PATCH 7/8] maintain --- paddle/fluid/platform/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index de09860fd26d54..1cec4b788d89cd 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -192,7 +192,7 @@ add_subdirectory(profiler) cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) if(WITH_GPU) - nv_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce dynload_cuda) + nv_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce dynload_cuda new_profiler) nv_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) elseif(WITH_ROCM) hip_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce) From 7f126031ba8f8cd00c4989715532d454167409bf Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 31 Mar 2022 02:36:11 +0000 Subject: [PATCH 8/8] fix cmakes --- paddle/fluid/platform/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 1cec4b788d89cd..46059100b3802a 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -195,10 +195,10 @@ if(WITH_GPU) nv_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce dynload_cuda new_profiler) nv_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) elseif(WITH_ROCM) - hip_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce) + hip_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce new_profiler) hip_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) else() - cc_library(profiler SRCS profiler.cc DEPS os_info device_tracer enforce) + cc_library(profiler SRCS profiler.cc DEPS os_info device_tracer enforce new_profiler) cc_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info place) endif()