remove autocast verbose (#424)

intel · Dec 22, 2021 · ed31bba · ed31bba
1 parent eb1b08b
commit ed31bba
Show file tree

Hide file tree

Showing 18 changed files with 0 additions and 181 deletions.
diff --git a/cmake/CPU.cmake b/cmake/CPU.cmake
@@ -51,9 +51,6 @@ IF("${IPEX_PROFILE_OP}" STREQUAL "1")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIPEX_PROFILE_OP")
 ENDIF()
 
-IF("${ENABLE_AUTOCAST_VERBOSE}" STREQUAL "1")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_AUTOCAST_VERBOSE")
-ENDIF()
 
 # ---[ Build flags
 set(CMAKE_C_STANDARD 11)

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/BatchNorm.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/BatchNorm.cpp
@@ -4,7 +4,6 @@
 #include "csrc/cpu/ideep/IDeepConversions.h"
 
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 
 namespace torch_ipex {
 namespace cpu {
@@ -346,9 +345,6 @@ at::Tensor frozen_batch_norm(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::frozen_batch_norm", "")
                        .typed<decltype(frozen_batch_norm)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("frozen_batch_norm");
-#endif
   return op.call(input, weight, bias, running_mean, running_var);
 }
 

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/ChannelShuffle.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/ChannelShuffle.cpp
@@ -18,7 +18,6 @@
 
 #include <torch/extension.h>
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/utils/library.h"
 
 namespace torch_ipex {

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/Conv.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/Conv.cpp
@@ -2,7 +2,6 @@
 #include <torch/extension.h>
 #include "WeightPack.h"
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/cpu/ideep/IDeepConversions.h"
 #include "csrc/utils/utils.h"
 
@@ -785,9 +784,6 @@ at::Tensor convolution_forward(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::convolution_forward", "")
                        .typed<decltype(convolution_forward)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("convolution_forward");
-#endif
   auto target_type = get_autocast_dtype();
 
   // TODO: make check weight dtype should be float for training case.

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/ConvTranspose.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/ConvTranspose.cpp
@@ -2,7 +2,6 @@
 #include <torch/extension.h>
 #include "WeightPack.h"
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/cpu/ideep/IDeepConversions.h"
 #include "csrc/utils/utils.h"
 
@@ -617,9 +616,6 @@ at::Tensor conv_transpose2d(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::conv_transpose2d", "")
                        .typed<decltype(conv_transpose2d)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("conv_transpose2d");
-#endif
   auto target_type = get_autocast_dtype();
 
   // TODO: make check weight dtype should be float for training case.

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/Linear.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/Linear.cpp
@@ -4,7 +4,6 @@
 #include "Linear.h"
 #include "WeightPack.h"
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/cpu/ideep/IDeepConversions.h"
 #include "csrc/utils/utils.h"
 
@@ -308,9 +307,6 @@ at::Tensor ipex_linear(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::ipex_linear", "")
                        .typed<decltype(ipex_linear)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("ipex_linear");
-#endif
   auto target_type = get_autocast_dtype();
   TORCH_CHECK(
       weight.scalar_type() == at::kBFloat16 ||
@@ -335,9 +331,6 @@ at::Tensor ipex_linear_eltwise(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::ipex_linear_eltwise", "")
                        .typed<decltype(ipex_linear_eltwise)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("ipex_linear_eltwise");
-#endif
   auto target_type = get_autocast_dtype();
   TORCH_CHECK(
       weight.scalar_type() == at::kBFloat16 ||

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/PixelShuffle.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/PixelShuffle.cpp
@@ -12,7 +12,6 @@
 #include "PixelShuffle.h"
 
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/utils/library.h"
 
 namespace torch_ipex {

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/RNN.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/RNN.cpp
@@ -9,7 +9,6 @@
 #include <torch/extension.h>
 #include "WeightPack.h"
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/cpu/ideep/IDeepConversions.h"
 #include "csrc/utils/utils.h"
 
@@ -999,9 +998,6 @@ std::vector<at::Tensor> ipex_lstm_layer(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::ipex_lstm_layer", "")
                        .typed<decltype(ipex_lstm_layer)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("ipex_lstm_layer");
-#endif
 #if defined(IPEX_DISP_OP)
   printf("torch_ipex::autocast::ipex_lstm_layer\n");
 #endif

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/ROIAlign.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/ROIAlign.cpp
@@ -5,7 +5,6 @@
 #include <ATen/cpu/vec/vec.h>
 #include <torch/library.h>
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/utils/library.h"
 
 // use float as accumulation type for BFloat16
@@ -931,9 +930,6 @@ at::Tensor roi_align_autocast(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torchvision::roi_align", "")
                        .typed<decltype(torch_ipex::cpu::ROIAlign_forward)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("roi_align");
-#endif
   if (input.scalar_type() == at::ScalarType::BFloat16) {
     return op.call(
         input,
@@ -967,9 +963,6 @@ at::Tensor ROIAlign_forward(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::ROIAlign_forward", "")
                        .typed<decltype(torch_ipex::cpu::ROIAlign_forward)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("ROIAlign_forward");
-#endif
   if (input.scalar_type() == at::ScalarType::BFloat16) {
     return op.call(
         input,

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/embeddingbag.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/embeddingbag.cpp
@@ -1,6 +1,5 @@
 #include "embeddingbag.h"
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/cpu/vec512/bf16/vec/bf16_vec_kernel.h"
 #include "csrc/cpu/vec512/int8/vec/int8_vec_kernel.h"
 #include "csrc/jit/cpu/kernels/Embeddingbag.h"
@@ -561,9 +560,6 @@ at::Tensor embedding_bag(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::embedding_bag", "")
                        .typed<decltype(embedding_bag)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("embedding_bag");
-#endif
   auto target_type = get_autocast_dtype();
   if (is_quantization_enabled()) {
     return int8::embedding_bag(

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/interaction.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/interaction.cpp
@@ -1,7 +1,6 @@
 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 #include "interaction.h"
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/cpu/ideep/IDeepConversions.h"
 #include "csrc/cpu/vec512/bf16/vec/bf16_vec_kernel.h"
 #include "csrc/cpu/vec512/int8/vec/int8_vec_kernel.h"
@@ -291,9 +290,6 @@ std::vector<at::Tensor> interaction_backward(
     // Performance overhead in training here if you use autocast.
     // Because we save the ctx.arg in python before autocast, we have duplicated
     // cast for the input: here and in autocast of the forward path.
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-    torch_ipex::autocast::verbose::OpNameGuard op_name("interaction_backward");
-#endif
     return _interaction_backward<at::BFloat16>(
         grad_out, torch_ipex::autocast::cpu_cached_cast(at::kBFloat16, input));
   }
@@ -477,9 +473,6 @@ at::Tensor interaction_forward(const std::vector<at::Tensor>& input) {
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::interaction_forward", "")
                        .typed<decltype(interaction_forward)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("interaction_forward");
-#endif
 
   auto target_type = get_autocast_dtype();
   if (is_quantization_enabled()) {

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/nms.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/nms.cpp
@@ -6,7 +6,6 @@
 #include <torch/csrc/autograd/function.h>
 #include <algorithm>
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/jit/cpu/kernels/Softmax.h"
 
 namespace torch_ipex {
@@ -967,9 +966,6 @@ at::Tensor nms(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::nms", "")
                        .typed<decltype(nms)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("nms");
-#endif
   return op.call(
       cpu_cached_cast(at::kFloat, dets),
       cpu_cached_cast(at::kFloat, scores),
@@ -986,9 +982,6 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor> batch_score_nms(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::batch_score_nms", "")
                        .typed<decltype(batch_score_nms)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("batch_score_nms");
-#endif
   return op.call(
       cpu_cached_cast(at::kFloat, dets),
       cpu_cached_cast(at::kFloat, scores),
@@ -1007,9 +1000,6 @@ std::tuple<std::vector<at::Tensor>, std::vector<at::Tensor>> rpn_nms(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::rpn_nms", "")
                        .typed<decltype(rpn_nms)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("rpn_nms");
-#endif
   return op.call(
       cpu_cached_cast(at::kFloat, batch_dets),
       cpu_cached_cast(at::kFloat, batch_scores),
@@ -1035,9 +1025,6 @@ box_head_nms(
   static auto op = torch::Dispatcher::singleton()
                        .findSchemaOrThrow("torch_ipex::box_head_nms", "")
                        .typed<decltype(box_head_nms)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("box_head_nms");
-#endif
   return op.call(
       cpu_cached_cast(at::kFloat, batch_bboxes),
       cpu_cached_cast(at::kFloat, batch_scores),
@@ -1059,9 +1046,6 @@ std::tuple<at::Tensor, at::Tensor> parallel_scale_back_batch(
       torch::Dispatcher::singleton()
           .findSchemaOrThrow("torch_ipex::parallel_scale_back_batch", "")
           .typed<decltype(parallel_scale_back_batch)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("parallel_scale_back_batch");
-#endif
   return op.call(
       cpu_cached_cast(at::kFloat, bboxes_in),
       cpu_cached_cast(at::kFloat, scores_in),

diff --git a/intel_extension_for_pytorch/csrc/aten/cpu/torchvision_nms.cpp b/intel_extension_for_pytorch/csrc/aten/cpu/torchvision_nms.cpp
@@ -3,7 +3,6 @@
 #include <ATen/record_function.h>
 #include <torch/types.h>
 #include "csrc/autocast/autocast_mode.h"
-#include "csrc/autocast/autocast_verbose.h"
 #include "csrc/utils/library.h"
 
 #include "torchvision_nms.h"
@@ -143,9 +142,6 @@ at::Tensor nms_autocast(
                            const at::Tensor& dets,
                            const at::Tensor& scores,
                            double iou_threshold)>();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("nms");
-#endif
   return op.call(
       cpu_cached_cast(at::kFloat, dets),
       cpu_cached_cast(at::kFloat, scores),

diff --git a/intel_extension_for_pytorch/csrc/autocast/autocast_kernel.cpp b/intel_extension_for_pytorch/csrc/autocast/autocast_kernel.cpp
@@ -1,6 +1,5 @@
 #include "autocast_kernel.hpp"
 #include "autocast_mode.h"
-#include "autocast_verbose.h"
 #include "csrc/aten/cpu/BatchNorm.h"
 #include "csrc/quantization/AutoCast.hpp"
 
@@ -15,9 +14,6 @@ Ret DataTypeCastFuction(
     Args... args) {
   c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
   auto target_type = get_autocast_dtype();
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name(register_op_name);
-#endif
   if (is_quantization_enabled()) {
     return Quant(cpu_cached_cast(target_type, args)...);
   } else {
@@ -32,9 +28,6 @@ Ret FallThroughFuction(
     std::string register_op_name,
     Args... args) {
   c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name(register_op_name);
-#endif
   if (is_quantization_enabled()) {
     return Quant(args...);
   } else {
@@ -122,9 +115,6 @@ at::Tensor _convolution(
     bool cudnn_enabled,
     bool allow_tf32) {
   c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("_convolution");
-#endif
   if (is_quantization_enabled()) {
     return int8::_convolution(
         input,
@@ -172,9 +162,6 @@ at::Tensor _convolution_deprecated(
     bool deterministic,
     bool cudnn_enabled) {
   c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("_convolution_deprecated");
-#endif
   auto target_type = get_autocast_dtype();
   if (is_quantization_enabled()) {
     return int8::_convolution(
@@ -415,9 +402,6 @@ at::Tensor relu(const at::Tensor& input) {
 
 at::Tensor& relu_(at::Tensor& input) {
   c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("relu_");
-#endif
   if (is_quantization_enabled()) {
     return int8::relu_(input);
   }
@@ -434,9 +418,6 @@ at::Tensor& add_tensor_(
     const at::Tensor& other,
     const at::Scalar& alpha) {
   c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-  verbose::OpNameGuard op_name("add_tensor_");
-#endif
   if (is_quantization_enabled()) {
     return int8::add_tensor_(input, other, alpha);
   }

diff --git a/intel_extension_for_pytorch/csrc/autocast/autocast_mode.cpp b/intel_extension_for_pytorch/csrc/autocast/autocast_mode.cpp
@@ -1,6 +1,5 @@
 #include "autocast_mode.h"
 #include "autocast_kernel.hpp"
-#include "autocast_verbose.h"
 
 #include "library.h"
 
@@ -74,16 +73,10 @@ Tensor cpu_cached_cast(at::ScalarType to_type, const Tensor& arg) {
     auto casted_arg = arg;
     if (arg.scalar_type() == at::kFloat && to_type == at::kBFloat16) {
       // This path works for fp32 to bf16
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-      verbose::autocast_verbose(to_type, arg);
-#endif
       casted_arg = arg.to(at::kBFloat16);
       // casted_arg = arg.to_mkldnn(at::kBFloat16);
     } else if (arg.scalar_type() == at::kBFloat16 && to_type == at::kFloat) {
       // This path works for bf16 to fp32
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-      verbose::autocast_verbose(to_type, arg);
-#endif
       casted_arg = arg.to(at::kFloat);
       // casted_arg = arg.to_dense(at::kFloat);
     }
@@ -141,9 +134,6 @@ struct CPU_WrapFunction_<
     guts::typelist::typelist<Args...>> {
   static Ret call(Args... args) {
     c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-    verbose::OpNameGuard op_name(get_op_name<Redispatch, F>());
-#endif
     return (*F)(cpu_cached_cast(current_target_dtype, args)...);
   }
 };
@@ -158,9 +148,6 @@ struct CPU_WrapFunction_<
     guts::typelist::typelist<Args...>> {
   static Ret call(Args... args) {
     c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-    verbose::OpNameGuard op_name(get_op_name<Redispatch, F>());
-#endif
     return (*F)(cpu_cached_cast(at::kFloat, args)...);
   }
 };
@@ -176,9 +163,6 @@ struct CPU_WrapFunction_<
   static Ret call(Args... args) {
     c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
     auto to_type = promote_type(at::kBFloat16, args...);
-#if defined(ENABLE_AUTOCAST_VERBOSE)
-    verbose::OpNameGuard op_name(get_op_name<Redispatch, F>());
-#endif
     return (*F)(cpu_cached_cast(to_type, args)...);
   }
 };