Skip to content

Commit

Permalink
remove autocast verbose (#424)
Browse files Browse the repository at this point in the history
  • Loading branch information
CaoE authored Dec 22, 2021
1 parent eb1b08b commit ed31bba
Show file tree
Hide file tree
Showing 18 changed files with 0 additions and 181 deletions.
3 changes: 0 additions & 3 deletions cmake/CPU.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@ IF("${IPEX_PROFILE_OP}" STREQUAL "1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIPEX_PROFILE_OP")
ENDIF()

IF("${ENABLE_AUTOCAST_VERBOSE}" STREQUAL "1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_AUTOCAST_VERBOSE")
ENDIF()

# ---[ Build flags
set(CMAKE_C_STANDARD 11)
Expand Down
4 changes: 0 additions & 4 deletions intel_extension_for_pytorch/csrc/aten/cpu/BatchNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include "csrc/cpu/ideep/IDeepConversions.h"

#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"

namespace torch_ipex {
namespace cpu {
Expand Down Expand Up @@ -346,9 +345,6 @@ at::Tensor frozen_batch_norm(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::frozen_batch_norm", "")
.typed<decltype(frozen_batch_norm)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("frozen_batch_norm");
#endif
return op.call(input, weight, bias, running_mean, running_var);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

#include <torch/extension.h>
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/utils/library.h"

namespace torch_ipex {
Expand Down
4 changes: 0 additions & 4 deletions intel_extension_for_pytorch/csrc/aten/cpu/Conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#include <torch/extension.h>
#include "WeightPack.h"
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/cpu/ideep/IDeepConversions.h"
#include "csrc/utils/utils.h"

Expand Down Expand Up @@ -785,9 +784,6 @@ at::Tensor convolution_forward(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::convolution_forward", "")
.typed<decltype(convolution_forward)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("convolution_forward");
#endif
auto target_type = get_autocast_dtype();

// TODO: make check weight dtype should be float for training case.
Expand Down
4 changes: 0 additions & 4 deletions intel_extension_for_pytorch/csrc/aten/cpu/ConvTranspose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#include <torch/extension.h>
#include "WeightPack.h"
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/cpu/ideep/IDeepConversions.h"
#include "csrc/utils/utils.h"

Expand Down Expand Up @@ -617,9 +616,6 @@ at::Tensor conv_transpose2d(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::conv_transpose2d", "")
.typed<decltype(conv_transpose2d)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("conv_transpose2d");
#endif
auto target_type = get_autocast_dtype();

// TODO: make check weight dtype should be float for training case.
Expand Down
7 changes: 0 additions & 7 deletions intel_extension_for_pytorch/csrc/aten/cpu/Linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include "Linear.h"
#include "WeightPack.h"
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/cpu/ideep/IDeepConversions.h"
#include "csrc/utils/utils.h"

Expand Down Expand Up @@ -308,9 +307,6 @@ at::Tensor ipex_linear(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::ipex_linear", "")
.typed<decltype(ipex_linear)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("ipex_linear");
#endif
auto target_type = get_autocast_dtype();
TORCH_CHECK(
weight.scalar_type() == at::kBFloat16 ||
Expand All @@ -335,9 +331,6 @@ at::Tensor ipex_linear_eltwise(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::ipex_linear_eltwise", "")
.typed<decltype(ipex_linear_eltwise)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("ipex_linear_eltwise");
#endif
auto target_type = get_autocast_dtype();
TORCH_CHECK(
weight.scalar_type() == at::kBFloat16 ||
Expand Down
1 change: 0 additions & 1 deletion intel_extension_for_pytorch/csrc/aten/cpu/PixelShuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "PixelShuffle.h"

#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/utils/library.h"

namespace torch_ipex {
Expand Down
4 changes: 0 additions & 4 deletions intel_extension_for_pytorch/csrc/aten/cpu/RNN.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#include <torch/extension.h>
#include "WeightPack.h"
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/cpu/ideep/IDeepConversions.h"
#include "csrc/utils/utils.h"

Expand Down Expand Up @@ -999,9 +998,6 @@ std::vector<at::Tensor> ipex_lstm_layer(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::ipex_lstm_layer", "")
.typed<decltype(ipex_lstm_layer)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("ipex_lstm_layer");
#endif
#if defined(IPEX_DISP_OP)
printf("torch_ipex::autocast::ipex_lstm_layer\n");
#endif
Expand Down
7 changes: 0 additions & 7 deletions intel_extension_for_pytorch/csrc/aten/cpu/ROIAlign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include <ATen/cpu/vec/vec.h>
#include <torch/library.h>
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/utils/library.h"

// use float as accumulation type for BFloat16
Expand Down Expand Up @@ -931,9 +930,6 @@ at::Tensor roi_align_autocast(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torchvision::roi_align", "")
.typed<decltype(torch_ipex::cpu::ROIAlign_forward)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("roi_align");
#endif
if (input.scalar_type() == at::ScalarType::BFloat16) {
return op.call(
input,
Expand Down Expand Up @@ -967,9 +963,6 @@ at::Tensor ROIAlign_forward(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::ROIAlign_forward", "")
.typed<decltype(torch_ipex::cpu::ROIAlign_forward)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("ROIAlign_forward");
#endif
if (input.scalar_type() == at::ScalarType::BFloat16) {
return op.call(
input,
Expand Down
4 changes: 0 additions & 4 deletions intel_extension_for_pytorch/csrc/aten/cpu/embeddingbag.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "embeddingbag.h"
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/cpu/vec512/bf16/vec/bf16_vec_kernel.h"
#include "csrc/cpu/vec512/int8/vec/int8_vec_kernel.h"
#include "csrc/jit/cpu/kernels/Embeddingbag.h"
Expand Down Expand Up @@ -561,9 +560,6 @@ at::Tensor embedding_bag(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::embedding_bag", "")
.typed<decltype(embedding_bag)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("embedding_bag");
#endif
auto target_type = get_autocast_dtype();
if (is_quantization_enabled()) {
return int8::embedding_bag(
Expand Down
7 changes: 0 additions & 7 deletions intel_extension_for_pytorch/csrc/aten/cpu/interaction.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include "interaction.h"
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/cpu/ideep/IDeepConversions.h"
#include "csrc/cpu/vec512/bf16/vec/bf16_vec_kernel.h"
#include "csrc/cpu/vec512/int8/vec/int8_vec_kernel.h"
Expand Down Expand Up @@ -291,9 +290,6 @@ std::vector<at::Tensor> interaction_backward(
// Performance overhead in training here if you use autocast.
// Because we save the ctx.arg in python before autocast, we have duplicated
// cast for the input: here and in autocast of the forward path.
#if defined(ENABLE_AUTOCAST_VERBOSE)
torch_ipex::autocast::verbose::OpNameGuard op_name("interaction_backward");
#endif
return _interaction_backward<at::BFloat16>(
grad_out, torch_ipex::autocast::cpu_cached_cast(at::kBFloat16, input));
}
Expand Down Expand Up @@ -477,9 +473,6 @@ at::Tensor interaction_forward(const std::vector<at::Tensor>& input) {
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::interaction_forward", "")
.typed<decltype(interaction_forward)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("interaction_forward");
#endif

auto target_type = get_autocast_dtype();
if (is_quantization_enabled()) {
Expand Down
16 changes: 0 additions & 16 deletions intel_extension_for_pytorch/csrc/aten/cpu/nms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <torch/csrc/autograd/function.h>
#include <algorithm>
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/jit/cpu/kernels/Softmax.h"

namespace torch_ipex {
Expand Down Expand Up @@ -967,9 +966,6 @@ at::Tensor nms(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::nms", "")
.typed<decltype(nms)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("nms");
#endif
return op.call(
cpu_cached_cast(at::kFloat, dets),
cpu_cached_cast(at::kFloat, scores),
Expand All @@ -986,9 +982,6 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor> batch_score_nms(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::batch_score_nms", "")
.typed<decltype(batch_score_nms)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("batch_score_nms");
#endif
return op.call(
cpu_cached_cast(at::kFloat, dets),
cpu_cached_cast(at::kFloat, scores),
Expand All @@ -1007,9 +1000,6 @@ std::tuple<std::vector<at::Tensor>, std::vector<at::Tensor>> rpn_nms(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::rpn_nms", "")
.typed<decltype(rpn_nms)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("rpn_nms");
#endif
return op.call(
cpu_cached_cast(at::kFloat, batch_dets),
cpu_cached_cast(at::kFloat, batch_scores),
Expand All @@ -1035,9 +1025,6 @@ box_head_nms(
static auto op = torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::box_head_nms", "")
.typed<decltype(box_head_nms)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("box_head_nms");
#endif
return op.call(
cpu_cached_cast(at::kFloat, batch_bboxes),
cpu_cached_cast(at::kFloat, batch_scores),
Expand All @@ -1059,9 +1046,6 @@ std::tuple<at::Tensor, at::Tensor> parallel_scale_back_batch(
torch::Dispatcher::singleton()
.findSchemaOrThrow("torch_ipex::parallel_scale_back_batch", "")
.typed<decltype(parallel_scale_back_batch)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("parallel_scale_back_batch");
#endif
return op.call(
cpu_cached_cast(at::kFloat, bboxes_in),
cpu_cached_cast(at::kFloat, scores_in),
Expand Down
4 changes: 0 additions & 4 deletions intel_extension_for_pytorch/csrc/aten/cpu/torchvision_nms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <ATen/record_function.h>
#include <torch/types.h>
#include "csrc/autocast/autocast_mode.h"
#include "csrc/autocast/autocast_verbose.h"
#include "csrc/utils/library.h"

#include "torchvision_nms.h"
Expand Down Expand Up @@ -143,9 +142,6 @@ at::Tensor nms_autocast(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold)>();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("nms");
#endif
return op.call(
cpu_cached_cast(at::kFloat, dets),
cpu_cached_cast(at::kFloat, scores),
Expand Down
19 changes: 0 additions & 19 deletions intel_extension_for_pytorch/csrc/autocast/autocast_kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "autocast_kernel.hpp"
#include "autocast_mode.h"
#include "autocast_verbose.h"
#include "csrc/aten/cpu/BatchNorm.h"
#include "csrc/quantization/AutoCast.hpp"

Expand All @@ -15,9 +14,6 @@ Ret DataTypeCastFuction(
Args... args) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
auto target_type = get_autocast_dtype();
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name(register_op_name);
#endif
if (is_quantization_enabled()) {
return Quant(cpu_cached_cast(target_type, args)...);
} else {
Expand All @@ -32,9 +28,6 @@ Ret FallThroughFuction(
std::string register_op_name,
Args... args) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name(register_op_name);
#endif
if (is_quantization_enabled()) {
return Quant(args...);
} else {
Expand Down Expand Up @@ -122,9 +115,6 @@ at::Tensor _convolution(
bool cudnn_enabled,
bool allow_tf32) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("_convolution");
#endif
if (is_quantization_enabled()) {
return int8::_convolution(
input,
Expand Down Expand Up @@ -172,9 +162,6 @@ at::Tensor _convolution_deprecated(
bool deterministic,
bool cudnn_enabled) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("_convolution_deprecated");
#endif
auto target_type = get_autocast_dtype();
if (is_quantization_enabled()) {
return int8::_convolution(
Expand Down Expand Up @@ -415,9 +402,6 @@ at::Tensor relu(const at::Tensor& input) {

at::Tensor& relu_(at::Tensor& input) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("relu_");
#endif
if (is_quantization_enabled()) {
return int8::relu_(input);
}
Expand All @@ -434,9 +418,6 @@ at::Tensor& add_tensor_(
const at::Tensor& other,
const at::Scalar& alpha) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name("add_tensor_");
#endif
if (is_quantization_enabled()) {
return int8::add_tensor_(input, other, alpha);
}
Expand Down
16 changes: 0 additions & 16 deletions intel_extension_for_pytorch/csrc/autocast/autocast_mode.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "autocast_mode.h"
#include "autocast_kernel.hpp"
#include "autocast_verbose.h"

#include "library.h"

Expand Down Expand Up @@ -74,16 +73,10 @@ Tensor cpu_cached_cast(at::ScalarType to_type, const Tensor& arg) {
auto casted_arg = arg;
if (arg.scalar_type() == at::kFloat && to_type == at::kBFloat16) {
// This path works for fp32 to bf16
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::autocast_verbose(to_type, arg);
#endif
casted_arg = arg.to(at::kBFloat16);
// casted_arg = arg.to_mkldnn(at::kBFloat16);
} else if (arg.scalar_type() == at::kBFloat16 && to_type == at::kFloat) {
// This path works for bf16 to fp32
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::autocast_verbose(to_type, arg);
#endif
casted_arg = arg.to(at::kFloat);
// casted_arg = arg.to_dense(at::kFloat);
}
Expand Down Expand Up @@ -141,9 +134,6 @@ struct CPU_WrapFunction_<
guts::typelist::typelist<Args...>> {
static Ret call(Args... args) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name(get_op_name<Redispatch, F>());
#endif
return (*F)(cpu_cached_cast(current_target_dtype, args)...);
}
};
Expand All @@ -158,9 +148,6 @@ struct CPU_WrapFunction_<
guts::typelist::typelist<Args...>> {
static Ret call(Args... args) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name(get_op_name<Redispatch, F>());
#endif
return (*F)(cpu_cached_cast(at::kFloat, args)...);
}
};
Expand All @@ -176,9 +163,6 @@ struct CPU_WrapFunction_<
static Ret call(Args... args) {
c10::impl::ExcludeDispatchKeyGuard no_autocastCPU(DispatchKey::AutocastCPU);
auto to_type = promote_type(at::kBFloat16, args...);
#if defined(ENABLE_AUTOCAST_VERBOSE)
verbose::OpNameGuard op_name(get_op_name<Redispatch, F>());
#endif
return (*F)(cpu_cached_cast(to_type, args)...);
}
};
Expand Down
Loading

0 comments on commit ed31bba

Please sign in to comment.