Skip to content

Commit

Permalink
Fix -Wdeprecated-defintions + -Werror data_ptr complaints in kernels (p…
Browse files Browse the repository at this point in the history
…ytorch#3639)

Summary:
Pull Request resolved: pytorch#3639

I couldn't build //xplat/executorch/examples/models/llama2:main because these flags are on by default in that environment.
ghstack-source-id: 226668848

Reviewed By: cccclai

Differential Revision: D57449759

fbshipit-source-id: 468568debc1c666b6c262f6c5918245d6eebaa44
  • Loading branch information
swolchok authored and facebook-github-bot committed May 16, 2024
1 parent 5c70121 commit 524703f
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 30 deletions.
12 changes: 6 additions & 6 deletions examples/models/llama2/custom_ops/op_sdpa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,12 +351,12 @@ void cpu_flash_attention(
// query.options());

// Data ptrs
scalar_t* q_data = query.data_ptr<scalar_t>();
scalar_t* k_data = key.data_ptr<scalar_t>();
scalar_t* v_data = value.data_ptr<scalar_t>();
accum_t* mask_data =
has_attn_mask ? attn_mask.value().data_ptr<accum_t>() : nullptr;
scalar_t* out_data = output.data_ptr<scalar_t>();
const scalar_t* q_data = query.const_data_ptr<scalar_t>();
const scalar_t* k_data = key.const_data_ptr<scalar_t>();
const scalar_t* v_data = value.const_data_ptr<scalar_t>();
const accum_t* mask_data =
has_attn_mask ? attn_mask.value().const_data_ptr<accum_t>() : nullptr;
scalar_t* out_data = output.mutable_data_ptr<scalar_t>();
accum_t* buf_data = reinterpret_cast<accum_t*>(buf);
scalar_t* buf_reduced_data =
is_reduced_type ? reinterpret_cast<scalar_t*>(buf_reduced) : nullptr;
Expand Down
6 changes: 3 additions & 3 deletions kernels/optimized/cpu/op_bmm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ void bmm_kernel(const Tensor& self, const Tensor& mat2, Tensor& out) {
return;
}

const CTYPE* b_data = self.data_ptr<CTYPE>();
const CTYPE* a_data = mat2.data_ptr<CTYPE>();
CTYPE* c_data = out.data_ptr<CTYPE>();
const CTYPE* b_data = self.const_data_ptr<CTYPE>();
const CTYPE* a_data = mat2.const_data_ptr<CTYPE>();
CTYPE* c_data = out.mutable_data_ptr<CTYPE>();

int64_t batch_size = self.size(0);
int64_t n = self.size(1);
Expand Down
4 changes: 2 additions & 2 deletions kernels/optimized/cpu/op_gelu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ void gelu(
const Tensor& input,
string_view approximate,
Tensor& output) {
const CTYPE* in_data = input.data_ptr<CTYPE>();
CTYPE* out_data = output.data_ptr<CTYPE>();
const CTYPE* in_data = input.const_data_ptr<CTYPE>();
CTYPE* out_data = output.mutable_data_ptr<CTYPE>();
size_t lim = input.numel();

// TODO: Add fast path for tanh using sleef's tanh
Expand Down
4 changes: 2 additions & 2 deletions kernels/optimized/cpu/op_log_softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ namespace {

template <typename IN_T, typename OUT_T>
void log_softmax_kernel(const Tensor& input, int64_t dim, Tensor& out) {
const IN_T* __restrict__ input_data_base = input.data_ptr<IN_T>();
OUT_T* __restrict__ output_data_base = out.data_ptr<OUT_T>();
const IN_T* __restrict__ input_data_base = input.const_data_ptr<IN_T>();
OUT_T* __restrict__ output_data_base = out.mutable_data_ptr<OUT_T>();

if (input.dim() == 0) {
output_data_base[0] = 0;
Expand Down
6 changes: 3 additions & 3 deletions kernels/quantized/cpu/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ void add_tensors(
int64_t out_quant_max) {
const size_t n = a.numel();

const auto data_a = a.data_ptr<CTYPE>();
const auto data_b = b.data_ptr<CTYPE>();
auto data_out = out.data_ptr<CTYPE>();
const auto data_a = a.const_data_ptr<CTYPE>();
const auto data_b = b.const_data_ptr<CTYPE>();
auto data_out = out.mutable_data_ptr<CTYPE>();

for (size_t i = 0; i < n; ++i) {
// Dq -> fp add -> Q. Can be optimized further
Expand Down
6 changes: 3 additions & 3 deletions kernels/quantized/cpu/op_choose_qparams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ void choose_qparams(
int32_t qmax,
Tensor& scale_out,
Tensor& zero_point_out) {
const float* x_fp32 = input.data_ptr<float>();
const float* x_fp32 = input.const_data_ptr<float>();
// Compute x_min, x_max and q_params (scale, zero_point)
float min = torch::executor::vec_minf(x_fp32, input.numel());
float max = torch::executor::vec_maxf(x_fp32, input.numel());
Expand Down Expand Up @@ -144,8 +144,8 @@ void choose_qparams(
nudged_zero_point = nearbyint(static_cast<float>(initial_zero_point));
}

scale_out.data_ptr<double>()[0] = scale;
zero_point_out.data_ptr<int64_t>()[0] = nudged_zero_point;
scale_out.mutable_data_ptr<double>()[0] = scale;
zero_point_out.mutable_data_ptr<int64_t>()[0] = nudged_zero_point;
}
} // namespace

Expand Down
19 changes: 10 additions & 9 deletions kernels/quantized/cpu/op_dequantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,14 @@ Tensor& dequantize_per_tensor_out(

// calculate the dequantized output, cast scale to float to match fbgemm
// behavior
#define DEQUANTIZE_IMPL(IN_CTYPE, OUT_CTYPE, out_dtype) \
case ScalarType::out_dtype: \
for (size_t i = 0; i < input.numel(); i++) { \
out.data_ptr<OUT_CTYPE>()[i] = static_cast<OUT_CTYPE>( \
(input.data_ptr<IN_CTYPE>()[i] - static_cast<int32_t>(zero_point)) * \
static_cast<float>(scale)); \
} \
#define DEQUANTIZE_IMPL(IN_CTYPE, OUT_CTYPE, out_dtype) \
case ScalarType::out_dtype: \
for (size_t i = 0; i < input.numel(); i++) { \
out.mutable_data_ptr<OUT_CTYPE>()[i] = static_cast<OUT_CTYPE>( \
(input.const_data_ptr<IN_CTYPE>()[i] - \
static_cast<int32_t>(zero_point)) * \
static_cast<float>(scale)); \
} \
break;
#define CALCULATE_INT_TYPE(IN_CTYPE, in_dtype) \
case ScalarType::in_dtype: \
Expand Down Expand Up @@ -153,8 +154,8 @@ Tensor& dequantize_per_tensor_tensor_args_out(

dequantize_per_tensor_out(
input,
scale.data_ptr<double>()[0],
zero_point.data_ptr<int64_t>()[0],
scale.const_data_ptr<double>()[0],
zero_point.const_data_ptr<int64_t>()[0],
quant_min,
quant_max,
dtype,
Expand Down
2 changes: 1 addition & 1 deletion kernels/quantized/cpu/op_embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ void embedding_byte_per_channel(
}

const CTYPE_WEIGHT* w_data =
weight.data_ptr<CTYPE_WEIGHT>() + embedding_dim * index;
weight.const_data_ptr<CTYPE_WEIGHT>() + embedding_dim * index;

for (int j = 0; j < embedding_dim; ++j) {
int32_t group_id = j / group_size;
Expand Down
3 changes: 2 additions & 1 deletion kernels/quantized/cpu/op_embedding4b.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ void embedding_4bit_per_channel(
zero_points_ptr = zero_points + qparams_index;
}

const uint8_t* w_data = weight.data_ptr<uint8_t>() + weight.size(1) * index;
const uint8_t* w_data =
weight.const_data_ptr<uint8_t>() + weight.size(1) * index;

for (int j = 0; j < embedding_dim; ++j) {
int32_t group_id = j / group_size;
Expand Down

0 comments on commit 524703f

Please sign in to comment.