Skip to content

Commit

Permalink
update quantize/dequantize op yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
csy0225 committed Oct 23, 2023
1 parent ea8907e commit 748bb9d
Show file tree
Hide file tree
Showing 10 changed files with 52 additions and 108 deletions.
16 changes: 3 additions & 13 deletions paddle/fluid/framework/ir/xpu/xpu_graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ PDNode *patterns::DequantQuantXPUAny::operator()() {
auto *dequant_in = pattern->NewNode(dequant_in_repr())
->AsInput()
->assert_is_op_input("dequantize_xpu", "x");
auto *dequant_max_in = pattern->NewNode(dequant_max_in_repr())
->AsInput()
->assert_is_op_input("dequantize_xpu", "max");

auto *dequant_op =
pattern->NewNode(dequant_op_repr())->assert_is_op("dequantize_xpu");
Expand All @@ -68,9 +65,6 @@ PDNode *patterns::DequantQuantXPUAny::operator()() {
->AsOutput()
->assert_is_op_output("dequantize_xpu", "y");

auto *quant_max_in = pattern->NewNode(quant_max_in_repr())
->assert_is_op_input("quantize_xpu", "max");

auto *quant_op = pattern->NewNode(quant_op_repr())
->assert_is_op("quantize_xpu")
->AsIntermediate();
Expand All @@ -81,8 +75,8 @@ PDNode *patterns::DequantQuantXPUAny::operator()() {

auto *next_op = pattern->NewNode(next_op_repr())->assert_is_op();

dequant_op->LinksFrom({dequant_in, dequant_max_in}).LinksTo({dequant_out});
quant_op->LinksFrom({dequant_out, quant_max_in}).LinksTo({quant_out});
dequant_op->LinksFrom({dequant_in}).LinksTo({dequant_out});
quant_op->LinksFrom({dequant_out}).LinksTo({quant_out});
next_op->LinksFrom({quant_out});

return quant_out;
Expand All @@ -92,18 +86,14 @@ PDNode *patterns::OpDequantXPU::operator()() {
auto any_op = pattern->NewNode(any_op_repr())->assert_is_op();
auto *dequant_in = pattern->NewNode(dequant_in_repr())
->assert_is_op_input("dequantize_xpu", "x");

auto *dequant_max_in = pattern->NewNode(dequant_max_in_repr())
->AsInput()
->assert_is_op_input("dequantize_xpu", "max");
auto *dequant_op =
pattern->NewNode(dequant_op_repr())->assert_is_op("dequantize_xpu");
auto dequant_out = pattern->NewNode(dequant_out_repr())
->AsOutput()
->assert_is_op_output("dequantize_xpu", "y");

any_op->LinksTo({dequant_in});
dequant_op->LinksFrom({dequant_in, dequant_max_in}).LinksTo({dequant_out});
dequant_op->LinksFrom({dequant_in}).LinksTo({dequant_out});
return dequant_out;
}

Expand Down
44 changes: 1 addition & 43 deletions paddle/fluid/framework/ir/xpu/xpu_quantize_op_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,31 +66,11 @@ void XPUQuantizeOpPass::QuantizeInput(Graph* g,
quantize_out_node->Var()->SetDataType(
proto::VarType::Type::VarType_Type_INT8);

// Create quantize max_ptr node
float scale = GetScaleValueForNode(&var_quant_scales_, input);
int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(-1);
std::string input_max_name = input->Name() + "_quantize_max";
VarDesc input_max_desc(input_max_name);
input_max_desc.SetPersistable(true);
input_max_desc.SetShape({static_cast<int64_t>(max_ptr_size)});
input_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32);
Node* input_max_node = g->CreateVarNode(&input_max_desc);
auto input_max_tensor =
scope->Var(input_max_name)->GetMutable<phi::DenseTensor>();
input_max_tensor->set_type(phi::DataType::FLOAT32);
input_max_tensor->Resize({max_ptr_size});
auto* cpu_ctx = static_cast<phi::CPUContext*>(
platform::DeviceContextPool::Instance().Get(phi::CPUPlace()));
std::vector<float> input_scales(max_ptr_size, scale);
memcpy(cpu_ctx->Alloc<float>(input_max_tensor),
input_scales.data(),
max_ptr_size * sizeof(float));

// Create a quantize op node
float scale = GetScaleValueForNode(&var_quant_scales_, input);
OpDesc q_desc;
q_desc.SetType("quantize_xpu");
q_desc.SetInput("x", std::vector<std::string>({input->Name()}));
q_desc.SetInput("max", std::vector<std::string>({input_max_name}));
q_desc.SetOutput("y", std::vector<std::string>({quantize_out_node->Name()}));
q_desc.SetAttr("out_dtype",
static_cast<int>(proto::VarType::Type::VarType_Type_INT8));
Expand All @@ -104,7 +84,6 @@ void XPUQuantizeOpPass::QuantizeInput(Graph* g,
// Link quantize op
UnlinkNodes(input, op);
IR_NODE_LINK_TO(input, quantize_op);
IR_NODE_LINK_TO(input_max_node, quantize_op);
IR_NODE_LINK_TO(quantize_op, quantize_out_node);
IR_NODE_LINK_TO(quantize_out_node, op);
}
Expand All @@ -131,32 +110,12 @@ void XPUQuantizeOpPass::DequantizeOutput(Graph* g,
dequantize_in_node->Var()->SetDataType(
proto::VarType::Type::VarType_Type_INT8);

// Create dequantize max_ptr node
float scale = GetScaleValueForNode(&var_quant_scales_, output);
int max_ptr_size = phi::backends::xpu::get_xpu_max_ptr_size(-1);
std::string input_max_name = output->Name() + "_dequantize_max";
VarDesc input_max_desc(input_max_name);
input_max_desc.SetPersistable(true);
input_max_desc.SetShape({static_cast<int64_t>(max_ptr_size)});
input_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32);
Node* input_max_node = g->CreateVarNode(&input_max_desc);
auto input_max_tensor =
scope->Var(input_max_name)->GetMutable<phi::DenseTensor>();
input_max_tensor->set_type(phi::DataType::FLOAT32);
input_max_tensor->Resize({max_ptr_size});
auto* cpu_ctx = static_cast<phi::CPUContext*>(
platform::DeviceContextPool::Instance().Get(phi::CPUPlace()));
std::vector<float> input_scales(max_ptr_size, scale);
memcpy(cpu_ctx->Alloc<float>(input_max_tensor),
input_scales.data(),
max_ptr_size * sizeof(float));

// Create a quantize op node
OpDesc deq_desc;
deq_desc.SetType("dequantize_xpu");
deq_desc.SetInput("x",
std::vector<std::string>({dequantize_in_node->Name()}));
deq_desc.SetInput("max", std::vector<std::string>({input_max_name}));
deq_desc.SetOutput("y", std::vector<std::string>({output->Name()}));
deq_desc.SetAttr("out_dtype", static_cast<int>(output->Var()->GetDataType()));
deq_desc.SetAttr("scale", static_cast<float>(scale));
Expand All @@ -170,7 +129,6 @@ void XPUQuantizeOpPass::DequantizeOutput(Graph* g,
UnlinkNodes(op, output);
IR_NODE_LINK_TO(op, dequantize_in_node);
IR_NODE_LINK_TO(dequantize_in_node, dequantize_op);
IR_NODE_LINK_TO(input_max_node, dequantize_op);
IR_NODE_LINK_TO(dequantize_op, output);
}

Expand Down
2 changes: 0 additions & 2 deletions paddle/fluid/framework/ir/xpu/xpu_quantize_squash_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ void XPUQuantizeSquashPass::DequantQuantSquash(
Graph* graph,
std::unordered_map<const Node*, int>* nodes_keep_counter) const {
GraphPatternDetector gpd;
LOG(INFO) << "DequantQuantSquash COME IN";
patterns::DequantQuantXPUAny squash_pattern{gpd.mutable_pattern(),
"dequant_quant_xpu_any"};
squash_pattern();
Expand Down Expand Up @@ -90,7 +89,6 @@ void XPUQuantizeSquashPass::DequantQuantSquash(
// check if dequantize op should be kept or removed, decrease the counter
bool keep_dequant = (*nodes_keep_counter)[dequant_out]-- > 1;

int equal = dequant_scale == quant_scale ? 1 : 0;
if (dequant_scale == quant_scale) {
// squash dequantize-quantize to nothing
auto quant_out_var_name = quant_out->Name();
Expand Down
6 changes: 2 additions & 4 deletions paddle/phi/api/yaml/ops.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -658,14 +658,13 @@
backward : depthwise_conv2d_grad

- op : dequantize_xpu
args : (Tensor x, Tensor max, DataType out_dtype, float scale = 1.0f)
args : (Tensor x, DataType out_dtype, float scale = 1.0f)
output : Tensor(y)
infer_meta :
func : DeQuantizeXPUInferMeta
kernel :
func : dequantize_xpu
data_type: x
optional : max

- op : det
args : (Tensor x)
Expand Down Expand Up @@ -2050,14 +2049,13 @@
backward : qr_grad

- op : quantize_xpu
args : (Tensor x, Tensor max, DataType out_dtype, float scale = 1.0f)
args : (Tensor x, DataType out_dtype, float scale = 1.0f)
output : Tensor(y)
infer_meta :
func : QuantizeXPUInferMeta
kernel :
func : quantize_xpu
data_type : x
optional : max

- op : real
args : (Tensor x)
Expand Down
20 changes: 0 additions & 20 deletions paddle/phi/infermeta/binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -978,16 +978,6 @@ void DepthwiseConvInferMeta(const MetaTensor& input,
config);
}

void DeQuantizeXPUInferMeta(const MetaTensor& x,
const MetaTensor& max,
DataType out_dtype,
float scale,
MetaTensor* y) {
auto x_dims = x.dims();
y->set_dims(x_dims);
y->set_dtype(out_dtype);
}

void DistInferMeta(const MetaTensor& x,
const MetaTensor& y,
float p,
Expand Down Expand Up @@ -2607,16 +2597,6 @@ void PriorBoxInferMeta(const MetaTensor& input,
var->set_dims(phi::make_ddim(dim_vec));
}

void QuantizeXPUInferMeta(const MetaTensor& x,
const MetaTensor& max,
DataType out_dtype,
float scale,
MetaTensor* y) {
auto x_dims = x.dims();
y->set_dims(x_dims);
y->set_dtype(out_dtype);
}

void RepeatInterleaveWithTensorIndexInferMeta(const MetaTensor& x,
const MetaTensor& repeats,
int dim,
Expand Down
12 changes: 0 additions & 12 deletions paddle/phi/infermeta/binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,6 @@ void DepthwiseConvInferMeta(const MetaTensor& input,
MetaTensor* out,
MetaConfig config = MetaConfig());

void DeQuantizeXPUInferMeta(const MetaTensor& x,
const MetaTensor& max,
DataType out_dtype,
float scale,
MetaTensor* y);

void DistInferMeta(const MetaTensor& x,
const MetaTensor& y,
float p,
Expand Down Expand Up @@ -414,12 +408,6 @@ void PriorBoxInferMeta(const MetaTensor& input,
MetaTensor* out,
MetaTensor* var);

void QuantizeXPUInferMeta(const MetaTensor& x,
const MetaTensor& max,
DataType out_dtype,
float scale,
MetaTensor* y);

void SearchsortedInferMeta(const MetaTensor& sorted_sequence,
const MetaTensor& value,
bool out_int32,
Expand Down
18 changes: 18 additions & 0 deletions paddle/phi/infermeta/unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,15 @@ void DecodeJpegInferMeta(const MetaTensor& x,
}
}

void DeQuantizeXPUInferMeta(const MetaTensor& x,
DataType out_dtype,
float scale,
MetaTensor* y) {
auto x_dims = x.dims();
y->set_dims(x_dims);
y->set_dtype(out_dtype);
}

void DiagEmbedInferMeta(
const MetaTensor& x, int offset, int dim1, int dim2, MetaTensor* out) {
auto x_dims = x.dims();
Expand Down Expand Up @@ -3768,6 +3777,15 @@ void FillSplitOutDims(const MetaTensor& x,
}
}

void QuantizeXPUInferMeta(const MetaTensor& x,
DataType out_dtype,
float scale,
MetaTensor* y) {
auto x_dims = x.dims();
y->set_dims(x_dims);
y->set_dtype(out_dtype);
}

void SplitInferMeta(const MetaTensor& x,
const IntArray& sections,
const Scalar& axis,
Expand Down
10 changes: 10 additions & 0 deletions paddle/phi/infermeta/unary.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ void DecodeJpegInferMeta(const MetaTensor& x,
const std::string& mode,
MetaTensor* out);

void DeQuantizeXPUInferMeta(const MetaTensor& x,
DataType out_dtype,
float scale,
MetaTensor* y);

void DiagEmbedInferMeta(
const MetaTensor& x, int offset, int dim1, int dim2, MetaTensor* out);

Expand Down Expand Up @@ -453,6 +458,11 @@ void QrInferMeta(const MetaTensor& x,
MetaTensor* q,
MetaTensor* r);

void QuantizeXPUInferMeta(const MetaTensor& x,
DataType out_dtype,
float scale,
MetaTensor* y);

void WeightQuantizeInferMeta(const MetaTensor& x,
const std::string& algo,
MetaTensor* out,
Expand Down
16 changes: 9 additions & 7 deletions paddle/phi/kernels/xpu/dequantization_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,20 @@ namespace phi {
template <typename TX, typename TY, typename Context>
void DeQuantizeKernelImpl(const Context& ctx,
const DenseTensor& x,
const paddle::optional<DenseTensor>& max,
float scale,
DenseTensor* y) {
using XPUInX = typename XPUTypeTrait<TX>::Type;
using XPUOutY = typename XPUTypeTrait<TY>::Type;

auto* y_data = ctx.template Alloc<TY>(y);
const auto* x_data = x.data<TX>();
int64_t len = x.numel();
const float* max_data =
max.get_ptr() == nullptr ? nullptr : max->data<float>();
int r = xpu::dequantization<XPUInX, XPUOutY>(
int max_ptr_size = ctx.x_context()->max_ptr_size();
xpu::ctx_guard RAII_GUARD(ctx.x_context());
auto max_data = RAII_GUARD.alloc_l3_or_gm<float>(max_ptr_size);
int r = xpu::constant<float>(ctx.x_context(), max_data, max_ptr_size, scale);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
r = xpu::dequantization<XPUInX, XPUOutY>(
ctx.x_context(),
reinterpret_cast<const XPUInX*>(x_data),
reinterpret_cast<XPUOutY*>(y_data),
Expand All @@ -41,16 +44,15 @@ void DeQuantizeKernelImpl(const Context& ctx,
template <typename T, typename Context>
void DeQuantizeKernel(const Context& ctx,
const DenseTensor& x,
const paddle::optional<DenseTensor>& max,
DataType out_dtype,
float scale,
DenseTensor* y) {
switch (out_dtype) {
case DataType::FLOAT32:
DeQuantizeKernelImpl<T, float, Context>(ctx, x, max, y);
DeQuantizeKernelImpl<T, float, Context>(ctx, x, scale, y);
break;
case DataType::FLOAT16:
DeQuantizeKernelImpl<T, dtype::float16, Context>(ctx, x, max, y);
DeQuantizeKernelImpl<T, dtype::float16, Context>(ctx, x, scale, y);
break;
default:
PADDLE_THROW(phi::errors::Unavailable(
Expand Down
16 changes: 9 additions & 7 deletions paddle/phi/kernels/xpu/quantization_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,20 @@ namespace phi {
template <typename TX, typename TY, typename Context>
void QuantizeKernelImpl(const Context& ctx,
const DenseTensor& x,
const paddle::optional<DenseTensor>& max,
float scale,
DenseTensor* y) {
using XPUInX = typename XPUTypeTrait<TX>::Type;
using XPUOutY = typename XPUTypeTrait<TY>::Type;

auto* y_data = ctx.template Alloc<TY>(y);
const auto* x_data = x.data<TX>();
int64_t len = x.numel();
const float* max_data =
max.get_ptr() == nullptr ? nullptr : max->data<float>();
int r = xpu::quantization<XPUInX, XPUOutY>(
int max_ptr_size = ctx.x_context()->max_ptr_size();
xpu::ctx_guard RAII_GUARD(ctx.x_context());
auto max_data = RAII_GUARD.alloc_l3_or_gm<float>(max_ptr_size);
int r = xpu::constant<float>(ctx.x_context(), max_data, max_ptr_size, scale);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
r = xpu::quantization<XPUInX, XPUOutY>(
ctx.x_context(),
reinterpret_cast<const XPUInX*>(x_data),
reinterpret_cast<XPUOutY*>(y_data),
Expand All @@ -41,16 +44,15 @@ void QuantizeKernelImpl(const Context& ctx,
template <typename T, typename Context>
void QuantizeKernel(const Context& ctx,
const DenseTensor& x,
const paddle::optional<DenseTensor>& max,
DataType out_dtype,
float scale,
DenseTensor* y) {
switch (out_dtype) {
case DataType::INT16:
QuantizeKernelImpl<T, int16_t, Context>(ctx, x, max, y);
QuantizeKernelImpl<T, int16_t, Context>(ctx, x, scale, y);
break;
case DataType::INT8:
QuantizeKernelImpl<T, int8_t, Context>(ctx, x, max, y);
QuantizeKernelImpl<T, int8_t, Context>(ctx, x, scale, y);
break;
default:
PADDLE_THROW(phi::errors::Unavailable(
Expand Down

0 comments on commit 748bb9d

Please sign in to comment.