Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Post-Training Quantization and export function in dygraph mode #50107

Merged
merged 9 commits into from
Feb 16, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
234 changes: 234 additions & 0 deletions python/paddle/nn/quant/format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
"""Define some layers used to export quantization model with ONNX style."""
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import abc
from typing import List, Tuple

import paddle
from paddle import _legacy_C_ops as _C_ops
from paddle.fluid.framework import in_dygraph_mode
from paddle.nn import Layer


class LinearQuanterDequanter(Layer):
def __init__(self, quanter, dequanter):
super(LinearQuanterDequanter, self).__init__()
self._quanter = quanter
self._dequanter = dequanter

def forward(self, input):
out = input
if self._quanter is not None:
out = self._quanter(out)
if self._dequanter is not None:
out = self._dequanter(out)
return out

@staticmethod
def from_quanter(quanter):
return LinearQuanterDequanter(
LinearQuanter.from_quanter(quanter),
LinearDequanter.from_quanter(quanter),
)


class LinearQuanter(Layer):
def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
super(LinearQuanter, self).__init__()
self._scales = paddle.to_tensor(scales, dtype="float32")
self._zero_point = (
paddle.zeros([1], dtype="float32")
if zero_point is None
else paddle.to_tensor(zero_point)
)
self._quant_axis = -1 if quant_axis is None else quant_axis
self._bit_length = bit_length

def forward(self, input):
if in_dygraph_mode():
return _C_ops.quantize_linear(
input,
self._scales,
self._zero_point,
"quant_axis",
self._quant_axis,
"bit_length",
self._bit_length,
)
else:
out = self._helper.create_variable_for_type_inference(input.dtype)
self._helper.append_op(
type='quantize_linear',
inputs={
'X': input,
'Scale': self._scales,
'ZeroPoint': self._zero_point,
},
outputs={'Y': out},
attrs={
'quant_axis': self._quant_axis,
'bit_length': self._bit_length,
},
)
return out

@staticmethod
def from_quanter(quanter):

return LinearQuanter(
quanter.scales(),
zero_point=quanter.zero_points(),
quant_axis=quanter.quant_axis(),
bit_length=quanter.bit_length(),
)


class LinearDequanter(Layer):
def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
super(LinearDequanter, self).__init__()
self._scales = paddle.to_tensor(scales, dtype="float32")
self._zero_point = (
paddle.zeros([1], dtype="float32")
if zero_point is None
else paddle.to_tensor(zero_point)
)
self._quant_axis = -1 if quant_axis is None else quant_axis
self._bit_length = bit_length

def forward(self, input):
if in_dygraph_mode():
return _C_ops.dequantize_linear(
input,
self._scales,
self._zero_point,
"quant_axis",
self._quant_axis,
"bit_length",
self._bit_length,
)
else:
out = self._helper.create_variable_for_type_inference(input.dtype)
self._helper.append_op(
type='dequantize_linear',
inputs={
'X': input,
'Scale': self._scales,
'ZeroPoint': self._zero_point,
},
outputs={'Y': out},
attrs={
'quant_axis': self._quant_axis,
'bit_length': self._bit_length,
},
)
return out

@staticmethod
def from_quanter(quanter):
return LinearDequanter(
quanter.scales(),
zero_point=quanter.zero_points(),
quant_axis=quanter.quant_axis(),
bit_length=quanter.bit_length(),
)


class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta):
r"""Abstract class to help convert quantized layer to inference model.
It Defined some function convert quanters and observers to quantize or
dequantize operators who maitains quantization parameters used during
inference.
Examples:
.. code-block:: python

# Given codes in ./customized_quanter.py
class CustomizedQuantedLayer(ConvertibleQuantedLayer):
def __init__(self):
super(CustomizedQuantedLayer, self).__init__()
self.weight_a = paddle.create_parameter(shape=[1], dtype='float32')
self.weight_b = paddle.create_parameter(shape=[1], dtype='float32')
self.quanter_for_weight_a = None
self.activation_weight = None
def forward(self, input):
qweight_a = self.quanter_for_weight_a(self.weight_a)
weight_b = self.weight_b
qinput = self.activation_weight(input)
// compute with qweight_a, weight_b and qinput.
return qweight * qinput + weight_b

def weights_to_quanters(self):
return [('weight_a', 'quanter_for_weight_a')]

def activation_quanters(self):
return ['activation_weight']
"""

def __init__(self):
super(ConvertibleQuantedLayer, self).__init__()
self.converted = False

@abc.abstractmethod
def weights_to_quanters(self) -> List[Tuple[str, str]]:
r"""Get the name pairs of weights to be quantized and corresponding quanters.
In convert function of this abstract class, it will call the 'weights_to_quanters' function.
And do something as bellow:
For each pair, the quanter will be converted to quantize operator and
dequantize operator. And then quantize the weight by quantize operator.
Finally, remove the quantize operator and store the weights in integer data type.

Returns: A list of name pairs. Each pair contains two names. The first is name of weight
to be quantized and the second is name of corresponding quanter.
"""
pass

@abc.abstractmethod
def activation_quanters(self) -> List[str]:
r"""Get the names of quanters used to quantize activations.
All the quanters or observers returned by this function will be converted to quantize
and dequantize operators for deployment.
Returns: A list of quanter names.
"""
pass

def _convert_quanter_to_qdq(self, quanter_name) -> LinearQuanterDequanter:
r"""Convert quanter to an instance of LinearQuanterDequanter."""
assert hasattr(
self, quanter_name
), f"{quanter_name} is not attribute of current layer."
quanter = getattr(self, quanter_name)
quanter = LinearQuanterDequanter.from_quanter(quanter)
setattr(self, quanter_name, quanter)
self._sub_layers[quanter_name] = quanter
return quanter

def _quant_weights(self, weight_name, quanter):
r"""Quantize the weight by given quanter."""
weight = getattr(self, weight_name)
qweight = quanter(weight)
weight.set_value(qweight)

def _convert(self):
r"""Convert current layer to onnx style for inference."""
assert not self.converted, "The model should be converted only once."
for weight_name, quanter_name in self.weights_to_quanters():
qdq = self._convert_quanter_to_qdq(quanter_name)
self._quant_weights(weight_name, qdq._quanter)
qdq._quanter = None
qdq._sub_layers['_quanter'] = None

for quanter_name in self.activation_quanters():
self._convert_quanter_to_qdq(quanter_name)

self.converted = True
10 changes: 9 additions & 1 deletion python/paddle/nn/quant/qat/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
from paddle.nn import Layer
from paddle.nn import functional as F

from ..format import ConvertibleQuantedLayer

class QuantedConv2D(Layer):

class QuantedConv2D(ConvertibleQuantedLayer):
"""
The computational logic of QuantizedConv2D is the same with Conv2D.
The only difference is that its inputs are all fake quantized.
Expand Down Expand Up @@ -77,3 +79,9 @@ def _conv_forward(self, inputs, weights):
groups=self._groups,
data_format=self._data_format,
)

def weights_to_quanters(self):
return [('weight', 'weight_quanter')]

def activation_quanters(self):
return ['activation_quanter']
11 changes: 10 additions & 1 deletion python/paddle/nn/quant/qat/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.


from paddle.nn import Layer
from paddle.nn import functional as F

from ..format import ConvertibleQuantedLayer


class QuantedLinear(Layer):
class QuantedLinear(ConvertibleQuantedLayer):
"""
The computational logic of QuantizedLinear is the same with Linear.
The only difference is that its inputs are all fake quantized.
Expand Down Expand Up @@ -49,3 +52,9 @@ def forward(self, input):
def _linear_forward(self, input, weight):
out = F.linear(x=input, weight=weight, bias=self.bias, name=self.name)
return out

def weights_to_quanters(self):
return [('weight', 'weight_quanter')]

def activation_quanters(self):
return ['activation_quanter']
7 changes: 6 additions & 1 deletion python/paddle/quantization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
"""Quantization Module"""
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,12 +50,16 @@

from .config import QuantConfig
from .base_quanter import BaseQuanter
from .base_observer import BaseObserver
from .factory import quanter
from .qat import QAT
from .ptq import PTQ

__all__ = [
"QuantConfig",
"BaseQuanter",
"BaseObserver",
"quanter",
"QAT",
"PTQ",
]
32 changes: 32 additions & 0 deletions python/paddle/quantization/base_observer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Abstract observer class."""
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import abc

from .base_quanter import BaseQuanter


class BaseObserver(BaseQuanter, metaclass=abc.ABCMeta):
r"""
Built-in observers and customized observers should extend this base observer
and implement abstract methods.
"""

def __init__(self):
super(BaseObserver, self).__init__()

@abc.abstractmethod
def cal_thresholds(self):
pass
3 changes: 3 additions & 0 deletions python/paddle/quantization/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def _instance(self, layer: Layer) -> BaseQuanter:
return self.partial_class(layer)


ObserverFactory = QuanterFactory


def quanter(class_name):
r"""
Annotation to declare a factory class for quanter.
Expand Down
18 changes: 18 additions & 0 deletions python/paddle/quantization/observers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Observers"""
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .abs_max import AbsmaxObserver

__all__ = ["AbsmaxObserver"]
Loading