Skip to content

Commit

Permalink
[LPT][TESTS] Functional tests: created TestParams for LPT
Browse files Browse the repository at this point in the history
             LayerTransformationParams: removed unused parameters
  • Loading branch information
v-Golubev authored and eshoguli committed Jun 5, 2021
1 parent 0bf1ec6 commit dc943d4
Show file tree
Hide file tree
Showing 135 changed files with 453 additions and 516 deletions.
18 changes: 8 additions & 10 deletions inference-engine/src/cldnn_engine/cldnn_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,11 +394,6 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
}

// TODO: LPT: not implemented:
// - supportAsymmetricQuantization
// - support3DTensorOnActivations
// - deconvolutionSpecificChannelsRatio

auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8, ngraph::element::i8}},
Expand Down Expand Up @@ -431,16 +426,19 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
return false;
});
lptPassConfig->set_callback<ConvolutionBackpropDataTransformation>([](const_node_ptr& node) -> bool {
return WeightableLayerTransformation::isAsymmetricOnWeights(node);
size_t inputChannels = node->get_input_shape(0)[1];
size_t outputChannels = node->get_output_shape(0)[1];
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
return true;
}

return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
});
lptPassConfig->set_callback<MatMulTransformation>([](const_node_ptr& node) -> bool {
return MatMulTransformation::is3DTensorOnActivations(node);
});

auto params = LayerTransformation::Params();
params.setDeconvolutionSpecificChannelsRatio(true);

lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization, params);
lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization);
lptManager.run_passes(nGraphFunc);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class LP_TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public Weig
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer, const bool deconvolutionSpecificChannelsRatio = false) noexcept;
static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept;
};

} // namespace low_precision
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,97 +159,26 @@ inline std::ostream &operator << (std::ostream &os, const DataPrecision& value)
// Base class for all LP transformations, holds some common data structures
class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherPass {
public:
enum QuantizedTensorAlignment {
None,
UpdateLevel
};

// TODO: LPT: not implemented: clean up ngraph::pass::low_precision::LayerTransformation::Params,
// use LayerTestsUtils::LayerTransformation::Params type instead:
// - quantizedTensorAlignmentOnActivations
// - quantizedTensorAlignmentOnWeights
// - supportAsymmetricQuantization
// - precisionsOnActivations
// - precisionsOnWeights
class Params {
public:
Params(
const bool updatePrecisions = true,
const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations = QuantizedTensorAlignment::UpdateLevel,
const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights = QuantizedTensorAlignment::None,
bool supportAsymmetricQuantization = true,
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
std::vector<element::Type> precisionsOnWeights = { element::i8 },
element::Type deqPrecision = element::f32,
bool support3DTensorOnActivations = true,
bool deconvolutionSpecificChannelsRatio = false) :
updatePrecisions(updatePrecisions),
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
supportAsymmetricQuantization(supportAsymmetricQuantization),
precisionsOnActivations(precisionsOnActivations),
precisionsOnWeights(precisionsOnWeights),
deqPrecision(deqPrecision),
support3DTensorOnActivations(support3DTensorOnActivations),
deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
if (precisionsOnActivations.size() == 0ul) {
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
}

if (precisionsOnWeights.size() == 0ul) {
THROW_TRANSFORMATION_EXCEPTION << "precisions on weights are not specisifed";
}
}
const bool updatePrecisions = true,
element::Type deqPrecision = element::f32) :
updatePrecisions(updatePrecisions),
deqPrecision(deqPrecision) {}

Params& setUpdatePrecisions(const bool updatePrecisions) {
this->updatePrecisions = updatePrecisions;
return *this;
}

Params& setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) {
this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations;
return *this;
}

Params& setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) {
this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights;
return *this;
}

Params& setSupportAsymmetricQuantization(const bool supportAsymmetricQuantization) {
this->supportAsymmetricQuantization = supportAsymmetricQuantization;
return *this;
}

Params& setPrecisionsOnActivations(const std::vector<element::Type>& precisionsOnActivations) {
this->precisionsOnActivations = precisionsOnActivations;
return *this;
}

Params& setPrecisionsOnWeights(const std::vector<element::Type>& precisionsOnWeights) {
this->precisionsOnWeights = precisionsOnWeights;
return *this;
}

Params& setSupport3DTensorOnActivations(const bool support3DTensorOnActivations) {
this->support3DTensorOnActivations = support3DTensorOnActivations;
return *this;
}

Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
Params& setDeqPrecision(const element::Type& deqPrecision) {
this->deqPrecision = deqPrecision;
return *this;
}

bool updatePrecisions;
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
bool supportAsymmetricQuantization;
std::vector<element::Type> precisionsOnActivations;
std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision;
bool support3DTensorOnActivations;
bool deconvolutionSpecificChannelsRatio;
};

class PrecisionDetails {
Expand Down Expand Up @@ -282,6 +211,8 @@ class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherP
// TODO: LPT: INT8 specific here, where is INT16/INT32?
static PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails);

static bool isAsymmetricQuantization(const std::shared_ptr<const Node>& node);

// return true if operation can be quantized and false otherwise
// for example: if convolution operation weights are not quantized, then isQuantize returns false and true otherwise
// note: dequantization operations on activations are absent during method execution
Expand All @@ -307,12 +238,7 @@ class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherP
#endif

bool updatePrecisions;
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
bool supportAsymmetricQuantization;
element::Type deqPrecision;
bool support3DTensorOnActivations;
bool deconvolutionSpecificChannelsRatio;

static const char originalLayerPostfix[];
TransformationContext* context;
Expand Down Expand Up @@ -353,38 +279,6 @@ class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherP
}
};

inline std::ostream &operator << (std::ostream &os, const LayerTransformation::QuantizedTensorAlignment& value) {
switch (value) {
case LayerTransformation::QuantizedTensorAlignment::None: {
os << "None";
break;
}
case LayerTransformation::QuantizedTensorAlignment::UpdateLevel: {
os << "UpdateLevel";
break;
}
default: {
os << static_cast<int>(value);
break;
}
}
return os;
}

inline std::ostream &operator << (std::ostream &os, const std::vector<element::Type>& values) {
os << "{";
for (size_t i = 0; i < values.size(); ++i) {
const element::Type& value = values[i];
if (i > 0) {
os << value;
} else {
os << ", " << value;
}
}
os << "}";
return os;
}

typedef std::shared_ptr<LayerTransformation> LayerTransformationPtr;

} // namespace low_precision
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,41 +53,11 @@ ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(con
this->register_matcher(m, callback);
}

//void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
// addPattern(
// pass,
// context,
// make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
// addPattern(
// pass,
// context,
// make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
// addPattern(
// pass,
// context,
// make_op_pattern<opset1::ConvolutionBackpropData>(
// { make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>(), make_op_label<opset1::Constant>() }));
// addPattern(
// pass,
// context,
// make_op_pattern<opset1::ConvolutionBackpropData>(
// { make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>(), make_op_label<opset1::Constant>() }));
//}

bool ConvolutionBackpropDataTransformation::isQuantized(const std::shared_ptr<const Node>& layer) const noexcept {
return ConvolutionBackpropDataTransformation::isQuantizedStatic(layer, deconvolutionSpecificChannelsRatio);
return ConvolutionBackpropDataTransformation::isQuantizedStatic(layer);
}

bool ConvolutionBackpropDataTransformation::isQuantizedStatic(
const std::shared_ptr<const Node>& layer,
const bool deconvolutionSpecificChannelsRatio) noexcept {
if (deconvolutionSpecificChannelsRatio) {
size_t inputChannels = layer->get_input_shape(0)[1];
size_t outputChannels = layer->get_output_shape(0)[1];
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
return false;
}
}
bool ConvolutionBackpropDataTransformation::isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept {
return WeightableLayerTransformation::isQuantizedStatic(layer, false);
}

Expand Down Expand Up @@ -242,14 +212,6 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
}

bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
if (deconvolutionSpecificChannelsRatio) {
size_t inputChannels = op->get_input_shape(0)[1];
size_t outputChannels = op->get_output_shape(0)[1];
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
return false;
}
}

return canConvolutionBeTransformed(context, op);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,7 @@ const char LayerTransformation::originalLayerPostfix[] = "_original";

LayerTransformation::LayerTransformation(const Params& params) :
updatePrecisions(params.updatePrecisions),
quantizedTensorAlignmentOnActivations(params.quantizedTensorAlignmentOnActivations),
quantizedTensorAlignmentOnWeights(params.quantizedTensorAlignmentOnWeights),
supportAsymmetricQuantization(params.supportAsymmetricQuantization),
deqPrecision(params.deqPrecision),
support3DTensorOnActivations(params.support3DTensorOnActivations),
deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio) {}
deqPrecision(params.deqPrecision) {}

void LayerTransformation::setContext(TransformationContext* context) noexcept {
this->context = context;
Expand Down Expand Up @@ -110,10 +105,6 @@ bool LayerTransformation::canSubtractBeHandled(const std::shared_ptr<Node>& op,
return true;
}

if (!supportAsymmetricQuantization) {
return false;
}

if (!updatePrecisions) {
return true;
}
Expand Down Expand Up @@ -250,6 +241,12 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
return LayerTransformation::PrecisionDetails(element::undefined, hasNegative, hasZeroPoint);
}

bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr<const Node>& layer) {
const auto nonConstNode = const_cast<ngraph::Node*>(layer.get())->shared_from_this();
const auto dequantization = NetworkHelper::getDequantization(nonConstNode);
return dequantization.subtract != nullptr;
}

bool LayerTransformation::isQuantized(const std::shared_ptr<const Node>& layer) const noexcept {
return true;
}
Expand Down Expand Up @@ -301,18 +298,6 @@ void LayerTransformation::updateOutput(
TransformationContext &context,
std::shared_ptr<ngraph::Node> lastNode,
std::shared_ptr<ngraph::Node> originalNode) const {
//const size_t outputSize = context.function->get_output_size();
//for (size_t i = 0; i < outputSize; ++i) {
// std::shared_ptr<ngraph::Node> result = context.function->get_output_op(i);
// std::shared_ptr<ngraph::Node> outputNode = result->get_input_node_shared_ptr(0);
// if (outputNode.get() == lastNode.get()) {
// const std::string originalName = originalNode->get_friendly_name();
// originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
// lastNode->set_friendly_name(originalName);
// break;
// }
//}

// TODO: not tested!!!
for (auto output : lastNode->outputs()) {
for (auto input : output.get_target_inputs()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
if (dequantization.empty()) {
const auto fqOnWeights = getFakeQuantizeOnWeights(layer);
const auto dataPrecision = getDataPrecisionOnWeights(layer);
if ((!supportAsymmetricQuantization) && dataPrecision.hasZeroPoint) {
return false;
}
if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) {
return false;
}
Expand Down
5 changes: 0 additions & 5 deletions inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
if (useLpt) {
OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations");

// TODO: LPT: not implemented:
// - supportAsymmetricQuantization
// - support3DTensorOnActivations
// - deconvolutionSpecificChannelsRatio

auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class AddTransformationTestValues {
ngraph::Shape inputShape;
bool broadcast;
int constInput;
ngraph::pass::low_precision::LayerTransformation::Params params;
TestTransformationParams params;
Actual actual;
Expected expected;
std::string additionalLayer;
Expand Down Expand Up @@ -99,7 +99,7 @@ class AddTransformation : public LayerTransformation, public testing::WithParamI
precision,
testValues.inputShape,
testValues.broadcast,
testValues.params,
TestTransformationParams::toParams(testValues.params),
testValues.actual.precision1,
testValues.actual.dequantization1,
testValues.actual.precision2,
Expand All @@ -109,15 +109,14 @@ class AddTransformation : public LayerTransformation, public testing::WithParamI
testValues.additionalLayer);

SimpleLowPrecisionTransformer transform;
transform.add<ngraph::pass::low_precision::AddTransformation, ngraph::opset1::Add>(
low_precision::LayerTransformation::Params(testValues.params));
transform.add<ngraph::pass::low_precision::AddTransformation, ngraph::opset1::Add>(testValues.params);
transform.transform(actualFunction);

referenceFunction = AddFunction::getReference(
precision,
testValues.inputShape,
testValues.broadcast,
testValues.params,
TestTransformationParams::toParams(testValues.params),
testValues.expected.precision1,
testValues.expected.dequantization1,
testValues.expected.precision2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class AlignConcatQuantizationParametersTransformationTestValues {
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
};

ngraph::pass::low_precision::LayerTransformation::Params params;
TestTransformationParams params;
Actual actual;
Expected expected;
};
Expand Down
Loading

0 comments on commit dc943d4

Please sign in to comment.