Skip to content

Commit

Permalink
Revert "[GPU] Fixes for hybrid quantization (openvinotoolkit#27127)" (o…
Browse files Browse the repository at this point in the history
…penvinotoolkit#27308)

This reverts commit c21f572.

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
  • Loading branch information
vladimir-paramuzov authored Oct 30, 2024
1 parent 874bf8a commit 95a6f18
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ struct FullyConnectedImplementationManager : public ImplementationManager {
bool compressed_case = fc_prim->compressed_weights &&
one_of(in0_dt, {data_types::f16, data_types::f32, data_types::i8}) &&
one_of(wei_dt, {data_types::u8, data_types::i8, data_types::u4, data_types::i4}) &&
one_of(out_dt, {data_types::f16, data_types::f32, data_types::u8, data_types::i8});
one_of(out_dt, {data_types::f16, data_types::f32});
if (!f16f16_case && !f32f32_case && !u8s8_case && !compressed_case)
return false;

Expand Down
59 changes: 13 additions & 46 deletions src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@
#include "intel_gpu/plugin/transformations_pipeline.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
#include "intel_gpu/runtime/itt.hpp"
#include "low_precision/add.hpp"
#include "low_precision/convolution.hpp"
#include "low_precision/convolution_backprop_data.hpp"
#include "low_precision/fold_convert.hpp"
#include "low_precision/fuse_convert.hpp"
#include "low_precision/group_convolution.hpp"
#include "low_precision/low_precision.hpp"
#include "low_precision/mat_mul.hpp"
Expand All @@ -28,9 +25,7 @@
#include "low_precision/pull_reshape_through_dequantization.hpp"
#include "low_precision/pull_transpose_through_dequantization.hpp"
#include "low_precision/recurrent_cell.hpp"
#include "low_precision/rt_info/bias_attribute.hpp"
#include "low_precision/strided_slice.hpp"
#include "low_precision/transpose.hpp"
#include "openvino/core/deprecated.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/core/validation_util.hpp"
Expand All @@ -51,7 +46,6 @@
#include "openvino/op/reshape.hpp"
#include "openvino/op/rnn_cell.hpp"
#include "openvino/op/rnn_sequence.hpp"
#include "openvino/op/scaled_dot_product_attention.hpp"
#include "openvino/op/squeeze.hpp"
#include "openvino/op/unsqueeze.hpp"
#include "openvino/op/util/sub_graph_base.hpp"
Expand Down Expand Up @@ -319,9 +313,13 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
// it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression
// types are not supported by oneDNN)
manager.register_pass<ov::pass::MarkDequantizationSubgraph>(supported_woq_types, !device_info.supports_immad);
pass_config->set_callback<ov::pass::MarkDequantizationSubgraph>([&](const std::shared_ptr<const ov::Node> node) {
return !is_decompression_multiply(node);
});

// Need to check if transformations work correctly for mixed models with both compression and quantization at the same time.
if (!is_model_quantized) {
pass_config->set_callback<ov::pass::MarkDequantizationSubgraph>([&](const std::shared_ptr<const ov::Node> node) {
return !is_decompression_multiply(node);
});
}

const bool keep_precision_sensitive_in_fp32_1 = true;
const bool convert_input_output_precision = false;
Expand Down Expand Up @@ -690,6 +688,12 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
auto lptPassConfig = lptManager.get_pass_config();
// quantized LSTMSequence / GPUSequence are not supported yet. Avoid extra transformation
lptPassConfig->disable<ov::pass::low_precision::RecurrentCellTransformation>();
lptPassConfig->set_callback<ov::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
if (const auto mulitply = std::dynamic_pointer_cast<const ov::op::v1::Multiply>(node)) {
return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
}
return false;
});
lptPassConfig->set_callback<ConvolutionBackpropDataTransformation>([func, defaultPrecisions](const_node_ptr& node) -> bool {
auto fillStaticChannel = [func](const ov::PartialShape& shape, size_t& channel) -> bool {
const auto rank = shape.rank();
Expand Down Expand Up @@ -726,43 +730,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|| WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
});

lptPassConfig->set_callback<TransposeTransformation>([&](const_node_ptr& node) -> bool {
for (auto& user : node->get_users()) {
if (ov::is_type<ov::op::v13::ScaledDotProductAttention>(user))
return true;
}

return false;
});

lptPassConfig->set_callback<MarkupPrecisions>([](const_node_ptr& node) -> bool {
return ov::is_type<ov::opset1::Multiply>(node) && !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(node);
});

lptPassConfig->set_callback<AddTransformation>([](const_node_ptr& node) -> bool {
return ov::marked_as_bias(node);
});
lptPassConfig->set_callback<FoldConvertTransformation>([](const_node_ptr& node) -> bool {
const auto& consumers = node->get_output_target_inputs(0);
if (consumers.size() == 1) {
const auto consumer = consumers.begin()->get_node()->shared_from_this();
return ov::is_type<ov::opset1::Multiply>(consumer) && is_decompression_multiply(consumer);
}
return false;
});
lptPassConfig->set_callback<FuseConvertTransformation>([](const_node_ptr& node) -> bool {
if (ov::is_type<ov::opset1::Multiply>(node)) {
return ov::is_type<ov::opset1::Multiply>(node) && is_decompression_multiply(node);
} else if (ov::is_type<ov::opset1::Subtract>(node)) {
const auto& consumers = node->get_output_target_inputs(0);
if (consumers.size() == 1) {
const auto consumer = consumers.begin()->get_node()->shared_from_this();
return ov::is_type<ov::opset1::Multiply>(consumer) && is_decompression_multiply(consumer);
}
}
return false;
});

lptPassConfig->set_callback<MultiplyToGroupConvolutionTransformation>([&](const_node_ptr& node) -> bool {
// disable MultiplyToGroupConvolution if Multiply with Constant can be fused

Expand Down

0 comments on commit 95a6f18

Please sign in to comment.