Skip to content

Commit

Permalink
Fix unexpected runtime skip permute
Browse files Browse the repository at this point in the history
  • Loading branch information
yeonbok committed Oct 31, 2024
1 parent c158480 commit 9e67731
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 27 deletions.
65 changes: 38 additions & 27 deletions src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1426,36 +1426,47 @@ void primitive_inst::do_runtime_skip_permute() {
auto desc = _node->as<permute>().get_primitive();
auto input_shape = _impl_params->get_input_layout(0).get_shape();
const auto& permute_order = desc->permute_order;
// Check runtime shape
// Optimize when the largest value among the actual dim values in case where the permute order
// is different from the shape index is equal to the multiplied value
// Set size to zero to pass the case that permute order is same with input order like [0, 1, 2, 3]
int32_t size = 0;
int32_t max_value = 0;
for (int32_t i = 0; i < static_cast<int32_t>(permute_order.size()); ++i) {
int32_t order = static_cast<int32_t>(permute_order[i]);
int32_t dim = static_cast<int32_t>(input_shape[order]);
if (i != order) {
if (dim > max_value)
max_value = dim;
size = (size == 0) ? dim : (size * dim);
}
}

// If the largest value and total size are different, can_be_optimized needs to be reset
if (size != max_value) {
set_can_be_optimized(false);
GPU_DEBUG_TRACE_DETAIL << "--- Cannot optimize because size(" << size << ") and max_value(" << max_value
<< ") are different" << std::endl;

GPU_DEBUG_TRACE_DETAIL << "[do_runtime_skip_permute] " << id() << " : reset can_be_optimized to false "
<< std::endl;
return;
// Skippability
// 1. Check within "congituous transpose range"
// [2, 1, 0] => [2, 1, 0]
// [1, 0, 2, 3] => [1, 0, 2]
// [0, 2, 1, 3] => [2, 1]
// [0, 3, 1, 2] => [3, 1, 2]
// [2, 0, 1, 3] => [2, 0, 1]
// [3, 2, 1, 0] => [3, 2, 1, 0]
// [3, 2, 1, 0, 4] => [3, 2, 1, 0]
// [0, 2, 1, 3, 5, 4] => [2, 1], [5, 4]
// [4, 5, 2, 3, 0, 1] => [4, 5, 2, 3, 0, 1]
// 2. Within each transpose range, only 1 non-1 dimension allowed at max.
size_t range_max_dim = 0;
size_t count_not_one = 0;
bool can_skip = true;
for (size_t dim = 0; dim < permute_order.size(); ++dim) {
auto target_dim = static_cast<size_t>(permute_order[dim]);
if (dim == target_dim && range_max_dim <= dim) {
// range end => check
if (count_not_one > 1) {
can_skip = false;
break;
}
count_not_one = 0;
range_max_dim = 0;
continue;
}
if (input_shape[dim] > 1)
count_not_one++;
range_max_dim = std::max(range_max_dim, target_dim);
}
GPU_DEBUG_TRACE_DETAIL << "[do_runtime_skip_permute] " << id() << " : can_be_optimized" << std::endl;
can_skip = (count_not_one <= 1);
GPU_DEBUG_TRACE_DETAIL << "[do_runtime_skip_permute] " << id() << " : can_be_optimized ? " << can_skip << std::endl;
GPU_DEBUG_TRACE_DETAIL << " - Input layout : " << _impl_params->get_input_layout(0).to_short_string() << std::endl;
GPU_DEBUG_TRACE_DETAIL << " - Output layout : " << _impl_params->get_output_layout().to_short_string() << std::endl;
set_can_be_optimized(true);
GPU_DEBUG_TRACE_DETAIL << " - permute order : ";
for (auto order : permute_order) {
GPU_DEBUG_TRACE_DETAIL << order << ",";
}
GPU_DEBUG_TRACE_DETAIL << std::endl;
set_can_be_optimized(can_skip);
}

void primitive_inst::do_runtime_skip_strided_slice() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "test_utils.h"

#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/permute.hpp>
#include <intel_gpu/primitives/reorder.hpp>
#include <intel_gpu/primitives/data.hpp>

#include "permute_inst.h"
#include "program_wrapper.h"

#include <cmath>
#include <algorithm>

using namespace cldnn;
using namespace ::tests;

namespace skip_permute_tests {

struct skip_permute_params {
layout input_layout_static;
std::vector<uint16_t> permute_order;
bool expected_result1;
bool expected_result2;
};

class skip_permute_at_runtime_test : public testing::TestWithParam<skip_permute_params> {};

TEST_P(skip_permute_at_runtime_test, runtime_skip) {
auto p = GetParam();
auto& engine = get_test_engine();
auto rank = p.input_layout_static.get_partial_shape().size();
auto input_layout_dynamic = layout {ov::PartialShape::dynamic(rank), data_types::f16, format::get_default_format(rank)};
topology topology(input_layout("input", input_layout_dynamic),
permute("permute", input_info("input"), p.permute_order),
reorder("reorder", input_info("permute"), format::get_default_format(rank), data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

network network(engine, topology, config);
auto permute_inst = network.get_primitive("permute");
ASSERT_EQ(permute_inst->can_be_optimized(), p.expected_result1);

auto input_mem = engine.allocate_memory(p.input_layout_static);
network.set_input_data("input", input_mem);
auto outputs = network.execute();
outputs.begin()->second.get_memory();

ASSERT_EQ(permute_inst->can_be_optimized(), p.expected_result2);
}

INSTANTIATE_TEST_SUITE_P(smoke, skip_permute_at_runtime_test,
testing::ValuesIn(std::vector<skip_permute_params> {
{ layout{ov::PartialShape{8, 2, 8}, data_types::f16, format::bfyx}, {2, 1, 0}, true, false },
{ layout{ov::PartialShape{8, 2, 1}, data_types::f16, format::bfyx}, {2, 1, 0}, true, false },
{ layout{ov::PartialShape{1, 12, 1}, data_types::f16, format::bfyx}, {2, 1, 0}, true, true },
{ layout{ov::PartialShape{2, 3, 1, 14}, data_types::f16, format::bfyx}, {1, 0, 2, 3}, true, false },
{ layout{ov::PartialShape{1, 3, 1, 14}, data_types::f16, format::bfyx}, {1, 0, 2, 3}, true, true },
{ layout{ov::PartialShape{12, 3, 1, 14}, data_types::f16, format::bfyx}, {0, 2, 1, 3}, true, true },
{ layout{ov::PartialShape{12, 3, 2, 14}, data_types::f16, format::bfyx}, {0, 2, 1, 3}, true, false },
{ layout{ov::PartialShape{12, 1, 1, 14}, data_types::f16, format::bfyx}, {0, 3, 1, 2}, true, true },
{ layout{ov::PartialShape{12, 1, 1, 14}, data_types::f16, format::bfyx}, {0, 3, 1, 2}, true, true },
{ layout{ov::PartialShape{1, 1, 1, 14}, data_types::f16, format::bfyx}, {0, 3, 1, 2}, true, true },
{ layout{ov::PartialShape{1, 3, 2, 14}, data_types::f16, format::bfyx}, {0, 3, 1, 2}, true, false },
{ layout{ov::PartialShape{1, 1, 4, 14}, data_types::f16, format::bfyx}, {2, 0, 1, 3}, true, true },
{ layout{ov::PartialShape{1, 4, 4, 1}, data_types::f16, format::bfyx}, {2, 0, 1, 3}, true, false },
{ layout{ov::PartialShape{1, 10, 1, 1, 11}, data_types::f16, format::bfzyx}, {3, 2, 1, 0, 4}, true, true },
{ layout{ov::PartialShape{1, 10, 2, 1, 10}, data_types::f16, format::bfzyx}, {3, 2, 1, 0, 4}, true, false },
{ layout{ov::PartialShape{1, 4, 1, 3, 4, 1}, data_types::f16, format::bfwzyx}, {0, 2, 1, 3, 5, 4}, true, true },
{ layout{ov::PartialShape{1, 4, 2, 3, 4, 1}, data_types::f16, format::bfwzyx}, {0, 2, 1, 3, 5, 4}, true, false },
{ layout{ov::PartialShape{1, 1, 1, 1, 4, 1}, data_types::f16, format::bfwzyx}, {4, 5, 2, 3, 0, 1}, true, true },
{ layout{ov::PartialShape{1, 1, 1, 1, 4, 2}, data_types::f16, format::bfwzyx}, {4, 5, 2, 3, 0, 1}, true, false },
}));
} // skip permute tests

0 comments on commit 9e67731

Please sign in to comment.