Skip to content

Commit

Permalink
[GPU][Loop] Change condition to reinterprete buffer (#25086)
Browse files Browse the repository at this point in the history
### Details:
- In primitive_inst.cpp when skippable node which is optimized out, is
returned at realloc_if_needed(), output memory layout should be changed.
- So it needs to adjust in respective primitive_inst (for this case,
Reorder)

### Tickets:
 - 143848
  • Loading branch information
davidsnam-intel authored Jun 29, 2024
1 parent 943a94a commit 64eb742
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/graph/reorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ void reorder_inst::update_output_memory() {
if (!can_be_optimized())
return;

if (static_cast<bool>(_outputs[0]) && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()))
if (static_cast<bool>(_outputs[0])
&& _network.get_engine().is_the_same_buffer(output_memory(), input_memory())
&& output_memory().get_layout().identical(get_output_layout()))
return;

if (_node != nullptr)
Expand Down
118 changes: 118 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,115 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout,
}
}

static void test_loop_gpu_multiple_shapes_single_shared(ov::PartialShape body_input_layout,
std::vector<ov::PartialShape> whole_layouts,
std::vector<std::vector<float>> input_data_list,
std::vector<float> expected_output_data,
int32_t axis,
size_t exit_value,
bool is_caching_test = false) {
auto& engine = get_test_engine();

auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx };
auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx };

auto e_initial_condition_mem = engine.allocate_memory(const_layout);
auto e_num_iteration_mem = engine.allocate_memory(const_layout);
auto b_exit_value_mem = engine.allocate_memory(const_layout);
auto b_index_inc_mem = engine.allocate_memory(const_layout);

// initialize input buffers
set_values(e_initial_condition_mem, {1});
set_values(b_exit_value_mem, {exit_value});
set_values(b_index_inc_mem, {1});
set_values(e_num_iteration_mem, {10});

primitive_id body_current_iteration_id = "b_index";
primitive_id body_execution_condition_id = "b_cond_exit_value";

cldnn::topology body(
input_layout(body_current_iteration_id, const_layout),
input_layout("b_parameter", b_input_layout),
data("b_exit_value", b_exit_value_mem),
data("b_index_inc", b_index_inc_mem),
eltwise("b_index_update", input_info(body_current_iteration_id), input_info("b_index_inc"), eltwise_mode::sum),
eltwise("b_permute", input_info("b_parameter"), input_info("b_index_update"), eltwise_mode::sum),
reorder("b_result", input_info("b_permute"), b_input_layout),
eltwise(body_execution_condition_id, input_info(body_current_iteration_id), input_info("b_exit_value"), eltwise_mode::lt)
);

primitive_id trip_count_id = "";
primitive_id actual_iteration_count_id = "actual_iteration_count";
primitive_id initial_condition_id = "initial_condition";
int64_t num_iterations = -1;

std::vector<loop::io_primitive_map> input_primitive_maps {
loop::io_primitive_map("input", "b_parameter", axis),
loop::io_primitive_map(actual_iteration_count_id, body_current_iteration_id) };
std::vector<loop::io_primitive_map> output_primitive_maps {
loop::io_primitive_map(cldnn::input_info("loop"), cldnn::input_info("b_result"), axis) };
std::vector<loop::backedge_mapping> back_edges {
loop::backedge_mapping("b_result", "b_parameter"),
loop::backedge_mapping("b_index_update", body_current_iteration_id) };

auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true);

auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx});
std::vector<int32_t> body_input_layouts;
for (size_t i = 0; i < body_input_layout.size(); i++) {
if (body_input_layout[i].is_dynamic())
body_input_layouts.push_back(-1);
else
body_input_layouts.push_back(body_input_layout[i].get_length());
}
set_values<int32_t>(const_shape, body_input_layouts);

cldnn::topology topology(
input_layout("input_origin", b_input_layout),
input_layout(initial_condition_id, e_initial_condition_mem->get_layout()),
mutable_data(actual_iteration_count_id, e_num_iteration_mem),
permute("input2", input_info("input_origin"), {0, 1, 2, 3}),
data("const", const_shape),
permute("permute1", input_info("input_origin"), {0, 1, 2, 3}),
concatenation("input", {input_info("permute1"), input_info("input_origin")}, 0),
loop("loop",
{input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input")},
body_program, trip_count_id, initial_condition_id, actual_iteration_count_id,
input_primitive_maps, output_primitive_maps, back_edges,
num_iterations, body_current_iteration_id, body_execution_condition_id, 1),
permute("result", input_info("loop"), {0, 1, 2, 3}));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));

network network(engine, topology, config);
for (size_t i = 0 ; i < whole_layouts.size(); i++) {
auto whole_layout = whole_layouts[i];
auto input_data = input_data_list[i];

set_values(e_initial_condition_mem, {1});
set_values(b_exit_value_mem, {exit_value});
set_values(b_index_inc_mem, {1});
set_values(e_num_iteration_mem, {10});

auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx };
auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y
auto expected_output_layout = whole_layout;
set_values(e_input_mem, input_data);

network.set_input_data("input_origin", e_input_mem);
network.set_input_data(initial_condition_id, e_initial_condition_mem);

auto outputs = network.execute();
auto output_layout = outputs.begin()->second.get_layout();
auto input_layout = network.get_primitive("input")->get_output_layout();

ASSERT_EQ(output_layout.feature(), input_layout.feature());
ASSERT_EQ(output_layout.spatial(0), input_layout.spatial(0));
ASSERT_EQ(output_layout.spatial(1), input_layout.spatial(1));
}
}

std::vector<float> input_data_2_4{
1.0f, 2.0f,
4.0f, -15.f,
Expand Down Expand Up @@ -919,6 +1028,15 @@ TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes2) {
-1, 10);
}

TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes3) {
test_loop_gpu_multiple_shapes_single_shared(
{ 1, -1, 560 },
{{ 1, 58, 560 }, { 1, 87, 560 }, { 1, 72, 560 }, { 1, 88, 560 }, { 1, 89, 560 }},
{input_data_2_4_4, input_data_2_4_4, input_data_2_4_4, input_data_2_4_4, input_data_2_4_4},
std::vector<float>(),
-1, 20);
}

static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape body_input_layout,
std::vector<ov::PartialShape> whole_layouts,
std::vector<std::vector<float>> input_data_list,
Expand Down

0 comments on commit 64eb742

Please sign in to comment.