From 64eb742eafd13e82d1eac1bc2e8de35dc0d4232e Mon Sep 17 00:00:00 2001 From: David Nam Date: Sat, 29 Jun 2024 15:02:58 +0800 Subject: [PATCH] [GPU][Loop] Change condition to reinterprete buffer (#25086) ### Details: - In primitive_inst.cpp when skippable node which is optimized out, is returned at realloc_if_needed(), output memory layout should be changed. - So it needs to adjust in respective primitive_inst (for this case, Reorder) ### Tickets: - 143848 --- src/plugins/intel_gpu/src/graph/reorder.cpp | 4 +- .../tests/unit/test_cases/loop_gpu_test.cpp | 118 ++++++++++++++++++ 2 files changed, 121 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 18c00d949b857c..e322baeee95e52 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -270,7 +270,9 @@ void reorder_inst::update_output_memory() { if (!can_be_optimized()) return; - if (static_cast(_outputs[0]) && _network.get_engine().is_the_same_buffer(output_memory(), input_memory())) + if (static_cast(_outputs[0]) + && _network.get_engine().is_the_same_buffer(output_memory(), input_memory()) + && output_memory().get_layout().identical(get_output_layout())) return; if (_node != nullptr) diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index c5211e39b69f08..d071c0f3416581 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -875,6 +875,115 @@ static void test_loop_gpu_multiple_shapes(ov::PartialShape body_input_layout, } } +static void test_loop_gpu_multiple_shapes_single_shared(ov::PartialShape body_input_layout, + std::vector whole_layouts, + std::vector> input_data_list, + std::vector expected_output_data, + int32_t axis, + size_t exit_value, + bool is_caching_test = false) { + auto& engine = get_test_engine(); + + auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx }; + auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx }; + + auto e_initial_condition_mem = engine.allocate_memory(const_layout); + auto e_num_iteration_mem = engine.allocate_memory(const_layout); + auto b_exit_value_mem = engine.allocate_memory(const_layout); + auto b_index_inc_mem = engine.allocate_memory(const_layout); + + // initialize input buffers + set_values(e_initial_condition_mem, {1}); + set_values(b_exit_value_mem, {exit_value}); + set_values(b_index_inc_mem, {1}); + set_values(e_num_iteration_mem, {10}); + + primitive_id body_current_iteration_id = "b_index"; + primitive_id body_execution_condition_id = "b_cond_exit_value"; + + cldnn::topology body( + input_layout(body_current_iteration_id, const_layout), + input_layout("b_parameter", b_input_layout), + data("b_exit_value", b_exit_value_mem), + data("b_index_inc", b_index_inc_mem), + eltwise("b_index_update", input_info(body_current_iteration_id), input_info("b_index_inc"), eltwise_mode::sum), + eltwise("b_permute", input_info("b_parameter"), input_info("b_index_update"), eltwise_mode::sum), + reorder("b_result", input_info("b_permute"), b_input_layout), + eltwise(body_execution_condition_id, input_info(body_current_iteration_id), input_info("b_exit_value"), eltwise_mode::lt) + ); + + primitive_id trip_count_id = ""; + primitive_id actual_iteration_count_id = "actual_iteration_count"; + primitive_id initial_condition_id = "initial_condition"; + int64_t num_iterations = -1; + + std::vector input_primitive_maps { + loop::io_primitive_map("input", "b_parameter", axis), + loop::io_primitive_map(actual_iteration_count_id, body_current_iteration_id) }; + std::vector output_primitive_maps { + loop::io_primitive_map(cldnn::input_info("loop"), cldnn::input_info("b_result"), axis) }; + std::vector back_edges { + loop::backedge_mapping("b_result", "b_parameter"), + loop::backedge_mapping("b_index_update", body_current_iteration_id) }; + + auto body_program = build_program(engine, body, body_execution_condition_id, output_primitive_maps, back_edges, true); + + auto const_shape = engine.allocate_memory({ov::PartialShape{4}, data_types::i32, format::bfyx}); + std::vector body_input_layouts; + for (size_t i = 0; i < body_input_layout.size(); i++) { + if (body_input_layout[i].is_dynamic()) + body_input_layouts.push_back(-1); + else + body_input_layouts.push_back(body_input_layout[i].get_length()); + } + set_values(const_shape, body_input_layouts); + + cldnn::topology topology( + input_layout("input_origin", b_input_layout), + input_layout(initial_condition_id, e_initial_condition_mem->get_layout()), + mutable_data(actual_iteration_count_id, e_num_iteration_mem), + permute("input2", input_info("input_origin"), {0, 1, 2, 3}), + data("const", const_shape), + permute("permute1", input_info("input_origin"), {0, 1, 2, 3}), + concatenation("input", {input_info("permute1"), input_info("input_origin")}, 0), + loop("loop", + {input_info(actual_iteration_count_id), input_info(initial_condition_id), input_info("input")}, + body_program, trip_count_id, initial_condition_id, actual_iteration_count_id, + input_primitive_maps, output_primitive_maps, back_edges, + num_iterations, body_current_iteration_id, body_execution_condition_id, 1), + permute("result", input_info("loop"), {0, 1, 2, 3})); + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + network network(engine, topology, config); + for (size_t i = 0 ; i < whole_layouts.size(); i++) { + auto whole_layout = whole_layouts[i]; + auto input_data = input_data_list[i]; + + set_values(e_initial_condition_mem, {1}); + set_values(b_exit_value_mem, {exit_value}); + set_values(b_index_inc_mem, {1}); + set_values(e_num_iteration_mem, {10}); + + auto e_input_layout = cldnn::layout{ whole_layout, data_types::f32, format::bfyx }; + auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y + auto expected_output_layout = whole_layout; + set_values(e_input_mem, input_data); + + network.set_input_data("input_origin", e_input_mem); + network.set_input_data(initial_condition_id, e_initial_condition_mem); + + auto outputs = network.execute(); + auto output_layout = outputs.begin()->second.get_layout(); + auto input_layout = network.get_primitive("input")->get_output_layout(); + + ASSERT_EQ(output_layout.feature(), input_layout.feature()); + ASSERT_EQ(output_layout.spatial(0), input_layout.spatial(0)); + ASSERT_EQ(output_layout.spatial(1), input_layout.spatial(1)); + } +} + std::vector input_data_2_4{ 1.0f, 2.0f, 4.0f, -15.f, @@ -919,6 +1028,15 @@ TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes2) { -1, 10); } +TEST(loop_gpu, support_loop_w_dynamic_input_w_various_shapes3) { + test_loop_gpu_multiple_shapes_single_shared( + { 1, -1, 560 }, + {{ 1, 58, 560 }, { 1, 87, 560 }, { 1, 72, 560 }, { 1, 88, 560 }, { 1, 89, 560 }}, + {input_data_2_4_4, input_data_2_4_4, input_data_2_4_4, input_data_2_4_4, input_data_2_4_4}, + std::vector(), + -1, 20); +} + static void test_loop_gpu_wo_trip_count_update_primitive_id(ov::PartialShape body_input_layout, std::vector whole_layouts, std::vector> input_data_list,