From f79b8911b7e2d9fde943564fcf2b1c7c04e056e2 Mon Sep 17 00:00:00 2001 From: andrei-cv Date: Mon, 25 Mar 2024 18:12:14 +0400 Subject: [PATCH 1/4] add unsqueeze --- .../cl_kernels/strided_slice_ref.cl | 65 ++++++++++++++----- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl index eff062e449f097..5e6a06c7a52dda 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl @@ -289,33 +289,68 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG #if NEW_AXIS_MODE // If NEW_AXIS_MODE that just copy input to output -#ifdef OUTPUT_LAYOUT_BFYX +#ifdef INPUT0_LAYOUT_BFYX + const uint index_in_batch = (feature * get_global_size(2) + (uint)get_global_id(2))%(OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + uint input_feature_id = (feature * get_global_size(2) +(uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint w_input = 0; const uint z_input = 0; - const uint y_input = (uint)get_global_id(2) / INPUT0_SIZE_X; - const uint x_input = (uint)get_global_id(2) % INPUT0_SIZE_X; -#elif OUTPUT_LAYOUT_BFZYX + const uint y_input = index_in_batch / OUTPUT_SIZE_X; + const uint x_input = index_in_batch % OUTPUT_SIZE_X; +#elif INPUT0_LAYOUT_BFZYX + const uint index_in_batch = (feature * get_global_size(2) +(uint)get_global_id(2))%(OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + uint input_feature_id = (feature * get_global_size(2) +(uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); const uint w_input = 0; - const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y); - const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y); + const uint yx_input = index_in_batch % (INPUT0_SIZE_X * INPUT0_SIZE_Y); + const uint z_input = index_in_batch / (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint y_input = yx_input / INPUT0_SIZE_X; const uint x_input = yx_input % INPUT0_SIZE_X; -#elif OUTPUT_LAYOUT_BFWZYX - const uint zyx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); - const uint w_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); +#elif INPUT0_LAYOUT_BFWZYX + const uint index_in_batch = (feature * get_global_size(2) +(uint)get_global_id(2))%(OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W); + uint input_feature_id = (feature * get_global_size(2) +(uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W); + const uint zyx_input = index_in_batch % (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); + const uint w_input = index_in_batch / (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); const uint z_input = zyx_input / (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint yx_input = zyx_input % (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint y_input = yx_input / INPUT0_SIZE_X; const uint x_input = yx_input % INPUT0_SIZE_X; #endif + const uint input_index = INPUT0_OFFSET + batch * INPUT0_BATCH_PITCH + - feature * INPUT0_FEATURE_PITCH + - w_input * INPUT0_W_PITCH + - z_input * INPUT0_Z_PITCH + - y_input * INPUT0_Y_PITCH + - x_input * INPUT0_X_PITCH; - output[input_index] = input[input_index]; + input_feature_id * INPUT0_FEATURE_PITCH + + w_input * OUTPUT_W_PITCH + + z_input * OUTPUT_Z_PITCH + + y_input * OUTPUT_Y_PITCH + + x_input * OUTPUT_X_PITCH; + +#ifdef OUTPUT_LAYOUT_BFYX + const uint w = 0; + const uint z = 0; + const uint y = get_global_id(2) / OUTPUT_SIZE_X; + const uint x = get_global_id(2) % OUTPUT_SIZE_X; +#elif OUTPUT_LAYOUT_BFZYX + const uint w = 0; + const uint yx = get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint z = get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint y = yx / OUTPUT_SIZE_X; + const uint x = yx % OUTPUT_SIZE_X; +#elif OUTPUT_LAYOUT_BFWZYX + const uint zyx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint w = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint z = zyx / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint yx = zyx % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint y = yx / OUTPUT_SIZE_X; + const uint x = yx % OUTPUT_SIZE_X; +#endif + const uint output_index = OUTPUT_OFFSET + + batch * OUTPUT_BATCH_PITCH + + feature * OUTPUT_FEATURE_PITCH + + w * OUTPUT_W_PITCH + + z * OUTPUT_Z_PITCH + + y * OUTPUT_Y_PITCH + + x * OUTPUT_X_PITCH; + output[output_index] = input[input_index]; + #else // NEW_AXIS_MODE #ifdef OUTPUT_LAYOUT_BFYX const uint w = 0; From 3cb53f710c4aa1644e7cf25f5bafdc9e66d3ac6c Mon Sep 17 00:00:00 2001 From: andrei-cv Date: Mon, 25 Mar 2024 19:35:36 +0400 Subject: [PATCH 2/4] rm skip StridedSliceLayerTest*TS=*2.2.4.1 --- .../functional/shared_tests_instances/skip_tests_config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 1da24cf25f3a2a..73a2c62a44cc81 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -82,7 +82,7 @@ std::vector disabledTestPatterns() { // Doesn't match reference results as v6 ref impl behavior is misaligned with expected R"(smoke_MemoryTestV3.*)", // Issue: 129991 - R"(.*StridedSliceLayerTest.*TS=.*2.2.4.1*.*)", + //R"(.*StridedSliceLayerTest.*TS=.*2.2.4.1*.*)", // Issue: CVS-133173 R"(.*smoke_CTCLoss_Set2/CTCLossLayerTest.Inference/IS=\(\[\]\)_TS=\{\(3.6.8\)\}_LL=\(6.5.6\)_A=\(4.1.2.3.4.5\)\(5.4.3.0.1.0\)\(2.1.3.1.3.0\)_AL=\(3.3.5\)_BI=7_PCR=1_CMR=1_U=0_PF=f32_PI=i64.*)", R"(.*smoke_LPT/BatchToSpaceTransformation.CompareWithRefImpl/f16_GPU_\[4,3,50,86\]_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=.*)", From 7f8bc7d4873c11ff65d3042a897680a6d81aaa29 Mon Sep 17 00:00:00 2001 From: andrei-cv Date: Mon, 25 Mar 2024 19:46:19 +0400 Subject: [PATCH 3/4] rm skip StridedSliceLayerTest*TS=*2.2.4.1 --- .../functional/shared_tests_instances/skip_tests_config.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 73a2c62a44cc81..cc45b925903f1f 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -81,8 +81,6 @@ std::vector disabledTestPatterns() { R"(smoke_Nms9LayerTest.*)", // Doesn't match reference results as v6 ref impl behavior is misaligned with expected R"(smoke_MemoryTestV3.*)", - // Issue: 129991 - //R"(.*StridedSliceLayerTest.*TS=.*2.2.4.1*.*)", // Issue: CVS-133173 R"(.*smoke_CTCLoss_Set2/CTCLossLayerTest.Inference/IS=\(\[\]\)_TS=\{\(3.6.8\)\}_LL=\(6.5.6\)_A=\(4.1.2.3.4.5\)\(5.4.3.0.1.0\)\(2.1.3.1.3.0\)_AL=\(3.3.5\)_BI=7_PCR=1_CMR=1_U=0_PF=f32_PI=i64.*)", R"(.*smoke_LPT/BatchToSpaceTransformation.CompareWithRefImpl/f16_GPU_\[4,3,50,86\]_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=.*)", From 76a1ae2a975253956becf1325d25513d903efc2c Mon Sep 17 00:00:00 2001 From: andrei-cv Date: Mon, 25 Mar 2024 20:55:36 +0400 Subject: [PATCH 4/4] code style --- .../cl_kernels/strided_slice_ref.cl | 34 ++++++++----------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl index 5e6a06c7a52dda..a768aa426eb918 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl @@ -290,23 +290,23 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG #if NEW_AXIS_MODE // If NEW_AXIS_MODE that just copy input to output #ifdef INPUT0_LAYOUT_BFYX - const uint index_in_batch = (feature * get_global_size(2) + (uint)get_global_id(2))%(OUTPUT_SIZE_X * OUTPUT_SIZE_Y); - uint input_feature_id = (feature * get_global_size(2) +(uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint index_in_batch = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint input_feature_id = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint w_input = 0; const uint z_input = 0; const uint y_input = index_in_batch / OUTPUT_SIZE_X; const uint x_input = index_in_batch % OUTPUT_SIZE_X; #elif INPUT0_LAYOUT_BFZYX - const uint index_in_batch = (feature * get_global_size(2) +(uint)get_global_id(2))%(OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); - uint input_feature_id = (feature * get_global_size(2) +(uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint index_in_batch = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint input_feature_id = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); const uint w_input = 0; const uint yx_input = index_in_batch % (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint z_input = index_in_batch / (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint y_input = yx_input / INPUT0_SIZE_X; const uint x_input = yx_input % INPUT0_SIZE_X; #elif INPUT0_LAYOUT_BFWZYX - const uint index_in_batch = (feature * get_global_size(2) +(uint)get_global_id(2))%(OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W); - uint input_feature_id = (feature * get_global_size(2) +(uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W); + const uint index_in_batch = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W); + const uint input_feature_id = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W); const uint zyx_input = index_in_batch % (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); const uint w_input = index_in_batch / (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); const uint z_input = zyx_input / (INPUT0_SIZE_X * INPUT0_SIZE_Y); @@ -324,16 +324,15 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG x_input * OUTPUT_X_PITCH; #ifdef OUTPUT_LAYOUT_BFYX - const uint w = 0; - const uint z = 0; - const uint y = get_global_id(2) / OUTPUT_SIZE_X; - const uint x = get_global_id(2) % OUTPUT_SIZE_X; + const uint y = (uint)get_global_id(2) / OUTPUT_SIZE_X; + const uint x = (uint)get_global_id(2) % OUTPUT_SIZE_X; + const uint output_index = OUTPUT_GET_INDEX(batch, feature, y, x); #elif OUTPUT_LAYOUT_BFZYX - const uint w = 0; - const uint yx = get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); - const uint z = get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint yx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint z = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint y = yx / OUTPUT_SIZE_X; const uint x = yx % OUTPUT_SIZE_X; + const uint output_index = OUTPUT_GET_INDEX(batch, feature, z, y, x); #elif OUTPUT_LAYOUT_BFWZYX const uint zyx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); const uint w = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); @@ -341,14 +340,9 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG const uint yx = zyx % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint y = yx / OUTPUT_SIZE_X; const uint x = yx % OUTPUT_SIZE_X; + const uint output_index = OUTPUT_GET_INDEX(batch, feature, w, z, y, x); #endif - const uint output_index = OUTPUT_OFFSET + - batch * OUTPUT_BATCH_PITCH + - feature * OUTPUT_FEATURE_PITCH + - w * OUTPUT_W_PITCH + - z * OUTPUT_Z_PITCH + - y * OUTPUT_Y_PITCH + - x * OUTPUT_X_PITCH; + output[output_index] = input[input_index]; #else // NEW_AXIS_MODE