diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl index eff062e449f097..a768aa426eb918 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl @@ -289,33 +289,62 @@ KERNEL(strided_slice_ref)(OPTIONAL_SHAPE_INFO_ARG #if NEW_AXIS_MODE // If NEW_AXIS_MODE that just copy input to output -#ifdef OUTPUT_LAYOUT_BFYX +#ifdef INPUT0_LAYOUT_BFYX + const uint index_in_batch = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint input_feature_id = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); const uint w_input = 0; const uint z_input = 0; - const uint y_input = (uint)get_global_id(2) / INPUT0_SIZE_X; - const uint x_input = (uint)get_global_id(2) % INPUT0_SIZE_X; -#elif OUTPUT_LAYOUT_BFZYX + const uint y_input = index_in_batch / OUTPUT_SIZE_X; + const uint x_input = index_in_batch % OUTPUT_SIZE_X; +#elif INPUT0_LAYOUT_BFZYX + const uint index_in_batch = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint input_feature_id = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); const uint w_input = 0; - const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y); - const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y); + const uint yx_input = index_in_batch % (INPUT0_SIZE_X * INPUT0_SIZE_Y); + const uint z_input = index_in_batch / (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint y_input = yx_input / INPUT0_SIZE_X; const uint x_input = yx_input % INPUT0_SIZE_X; -#elif OUTPUT_LAYOUT_BFWZYX - const uint zyx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); - const uint w_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); +#elif INPUT0_LAYOUT_BFWZYX + const uint index_in_batch = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W); + const uint input_feature_id = (feature * (uint)get_global_size(2) + (uint)get_global_id(2)) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W); + const uint zyx_input = index_in_batch % (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); + const uint w_input = index_in_batch / (INPUT0_SIZE_X * INPUT0_SIZE_Y * INPUT0_SIZE_Z); const uint z_input = zyx_input / (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint yx_input = zyx_input % (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint y_input = yx_input / INPUT0_SIZE_X; const uint x_input = yx_input % INPUT0_SIZE_X; #endif + const uint input_index = INPUT0_OFFSET + batch * INPUT0_BATCH_PITCH + - feature * INPUT0_FEATURE_PITCH + - w_input * INPUT0_W_PITCH + - z_input * INPUT0_Z_PITCH + - y_input * INPUT0_Y_PITCH + - x_input * INPUT0_X_PITCH; - output[input_index] = input[input_index]; + input_feature_id * INPUT0_FEATURE_PITCH + + w_input * OUTPUT_W_PITCH + + z_input * OUTPUT_Z_PITCH + + y_input * OUTPUT_Y_PITCH + + x_input * OUTPUT_X_PITCH; + +#ifdef OUTPUT_LAYOUT_BFYX + const uint y = (uint)get_global_id(2) / OUTPUT_SIZE_X; + const uint x = (uint)get_global_id(2) % OUTPUT_SIZE_X; + const uint output_index = OUTPUT_GET_INDEX(batch, feature, y, x); +#elif OUTPUT_LAYOUT_BFZYX + const uint yx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint z = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint y = yx / OUTPUT_SIZE_X; + const uint x = yx % OUTPUT_SIZE_X; + const uint output_index = OUTPUT_GET_INDEX(batch, feature, z, y, x); +#elif OUTPUT_LAYOUT_BFWZYX + const uint zyx = (uint)get_global_id(2) % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint w = (uint)get_global_id(2) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z); + const uint z = zyx / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint yx = zyx % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + const uint y = yx / OUTPUT_SIZE_X; + const uint x = yx % OUTPUT_SIZE_X; + const uint output_index = OUTPUT_GET_INDEX(batch, feature, w, z, y, x); +#endif + + output[output_index] = input[input_index]; + #else // NEW_AXIS_MODE #ifdef OUTPUT_LAYOUT_BFYX const uint w = 0; diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 1da24cf25f3a2a..cc45b925903f1f 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -81,8 +81,6 @@ std::vector disabledTestPatterns() { R"(smoke_Nms9LayerTest.*)", // Doesn't match reference results as v6 ref impl behavior is misaligned with expected R"(smoke_MemoryTestV3.*)", - // Issue: 129991 - R"(.*StridedSliceLayerTest.*TS=.*2.2.4.1*.*)", // Issue: CVS-133173 R"(.*smoke_CTCLoss_Set2/CTCLossLayerTest.Inference/IS=\(\[\]\)_TS=\{\(3.6.8\)\}_LL=\(6.5.6\)_A=\(4.1.2.3.4.5\)\(5.4.3.0.1.0\)\(2.1.3.1.3.0\)_AL=\(3.3.5\)_BI=7_PCR=1_CMR=1_U=0_PF=f32_PI=i64.*)", R"(.*smoke_LPT/BatchToSpaceTransformation.CompareWithRefImpl/f16_GPU_\[4,3,50,86\]_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=.*)",