diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp index 8917ecffd7ec5a..aaa51eeccf15b8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp @@ -123,7 +123,7 @@ DetectionOutputKernelRef::DispatchData SetDefault(const detection_output_params& dispatchData.lws = {1, 1, 1}; } else { dispatchData.gws = {input.Batch().v, 1, 1}; - dispatchData.lws = {input.Batch().v, 1, 1}; + dispatchData.lws = {1, 1, 1}; } } else { dispatchData.gws = {1, 1, 1}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_gpu_ref.cl index 6f19536ed68a48..5d9ae37def8d9d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_gpu_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_gpu_ref.cl @@ -658,7 +658,7 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location __global int *buffer1) { const int batchId = get_global_id(0); - __local int class_offset[LOCAL_BATCHES_NUM * NUM_CLASSES_ACC]; + __local int class_offset[NUM_CLASSES_ACC]; const int total_det = FUNC_CALL(get_accumulated_detections)(buffer1, batchId); buffer1[batchId * NUM_CLASSES_ACC + NUM_CLASSES] = total_det; @@ -689,9 +689,9 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location } // calculate starting point of each class - class_offset[scores_size_offset] = 0; + class_offset[0] = 0; for (int i = 1; i < NUM_CLASSES_ACC; ++i) { - class_offset[scores_size_offset + i] = class_offset[scores_size_offset + i - 1] + buffer1[scores_size_offset + i - 1]; + class_offset[i] = class_offset[i - 1] + buffer1[scores_size_offset + i - 1]; } barrier(CLK_LOCAL_MEM_FENCE); @@ -700,7 +700,7 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location for (uint idx_num_det = 0; idx_num_det < KEEP_TOP_K; idx_num_det++) { SCORES_INFO score_info; score_info = scoresList[scores_offset + idx_num_det]; - const int idx = startIdx + class_offset[batchId * NUM_CLASSES_ACC + score_info.classId]; + const int idx = startIdx + class_offset[score_info.classId]; output[idx * OUTPUT_ROW_SIZE] = TO_OUTPUT_TYPE(batchId); output[idx * OUTPUT_ROW_SIZE + 1] = TO_OUTPUT_TYPE((DECREASE_LABEL_ID) ? score_info.classId - 1 : score_info.classId); output[idx * OUTPUT_ROW_SIZE + 2] = TO_OUTPUT_TYPE(score_info.score); @@ -719,7 +719,7 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location ymax = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), ymax)); } vstore4((OUTPUT_TYPE4)(xmin, ymin, xmax, ymax), 0, output + idx * OUTPUT_ROW_SIZE + 3); - class_offset[batchId * NUM_CLASSES_ACC + score_info.classId]++; + class_offset[score_info.classId]++; } } else { const int startIdx = FUNC_CALL(get_start_idx)(buffer1, batchId); @@ -753,7 +753,6 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location } } - barrier(CLK_GLOBAL_MEM_FENCE); if(batchId == 0) { const int final_detections = FUNC_CALL(get_final_detections)(buffer1); unroll_for (uint i = final_detections; i < NUM_OF_IMAGES * KEEP_TOP_K; i++) {