Skip to content

Commit

Permalink
[GPU] Fix clBuildProgram failure with ssd_mobilnet_v1_coco and batch=…
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-k-park authored and alexander-shchepetov committed Sep 5, 2021
1 parent 555c8b8 commit 38ac9a2
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ DetectionOutputKernelRef::DispatchData SetDefault(const detection_output_params&
dispatchData.lws = {1, 1, 1};
} else {
dispatchData.gws = {input.Batch().v, 1, 1};
dispatchData.lws = {input.Batch().v, 1, 1};
dispatchData.lws = {1, 1, 1};
}
} else {
dispatchData.gws = {1, 1, 1};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location
__global int *buffer1) {
const int batchId = get_global_id(0);

__local int class_offset[LOCAL_BATCHES_NUM * NUM_CLASSES_ACC];
__local int class_offset[NUM_CLASSES_ACC];

const int total_det = FUNC_CALL(get_accumulated_detections)(buffer1, batchId);
buffer1[batchId * NUM_CLASSES_ACC + NUM_CLASSES] = total_det;
Expand Down Expand Up @@ -689,9 +689,9 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location
}

// calculate starting point of each class
class_offset[scores_size_offset] = 0;
class_offset[0] = 0;
for (int i = 1; i < NUM_CLASSES_ACC; ++i) {
class_offset[scores_size_offset + i] = class_offset[scores_size_offset + i - 1] + buffer1[scores_size_offset + i - 1];
class_offset[i] = class_offset[i - 1] + buffer1[scores_size_offset + i - 1];
}

barrier(CLK_LOCAL_MEM_FENCE);
Expand All @@ -700,7 +700,7 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location
for (uint idx_num_det = 0; idx_num_det < KEEP_TOP_K; idx_num_det++) {
SCORES_INFO score_info;
score_info = scoresList[scores_offset + idx_num_det];
const int idx = startIdx + class_offset[batchId * NUM_CLASSES_ACC + score_info.classId];
const int idx = startIdx + class_offset[score_info.classId];
output[idx * OUTPUT_ROW_SIZE] = TO_OUTPUT_TYPE(batchId);
output[idx * OUTPUT_ROW_SIZE + 1] = TO_OUTPUT_TYPE((DECREASE_LABEL_ID) ? score_info.classId - 1 : score_info.classId);
output[idx * OUTPUT_ROW_SIZE + 2] = TO_OUTPUT_TYPE(score_info.score);
Expand All @@ -719,7 +719,7 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location
ymax = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), ymax));
}
vstore4((OUTPUT_TYPE4)(xmin, ymin, xmax, ymax), 0, output + idx * OUTPUT_ROW_SIZE + 3);
class_offset[batchId * NUM_CLASSES_ACC + score_info.classId]++;
class_offset[score_info.classId]++;
}
} else {
const int startIdx = FUNC_CALL(get_start_idx)(buffer1, batchId);
Expand Down Expand Up @@ -753,7 +753,6 @@ KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location
}
}

barrier(CLK_GLOBAL_MEM_FENCE);
if(batchId == 0) {
const int final_detections = FUNC_CALL(get_final_detections)(buffer1);
unroll_for (uint i = final_detections; i < NUM_OF_IMAGES * KEEP_TOP_K; i++) {
Expand Down

0 comments on commit 38ac9a2

Please sign in to comment.