Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

camerad: fast debayer on c2 cameras #23795

Merged
merged 4 commits into from
Feb 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions selfdrive/camerad/cameras/camera_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Debayer {
Debayer(cl_device_id device_id, cl_context context, const CameraBuf *b, const CameraState *s) {
char args[4096];
const CameraInfo *ci = &s->ci;
hdr_ = ci->hdr;
snprintf(args, sizeof(args),
"-cl-fast-relaxed-math -cl-denorms-are-zero "
"-DFRAME_WIDTH=%d -DFRAME_HEIGHT=%d -DFRAME_STRIDE=%d "
Expand All @@ -63,9 +64,20 @@ class Debayer {
CL_CHECK(clSetKernelArg(krnl_, 2, localMemSize, 0));
CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 2, NULL, globalWorkSize, localWorkSize, 0, 0, debayer_event));
} else {
const size_t debayer_work_size = height; // doesn't divide evenly, is this okay?
CL_CHECK(clSetKernelArg(krnl_, 2, sizeof(float), &gain));
CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 1, NULL, &debayer_work_size, NULL, 0, 0, debayer_event));
if (hdr_) {
// HDR requires a 1-D kernel due to the DPCM compression
const size_t debayer_local_worksize = 128;
const size_t debayer_work_size = height; // doesn't divide evenly, is this okay?
CL_CHECK(clSetKernelArg(krnl_, 2, sizeof(float), &gain));
CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 1, NULL, &debayer_work_size, &debayer_local_worksize, 0, 0, debayer_event));
} else {
const int debayer_local_worksize = 32;
assert(width % 2 == 0);
const size_t globalWorkSize[] = {size_t(height), size_t(width / 2)};
const size_t localWorkSize[] = {debayer_local_worksize, debayer_local_worksize};
CL_CHECK(clSetKernelArg(krnl_, 2, sizeof(float), &gain));
CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 2, NULL, globalWorkSize, localWorkSize, 0, 0, debayer_event));
}
}
}

Expand All @@ -75,6 +87,7 @@ class Debayer {

private:
cl_kernel krnl_;
bool hdr_;
};

void CameraBuf::init(cl_device_id device_id, cl_context context, CameraState *s, VisionIpcServer * v, int frame_cnt, VisionStreamType init_rgb_type, VisionStreamType init_yuv_type, release_cb init_release_callback) {
Expand Down
9 changes: 9 additions & 0 deletions selfdrive/camerad/cameras/debayer.cl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ float3 srgb_gamma(float3 p) {
return select(ph, pl, islessequal(p, 0.0031308f));
}

#if HDR

__constant int dpcm_lookup[512] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, 935, 951, 967, 983, 999, 1015, 1031, 1047, 1063, 1079, 1095, 1111, 1127, 1143, 1159, 1175, 1191, 1207, 1223, 1239, 1255, 1271, 1287, 1303, 1319, 1335, 1351, 1367, 1383, 1399, 1415, 1431, -935, -951, -967, -983, -999, -1015, -1031, -1047, -1063, -1079, -1095, -1111, -1127, -1143, -1159, -1175, -1191, -1207, -1223, -1239, -1255, -1271, -1287, -1303, -1319, -1335, -1351, -1367, -1383, -1399, -1415, -1431, 419, 427, 435, 443, 451, 459, 467, 475, 483, 491, 499, 507, 515, 523, 531, 539, 547, 555, 563, 571, 579, 587, 595, 603, 611, 619, 627, 635, 643, 651, 659, 667, 675, 683, 691, 699, 707, 715, 723, 731, 739, 747, 755, 763, 771, 779, 787, 795, 803, 811, 819, 827, 835, 843, 851, 859, 867, 875, 883, 891, 899, 907, 915, 923, -419, -427, -435, -443, -451, -459, -467, -475, -483, -491, -499, -507, -515, -523, -531, -539, -547, -555, -563, -571, -579, -587, -595, -603, -611, -619, -627, -635, -643, -651, -659, -667, -675, -683, -691, -699, -707, -715, -723, -731, -739, -747, -755, -763, -771, -779, -787, -795, -803, -811, -819, -827, -835, -843, -851, -859, -867, -875, -883, -891, -899, -907, -915, -923, 161, 165, 169, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 233, 237, 241, 245, 249, 253, 257, 261, 265, 269, 273, 277, 281, 285, 289, 293, 297, 301, 305, 309, 313, 317, 321, 325, 329, 333, 337, 341, 345, 349, 353, 357, 361, 365, 369, 373, 377, 381, 385, 389, 393, 397, 401, 405, 409, 413, -161, -165, -169, -173, -177, -181, -185, -189, -193, -197, -201, -205, -209, -213, -217, -221, -225, -229, -233, -237, -241, -245, -249, -253, -257, -261, -265, -269, -273, -277, -281, -285, -289, -293, -297, -301, -305, -309, -313, -317, -321, -325, -329, -333, -337, -341, -345, -349, -353, -357, -361, -365, -369, -373, -377, -381, -385, -389, -393, -397, -401, -405, -409, -413, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, -32, -34, -36, -38, -40, -42, -44, -46, -48, -50, -52, -54, -56, -58, -60, -62, -64, -66, -68, -70, -72, -74, -76, -78, -80, -82, -84, -86, -88, -90, -92, -94, -96, -98, -100, -102, -104, -106, -108, -110, -112, -114, -116, -118, -120, -122, -124, -126, -128, -130, -132, -134, -136, -138, -140, -142, -144, -146, -148, -150, -152, -154, -156, -158};

inline uint4 decompress(uint4 p, uint4 pl) {
Expand All @@ -35,15 +37,22 @@ inline uint4 decompress(uint4 p, uint4 pl) {
return select(r2, r1, p < 0x200);
}

#endif

__kernel void debayer10(__global uchar const * const in,
__global uchar * out, float digital_gain)
{
const int oy = get_global_id(0);
if (oy >= RGB_HEIGHT) return;
const int iy = oy * 2;

#if HDR
uint4 pint_last;
for (int ox = 0; ox < RGB_WIDTH; ox += 2) {
#else
int ox = get_global_id(1) * 2;
{
#endif
const int ix = (ox/2) * 5;

// TODO: why doesn't this work for the frontview
Expand Down