|
10 | 10 |
|
11 | 11 | #include "selfdrive/modeld/models/dmonitoring.h"
|
12 | 12 |
|
13 |
| -constexpr int MODEL_WIDTH = 1440; |
14 |
| -constexpr int MODEL_HEIGHT = 960; |
| 13 | +constexpr int MODEL_WIDTH = 320; |
| 14 | +constexpr int MODEL_HEIGHT = 640; |
15 | 15 |
|
16 | 16 | template <class T>
|
17 | 17 | static inline T *get_buffer(std::vector<T> &buf, const size_t size) {
|
18 | 18 | if (buf.size() < size) buf.resize(size);
|
19 | 19 | return buf.data();
|
20 | 20 | }
|
21 | 21 |
|
| 22 | +static inline void init_yuv_buf(std::vector<uint8_t> &buf, const int width, int height) { |
| 23 | + uint8_t *y = get_buffer(buf, width * height * 3 / 2); |
| 24 | + uint8_t *u = y + width * height; |
| 25 | + uint8_t *v = u + (width / 2) * (height / 2); |
| 26 | + |
| 27 | + // needed on comma two to make the padded border black |
| 28 | + // equivalent to RGB(0,0,0) in YUV space |
| 29 | + memset(y, 16, width * height); |
| 30 | + memset(u, 128, (width / 2) * (height / 2)); |
| 31 | + memset(v, 128, (width / 2) * (height / 2)); |
| 32 | +} |
| 33 | + |
22 | 34 | void dmonitoring_init(DMonitoringModelState* s) {
|
23 | 35 | s->is_rhd = Params().getBool("IsRHD");
|
| 36 | + for (int x = 0; x < std::size(s->tensor); ++x) { |
| 37 | + s->tensor[x] = (x - 128.f) * 0.0078125f; |
| 38 | + } |
| 39 | + init_yuv_buf(s->resized_buf, MODEL_WIDTH, MODEL_HEIGHT); |
24 | 40 |
|
25 | 41 | #ifdef USE_ONNX_MODEL
|
26 |
| - s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true); |
| 42 | + s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME); |
27 | 43 | #else
|
28 |
| - s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true); |
| 44 | + s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME); |
29 | 45 | #endif
|
30 | 46 |
|
31 | 47 | s->m->addCalib(s->calib, CALIB_LEN);
|
32 | 48 | }
|
33 | 49 |
|
34 |
| -void parse_driver_data(DriverStateResult &ds_res, const DMonitoringModelState* s, int out_idx_offset) { |
35 |
| - for (int i = 0; i < 3; ++i) { |
36 |
| - ds_res.face_orientation[i] = s->output[out_idx_offset+i] * REG_SCALE; |
37 |
| - ds_res.face_orientation_std[i] = exp(s->output[out_idx_offset+6+i]); |
38 |
| - } |
39 |
| - for (int i = 0; i < 2; ++i) { |
40 |
| - ds_res.face_position[i] = s->output[out_idx_offset+3+i] * REG_SCALE; |
41 |
| - ds_res.face_position_std[i] = exp(s->output[out_idx_offset+9+i]); |
42 |
| - } |
43 |
| - for (int i = 0; i < 4; ++i) { |
44 |
| - ds_res.ready_prob[i] = sigmoid(s->output[out_idx_offset+35+i]); |
45 |
| - } |
46 |
| - for (int i = 0; i < 2; ++i) { |
47 |
| - ds_res.not_ready_prob[i] = sigmoid(s->output[out_idx_offset+39+i]); |
48 |
| - } |
49 |
| - ds_res.face_prob = sigmoid(s->output[out_idx_offset+12]); |
50 |
| - ds_res.left_eye_prob = sigmoid(s->output[out_idx_offset+21]); |
51 |
| - ds_res.right_eye_prob = sigmoid(s->output[out_idx_offset+30]); |
52 |
| - ds_res.left_blink_prob = sigmoid(s->output[out_idx_offset+31]); |
53 |
| - ds_res.right_blink_prob = sigmoid(s->output[out_idx_offset+32]); |
54 |
| - ds_res.sunglasses_prob = sigmoid(s->output[out_idx_offset+33]); |
55 |
| - ds_res.occluded_prob = sigmoid(s->output[out_idx_offset+34]); |
| 50 | +static inline auto get_yuv_buf(std::vector<uint8_t> &buf, const int width, int height) { |
| 51 | + uint8_t *y = get_buffer(buf, width * height * 3 / 2); |
| 52 | + uint8_t *u = y + width * height; |
| 53 | + uint8_t *v = u + (width /2) * (height / 2); |
| 54 | + return std::make_tuple(y, u, v); |
56 | 55 | }
|
57 | 56 |
|
58 |
| -void fill_driver_data(cereal::DriverStateV2::DriverData::Builder ddata, const DriverStateResult &ds_res) { |
59 |
| - ddata.setFaceOrientation(ds_res.face_orientation); |
60 |
| - ddata.setFaceOrientationStd(ds_res.face_orientation_std); |
61 |
| - ddata.setFacePosition(ds_res.face_position); |
62 |
| - ddata.setFacePositionStd(ds_res.face_position_std); |
63 |
| - ddata.setFaceProb(ds_res.face_prob); |
64 |
| - ddata.setLeftEyeProb(ds_res.left_eye_prob); |
65 |
| - ddata.setRightEyeProb(ds_res.right_eye_prob); |
66 |
| - ddata.setLeftBlinkProb(ds_res.left_blink_prob); |
67 |
| - ddata.setRightBlinkProb(ds_res.right_blink_prob); |
68 |
| - ddata.setSunglassesProb(ds_res.sunglasses_prob); |
69 |
| - ddata.setOccludedProb(ds_res.occluded_prob); |
70 |
| - ddata.setReadyProb(ds_res.ready_prob); |
71 |
| - ddata.setNotReadyProb(ds_res.not_ready_prob); |
| 57 | +struct Rect {int x, y, w, h;}; |
| 58 | +void crop_nv12_to_yuv(uint8_t *raw, int stride, int uv_offset, uint8_t *y, uint8_t *u, uint8_t *v, const Rect &rect) { |
| 59 | + uint8_t *raw_y = raw; |
| 60 | + uint8_t *raw_uv = raw_y + uv_offset; |
| 61 | + for (int r = 0; r < rect.h / 2; r++) { |
| 62 | + memcpy(y + 2 * r * rect.w, raw_y + (2 * r + rect.y) * stride + rect.x, rect.w); |
| 63 | + memcpy(y + (2 * r + 1) * rect.w, raw_y + (2 * r + rect.y + 1) * stride + rect.x, rect.w); |
| 64 | + for (int h = 0; h < rect.w / 2; h++) { |
| 65 | + u[r * rect.w/2 + h] = raw_uv[(r + (rect.y/2)) * stride + (rect.x/2 + h)*2]; |
| 66 | + v[r * rect.w/2 + h] = raw_uv[(r + (rect.y/2)) * stride + (rect.x/2 + h)*2 + 1]; |
| 67 | + } |
| 68 | + } |
72 | 69 | }
|
73 | 70 |
|
74 |
| -DMonitoringModelResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, int stride, int uv_offset, float *calib) { |
75 |
| - int v_off = height - MODEL_HEIGHT; |
76 |
| - int h_off = (width - MODEL_WIDTH) / 2; |
77 |
| - int yuv_buf_len = MODEL_WIDTH * MODEL_HEIGHT; |
78 |
| - |
79 |
| - uint8_t *raw_buf = (uint8_t *) stream_buf; |
80 |
| - // vertical crop free |
81 |
| - uint8_t *raw_y_start = raw_buf + stride * v_off; |
| 71 | +DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, int stride, int uv_offset, float *calib) { |
| 72 | + const int cropped_height = tici_dm_crop::width / 1.33; |
| 73 | + Rect crop_rect = {width / 2 - tici_dm_crop::width / 2 + tici_dm_crop::x_offset, |
| 74 | + height / 2 - cropped_height / 2 + tici_dm_crop::y_offset, |
| 75 | + cropped_height / 2, |
| 76 | + cropped_height}; |
| 77 | + if (!s->is_rhd) { |
| 78 | + crop_rect.x += tici_dm_crop::width - crop_rect.w; |
| 79 | + } |
82 | 80 |
|
83 |
| - uint8_t *net_input_buf = get_buffer(s->net_input_buf, yuv_buf_len); |
| 81 | + int resized_width = MODEL_WIDTH; |
| 82 | + int resized_height = MODEL_HEIGHT; |
| 83 | + |
| 84 | + auto [cropped_y, cropped_u, cropped_v] = get_yuv_buf(s->cropped_buf, crop_rect.w, crop_rect.h); |
| 85 | + if (!s->is_rhd) { |
| 86 | + crop_nv12_to_yuv((uint8_t *)stream_buf, stride, uv_offset, cropped_y, cropped_u, cropped_v, crop_rect); |
| 87 | + } else { |
| 88 | + auto [mirror_y, mirror_u, mirror_v] = get_yuv_buf(s->premirror_cropped_buf, crop_rect.w, crop_rect.h); |
| 89 | + crop_nv12_to_yuv((uint8_t *)stream_buf, stride, uv_offset, mirror_y, mirror_u, mirror_v, crop_rect); |
| 90 | + libyuv::I420Mirror(mirror_y, crop_rect.w, |
| 91 | + mirror_u, crop_rect.w / 2, |
| 92 | + mirror_v, crop_rect.w / 2, |
| 93 | + cropped_y, crop_rect.w, |
| 94 | + cropped_u, crop_rect.w / 2, |
| 95 | + cropped_v, crop_rect.w / 2, |
| 96 | + crop_rect.w, crop_rect.h); |
| 97 | + } |
84 | 98 |
|
85 |
| - // here makes a uint8 copy |
86 |
| - for (int r = 0; r < MODEL_HEIGHT; ++r) { |
87 |
| - memcpy(net_input_buf + r * MODEL_WIDTH, raw_y_start + r * stride + h_off, MODEL_WIDTH); |
| 99 | + auto [resized_buf, resized_u, resized_v] = get_yuv_buf(s->resized_buf, resized_width, resized_height); |
| 100 | + uint8_t *resized_y = resized_buf; |
| 101 | + libyuv::FilterMode mode = libyuv::FilterModeEnum::kFilterBilinear; |
| 102 | + libyuv::I420Scale(cropped_y, crop_rect.w, |
| 103 | + cropped_u, crop_rect.w / 2, |
| 104 | + cropped_v, crop_rect.w / 2, |
| 105 | + crop_rect.w, crop_rect.h, |
| 106 | + resized_y, resized_width, |
| 107 | + resized_u, resized_width / 2, |
| 108 | + resized_v, resized_width / 2, |
| 109 | + resized_width, resized_height, |
| 110 | + mode); |
| 111 | + |
| 112 | + |
| 113 | + int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v |
| 114 | + float *net_input_buf = get_buffer(s->net_input_buf, yuv_buf_len); |
| 115 | + // one shot conversion, O(n) anyway |
| 116 | + // yuvframe2tensor, normalize |
| 117 | + for (int r = 0; r < MODEL_HEIGHT/2; r++) { |
| 118 | + for (int c = 0; c < MODEL_WIDTH/2; c++) { |
| 119 | + // Y_ul |
| 120 | + net_input_buf[(r*MODEL_WIDTH/2) + c + (0*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r)*resized_width + 2*c]]; |
| 121 | + // Y_dl |
| 122 | + net_input_buf[(r*MODEL_WIDTH/2) + c + (1*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r+1)*resized_width + 2*c]]; |
| 123 | + // Y_ur |
| 124 | + net_input_buf[(r*MODEL_WIDTH/2) + c + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r)*resized_width + 2*c+1]]; |
| 125 | + // Y_dr |
| 126 | + net_input_buf[(r*MODEL_WIDTH/2) + c + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r+1)*resized_width + 2*c+1]]; |
| 127 | + // U |
| 128 | + net_input_buf[(r*MODEL_WIDTH/2) + c + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_u[r*resized_width/2 + c]]; |
| 129 | + // V |
| 130 | + net_input_buf[(r*MODEL_WIDTH/2) + c + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_v[r*resized_width/2 + c]]; |
| 131 | + } |
88 | 132 | }
|
89 | 133 |
|
90 |
| - // printf("preprocess completed. %d \n", yuv_buf_len); |
91 |
| - // FILE *dump_yuv_file = fopen("/tmp/rawdump.yuv", "wb"); |
92 |
| - // fwrite(net_input_buf, yuv_buf_len, sizeof(uint8_t), dump_yuv_file); |
93 |
| - // fclose(dump_yuv_file); |
| 134 | + //printf("preprocess completed. %d \n", yuv_buf_len); |
| 135 | + //FILE *dump_yuv_file = fopen("/tmp/rawdump.yuv", "wb"); |
| 136 | + //fwrite(resized_buf, yuv_buf_len, sizeof(uint8_t), dump_yuv_file); |
| 137 | + //fclose(dump_yuv_file); |
| 138 | + |
| 139 | + // *** testing *** |
| 140 | + // idat = np.frombuffer(open("/tmp/inputdump.yuv", "rb").read(), np.float32).reshape(6, 160, 320) |
| 141 | + // imshow(cv2.cvtColor(tensor_to_frames(idat[None]/0.0078125+128)[0], cv2.COLOR_YUV2RGB_I420)) |
| 142 | + |
| 143 | + //FILE *dump_yuv_file2 = fopen("/tmp/inputdump.yuv", "wb"); |
| 144 | + //fwrite(net_input_buf, MODEL_HEIGHT*MODEL_WIDTH*3/2, sizeof(float), dump_yuv_file2); |
| 145 | + //fclose(dump_yuv_file2); |
94 | 146 |
|
95 | 147 | double t1 = millis_since_boot();
|
96 |
| - s->m->addImage((float*)net_input_buf, yuv_buf_len / 4); |
| 148 | + s->m->addImage(net_input_buf, yuv_buf_len); |
97 | 149 | for (int i = 0; i < CALIB_LEN; i++) {
|
98 | 150 | s->calib[i] = calib[i];
|
99 | 151 | }
|
100 | 152 | s->m->execute();
|
101 | 153 | double t2 = millis_since_boot();
|
102 | 154 |
|
103 |
| - DMonitoringModelResult model_res = {0}; |
104 |
| - parse_driver_data(model_res.driver_state_lhd, s, 0); |
105 |
| - parse_driver_data(model_res.driver_state_rhd, s, 41); |
106 |
| - model_res.poor_vision_prob = sigmoid(s->output[82]); |
107 |
| - model_res.wheel_on_right_prob = sigmoid(s->output[83]); |
108 |
| - model_res.dsp_execution_time = (t2 - t1) / 1000.; |
109 |
| - |
110 |
| - return model_res; |
| 155 | + DMonitoringResult ret = {0}; |
| 156 | + for (int i = 0; i < 3; ++i) { |
| 157 | + ret.face_orientation[i] = s->output[i] * REG_SCALE; |
| 158 | + ret.face_orientation_meta[i] = exp(s->output[6 + i]); |
| 159 | + } |
| 160 | + for (int i = 0; i < 2; ++i) { |
| 161 | + ret.face_position[i] = s->output[3 + i] * REG_SCALE; |
| 162 | + ret.face_position_meta[i] = exp(s->output[9 + i]); |
| 163 | + } |
| 164 | + for (int i = 0; i < 4; ++i) { |
| 165 | + ret.ready_prob[i] = sigmoid(s->output[39 + i]); |
| 166 | + } |
| 167 | + for (int i = 0; i < 2; ++i) { |
| 168 | + ret.not_ready_prob[i] = sigmoid(s->output[43 + i]); |
| 169 | + } |
| 170 | + ret.face_prob = sigmoid(s->output[12]); |
| 171 | + ret.left_eye_prob = sigmoid(s->output[21]); |
| 172 | + ret.right_eye_prob = sigmoid(s->output[30]); |
| 173 | + ret.left_blink_prob = sigmoid(s->output[31]); |
| 174 | + ret.right_blink_prob = sigmoid(s->output[32]); |
| 175 | + ret.sg_prob = sigmoid(s->output[33]); |
| 176 | + ret.poor_vision = sigmoid(s->output[34]); |
| 177 | + ret.partial_face = sigmoid(s->output[35]); |
| 178 | + ret.distracted_pose = sigmoid(s->output[36]); |
| 179 | + ret.distracted_eyes = sigmoid(s->output[37]); |
| 180 | + ret.occluded_prob = sigmoid(s->output[38]); |
| 181 | + ret.dsp_execution_time = (t2 - t1) / 1000.; |
| 182 | + return ret; |
111 | 183 | }
|
112 | 184 |
|
113 |
| -void dmonitoring_publish(PubMaster &pm, uint32_t frame_id, const DMonitoringModelResult &model_res, float execution_time, kj::ArrayPtr<const float> raw_pred) { |
| 185 | +void dmonitoring_publish(PubMaster &pm, uint32_t frame_id, const DMonitoringResult &res, float execution_time, kj::ArrayPtr<const float> raw_pred) { |
114 | 186 | // make msg
|
115 | 187 | MessageBuilder msg;
|
116 |
| - auto framed = msg.initEvent().initDriverStateV2(); |
| 188 | + auto framed = msg.initEvent().initDriverState(); |
117 | 189 | framed.setFrameId(frame_id);
|
118 | 190 | framed.setModelExecutionTime(execution_time);
|
119 |
| - framed.setDspExecutionTime(model_res.dsp_execution_time); |
120 |
| - |
121 |
| - framed.setPoorVisionProb(model_res.poor_vision_prob); |
122 |
| - framed.setWheelOnRightProb(model_res.wheel_on_right_prob); |
123 |
| - fill_driver_data(framed.initLeftDriverData(), model_res.driver_state_lhd); |
124 |
| - fill_driver_data(framed.initRightDriverData(), model_res.driver_state_rhd); |
125 |
| - |
| 191 | + framed.setDspExecutionTime(res.dsp_execution_time); |
| 192 | + |
| 193 | + framed.setFaceOrientation(res.face_orientation); |
| 194 | + framed.setFaceOrientationStd(res.face_orientation_meta); |
| 195 | + framed.setFacePosition(res.face_position); |
| 196 | + framed.setFacePositionStd(res.face_position_meta); |
| 197 | + framed.setFaceProb(res.face_prob); |
| 198 | + framed.setLeftEyeProb(res.left_eye_prob); |
| 199 | + framed.setRightEyeProb(res.right_eye_prob); |
| 200 | + framed.setLeftBlinkProb(res.left_blink_prob); |
| 201 | + framed.setRightBlinkProb(res.right_blink_prob); |
| 202 | + framed.setSunglassesProb(res.sg_prob); |
| 203 | + framed.setPoorVision(res.poor_vision); |
| 204 | + framed.setPartialFace(res.partial_face); |
| 205 | + framed.setDistractedPose(res.distracted_pose); |
| 206 | + framed.setDistractedEyes(res.distracted_eyes); |
| 207 | + framed.setOccludedProb(res.occluded_prob); |
| 208 | + framed.setReadyProb(res.ready_prob); |
| 209 | + framed.setNotReadyProb(res.not_ready_prob); |
126 | 210 | if (send_raw_pred) {
|
127 | 211 | framed.setRawPredictions(raw_pred.asBytes());
|
128 | 212 | }
|
129 | 213 |
|
130 |
| - pm.send("driverStateV2", msg); |
| 214 | + pm.send("driverState", msg); |
131 | 215 | }
|
132 | 216 |
|
133 | 217 | void dmonitoring_free(DMonitoringModelState* s) {
|
|
0 commit comments