dkiiv
diff --git a/‎cereal b/‎cereal
diff --git a/‎common/modeldata.h
+6 b/‎common/modeldata.h
+6
diff --git a/‎selfdrive/camerad/cameras/camera_common.cc
+1-1 b/‎selfdrive/camerad/cameras/camera_common.cc
+1-1
diff --git a/‎selfdrive/camerad/cameras/camera_qcom2.cc
+1-1 b/‎selfdrive/camerad/cameras/camera_qcom2.cc
+1-1
diff --git a/‎selfdrive/hardware/tici/test_power_draw.py
+1-1 b/‎selfdrive/hardware/tici/test_power_draw.py
+1-1
diff --git a/‎selfdrive/modeld/dmonitoringmodeld.cc
+3-3 b/‎selfdrive/modeld/dmonitoringmodeld.cc
+3-3
diff --git a/‎selfdrive/modeld/models/dmonitoring.cc
+159-75 b/‎selfdrive/modeld/models/dmonitoring.cc
+159-75
@@ -24,6 +24,12 @@ constexpr auto T_IDXS_FLOAT = build_idxs<float, TRAJECTORY_SIZE>(10.0);
 constexpr auto X_IDXS = build_idxs<double, TRAJECTORY_SIZE>(192.0);
 constexpr auto X_IDXS_FLOAT = build_idxs<float, TRAJECTORY_SIZE>(192.0);
 
+namespace tici_dm_crop {
+  const int x_offset = -72;
+  const int y_offset = -144;
+  const int width = 954;
+};
+
 const mat3 fcam_intrinsic_matrix = (mat3){{2648.0, 0.0, 1928.0 / 2,
                                            0.0, 2648.0, 1208.0 / 2,
                                            0.0, 0.0, 1.0}};
 
@@ -239,7 +239,7 @@ static kj::Array<capnp::byte> yuv420_to_jpeg(const CameraBuf *b, int thumbnail_w
   int in_stride = b->cur_yuv_buf->stride;
 
   // make the buffer big enough. jpeg_write_raw_data requires 16-pixels aligned height to be used.
-  std::unique_ptr<uint8_t[]> buf(new uint8_t[(thumbnail_width * ((thumbnail_height + 15) & ~15) * 3) / 2]);
+  std::unique_ptr<uint8[]> buf(new uint8_t[(thumbnail_width * ((thumbnail_height + 15) & ~15) * 3) / 2]);
   uint8_t *y_plane = buf.get();
   uint8_t *u_plane = y_plane + thumbnail_width * thumbnail_height;
   uint8_t *v_plane = u_plane + (thumbnail_width * thumbnail_height) / 4;
 
@@ -837,7 +837,7 @@ void cameras_init(VisionIpcServer *v, MultiCameraState *s, cl_device_id device_i
   s->road_cam.camera_init(s, v, CAMERA_ID_AR0231, 1, 20, device_id, ctx, VISION_STREAM_RGB_ROAD, VISION_STREAM_ROAD, !env_disable_road);
   s->wide_road_cam.camera_init(s, v, CAMERA_ID_AR0231, 0, 20, device_id, ctx, VISION_STREAM_RGB_WIDE_ROAD, VISION_STREAM_WIDE_ROAD, !env_disable_wide_road);
 
-  s->sm = new SubMaster({"driverStateV2"});
+  s->sm = new SubMaster({"driverState"});
   s->pm = new PubMaster({"roadCameraState", "driverCameraState", "wideRoadCameraState", "thumbnail"});
 }
 
 
@@ -21,7 +21,7 @@ class Proc:
 PROCS = [
   Proc('camerad', 2.15),
   Proc('modeld', 1.0),
-  Proc('dmonitoringmodeld', 0.35),
+  Proc('dmonitoringmodeld', 0.25),
   Proc('encoderd', 0.23),
 ]
 
 
@@ -12,7 +12,7 @@
 ExitHandler do_exit;
 
 void run_model(DMonitoringModelState &model, VisionIpcClient &vipc_client) {
-  PubMaster pm({"driverStateV2"});
+  PubMaster pm({"driverState"});
   SubMaster sm({"liveCalibration"});
   float calib[CALIB_LEN] = {0};
   double last = 0;
@@ -31,11 +31,11 @@ void run_model(DMonitoringModelState &model, VisionIpcClient &vipc_client) {
     }
 
     double t1 = millis_since_boot();
-    DMonitoringModelResult model_res = dmonitoring_eval_frame(&model, buf->addr, buf->width, buf->height, buf->stride, buf->uv_offset, calib);
+    DMonitoringResult res = dmonitoring_eval_frame(&model, buf->addr, buf->width, buf->height, buf->stride, buf->uv_offset, calib);
     double t2 = millis_since_boot();
 
     // send dm packet
-    dmonitoring_publish(pm, extra.frame_id, model_res, (t2 - t1) / 1000.0, model.output);
+    dmonitoring_publish(pm, extra.frame_id, res, (t2 - t1) / 1000.0, model.output);
 
     //printf("dmonitoring process: %.2fms, from last %.2fms\n", t2 - t1, t1 - last);
     last = t1;
 
@@ -10,124 +10,208 @@
 
 #include "selfdrive/modeld/models/dmonitoring.h"
 
-constexpr int MODEL_WIDTH = 1440;
-constexpr int MODEL_HEIGHT = 960;
+constexpr int MODEL_WIDTH = 320;
+constexpr int MODEL_HEIGHT = 640;
 
 template <class T>
 static inline T *get_buffer(std::vector<T> &buf, const size_t size) {
   if (buf.size() < size) buf.resize(size);
   return buf.data();
 }
 
+static inline void init_yuv_buf(std::vector<uint8_t> &buf, const int width, int height) {
+  uint8_t *y = get_buffer(buf, width * height * 3 / 2);
+  uint8_t *u = y + width * height;
+  uint8_t *v = u + (width / 2) * (height / 2);
+
+  // needed on comma two to make the padded border black
+  // equivalent to RGB(0,0,0) in YUV space
+  memset(y, 16, width * height);
+  memset(u, 128, (width / 2) * (height / 2));
+  memset(v, 128, (width / 2) * (height / 2));
+}
+
 void dmonitoring_init(DMonitoringModelState* s) {
   s->is_rhd = Params().getBool("IsRHD");
+  for (int x = 0; x < std::size(s->tensor); ++x) {
+    s->tensor[x] = (x - 128.f) * 0.0078125f;
+  }
+  init_yuv_buf(s->resized_buf, MODEL_WIDTH, MODEL_HEIGHT);
 
 #ifdef USE_ONNX_MODEL
-  s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+  s->m = new ONNXModel("models/dmonitoring_model.onnx", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME);
 #else
-  s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME, false, true);
+  s->m = new SNPEModel("models/dmonitoring_model_q.dlc", &s->output[0], OUTPUT_SIZE, USE_DSP_RUNTIME);
 #endif
 
   s->m->addCalib(s->calib, CALIB_LEN);
 }
 
-void parse_driver_data(DriverStateResult &ds_res, const DMonitoringModelState* s, int out_idx_offset) {
-  for (int i = 0; i < 3; ++i) {
-    ds_res.face_orientation[i] = s->output[out_idx_offset+i] * REG_SCALE;
-    ds_res.face_orientation_std[i] = exp(s->output[out_idx_offset+6+i]);
-  }
-  for (int i = 0; i < 2; ++i) {
-    ds_res.face_position[i] = s->output[out_idx_offset+3+i] * REG_SCALE;
-    ds_res.face_position_std[i] = exp(s->output[out_idx_offset+9+i]);
-  }
-  for (int i = 0; i < 4; ++i) {
-    ds_res.ready_prob[i] = sigmoid(s->output[out_idx_offset+35+i]);
-  }
-  for (int i = 0; i < 2; ++i) {
-    ds_res.not_ready_prob[i] = sigmoid(s->output[out_idx_offset+39+i]);
-  }
-  ds_res.face_prob = sigmoid(s->output[out_idx_offset+12]);
-  ds_res.left_eye_prob = sigmoid(s->output[out_idx_offset+21]);
-  ds_res.right_eye_prob = sigmoid(s->output[out_idx_offset+30]);
-  ds_res.left_blink_prob = sigmoid(s->output[out_idx_offset+31]);
-  ds_res.right_blink_prob = sigmoid(s->output[out_idx_offset+32]);
-  ds_res.sunglasses_prob = sigmoid(s->output[out_idx_offset+33]);
-  ds_res.occluded_prob = sigmoid(s->output[out_idx_offset+34]);
+static inline auto get_yuv_buf(std::vector<uint8_t> &buf, const int width, int height) {
+  uint8_t *y = get_buffer(buf, width * height * 3 / 2);
+  uint8_t *u = y + width * height;
+  uint8_t *v = u + (width /2) * (height / 2);
+  return std::make_tuple(y, u, v);
 }
 
-void fill_driver_data(cereal::DriverStateV2::DriverData::Builder ddata, const DriverStateResult &ds_res) {
-  ddata.setFaceOrientation(ds_res.face_orientation);
-  ddata.setFaceOrientationStd(ds_res.face_orientation_std);
-  ddata.setFacePosition(ds_res.face_position);
-  ddata.setFacePositionStd(ds_res.face_position_std);
-  ddata.setFaceProb(ds_res.face_prob);
-  ddata.setLeftEyeProb(ds_res.left_eye_prob);
-  ddata.setRightEyeProb(ds_res.right_eye_prob);
-  ddata.setLeftBlinkProb(ds_res.left_blink_prob);
-  ddata.setRightBlinkProb(ds_res.right_blink_prob);
-  ddata.setSunglassesProb(ds_res.sunglasses_prob);
-  ddata.setOccludedProb(ds_res.occluded_prob);
-  ddata.setReadyProb(ds_res.ready_prob);
-  ddata.setNotReadyProb(ds_res.not_ready_prob);
+struct Rect {int x, y, w, h;};
+void crop_nv12_to_yuv(uint8_t *raw, int stride, int uv_offset, uint8_t *y, uint8_t *u, uint8_t *v, const Rect &rect) {
+  uint8_t *raw_y = raw;
+  uint8_t *raw_uv = raw_y + uv_offset;
+  for (int r = 0; r < rect.h / 2; r++) {
+    memcpy(y + 2 * r * rect.w, raw_y + (2 * r + rect.y) * stride + rect.x, rect.w);
+    memcpy(y + (2 * r + 1) * rect.w, raw_y + (2 * r + rect.y + 1) * stride + rect.x, rect.w);
+    for (int h = 0; h < rect.w / 2; h++) {
+      u[r * rect.w/2 + h] = raw_uv[(r + (rect.y/2)) * stride + (rect.x/2 + h)*2];
+      v[r * rect.w/2 + h] = raw_uv[(r + (rect.y/2)) * stride + (rect.x/2 + h)*2 + 1];
+    }
+  }
 }
 
-DMonitoringModelResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, int stride, int uv_offset, float *calib) {
-  int v_off = height - MODEL_HEIGHT;
-  int h_off = (width - MODEL_WIDTH) / 2;
-  int yuv_buf_len = MODEL_WIDTH * MODEL_HEIGHT;
-
-  uint8_t *raw_buf = (uint8_t *) stream_buf;
-  // vertical crop free
-  uint8_t *raw_y_start = raw_buf + stride * v_off;
+DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_buf, int width, int height, int stride, int uv_offset, float *calib) {
+  const int cropped_height = tici_dm_crop::width / 1.33;
+  Rect crop_rect = {width / 2 - tici_dm_crop::width / 2 + tici_dm_crop::x_offset,
+                height / 2 - cropped_height / 2 + tici_dm_crop::y_offset,
+                cropped_height / 2,
+                cropped_height};
+  if (!s->is_rhd) {
+    crop_rect.x += tici_dm_crop::width - crop_rect.w;
+  }
 
-  uint8_t *net_input_buf = get_buffer(s->net_input_buf, yuv_buf_len);
+  int resized_width = MODEL_WIDTH;
+  int resized_height = MODEL_HEIGHT;
+
+  auto [cropped_y, cropped_u, cropped_v] = get_yuv_buf(s->cropped_buf, crop_rect.w, crop_rect.h);
+  if (!s->is_rhd) {
+    crop_nv12_to_yuv((uint8_t *)stream_buf, stride, uv_offset, cropped_y, cropped_u, cropped_v, crop_rect);
+  } else {
+    auto [mirror_y, mirror_u, mirror_v] = get_yuv_buf(s->premirror_cropped_buf, crop_rect.w, crop_rect.h);
+    crop_nv12_to_yuv((uint8_t *)stream_buf, stride, uv_offset, mirror_y, mirror_u, mirror_v, crop_rect);
+    libyuv::I420Mirror(mirror_y, crop_rect.w,
+                       mirror_u, crop_rect.w / 2,
+                       mirror_v, crop_rect.w / 2,
+                       cropped_y, crop_rect.w,
+                       cropped_u, crop_rect.w / 2,
+                       cropped_v, crop_rect.w / 2,
+                       crop_rect.w, crop_rect.h);
+  }
 
-  // here makes a uint8 copy
-  for (int r = 0; r < MODEL_HEIGHT; ++r) {
-    memcpy(net_input_buf + r * MODEL_WIDTH, raw_y_start + r * stride + h_off, MODEL_WIDTH);
+  auto [resized_buf, resized_u, resized_v] = get_yuv_buf(s->resized_buf, resized_width, resized_height);
+  uint8_t *resized_y = resized_buf;
+  libyuv::FilterMode mode = libyuv::FilterModeEnum::kFilterBilinear;
+  libyuv::I420Scale(cropped_y, crop_rect.w,
+                  cropped_u, crop_rect.w / 2,
+                  cropped_v, crop_rect.w / 2,
+                  crop_rect.w, crop_rect.h,
+                  resized_y, resized_width,
+                  resized_u, resized_width / 2,
+                  resized_v, resized_width / 2,
+                  resized_width, resized_height,
+                  mode);
+
+
+  int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v
+  float *net_input_buf = get_buffer(s->net_input_buf, yuv_buf_len);
+  // one shot conversion, O(n) anyway
+  // yuvframe2tensor, normalize
+  for (int r = 0; r < MODEL_HEIGHT/2; r++) {
+    for (int c = 0; c < MODEL_WIDTH/2; c++) {
+      // Y_ul
+      net_input_buf[(r*MODEL_WIDTH/2) + c + (0*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r)*resized_width + 2*c]];
+      // Y_dl
+      net_input_buf[(r*MODEL_WIDTH/2) + c + (1*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r+1)*resized_width + 2*c]];
+      // Y_ur
+      net_input_buf[(r*MODEL_WIDTH/2) + c + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r)*resized_width + 2*c+1]];
+      // Y_dr
+      net_input_buf[(r*MODEL_WIDTH/2) + c + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_y[(2*r+1)*resized_width + 2*c+1]];
+      // U
+      net_input_buf[(r*MODEL_WIDTH/2) + c + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_u[r*resized_width/2 + c]];
+      // V
+      net_input_buf[(r*MODEL_WIDTH/2) + c + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = s->tensor[resized_v[r*resized_width/2 + c]];
+    }
   }
 
-  // printf("preprocess completed. %d \n", yuv_buf_len);
-  // FILE *dump_yuv_file = fopen("/tmp/rawdump.yuv", "wb");
-  // fwrite(net_input_buf, yuv_buf_len, sizeof(uint8_t), dump_yuv_file);
-  // fclose(dump_yuv_file);
+  //printf("preprocess completed. %d \n", yuv_buf_len);
+  //FILE *dump_yuv_file = fopen("/tmp/rawdump.yuv", "wb");
+  //fwrite(resized_buf, yuv_buf_len, sizeof(uint8_t), dump_yuv_file);
+  //fclose(dump_yuv_file);
+
+  // *** testing ***
+  // idat = np.frombuffer(open("/tmp/inputdump.yuv", "rb").read(), np.float32).reshape(6, 160, 320)
+  // imshow(cv2.cvtColor(tensor_to_frames(idat[None]/0.0078125+128)[0], cv2.COLOR_YUV2RGB_I420))
+
+  //FILE *dump_yuv_file2 = fopen("/tmp/inputdump.yuv", "wb");
+  //fwrite(net_input_buf, MODEL_HEIGHT*MODEL_WIDTH*3/2, sizeof(float), dump_yuv_file2);
+  //fclose(dump_yuv_file2);
 
   double t1 = millis_since_boot();
-  s->m->addImage((float*)net_input_buf, yuv_buf_len / 4);
+  s->m->addImage(net_input_buf, yuv_buf_len);
   for (int i = 0; i < CALIB_LEN; i++) {
     s->calib[i] = calib[i];
   }
   s->m->execute();
   double t2 = millis_since_boot();
 
-  DMonitoringModelResult model_res = {0};
-  parse_driver_data(model_res.driver_state_lhd, s, 0);
-  parse_driver_data(model_res.driver_state_rhd, s, 41);
-  model_res.poor_vision_prob = sigmoid(s->output[82]);
-  model_res.wheel_on_right_prob = sigmoid(s->output[83]);
-  model_res.dsp_execution_time = (t2 - t1) / 1000.;
-
-  return model_res;
+  DMonitoringResult ret = {0};
+  for (int i = 0; i < 3; ++i) {
+    ret.face_orientation[i] = s->output[i] * REG_SCALE;
+    ret.face_orientation_meta[i] = exp(s->output[6 + i]);
+  }
+  for (int i = 0; i < 2; ++i) {
+    ret.face_position[i] = s->output[3 + i] * REG_SCALE;
+    ret.face_position_meta[i] = exp(s->output[9 + i]);
+  }
+  for (int i = 0; i < 4; ++i) {
+    ret.ready_prob[i] = sigmoid(s->output[39 + i]);
+  }
+  for (int i = 0; i < 2; ++i) {
+    ret.not_ready_prob[i] = sigmoid(s->output[43 + i]);
+  }
+  ret.face_prob = sigmoid(s->output[12]);
+  ret.left_eye_prob = sigmoid(s->output[21]);
+  ret.right_eye_prob = sigmoid(s->output[30]);
+  ret.left_blink_prob = sigmoid(s->output[31]);
+  ret.right_blink_prob = sigmoid(s->output[32]);
+  ret.sg_prob = sigmoid(s->output[33]);
+  ret.poor_vision = sigmoid(s->output[34]);
+  ret.partial_face = sigmoid(s->output[35]);
+  ret.distracted_pose = sigmoid(s->output[36]);
+  ret.distracted_eyes = sigmoid(s->output[37]);
+  ret.occluded_prob = sigmoid(s->output[38]);
+  ret.dsp_execution_time = (t2 - t1) / 1000.;
+  return ret;
 }
 
-void dmonitoring_publish(PubMaster &pm, uint32_t frame_id, const DMonitoringModelResult &model_res, float execution_time, kj::ArrayPtr<const float> raw_pred) {
+void dmonitoring_publish(PubMaster &pm, uint32_t frame_id, const DMonitoringResult &res, float execution_time, kj::ArrayPtr<const float> raw_pred) {
   // make msg
   MessageBuilder msg;
-  auto framed = msg.initEvent().initDriverStateV2();
+  auto framed = msg.initEvent().initDriverState();
   framed.setFrameId(frame_id);
   framed.setModelExecutionTime(execution_time);
-  framed.setDspExecutionTime(model_res.dsp_execution_time);
-
-  framed.setPoorVisionProb(model_res.poor_vision_prob);
-  framed.setWheelOnRightProb(model_res.wheel_on_right_prob);
-  fill_driver_data(framed.initLeftDriverData(), model_res.driver_state_lhd);
-  fill_driver_data(framed.initRightDriverData(), model_res.driver_state_rhd);
-
+  framed.setDspExecutionTime(res.dsp_execution_time);
+
+  framed.setFaceOrientation(res.face_orientation);
+  framed.setFaceOrientationStd(res.face_orientation_meta);
+  framed.setFacePosition(res.face_position);
+  framed.setFacePositionStd(res.face_position_meta);
+  framed.setFaceProb(res.face_prob);
+  framed.setLeftEyeProb(res.left_eye_prob);
+  framed.setRightEyeProb(res.right_eye_prob);
+  framed.setLeftBlinkProb(res.left_blink_prob);
+  framed.setRightBlinkProb(res.right_blink_prob);
+  framed.setSunglassesProb(res.sg_prob);
+  framed.setPoorVision(res.poor_vision);
+  framed.setPartialFace(res.partial_face);
+  framed.setDistractedPose(res.distracted_pose);
+  framed.setDistractedEyes(res.distracted_eyes);
+  framed.setOccludedProb(res.occluded_prob);
+  framed.setReadyProb(res.ready_prob);
+  framed.setNotReadyProb(res.not_ready_prob);
   if (send_raw_pred) {
     framed.setRawPredictions(raw_pred.asBytes());
   }
 
-  pm.send("driverStateV2", msg);
+  pm.send("driverState", msg);
 }
 
 void dmonitoring_free(DMonitoringModelState* s) {
Original file line number	Diff line number	Diff line change
`@@ -837,7 +837,7 @@ void cameras_init(VisionIpcServer v, MultiCameraState s, cl_device_id device_i`
`837`	`837`	`s->road_cam.camera_init(s, v, CAMERA_ID_AR0231, 1, 20, device_id, ctx, VISION_STREAM_RGB_ROAD, VISION_STREAM_ROAD, !env_disable_road);`
`838`	`838`	`s->wide_road_cam.camera_init(s, v, CAMERA_ID_AR0231, 0, 20, device_id, ctx, VISION_STREAM_RGB_WIDE_ROAD, VISION_STREAM_WIDE_ROAD, !env_disable_wide_road);`
`839`	`839`
`840`		`- s->sm = new SubMaster({"driverStateV2"});`
	`840`	`+ s->sm = new SubMaster({"driverState"});`
`841`	`841`	`s->pm = new PubMaster({"roadCameraState", "driverCameraState", "wideRoadCameraState", "thumbnail"});`
`842`	`842`	`}`
`843`	`843`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ class Proc:`
`21`	`21`	`PROCS = [`
`22`	`22`	`Proc('camerad', 2.15),`
`23`	`23`	`Proc('modeld', 1.0),`
`24`		`- Proc('dmonitoringmodeld', 0.35),`
	`24`	`+ Proc('dmonitoringmodeld', 0.25),`
`25`	`25`	`Proc('encoderd', 0.23),`
`26`	`26`	`]`
`27`	`27`