diff --git a/README.md b/README.md
index 85071d5..3208887 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
diff --git a/assets/dancer1.gif b/assets/dancer1.gif
deleted file mode 100644
index fddf720..0000000
Binary files a/assets/dancer1.gif and /dev/null differ
diff --git a/assets/dancer2.gif b/assets/dancer2.gif
deleted file mode 100644
index 6b67a5c..0000000
Binary files a/assets/dancer2.gif and /dev/null differ
diff --git a/assets/dancer3.gif b/assets/dancer3.gif
deleted file mode 100644
index 124e4ff..0000000
Binary files a/assets/dancer3.gif and /dev/null differ
diff --git a/assets/hybrik_dance1.gif b/assets/hybrik_dance1.gif
new file mode 100644
index 0000000..a6f889c
Binary files /dev/null and b/assets/hybrik_dance1.gif differ
diff --git a/assets/hybrik_dance2.gif b/assets/hybrik_dance2.gif
new file mode 100644
index 0000000..884b19e
Binary files /dev/null and b/assets/hybrik_dance2.gif differ
diff --git a/assets/hybrik_dance3.gif b/assets/hybrik_dance3.gif
new file mode 100644
index 0000000..65dc4de
Binary files /dev/null and b/assets/hybrik_dance3.gif differ
diff --git a/configs/256x192_adam_lr1e-3-hrw48_cam_2x_w_pw3d_3dhp.yaml b/configs/256x192_adam_lr1e-3-hrw48_cam_2x_w_pw3d_3dhp.yaml
index 0d82d33..51946a0 100644
--- a/configs/256x192_adam_lr1e-3-hrw48_cam_2x_w_pw3d_3dhp.yaml
+++ b/configs/256x192_adam_lr1e-3-hrw48_cam_2x_w_pw3d_3dhp.yaml
@@ -48,13 +48,13 @@ MODEL:
- 2200
- 2200
LOSS:
- TYPE: 'L1LossDimSMPLCam'
+ TYPE: 'LaplaceLossDimSMPLCam'
ELEMENTS:
BETA_WEIGHT: 1
BETA_REG_WEIGHT: 0
PHI_REG_WEIGHT: 0.0001
LEAF_REG_WEIGHT: 0
- TWIST_WEIGHT: 0.01
+ TWIST_WEIGHT: 1
THETA_WEIGHT: 0.01
UVD24_WEIGHT: 1
XYZ24_WEIGHT: 0
diff --git a/configs/256x192_adam_lr1e-3-res34_smpl_3d_cam_2x_mix.yaml b/configs/256x192_adam_lr1e-3-res34_smpl_3d_cam_2x_mix.yaml
index c97669b..aeba1f7 100644
--- a/configs/256x192_adam_lr1e-3-res34_smpl_3d_cam_2x_mix.yaml
+++ b/configs/256x192_adam_lr1e-3-res34_smpl_3d_cam_2x_mix.yaml
@@ -53,7 +53,7 @@ LOSS:
BETA_REG_WEIGHT: 0
PHI_REG_WEIGHT: 0.0001
LEAF_REG_WEIGHT: 0
- TWIST_WEIGHT: 0.01
+ TWIST_WEIGHT: 1
THETA_WEIGHT: 0.01
UVD24_WEIGHT: 1
XYZ24_WEIGHT: 1
diff --git a/configs/256x192_adam_lr1e-3-res34_smpl_3d_cam_2x_mix_w_pw3d.yaml b/configs/256x192_adam_lr1e-3-res34_smpl_3d_cam_2x_mix_w_pw3d.yaml
index 17b4919..6f13647 100644
--- a/configs/256x192_adam_lr1e-3-res34_smpl_3d_cam_2x_mix_w_pw3d.yaml
+++ b/configs/256x192_adam_lr1e-3-res34_smpl_3d_cam_2x_mix_w_pw3d.yaml
@@ -47,13 +47,13 @@ MODEL:
- 2200
- 2200
LOSS:
- TYPE: 'L1LossDimSMPLCam'
+ TYPE: 'LaplaceLossDimSMPLCam'
ELEMENTS:
BETA_WEIGHT: 1
BETA_REG_WEIGHT: 0
PHI_REG_WEIGHT: 0.0001
LEAF_REG_WEIGHT: 0
- TWIST_WEIGHT: 0.01
+ TWIST_WEIGHT: 1
THETA_WEIGHT: 0.01
UVD24_WEIGHT: 1
XYZ24_WEIGHT: 0
diff --git a/configs/256x192_adam_lr1e-3-res50_reg_smpl_3d_cam_2x_mix_w_pw3d.yaml b/configs/256x192_adam_lr1e-3-res50_reg_smpl_3d_cam_2x_mix_w_pw3d.yaml
index 8d388c2..696e987 100644
--- a/configs/256x192_adam_lr1e-3-res50_reg_smpl_3d_cam_2x_mix_w_pw3d.yaml
+++ b/configs/256x192_adam_lr1e-3-res50_reg_smpl_3d_cam_2x_mix_w_pw3d.yaml
@@ -53,7 +53,7 @@ LOSS:
BETA_REG_WEIGHT: 0
PHI_REG_WEIGHT: 0.0001
LEAF_REG_WEIGHT: 0
- TWIST_WEIGHT: 0.01
+ TWIST_WEIGHT: 1
THETA_WEIGHT: 0.01
UVD24_WEIGHT: 1
XYZ24_WEIGHT: 0
diff --git a/hybrik/datasets/cocoeft.py b/hybrik/datasets/cocoeft.py
new file mode 100644
index 0000000..1ae10c5
--- /dev/null
+++ b/hybrik/datasets/cocoeft.py
@@ -0,0 +1,235 @@
+"""MS COCO Human keypoint dataset."""
+import os
+
+# import scipy.misc
+import cv2
+import joblib
+import numpy as np
+import torch
+import torch.utils.data as data
+from hybrik.utils.presets.simple_transform_3d_cam_eft import SimpleTransform3DCamEFT
+from pytorch3d.transforms.rotation_conversions import matrix_to_axis_angle
+
+s_coco_2_smpl_jt = [
+ -1, 11, 12,
+ -1, 13, 14,
+ -1, 15, 16,
+ -1, -1, -1,
+ -1, -1, -1,
+ -1,
+ 5, 6,
+ 7, 8,
+ 9, 10,
+ -1, -1
+]
+
+
+class COCO_EFT_3D(data.Dataset):
+ """ COCO Person dataset.
+ Parameters
+ ----------
+ ann_file: str,
+ Path to the annotation json file.
+ root: str, default './data/coco'
+ Path to the ms coco dataset.
+ train: bool, default is True
+ If true, will set as training mode.
+ skip_empty: bool, default is False
+ Whether skip entire image if no valid label is found. Use `False` if this dataset is
+ for validation to avoid COCO metric error.
+ """
+ CLASSES = ['person']
+ num_joints = 17
+ EVAL_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+ joints_name = ('nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', # 4
+ 'left_shoulder', 'right_shoulder', # 6
+ 'left_elbow', 'right_elbow', # 8
+ 'left_wrist', 'right_wrist', # 10
+ 'left_hip', 'right_hip', # 12
+ 'left_knee', 'right_knee', # 14
+ 'left_ankle', 'right_ankle') # 16
+
+ def __init__(self,
+ cfg,
+ ann_file,
+ root='./data/coco',
+ train=True,
+ skip_empty=True,
+ dpg=False,
+ lazy_import=False):
+
+ self._cfg = cfg
+ self._ann_file = os.path.join(root, 'annotations', ann_file)
+ self._lazy_import = lazy_import
+ self._root = root
+ self._skip_empty = skip_empty
+ self._train = train
+ self._dpg = dpg
+
+ self._scale_factor = cfg.DATASET.SCALE_FACTOR
+ self._color_factor = cfg.DATASET.COLOR_FACTOR
+ self._rot = cfg.DATASET.ROT_FACTOR
+ self._input_size = cfg.MODEL.IMAGE_SIZE
+ self._output_size = cfg.MODEL.HEATMAP_SIZE
+
+ self._occlusion = cfg.DATASET.OCCLUSION
+
+ self._crop = cfg.MODEL.EXTRA.CROP
+ self._sigma = cfg.MODEL.EXTRA.SIGMA
+
+ self._check_centers = False
+
+ self.num_class = len(self.CLASSES)
+
+ self.num_joints_half_body = cfg.DATASET.NUM_JOINTS_HALF_BODY
+ self.prob_half_body = cfg.DATASET.PROB_HALF_BODY
+
+ self.augment = cfg.MODEL.EXTRA.AUGMENT
+
+ self._loss_type = cfg.LOSS['TYPE']
+
+ self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+ self.lower_body_ids = (11, 12, 13, 14, 15, 16)
+
+ bbox_3d_shape = getattr(cfg.MODEL, 'BBOX_3D_SHAPE', [2200, 2200, 2200])
+ # millimeter -> meter
+ self.bbox_3d_shape = [item * 1e-3 for item in bbox_3d_shape]
+
+ self.transformation = SimpleTransform3DCamEFT(
+ self, scale_factor=self._scale_factor,
+ color_factor=self._color_factor,
+ occlusion=self._occlusion,
+ input_size=self._input_size,
+ output_size=self._output_size,
+ depth_dim=64,
+ bbox_3d_shape=self.bbox_3d_shape,
+ rot=self._rot, sigma=self._sigma,
+ train=self._train, add_dpg=self._dpg,
+ loss_type=self._loss_type, scale_mult=1.25)
+
+ self.db = self.load_pt()
+
+ def __getitem__(self, idx):
+ # get image id
+ img_path = self.db['img_path'][idx]
+ img_id = int(os.path.splitext(os.path.basename(img_path))[0])
+
+ # load ground truth, including bbox, keypoints, image size
+ label = {}
+ for k in self.db.keys():
+ try:
+ label[k] = self.db[k][idx].copy()
+ except AttributeError:
+ label[k] = self.db[k][idx]
+
+ label_new = self.preprocess_pt_item(label, idx)
+ # img = scipy.misc.imread(img_path, mode='RGB')
+ src = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
+ # transform ground truth into training label and apply data augmentation
+ target = self.transformation(src, label_new)
+
+ img = target.pop('image')
+ bbox = target.pop('bbox')
+
+ return img, target, img_id, bbox
+
+ def __len__(self):
+ return len(self.db['img_path'])
+
+ def load_pt(self):
+ db = joblib.load(self._ann_file + '_smpl_annot.pt', 'r')
+ return db
+
+ @property
+ def joint_pairs(self):
+ """Joint pairs which defines the pairs of joint to be swapped
+ when the image is flipped horizontally."""
+ return [[1, 2], [3, 4], [5, 6], [7, 8],
+ [9, 10], [11, 12], [13, 14], [15, 16]]
+
+ def _get_box_center_area(self, bbox):
+ """Get bbox center"""
+ c = np.array([(bbox[0] + bbox[2]) / 2.0, (bbox[1] + bbox[3]) / 2.0])
+ area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
+ return c, area
+
+ def _get_keypoints_center_count(self, keypoints):
+ """Get geometric center of all keypoints"""
+ keypoint_x = np.sum(keypoints[:, 0, 0] * (keypoints[:, 0, 1] > 0))
+ keypoint_y = np.sum(keypoints[:, 1, 0] * (keypoints[:, 1, 1] > 0))
+ num = float(np.sum(keypoints[:, 0, 1]))
+ return np.array([keypoint_x / num, keypoint_y / num]), num
+
+ def preprocess_pt_item(self, label, idx):
+
+ # for k, v in label.items():
+ # print(k)
+ beta = label['shape'].copy()
+ theta = label['pose'].copy().reshape(24, 3, 3)
+ theta = matrix_to_axis_angle(torch.from_numpy(theta)).numpy()
+ # scalar
+ smpl_weight = label['smpl_weight'].copy().reshape(-1)
+
+ joint_cam_17 = label['xyz_17'].reshape((17, 3))
+ joint_cam_17 = joint_cam_17 - joint_cam_17[0]
+ joint_cam_29 = label['xyz_29'].reshape((29, 3))
+ joint_cam_29 = joint_cam_29 - joint_cam_29[0]
+
+ joint_img_17 = np.zeros((17, 3))
+ joints_vis_17 = np.zeros((17, 3)) * smpl_weight
+ joint_img_29 = np.zeros((29, 3))
+ joints_vis_29 = np.ones((29, 3)) * smpl_weight
+ joints_vis_xyz_29 = np.ones((29, 3)) * smpl_weight
+ gt_joints = label['joints_3d']
+
+ # if smpl_weight[0] < 0.5:
+ if float(smpl_weight) < 0.5:
+ for i in range(24):
+ id1 = i
+ id2 = s_coco_2_smpl_jt[i]
+ if id2 >= 0:
+ joint_img_29[id1, :2] = gt_joints[id2, :2, 0].copy()
+ joints_vis_29[id1, :2] = gt_joints[id2, :2, 1].copy()
+ else:
+ uv_29 = label['uv_29']
+ joint_img_29[:, :2] = uv_29
+ joint_img_29[:, 2] = joint_cam_29[:, 2]
+
+ twist_angle = label['twist_angle'].reshape(23)
+ cos = np.cos(twist_angle)
+ sin = np.sin(twist_angle)
+ phi = np.stack((cos, sin), axis=1)
+ phi_weight = np.ones_like(phi) * smpl_weight[0]
+
+ flag = (twist_angle < -10)
+ phi_weight[flag, :] = 0
+
+ root_cam = joint_cam_29[0]
+
+ f = np.array([1000.0, 1000.0])
+ c = np.array([128.0, 128.0])
+
+ return_label = {
+ 'bbox': label['bbox'],
+ 'img_id': idx,
+ 'img_path': label['img_path'],
+ 'img_name': label['img_path'],
+ 'joint_img_17': joint_img_17,
+ 'joint_vis_17': joints_vis_17,
+ 'joint_cam_17': joint_cam_17,
+ 'joint_relative_17': joint_cam_17,
+ 'joint_img_29': joint_img_29,
+ 'joint_vis_29': joints_vis_29,
+ 'joint_vis_xyz_29': joints_vis_xyz_29,
+ 'joint_cam_29': joint_cam_29,
+ 'twist_phi': phi,
+ 'twist_weight': phi_weight,
+ 'beta': beta,
+ 'theta': theta,
+ 'root_cam': root_cam,
+ 'f': f,
+ 'c': c,
+ 'smpl_weight': smpl_weight
+ }
+
+ return return_label
diff --git a/hybrik/datasets/mix_dataset2_cam.py b/hybrik/datasets/mix_dataset2_cam.py
index a3ebe95..68a0cd1 100644
--- a/hybrik/datasets/mix_dataset2_cam.py
+++ b/hybrik/datasets/mix_dataset2_cam.py
@@ -32,7 +32,7 @@
9, 14,
10, 15,
11, 16,
- -1, -1, # 23
+ -1, -1, # 23
# 7,
# -1, -1,
# 21, 26
@@ -253,10 +253,12 @@ def __getitem__(self, idx):
target['target_weight_29'] = label_uvd_29_mask
target['target_xyz_17'] = label_xyz_17
target['target_weight_17'] = label_xyz_17_mask
- target['target_theta'] = torch.zeros(24 * 4)
+ # target['target_theta'] = torch.zeros(24 * 4)
+ target['target_theta'] = torch.zeros(24 * 9)
target['target_beta'] = torch.zeros(10)
target['target_smpl_weight'] = torch.zeros(1)
- target['target_theta_weight'] = torch.zeros(24 * 4)
+ # target['target_theta_weight'] = torch.zeros(24 * 4)
+ target['target_theta_weight'] = torch.zeros(24 * 9)
target['target_twist'] = torch.zeros(23, 2)
target['target_twist_weight'] = torch.zeros(23, 2)
target['target_xyz_weight_24'] = label_xyz_24_mask
diff --git a/hybrik/datasets/mix_dataset_cam.py b/hybrik/datasets/mix_dataset_cam.py
index 0bff3a6..75af4c4 100644
--- a/hybrik/datasets/mix_dataset_cam.py
+++ b/hybrik/datasets/mix_dataset_cam.py
@@ -260,10 +260,12 @@ def __getitem__(self, idx):
target['target_weight_29'] = label_uvd_29_mask
target['target_xyz_17'] = label_xyz_17
target['target_weight_17'] = label_xyz_17_mask
- target['target_theta'] = torch.zeros(24 * 4)
+ # target['target_theta'] = torch.zeros(24 * 4)
+ target['target_theta'] = torch.zeros(24 * 9)
target['target_beta'] = torch.zeros(10)
target['target_smpl_weight'] = torch.zeros(1)
- target['target_theta_weight'] = torch.zeros(24 * 4)
+ # target['target_theta_weight'] = torch.zeros(24 * 4)
+ target['target_theta_weight'] = torch.zeros(24 * 9)
target['target_twist'] = torch.zeros(23, 2)
target['target_twist_weight'] = torch.zeros(23, 2)
target['target_xyz_weight_24'] = label_xyz_24_mask
diff --git a/hybrik/models/HRNetWithCam.py b/hybrik/models/HRNetWithCam.py
index 709c028..6158850 100644
--- a/hybrik/models/HRNetWithCam.py
+++ b/hybrik/models/HRNetWithCam.py
@@ -332,7 +332,7 @@ def forward(self, x, flip_test=False, **kwargs):
pred_xyz_jts_24_struct = output.joints.float() / self.depth_factor
# -0.5 ~ 0.5
pred_xyz_jts_17 = output.joints_from_verts.float() / self.depth_factor
- pred_theta_mats = output.rot_mats.float().reshape(batch_size, 24 * 4)
+ pred_theta_mats = output.rot_mats.float().reshape(batch_size, 24 * 9)
pred_xyz_jts_24 = pred_xyz_jts_29[:, :24, :].reshape(batch_size, 72)
pred_xyz_jts_24_struct = pred_xyz_jts_24_struct.reshape(batch_size, 72)
pred_xyz_jts_17_flat = pred_xyz_jts_17.reshape(batch_size, 17 * 3)
diff --git a/hybrik/models/HRNetWithCamReg.py b/hybrik/models/HRNetWithCamReg.py
index 1aea466..de3db7c 100644
--- a/hybrik/models/HRNetWithCamReg.py
+++ b/hybrik/models/HRNetWithCamReg.py
@@ -351,7 +351,7 @@ def forward(self, x, flip_test=False, **kwargs):
pred_xyz_jts_24_struct = output.joints.float() / self.depth_factor
# -0.5 ~ 0.5
pred_xyz_jts_17 = output.joints_from_verts.float() / self.depth_factor
- pred_theta_mats = output.rot_mats.float().reshape(batch_size, 24 * 4)
+ pred_theta_mats = output.rot_mats.float().reshape(batch_size, 24 * 9)
pred_xyz_jts_24 = pred_xyz_jts_29[:, :24, :].reshape(batch_size, 72)
pred_xyz_jts_24_struct = pred_xyz_jts_24_struct.reshape(batch_size, 72)
pred_xyz_jts_17_flat = pred_xyz_jts_17.reshape(batch_size, 17 * 3)
diff --git a/hybrik/models/criterion.py b/hybrik/models/criterion.py
index 4ee6844..327e2c4 100644
--- a/hybrik/models/criterion.py
+++ b/hybrik/models/criterion.py
@@ -22,7 +22,7 @@ def weighted_l1_loss(input, target, weights, size_average):
def weighted_laplace_loss(input, sigma, target, weights, size_average):
input = input
target = target
- out = torch.log(sigma / amp) + torch.abs(input - target) / (math.sqrt(2) * sigma + 1e-9)
+ out = torch.log(sigma / amp) + torch.abs(input - target) / (math.sqrt(2) * sigma + 1e-5)
out = out * weights
if size_average and weights.sum() > 0:
return out.sum() / weights.sum()
diff --git a/hybrik/models/layers/smpl/SMPL.py b/hybrik/models/layers/smpl/SMPL.py
index 6fc8864..6095945 100644
--- a/hybrik/models/layers/smpl/SMPL.py
+++ b/hybrik/models/layers/smpl/SMPL.py
@@ -261,7 +261,7 @@ def hybrik(self,
leaf_thetas=leaf_thetas)
rot_mats = rot_mats.reshape(batch_size * 24, 3, 3)
- rot_mats = rotmat_to_quat(rot_mats).reshape(batch_size, 24 * 4)
+ # rot_mats = rotmat_to_quat(rot_mats).reshape(batch_size, 24 * 4)
if transl is not None:
new_joints += transl.unsqueeze(dim=1)
diff --git a/hybrik/models/layers/smpl/lbs.py b/hybrik/models/layers/smpl/lbs.py
index bf45101..9c13fa5 100644
--- a/hybrik/models/layers/smpl/lbs.py
+++ b/hybrik/models/layers/smpl/lbs.py
@@ -991,6 +991,12 @@ def batch_get_pelvis_orient(rel_pose_skeleton, rel_rest_pose, parents, children,
spine_norm = torch.norm(spine_final_loc, dim=1, keepdim=True)
spine_norm = spine_final_loc / (spine_norm + 1e-8)
+ assert torch.sum(torch.isnan(spine_rest_loc)
+ ) == 0, ('spine_rest_loc', spine_rest_loc)
+
+ assert torch.sum(torch.isnan(spine_final_loc)
+ ) == 0, ('spine_final_loc', spine_final_loc)
+
rot_mat_spine = vectors2rotmat(spine_rest_loc, spine_final_loc, dtype)
assert torch.sum(torch.isnan(rot_mat_spine)
diff --git a/hybrik/models/simple3dposeSMPLWithCam.py b/hybrik/models/simple3dposeSMPLWithCam.py
index d4aa633..1839f98 100644
--- a/hybrik/models/simple3dposeSMPLWithCam.py
+++ b/hybrik/models/simple3dposeSMPLWithCam.py
@@ -98,8 +98,8 @@ def __init__(self, norm_layer=nn.BatchNorm2d, **kwargs):
init_cam = torch.tensor([0.9, 0, 0])
self.register_buffer(
'init_cam',
- torch.Tensor(init_cam).float())
-
+ torch.Tensor(init_cam).float())
+
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Linear(self.feature_channel, 1024)
self.drop1 = nn.Dropout(p=0.5)
@@ -108,6 +108,7 @@ def __init__(self, norm_layer=nn.BatchNorm2d, **kwargs):
self.decshape = nn.Linear(1024, 10)
self.decphi = nn.Linear(1024, 23 * 2) # [cos(phi), sin(phi)]
self.deccam = nn.Linear(1024, 3)
+ self.decsigma = nn.Linear(1024, 29)
self.focal_length = kwargs['FOCAL_LENGTH']
self.bbox_3d_shape = kwargs['BBOX_3D_SHAPE'] if 'BBOX_3D_SHAPE' in kwargs else (2000, 2000, 2000)
@@ -238,7 +239,7 @@ def forward(self, x, flip_test=False, **kwargs):
x0 = self.avg_pool(x0)
x0 = x0.view(x0.size(0), -1)
init_shape = self.init_shape.expand(batch_size, -1) # (B, 10,)
- init_cam = self.init_cam.expand(batch_size, -1) # (B, 3,)
+ init_cam = self.init_cam.expand(batch_size, -1) # (B, 3,)
xc = x0
@@ -251,6 +252,7 @@ def forward(self, x, flip_test=False, **kwargs):
pred_shape = delta_shape + init_shape
pred_phi = self.decphi(xc)
pred_camera = self.deccam(xc).reshape(batch_size, -1) + init_cam
+ sigma = self.decsigma(xc).reshape(batch_size, 29, 1).sigmoid()
pred_phi = pred_phi.reshape(batch_size, 23, 2)
@@ -267,6 +269,8 @@ def forward(self, x, flip_test=False, **kwargs):
flip_pred_shape = flip_delta_shape + init_shape
flip_pred_phi = self.decphi(flip_xc)
flip_pred_camera = self.deccam(flip_xc).reshape(batch_size, -1) + init_cam
+ flip_sigma = self.decsigma(flip_x0).reshape(
+ batch_size, 29, 1).sigmoid()
pred_shape = (pred_shape + flip_pred_shape) / 2
@@ -277,6 +281,9 @@ def forward(self, x, flip_test=False, **kwargs):
flip_pred_camera[:, 1] = -flip_pred_camera[:, 1]
pred_camera = (pred_camera + flip_pred_camera) / 2
+ flip_sigma = self.flip_sigma(flip_sigma)
+ sigma = (sigma + flip_sigma) / 2
+
camScale = pred_camera[:, :1].unsqueeze(1)
camTrans = pred_camera[:, 1:].unsqueeze(1)
@@ -309,8 +316,7 @@ def forward(self, x, flip_test=False, **kwargs):
camera_root[:, 2] += camDepth[:, 0, 0]
else:
pred_xyz_jts_29[:, :, 2:] = pred_uvd_jts_29[:, :, 2:].clone() # unit: (self.depth_factor m)
- pred_xyz_jts_29_meter = (pred_uvd_jts_29[:, :, :2] * self.input_size / self.focal_length) \
- * (pred_xyz_jts_29[:, :, 2:]*self.depth_factor + camDepth) - camTrans # unit: m
+ pred_xyz_jts_29_meter = (pred_uvd_jts_29[:, :, :2] * self.input_size / self.focal_length) * (pred_xyz_jts_29[:, :, 2:] * self.depth_factor + camDepth) - camTrans # unit: m
pred_xyz_jts_29[:, :, :2] = pred_xyz_jts_29_meter / self.depth_factor # unit: (self.depth_factor m)
@@ -322,7 +328,7 @@ def forward(self, x, flip_test=False, **kwargs):
pred_xyz_jts_29_flat = pred_xyz_jts_29.reshape(batch_size, -1)
output = self.smpl.hybrik(
- pose_skeleton=pred_xyz_jts_29.type(self.smpl_dtype) * self.depth_factor, # unit: meter
+ pose_skeleton=pred_xyz_jts_29.type(self.smpl_dtype) * self.depth_factor, # unit: meter
betas=pred_shape.type(self.smpl_dtype),
phis=pred_phi.type(self.smpl_dtype),
global_orient=None,
@@ -333,7 +339,7 @@ def forward(self, x, flip_test=False, **kwargs):
pred_xyz_jts_24_struct = output.joints.float() / self.depth_factor
# -0.5 ~ 0.5
pred_xyz_jts_17 = output.joints_from_verts.float() / self.depth_factor
- pred_theta_mats = output.rot_mats.float().reshape(batch_size, 24 * 4)
+ pred_theta_mats = output.rot_mats.float().reshape(batch_size, 24 * 9)
pred_xyz_jts_24 = pred_xyz_jts_29[:, :24, :].reshape(batch_size, 72)
pred_xyz_jts_24_struct = pred_xyz_jts_24_struct.reshape(batch_size, 72)
pred_xyz_jts_17_flat = pred_xyz_jts_17.reshape(batch_size, 17 * 3)
@@ -356,6 +362,8 @@ def forward(self, x, flip_test=False, **kwargs):
cam_trans=camTrans[:, 0],
cam_root=camera_root,
transl=transl,
+ pred_sigma=sigma,
+ scores=1 - sigma,
# uvd_heatmap=torch.stack([hm_x0, hm_y0, hm_z0], dim=2),
# uvd_heatmap=heatmaps,
# img_feat=x0
diff --git a/hybrik/models/simple3dposeSMPLWithCamReg.py b/hybrik/models/simple3dposeSMPLWithCamReg.py
index 9ed4543..f59c0aa 100644
--- a/hybrik/models/simple3dposeSMPLWithCamReg.py
+++ b/hybrik/models/simple3dposeSMPLWithCamReg.py
@@ -256,7 +256,7 @@ def forward(self, x, flip_test=False, **kwargs):
pred_xyz_jts_24_struct = output.joints.float() / self.depth_factor
# -0.5 ~ 0.5
pred_xyz_jts_17 = output.joints_from_verts.float() / self.depth_factor
- pred_theta_mats = output.rot_mats.float().reshape(batch_size, 24 * 4)
+ pred_theta_mats = output.rot_mats.float().reshape(batch_size, 24 * 9)
pred_xyz_jts_24 = pred_xyz_jts_29[:, :24, :].reshape(batch_size, 72)
pred_xyz_jts_24_struct = pred_xyz_jts_24_struct.reshape(batch_size, 72)
pred_xyz_jts_17_flat = pred_xyz_jts_17.reshape(batch_size, 17 * 3)
diff --git a/hybrik/utils/presets/simple_transform_3d_cam_eft.py b/hybrik/utils/presets/simple_transform_3d_cam_eft.py
new file mode 100644
index 0000000..7053b0b
--- /dev/null
+++ b/hybrik/utils/presets/simple_transform_3d_cam_eft.py
@@ -0,0 +1,592 @@
+import math
+import random
+
+import cv2
+import numpy as np
+import torch
+
+from ..bbox import _box_to_center_scale, _center_scale_to_box
+from ..transforms import (addDPG, affine_transform, flip_joints_3d, flip_thetas, flip_xyz_joints_3d,
+ get_affine_transform, im_to_torch, batch_rodrigues_numpy, flip_twist,
+ rotmat_to_quat_numpy, rotate_xyz_jts, rot_aa, flip_cam_xyz_joints_3d)
+from ..pose_utils import get_intrinsic_metrix
+
+s_coco_2_smpl_jt = [
+ -1, 11, 12,
+ -1, 13, 14,
+ -1, 15, 16,
+ -1, -1, -1,
+ -1, -1, -1,
+ -1,
+ 5, 6,
+ 7, 8,
+ 9, 10,
+ -1, -1
+]
+
+smpl_parents = [-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14,
+ 16, 17, 18, 19, 20, 21]
+
+
+left_bones_idx = [
+ (0, 1), (1, 4), (4, 7), (12, 13),
+ (13, 16), (16, 18), (18, 20)
+]
+
+right_bones_idx = [
+ (0, 2), (2, 5), (5, 8), (12, 14),
+ (14, 17), (17, 19), (19, 21)
+]
+
+skeleton_29 = [
+ (0, 1), (0, 2), (0, 3), (1, 4), (2, 5), (3, 6), # 5
+ (4, 7), (5, 8), (6, 9), (7, 10), (8, 11), (9, 12), # 11
+ (9, 13), (9, 14), (12, 15), (13, 16), (14, 17), (16, 18), # 17
+ (17, 19), (18, 20), (19, 21), (20, 22), (21, 23), (15, 24), # 23
+ (22, 25), (23, 26), (10, 27), (11, 28) # 27
+]
+
+skeleton_3dhp = np.array([(-1, -1)] * 28).astype(int)
+skeleton_3dhp[ [6, 7, 17, 18, 19, 20] ] = np.array([
+ (19, 20), (24, 25), (9, 10), (14, 15), (10, 11), (15, 16)
+ ]).astype(int)
+
+
+class SimpleTransform3DCamEFT(object):
+ """Generation of cropped input person, pose coords, smpl parameters.
+ Parameters
+ ----------
+ img: torch.Tensor
+ A tensor with shape: `(3, h, w)`.
+ label: dict
+ A dictionary with 4 keys:
+ `bbox`: [xmin, ymin, xmax, ymax]
+ `joints_3d`: numpy.ndarray with shape: (n_joints, 2),
+ including position and visible flag
+ `width`: image width
+ `height`: image height
+ dataset:
+ The dataset to be transformed, must include `joint_pairs` property for flipping.
+ scale_factor: int
+ Scale augmentation.
+ input_size: tuple
+ Input image size, as (height, width).
+ output_size: tuple
+ Heatmap size, as (height, width).
+ rot: int
+ Ratation augmentation.
+ train: bool
+ True for training trasformation.
+ """
+
+ def __init__(self, dataset, scale_factor, color_factor, occlusion, add_dpg,
+ input_size, output_size, depth_dim, bbox_3d_shape,
+ rot, sigma, train, loss_type='MSELoss', scale_mult=1.25, focal_length=1000, two_d=False,
+ root_idx=0):
+
+ self._joint_pairs_17 = ((1, 4), (2, 5), (3, 6), (11, 14), (12, 15), (13, 16))
+ self._joint_pairs_24 = ((1, 2), (4, 5), (7, 8), (10, 11), (13, 14), (16, 17), (18, 19), (20, 21), (22, 23))
+ self._joint_pairs_29 = ((1, 2), (4, 5), (7, 8), (10, 11), (13, 14), (16, 17), (18, 19), (20, 21), (22, 23), (25, 26), (27, 28))
+
+ self._scale_factor = scale_factor
+ self._color_factor = color_factor
+ self._occlusion = occlusion
+ self._rot = rot
+ self._add_dpg = add_dpg
+
+ self._input_size = input_size
+ self._heatmap_size = output_size
+
+ self._sigma = sigma
+ self._train = train
+ self._loss_type = loss_type
+ self._aspect_ratio = float(input_size[1]) / input_size[0] # w / h
+ self._feat_stride = np.array(input_size) / np.array(output_size)
+
+ self.pixel_std = 1
+
+ self.bbox_3d_shape = dataset.bbox_3d_shape
+ self._scale_mult = scale_mult
+ self.two_d = two_d
+
+ # convert to unit: meter
+ self.depth_factor2meter = self.bbox_3d_shape[2] if self.bbox_3d_shape[2] < 50 else self.bbox_3d_shape[2]*1e-3
+
+ self.focal_length = focal_length
+ self.root_idx = root_idx
+
+ if train:
+ self.num_joints_half_body = dataset.num_joints_half_body
+ self.prob_half_body = dataset.prob_half_body
+
+ self.upper_body_ids = dataset.upper_body_ids
+ self.lower_body_ids = dataset.lower_body_ids
+
+ def test_transform(self, src, bbox):
+ xmin, ymin, xmax, ymax = bbox
+ center, scale = _box_to_center_scale(
+ xmin, ymin, xmax - xmin, ymax - ymin, self._aspect_ratio, scale_mult=self._scale_mult)
+ scale = scale * 1.0
+
+ input_size = self._input_size
+ inp_h, inp_w = input_size
+ trans = get_affine_transform(center, scale, 0, [inp_w, inp_h])
+ img = cv2.warpAffine(src, trans, (int(inp_w), int(inp_h)), flags=cv2.INTER_LINEAR)
+ bbox = _center_scale_to_box(center, scale)
+
+ img = im_to_torch(img)
+ # mean
+ img[0].add_(-0.406)
+ img[1].add_(-0.457)
+ img[2].add_(-0.480)
+
+ # std
+ img[0].div_(0.225)
+ img[1].div_(0.224)
+ img[2].div_(0.229)
+
+ img_center = np.array([float(src.shape[1]) * 0.5, float(src.shape[0]) * 0.5])
+
+ return img, bbox, img_center
+
+ def _integral_target_generator(self, joints_3d, num_joints, patch_height, patch_width):
+ target_weight = np.ones((num_joints, 3), dtype=np.float32)
+ target_weight[:, 0] = joints_3d[:, 0, 1]
+ target_weight[:, 1] = joints_3d[:, 0, 1]
+ target_weight[:, 2] = joints_3d[:, 0, 1]
+
+ target = np.zeros((num_joints, 3), dtype=np.float32)
+ target[:, 0] = joints_3d[:, 0, 0] / patch_width - 0.5
+ target[:, 1] = joints_3d[:, 1, 0] / patch_height - 0.5
+ target[:, 2] = joints_3d[:, 2, 0] / self.bbox_3d_shape[0]
+
+ target_weight[target[:, 0] > 0.5] = 0
+ target_weight[target[:, 0] < -0.5] = 0
+ target_weight[target[:, 1] > 0.5] = 0
+ target_weight[target[:, 1] < -0.5] = 0
+ target_weight[target[:, 2] > 0.5] = 0
+ target_weight[target[:, 2] < -0.5] = 0
+
+ target = target.reshape((-1))
+ target_weight = target_weight.reshape((-1))
+ return target, target_weight
+
+ def _integral_uvd_target_generator(self, joints_3d, num_joints, patch_height, patch_width):
+
+ target_weight = np.ones((num_joints, 3), dtype=np.float32)
+ target_weight[:, 0] = joints_3d[:, 0, 1]
+ target_weight[:, 1] = joints_3d[:, 0, 1]
+ target_weight[:, 2] = joints_3d[:, 0, 1]
+
+ target = np.zeros((num_joints, 3), dtype=np.float32)
+ target[:, 0] = joints_3d[:, 0, 0] / patch_width - 0.5
+ target[:, 1] = joints_3d[:, 1, 0] / patch_height - 0.5
+ target[:, 2] = joints_3d[:, 2, 0] / self.bbox_3d_shape[2]
+
+ target_weight[target[:, 0] > 0.5] = 0
+ target_weight[target[:, 0] < -0.5] = 0
+ target_weight[target[:, 1] > 0.5] = 0
+ target_weight[target[:, 1] < -0.5] = 0
+ target_weight[target[:, 2] > 0.5] = 0
+ target_weight[target[:, 2] < -0.5] = 0
+
+ target = target.reshape((-1))
+ target_weight = target_weight.reshape((-1))
+ return target, target_weight
+
+ def _integral_xyz_target_generator(self, joints_3d, joints_3d_vis, num_joints):
+ target_weight = np.ones((num_joints, 3), dtype=np.float32)
+ target_weight[:, 0] = joints_3d_vis[:, 0]
+ target_weight[:, 1] = joints_3d_vis[:, 1]
+ target_weight[:, 2] = joints_3d_vis[:, 2]
+
+ target = np.zeros((num_joints, 3), dtype=np.float32)
+ target[:, 0] = joints_3d[:, 0] / self.bbox_3d_shape[0]
+ target[:, 1] = joints_3d[:, 1] / self.bbox_3d_shape[1]
+ target[:, 2] = joints_3d[:, 2] / self.bbox_3d_shape[2]
+
+ # if self.bbox_3d_shape[0] < 1000:
+ # print(self.bbox_3d_shape, target)
+
+ # assert (target[0] == 0).all(), f'{target}, {self.bbox_3d_shape}'
+
+ target = target.reshape((-1))
+ target_weight = target_weight.reshape((-1))
+ return target, target_weight
+
+ def __call__(self, src, label):
+ if self.two_d:
+ assert NotImplementedError
+ else:
+ bbox = list(label['bbox'])
+ joint_img_17 = label['joint_img_17'].copy()
+ joint_relative_17 = label['joint_relative_17'].copy()
+ joint_cam_17 = label['joint_cam_17'].copy()
+ joints_vis_17 = label['joint_vis_17'].copy()
+ joint_img_29 = label['joint_img_29'].copy()
+ joint_cam_29 = label['joint_cam_29'].copy()
+ joints_vis_29 = label['joint_vis_29'].copy()
+ joints_vis_xyz_29 = label['joint_vis_xyz_29'].copy()
+ smpl_weight = label['smpl_weight'].copy()
+ # root_cam = label['root_cam'].copy()
+ # root_depth = root_cam[2] / self.bbox_3d_shape[2]
+ self.num_joints = joint_img_29.shape[0]
+
+ beta = label['beta'].copy()
+ theta = label['theta'].copy()
+
+ beta_kid = label['beta_kid'].copy() if 'beta_kid' in label else np.zeros(1)
+
+ # assert not (theta < 1e-3).all(), label
+
+ if 'twist_phi' in label.keys():
+ twist_phi = label['twist_phi'].copy()
+ twist_weight = label['twist_weight'].copy()
+ else:
+ twist_phi = np.zeros((23, 2))
+ twist_weight = np.zeros((23, 2))
+
+ gt_joints_17 = np.zeros((17, 3, 2), dtype=np.float32)
+ gt_joints_17[:, :, 0] = joint_img_17.copy()
+ gt_joints_17[:, :, 1] = joints_vis_17.copy()
+ gt_joints_29 = np.zeros((29, 3, 2), dtype=np.float32)
+ gt_joints_29[:, :, 0] = joint_img_29.copy()
+ gt_joints_29[:, :, 1] = joints_vis_29.copy()
+
+ imgwidth, imght = src.shape[1], src.shape[0]
+
+ input_size = self._input_size
+
+ if self._add_dpg and self._train:
+ bbox = addDPG(bbox, imgwidth, imght)
+
+ xmin, ymin, xmax, ymax = bbox
+ center, scale = _box_to_center_scale(
+ xmin, ymin, xmax - xmin, ymax - ymin, self._aspect_ratio, scale_mult=self._scale_mult)
+
+ xmin, ymin, xmax, ymax = _center_scale_to_box(center, scale)
+
+ # half body transform
+ if self._train and (np.sum(joints_vis_17[:, 0]) > self.num_joints_half_body and np.random.rand() < self.prob_half_body):
+ c_half_body, s_half_body = self.half_body_transform(
+ gt_joints_17[:, :, 0], joints_vis_17
+ )
+
+ if c_half_body is not None and s_half_body is not None:
+ center, scale = c_half_body, s_half_body
+
+ # rescale
+ if self._train:
+ sf = self._scale_factor
+ scale = scale * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+ else:
+ scale = scale * 1.0
+
+ # rotation
+ if self._train:
+ rf = self._rot
+ r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0
+ else:
+ r = 0
+
+ if self._train and self._occlusion:
+ while True:
+ area_min = 0.0
+ area_max = 0.3
+ synth_area = (random.random() * (area_max - area_min) + area_min) * (xmax - xmin) * (ymax - ymin)
+
+ ratio_min = 0.5
+ ratio_max = 1 / 0.5
+ synth_ratio = (random.random() * (ratio_max - ratio_min) + ratio_min)
+
+ synth_h = math.sqrt(synth_area * synth_ratio)
+ synth_w = math.sqrt(synth_area / synth_ratio)
+ synth_xmin = random.random() * ((xmax - xmin) - synth_w - 1) + xmin
+ synth_ymin = random.random() * ((ymax - ymin) - synth_h - 1) + ymin
+
+ if synth_xmin >= 0 and synth_ymin >= 0 and synth_xmin + synth_w < imgwidth and synth_ymin + synth_h < imght:
+ synth_xmin = int(synth_xmin)
+ synth_ymin = int(synth_ymin)
+ synth_w = int(synth_w)
+ synth_h = int(synth_h)
+ src[synth_ymin:synth_ymin + synth_h, synth_xmin:synth_xmin + synth_w, :] = np.random.rand(synth_h, synth_w, 3) * 255
+ break
+
+ joints_17_uvd = gt_joints_17
+ joints_29_uvd = gt_joints_29
+
+ joint_cam_17_xyz = joint_cam_17
+ joints_cam_24_xyz = joint_cam_29[:24]
+
+ if random.random() > 0.75 and self._train:
+ assert src.shape[2] == 3
+ src = src[:, ::-1, :]
+
+ joints_17_uvd = flip_joints_3d(joints_17_uvd, imgwidth, self._joint_pairs_17)
+ joints_29_uvd = flip_joints_3d(joints_29_uvd, imgwidth, self._joint_pairs_29)
+ joint_cam_17_xyz = flip_cam_xyz_joints_3d(joint_cam_17_xyz, self._joint_pairs_17)
+ joints_cam_24_xyz = flip_cam_xyz_joints_3d(joints_cam_24_xyz, self._joint_pairs_24)
+ theta = flip_thetas(theta, self._joint_pairs_24)
+ twist_phi, twist_weight = flip_twist(twist_phi, twist_weight, self._joint_pairs_24)
+ center[0] = imgwidth - center[0] - 1
+
+ # rotate global theta
+ theta[0, :3] = rot_aa(theta[0, :3], r)
+
+ theta_rot_mat = batch_rodrigues_numpy(theta).reshape(24 * 9)
+ # theta_quat = rotmat_to_quat_numpy(theta_rot_mat).reshape(24 * 4)
+
+ # rotate xyz joints
+ joint_cam_17_xyz = rotate_xyz_jts(joint_cam_17_xyz, r)
+ joints_17_xyz = joint_cam_17_xyz - joint_cam_17_xyz[:1].copy()
+ joints_cam_24_xyz = rotate_xyz_jts(joints_cam_24_xyz, r)
+ joints_24_xyz = joints_cam_24_xyz - joints_cam_24_xyz[:1].copy()
+
+ inp_h, inp_w = input_size
+ trans = get_affine_transform(center, scale, r, [inp_w, inp_h])
+ trans_inv = get_affine_transform(center, scale, r, [inp_w, inp_h], inv=True).astype(np.float32)
+ intrinsic_param = get_intrinsic_metrix(label['f'], label['c'], inv=True).astype(np.float32) if 'f' in label.keys() else np.zeros((3, 3)).astype(np.float32)
+ joint_root = label['root_cam'].astype(np.float32) if 'root_cam' in label.keys() else np.zeros((3)).astype(np.float32)
+ depth_factor = np.array([self.bbox_3d_shape[2]]).astype(np.float32) if self.bbox_3d_shape else np.zeros((1)).astype(np.float32)
+
+ img = cv2.warpAffine(src, trans, (int(inp_w), int(inp_h)), flags=cv2.INTER_LINEAR)
+ # affine transform
+ for i in range(17):
+ if joints_17_uvd[i, 0, 1] > 0.0:
+ joints_17_uvd[i, 0:2, 0] = affine_transform(joints_17_uvd[i, 0:2, 0], trans)
+
+ for i in range(29):
+ if joints_29_uvd[i, 0, 1] > 0.0:
+ joints_29_uvd[i, 0:2, 0] = affine_transform(joints_29_uvd[i, 0:2, 0], trans)
+
+ target_smpl_weight = torch.ones(1).float() * smpl_weight
+ theta_24_weights = np.ones((24, 9)) * smpl_weight
+
+ theta_24_weights = theta_24_weights.reshape(24 * 9)
+
+ # generate training targets
+ target_uvd_29, target_weight_29 = self._integral_uvd_target_generator(joints_29_uvd, 29, inp_h, inp_w)
+ target_xyz_17, target_weight_17 = self._integral_xyz_target_generator(joints_17_xyz, joints_vis_17, 17)
+ target_xyz_24, target_weight_24 = self._integral_xyz_target_generator(joints_24_xyz, joints_vis_29[:24, :], 24)
+
+ target_weight_29 *= joints_vis_29.reshape(-1)
+ target_weight_24 *= joints_vis_xyz_29[:24, :].reshape(-1)
+ target_weight_17 *= joints_vis_17.reshape(-1)
+ bbox = _center_scale_to_box(center, scale)
+
+ tmp_uvd_24 = target_uvd_29.reshape(-1, 3)[:24]
+ tmp_uvd_24_weight = target_weight_29.reshape(-1, 3)[:24] * target_weight_24.reshape(-1, 3)
+
+ if self.focal_length > 0:
+ cam_scale, cam_trans, cam_valid, cam_error, new_uvd = self.calc_cam_scale_trans2(
+ target_xyz_24.reshape(-1, 3).copy(),
+ tmp_uvd_24.copy(),
+ tmp_uvd_24_weight.copy())
+
+ # target_uvd_29 = (target_uvd_29 * target_weight_29).reshape(-1, 3)
+ else:
+ cam_scale = 1
+ cam_trans = np.zeros(2)
+ cam_valid = 0
+ cam_error = 0
+
+ assert img.shape[2] == 3
+ if self._train:
+ c_high = 1 + self._color_factor
+ c_low = 1 - self._color_factor
+ img[:, :, 0] = np.clip(img[:, :, 0] * random.uniform(c_low, c_high), 0, 255)
+ img[:, :, 1] = np.clip(img[:, :, 1] * random.uniform(c_low, c_high), 0, 255)
+ img[:, :, 2] = np.clip(img[:, :, 2] * random.uniform(c_low, c_high), 0, 255)
+
+ img = im_to_torch(img)
+ # mean
+ img[0].add_(-0.406)
+ img[1].add_(-0.457)
+ img[2].add_(-0.480)
+
+ # std
+ img[0].div_(0.225)
+ img[1].div_(0.224)
+ img[2].div_(0.229)
+
+ img_center = np.array([float(imgwidth) * 0.5, float(imght) * 0.5])
+
+ # target_weight_29 = target_weight_29.reshape(29, 3)
+ # target_weight_29[:, 2] = 0
+ # target_weight_29 = target_weight_29.reshape(-1)
+
+ output = {
+ 'type': '3d_data_w_smpl',
+ 'image': img,
+ # 'target_theta': torch.from_numpy(theta_quat).float(),
+ 'target_theta': torch.from_numpy(theta_rot_mat).float(),
+ 'target_theta_weight': torch.from_numpy(theta_24_weights).float(),
+ 'target_beta': torch.from_numpy(beta).float(),
+ 'target_smpl_weight': target_smpl_weight,
+ 'target_uvd_29': torch.from_numpy(target_uvd_29.reshape(-1)).float(),
+ 'target_xyz_24': torch.from_numpy(target_xyz_24).float(),
+ 'target_weight_29': torch.from_numpy(target_weight_29).float(),
+ 'target_weight_24': torch.from_numpy(target_weight_24).float(),
+ 'target_xyz_17': torch.from_numpy(target_xyz_17).float(),
+ 'target_weight_17': torch.from_numpy(target_weight_17).float(),
+ 'target_xyz_weight_24': torch.from_numpy(target_weight_24).float(),
+ 'trans_inv': torch.from_numpy(trans_inv).float(),
+ 'intrinsic_param': torch.from_numpy(intrinsic_param).float(),
+ 'joint_root': torch.from_numpy(joint_root).float(),
+ 'depth_factor': torch.from_numpy(depth_factor).float(),
+ 'bbox': torch.Tensor(bbox),
+ 'target_twist': torch.from_numpy(twist_phi).float(),
+ 'target_twist_weight': torch.from_numpy(twist_weight).float(),
+ 'camera_scale': torch.from_numpy(np.array([cam_scale])).float(),
+ 'camera_trans': torch.from_numpy(cam_trans).float(),
+ 'camera_valid': cam_valid,
+ 'camera_error': cam_error,
+ 'img_center': torch.from_numpy(img_center).float(),
+ 'target_beta_kid': torch.from_numpy(beta_kid).float(),
+ }
+
+ return output
+
+ def half_body_transform(self, joints, joints_vis):
+ upper_joints = []
+ lower_joints = []
+ for joint_id in range(self.num_joints):
+ if joints_vis[joint_id][0] > 0:
+ if joint_id in self.upper_body_ids:
+ upper_joints.append(joints[joint_id])
+ else:
+ lower_joints.append(joints[joint_id])
+
+ if np.random.randn() < 0.5 and len(upper_joints) > 2:
+ selected_joints = upper_joints
+ else:
+ selected_joints = lower_joints \
+ if len(lower_joints) > 2 else upper_joints
+
+ if len(selected_joints) < 2:
+ return None, None
+
+ selected_joints = np.array(selected_joints, dtype=np.float32)
+ center = selected_joints.mean(axis=0)[:2]
+
+ left_top = np.amin(selected_joints, axis=0)
+ right_bottom = np.amax(selected_joints, axis=0)
+
+ w = right_bottom[0] - left_top[0]
+ h = right_bottom[1] - left_top[1]
+
+ if w > self._aspect_ratio * h:
+ h = w * 1.0 / self._aspect_ratio
+ elif w < self._aspect_ratio * h:
+ w = h * self._aspect_ratio
+
+ scale = np.array(
+ [
+ w * 1.0 / self.pixel_std,
+ h * 1.0 / self.pixel_std
+ ],
+ dtype=np.float32
+ )
+
+ scale = scale * 1.5
+
+ return center, scale
+
+ def calc_cam_scale_trans2(self, xyz_29, uvd_29, uvd_weight):
+
+ f = self.focal_length
+
+ # unit: meter
+ # the equation to be solved:
+ # u * 256 / f * (z + f/256 * 1/scale) = x + tx
+ # v * 256 / f * (z + f/256 * 1/scale) = y + ty
+
+ weight = (uvd_weight.sum(axis=-1, keepdims=True) >= 3.0) * 1.0 # 24 x 1
+ # assert weight.sum() >= 2, 'too few valid keypoints to calculate cam para'
+
+ if weight.sum() < 2:
+ # print('bad data')
+ return 0, np.zeros(2), 0.0, -1, uvd_29
+
+ xyz_29 = xyz_29 * self.depth_factor2meter # convert to meter
+ new_uvd = uvd_29.copy()
+
+ num_joints = len(uvd_29)
+
+ Ax = np.zeros((num_joints, 3))
+ Ax[:, 1] = -1
+ Ax[:, 0] = uvd_29[:, 0]
+
+ Ay = np.zeros((num_joints, 3))
+ Ay[:, 2] = -1
+ Ay[:, 0] = uvd_29[:, 1]
+
+ Ax = Ax * weight
+ Ay = Ay * weight
+
+ A = np.concatenate([Ax, Ay], axis=0)
+
+ bx = (xyz_29[:, 0] - 256 * uvd_29[:, 0] / f * xyz_29[:, 2]) * weight[:, 0]
+ by = (xyz_29[:, 1] - 256 * uvd_29[:, 1] / f * xyz_29[:, 2]) * weight[:, 0]
+ b = np.concatenate([bx, by], axis=0)
+
+ A_s = np.dot(A.T, A)
+ b_s = np.dot(A.T, b)
+
+ cam_para = np.linalg.solve(A_s, b_s)
+
+ trans = cam_para[1:]
+ scale = 1.0 / cam_para[0]
+
+ target_camera = np.zeros(3)
+ target_camera[0] = scale
+ target_camera[1:] = trans
+
+ backed_projected_xyz = self.back_projection(uvd_29, target_camera, f)
+ backed_projected_xyz[:, 2] = backed_projected_xyz[:, 2] * self.depth_factor2meter
+ diff = np.sum((backed_projected_xyz - xyz_29)**2, axis=-1) * weight[:, 0]
+ diff = np.sqrt(diff).sum() / (weight.sum() + 1e-6) * 1000 # roughly mpjpe > 70
+ # print(scale, trans, diff)
+ if diff < 70:
+ new_uvd = self.projection(xyz_29, target_camera, f)
+ return scale, trans, 1.0, diff, new_uvd * uvd_weight
+ else:
+ return scale, trans, 0.0, diff, new_uvd
+
+ def projection(self, xyz, camera, f):
+ # xyz: unit: meter, u = f/256 * (x+dx) / (z+dz)
+ transl = camera[1:3]
+ scale = camera[0]
+ z_cam = xyz[:, 2:] + f / (256.0 * scale) # J x 1
+ uvd = np.zeros_like(xyz)
+ uvd[:, 2] = xyz[:, 2] / self.bbox_3d_shape[2]
+ uvd[:, :2] = f / 256.0 * (xyz[:, :2] + transl) / z_cam
+ return uvd
+
+ def back_projection(self, uvd, pred_camera, focal_length=5000.):
+ camScale = pred_camera[:1].reshape(1, -1)
+ camTrans = pred_camera[1:].reshape(1, -1)
+
+ camDepth = focal_length / (256 * camScale)
+
+ pred_xyz = np.zeros_like(uvd)
+ pred_xyz[:, 2] = uvd[:, 2].copy()
+ pred_xyz[:, :2] = (uvd[:, :2] * 256 / focal_length) * (pred_xyz[:, 2:] * self.depth_factor2meter + camDepth) - camTrans
+
+ return pred_xyz
+
+
+def _box_to_center_scale_nosquare(x, y, w, h, aspect_ratio=1.0, scale_mult=1.5):
+ """Convert box coordinates to center and scale.
+ adapted from https://github.com/Microsoft/human-pose-estimation.pytorch
+ """
+ pixel_std = 1
+ center = np.zeros((2), dtype=np.float32)
+ center[0] = x + w * 0.5
+ center[1] = y + h * 0.5
+
+ scale = np.array(
+ [w * 1.0 / pixel_std, h * 1.0 / pixel_std], dtype=np.float32)
+ if center[0] != -1:
+ scale = scale * scale_mult
+ return center, scale
diff --git a/hybrik/utils/presets/simple_transform_3d_smpl_cam.py b/hybrik/utils/presets/simple_transform_3d_smpl_cam.py
index 9890be8..8f62dad 100644
--- a/hybrik/utils/presets/simple_transform_3d_smpl_cam.py
+++ b/hybrik/utils/presets/simple_transform_3d_smpl_cam.py
@@ -8,7 +8,7 @@
from ..bbox import _box_to_center_scale, _center_scale_to_box
from ..transforms import (addDPG, affine_transform, flip_joints_3d, flip_thetas, flip_xyz_joints_3d,
get_affine_transform, im_to_torch, batch_rodrigues_numpy, flip_twist,
- rotmat_to_quat_numpy, rotate_xyz_jts, rot_aa, flip_cam_xyz_joints_3d)
+ rotate_xyz_jts, rot_aa, flip_cam_xyz_joints_3d)
from ..pose_utils import get_intrinsic_metrix
s_coco_2_smpl_jt = [
@@ -105,7 +105,7 @@ class SimpleTransform3DSMPLCam(object):
def __init__(self, dataset, scale_factor, color_factor, occlusion, add_dpg,
input_size, output_size, depth_dim, bbox_3d_shape,
rot, sigma, train, loss_type='MSELoss', scale_mult=1.25, focal_length=1000, two_d=False,
- root_idx=0, get_paf=False):
+ root_idx=0):
if two_d:
self._joint_pairs = dataset.joint_pairs
else:
@@ -140,9 +140,6 @@ def __init__(self, dataset, scale_factor, color_factor, occlusion, add_dpg,
self.focal_length = focal_length
self.root_idx = root_idx
- self.get_paf = get_paf
- # self.use_camera = use_camera
-
if train:
self.num_joints_half_body = dataset.num_joints_half_body
self.prob_half_body = dataset.prob_half_body
@@ -454,7 +451,6 @@ def __call__(self, src, label):
joint_cam_17_xyz = joint_cam_17
joints_cam_24_xyz = joint_cam_29[:24]
-
if random.random() > 0.75 and self._train:
assert src.shape[2] == 3
src = src[:, ::-1, :]
@@ -470,8 +466,8 @@ def __call__(self, src, label):
# rotate global theta
theta[0, :3] = rot_aa(theta[0, :3], r)
- theta_rot_mat = batch_rodrigues_numpy(theta)
- theta_quat = rotmat_to_quat_numpy(theta_rot_mat).reshape(24 * 4)
+ theta_rot_mat = batch_rodrigues_numpy(theta).reshape(24 * 9)
+ # theta_quat = rotmat_to_quat_numpy(theta_rot_mat).reshape(24 * 4)
# rotate xyz joints
joint_cam_17_xyz = rotate_xyz_jts(joint_cam_17_xyz, r)
@@ -497,9 +493,10 @@ def __call__(self, src, label):
joints_29_uvd[i, 0:2, 0] = affine_transform(joints_29_uvd[i, 0:2, 0], trans)
target_smpl_weight = torch.ones(1).float()
- theta_24_weights = np.ones((24, 4))
-
- theta_24_weights = theta_24_weights.reshape(24 * 4)
+ # theta_24_weights = np.ones((24, 4))
+ # theta_24_weights = theta_24_weights.reshape(24 * 4)
+ theta_24_weights = np.ones((24, 9))
+ theta_24_weights = theta_24_weights.reshape(24 * 9)
# generate training targets
target_uvd_29, target_weight_29 = self._integral_uvd_target_generator(joints_29_uvd, 29, inp_h, inp_w)
@@ -517,9 +514,9 @@ def __call__(self, src, label):
if self.focal_length > 0:
cam_scale, cam_trans, cam_valid, cam_error, new_uvd = self.calc_cam_scale_trans2(
target_xyz_24.reshape(-1, 3).copy(),
- tmp_uvd_24.copy(),
+ tmp_uvd_24.copy(),
tmp_uvd_24_weight.copy())
-
+
target_uvd_29 = (target_uvd_29 * target_weight_29).reshape(-1, 3)
else:
cam_scale = 1
@@ -572,7 +569,8 @@ def __call__(self, src, label):
output = {
'type': '3d_data_w_smpl',
'image': img,
- 'target_theta': torch.from_numpy(theta_quat).float(),
+ # 'target_theta': torch.from_numpy(theta_quat).float(),
+ 'target_theta': torch.from_numpy(theta_rot_mat).float(),
'target_theta_weight': torch.from_numpy(theta_24_weights).float(),
'target_beta': torch.from_numpy(beta).float(),
'target_smpl_weight': target_smpl_weight,
@@ -662,7 +660,7 @@ def calc_cam_scale_trans2(self, xyz_29, uvd_29, uvd_weight):
# print('bad data')
return 0, np.zeros(2), 0.0, -1, uvd_29
- xyz_29 = xyz_29 * self.depth_factor2meter # convert to meter
+ xyz_29 = xyz_29 * self.depth_factor2meter # convert to meter
new_uvd = uvd_29.copy()
num_joints = len(uvd_29)