-
Notifications
You must be signed in to change notification settings - Fork 7
/
behave_demo.py
134 lines (110 loc) · 5.81 KB
/
behave_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
a simple demo script to show how to load different data given a sequence path
Author: Xianghui
Cite: BEHAVE: Dataset and Method for Tracking Human Object Interaction
"""
import sys, os
sys.path.append(os.getcwd())
import cv2
import numpy as np
from tqdm import tqdm
from os.path import join, dirname, basename
# imports for data loader and transformation between kinects
from data.frame_data import FrameDataReader
from data.kinect_transform import KinectTransform
# imports for rendering, you can replace with your own code
from viz.pyt3d_wrapper import Pyt3DWrapper
import pytorch3d
def main(args):
image_size = 1200
w, h = image_size, int(image_size * 0.75)
# FrameDataReader is the core class for dataset reading
reader = FrameDataReader(args.seq_folder)
# handle transformations between different kinect color cameras
# inside the constructor, the calibration info and kinect intrinsics are loaded
kinect_transform = KinectTransform(args.seq_folder, kinect_count=reader.kinect_count)
# defines the subfolder for loading fitting results
smpl_name = args.smpl_name
obj_name = args.obj_name
pyt3d_version = pytorch3d.__version__
if pyt3d_version >= '0.6.0':
image_size = (h, w) # this supports rendering rectangular image
else:
print(f"Warning: using old pytorch3d version of {pyt3d_version}, we recommend using 0.6 or higher.")
image_size = image_size
pyt3d_wrapper = Pyt3DWrapper(image_size=image_size)
outdir = args.viz_dir
seq_save_path = join(outdir, reader.seq_name)
os.makedirs(seq_save_path, exist_ok=True)
seq_end = reader.cvt_end(args.end)
# mask_video_paths = [join(seq_save_path, f'mask_k{x}.mp4') for x in reader.seq_info.kids]
rend_video_path = join(seq_save_path, f'smpl_{smpl_name}_obj_{obj_name}_s{args.start}_e{seq_end}.mp4')
video_writer = None
loop = tqdm(range(args.start, seq_end))
loop.set_description(reader.seq_name)
for i in loop:
# load smpl and object fit meshes
smpl_fit = reader.get_smplfit(i, smpl_name)
obj_fit = reader.get_objfit(i, obj_name)
if smpl_fit is None or obj_fit is None:
print('no fitting result for frame: {}'.format(reader.frame_time(i)))
continue
fit_meshes = [smpl_fit, obj_fit]
# get all color images in this frame
kids = [1, 2] # choose which kinect id to visualize
imgs_all = reader.get_color_images(i, reader.kids)
imgs_resize = [cv2.resize(x, (w, h)) for x in imgs_all]
overlaps = [imgs_resize[1]]
selected_imgs = [imgs_resize[x] for x in kids] # here we render fitting in all 4 views
for orig, kid in zip(selected_imgs, kids):
# transform fitted mesh from world coordinate to local color coordinate, same for point cloud
fit_meshes_local = kinect_transform.world2local_meshes(fit_meshes, kid)
# render mesh
rend = pyt3d_wrapper.render_meshes(fit_meshes_local, viz_contact=args.viz_contact)
h, w = orig.shape[:2]
# print(rend.shape, orig.shape)
overlap = cv2.resize((rend*255).astype(np.uint8), (w, h))
mask = overlap[:, :, 0] == 255
overlap[mask] = orig[mask]
cv2.putText(overlap, f'kinect {kid}', (w // 3, 30), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 255), 2)
overlaps.append(overlap)
comb = np.concatenate(overlaps, 1)
cv2.putText(comb, reader.frame_time(i), (w//3, 30), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 255), 2)
if video_writer is None:
ch, cw = comb.shape[:2]
video_writer = cv2.VideoWriter(rend_video_path, 0x7634706d, 3, (cw, ch))
video_writer.write(cv2.cvtColor(comb, cv2.COLOR_RGB2BGR))
# load person and object pc, return psbody.Mesh
# convert flag is used to be compatible with detectron2 classes, in detectron2 all chairs are clasified as chair,
# so the chair pc is saved in subfolder chair but we have two chairs: chairwood and chairblack;
# also yogaball, basketball are classified as 'sports ball',
# obj_pc = reader.get_pc(i, 'obj', convert=True)
# person_pc = reader.get_pc(i, 'person')
# load person and object mask
# for kid, rgb, writer in zip(kids, imgs_all, video_writers):
# obj_mask = np.zeros_like(rgb).astype(np.uint8)
# mask = reader.get_mask(i, kid, 'obj', ret_bool=True)
# if mask is None:
# continue # mask can be None if there is not fitting in this frame
# obj_mask[mask] = np.array([255, 0, 0])
#
# person_mask = np.zeros_like(rgb).astype(np.uint8)
# mask = reader.get_mask(i, kid, 'person', ret_bool=True)
# person_mask[mask] = np.array([255, 0, 0])
#
# comb = np.concatenate([rgb, person_mask, obj_mask], 1)
# ch, cw = comb.shape[:2]
# writer.append_data(cv2.resize(comb, (cw//3, ch//3)))
video_writer.release()
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('-s', '--seq_folder')
parser.add_argument('-sn', '--smpl_name', help='smpl fitting save name, for final dataset, use fit02', default='fit02')
parser.add_argument('-on', '--obj_name', help='object fitting save name, for final dataset, use fit01', default='fit01')
parser.add_argument('-fs', '--start', type=int, default=0, help='start from which frame')
parser.add_argument('-fe', '--end', type=int, default=None, help='ends at which frame')
parser.add_argument('-v', '--viz_dir', default="/BS/xxie-4/work/viz", help='path to save you r visualization videos')
parser.add_argument('-vc', '--viz_contact', default=False, action='store_true', help='visualize contact sphere or not')
args = parser.parse_args()
main(args)