pose_estimation_3d/gast/gast_utils.py

import math

import matplotlib

matplotlib.use('Agg')

import numpy as np
from scipy.optimize import linear_sum_assignment
import cv2
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d, Axes3D
from libs.filterpy.kalman import KalmanFilter
import webcamera_utils

__all__ = [
    'DataLoader',
    'ObjSort',
    'h36m_coco_format',
    'get_joints_info',
    'normalize_screen_coordinates',
    'camera_to_world',
    'get_affine_transform',
    'get_final_preds',
    'render_animation',
]

h36m_coco_order = [9, 11, 14, 12, 15, 13, 16, 4, 1, 5, 2, 6, 3]
coco_order = [0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
spple_keypoints = [10, 8, 0, 7]


class DataLoader(object):
    def __init__(
            self, poses_2d, pad=0, causal_shift=0,
            kps_left=None, kps_right=None):
        self.poses_2d = poses_2d
        self.kps_left = kps_left
        self.kps_right = kps_right
        self.pad = pad
        self.causal_shift = causal_shift

    def next_epoch(self):
        for seq_2d in self.poses_2d:
            batch_2d = np.expand_dims(
                np.pad(seq_2d,
                       ((
                            self.pad + self.causal_shift,
                            self.pad - self.causal_shift
                        ),
                        (0, 0), (0, 0)),
                       'edge'),
                axis=0)

            batch_2d = np.concatenate((batch_2d, batch_2d), axis=0)
            batch_2d[1, :, :, 0] *= -1
            batch_2d[1, :, self.kps_left + self.kps_right] = \
                batch_2d[1, :, self.kps_right + self.kps_left]

            yield batch_2d.astype(np.float32)


class Skeleton:
    def __init__(self, parents, joints_left, joints_right):
        assert len(joints_left) == len(joints_right)

        self._parents = parents
        self._joints_left = joints_left
        self._joints_right = joints_right

    def num_joints(self):
        return len(self._parents)

    def parents(self):
        return self._parents


def iou(bb_test, bb_gt):
    """
    Computes IUO between two bboxes in the form [x1,y1,x2,y2]
    """
    xx1 = np.maximum(bb_test[0], bb_gt[0])
    yy1 = np.maximum(bb_test[1], bb_gt[1])
    xx2 = np.minimum(bb_test[2], bb_gt[2])
    yy2 = np.minimum(bb_test[3], bb_gt[3])
    w = np.maximum(0., xx2 - xx1)
    h = np.maximum(0., yy2 - yy1)
    wh = w * h
    o = wh / ((bb_test[2] - bb_test[0]) * (bb_test[3] - bb_test[1])
              + (bb_gt[2] - bb_gt[0]) * (bb_gt[3] - bb_gt[1]) - wh)

    return o


def convert_bbox_to_z(bbox):
    """
    Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
      [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
      the aspect ratio
    """
    w = bbox[2] - bbox[0]
    h = bbox[3] - bbox[1]
    x = bbox[0] + w / 2.
    y = bbox[1] + h / 2.
    s = w * h  # scale is just area
    r = w / float(h)
    return np.array([x, y, s, r]).reshape((4, 1))


def convert_x_to_bbox(x, score=None):
    """
    Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
      [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
    """
    w = np.sqrt(x[2] * x[3])
    h = x[2] / w
    if (score == None):
        return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2.]).reshape((1, 4))
    else:
        return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score]).reshape((1, 5))


class KalmanBoxTracker(object):
    """
    This class represents the internel state of individual tracked objects observed as bbox.
    """
    count = 0

    def __init__(self, bbox):
        """
        Initialises a tracker using initial bounding box.
        """
        # define constant velocity model
        self.kf = KalmanFilter(dim_x=7, dim_z=4)
        self.kf.F = np.array(
            [[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0],
             [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1]])
        self.kf.H = np.array(
            [[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0]])

        self.kf.R[2:, 2:] *= 10.
        self.kf.P[4:, 4:] *= 1000.  # give high uncertainty to the unobservable initial velocities
        self.kf.P *= 10.
        self.kf.Q[-1, -1] *= 0.01
        self.kf.Q[4:, 4:] *= 0.01

        self.kf.x[:4] = convert_bbox_to_z(bbox)
        self.time_since_update = 0
        self.id = KalmanBoxTracker.count
        KalmanBoxTracker.count += 1
        self.history = []
        self.hits = 0
        self.hit_streak = 0
        self.age = 0

    def update(self, bbox):
        """
        Updates the state vector with observed bbox.
        """
        self.time_since_update = 0
        self.history = []
        self.hits += 1
        self.hit_streak += 1
        self.kf.update(convert_bbox_to_z(bbox))

    def predict(self):
        """
        Advances the state vector and returns the predicted bounding box estimate.
        """
        if ((self.kf.x[6] + self.kf.x[2]) <= 0):
            self.kf.x[6] *= 0.0
        self.kf.predict()
        self.age += 1
        if (self.time_since_update > 0):
            self.hit_streak = 0
        self.time_since_update += 1
        self.history.append(convert_x_to_bbox(self.kf.x))
        return self.history[-1]

    def get_state(self):
        """
        Returns the current bounding box estimate.
        """
        return convert_x_to_bbox(self.kf.x)


def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
    """
    Assigns detections to tracked object (both represented as bounding boxes)

    Returns 3 lists of matches, unmatched_detections and unmatched_trackers
    """
    if (len(trackers) == 0):
        return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int)
    iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)

    for d, det in enumerate(detections):
        for t, trk in enumerate(trackers):
            iou_matrix[d, t] = iou(det, trk)
    matched_indices = linear_sum_assignment(-iou_matrix)
    matched_indices = np.asarray(matched_indices)
    matched_indices = matched_indices.transpose()

    unmatched_detections = []
    for d, det in enumerate(detections):
        if (d not in matched_indices[:, 0]):
            unmatched_detections.append(d)
    unmatched_trackers = []
    for t, trk in enumerate(trackers):
        if (t not in matched_indices[:, 1]):
            unmatched_trackers.append(t)

    # filter out matched with low IOU
    matches = []
    for m in matched_indices:
        if (iou_matrix[m[0], m[1]] < iou_threshold):
            unmatched_detections.append(m[0])
            unmatched_trackers.append(m[1])
        else:
            matches.append(m.reshape(1, 2))
    if (len(matches) == 0):
        matches = np.empty((0, 2), dtype=int)
    else:
        matches = np.concatenate(matches, axis=0)

    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)


class ObjSort(object):
    def __init__(self, max_age=1, min_hits=3):
        """
        Sets key parameters for SORT
        """
        self.max_age = max_age
        self.min_hits = min_hits
        self.trackers = []
        self.frame_count = 0

    def update(self, dets):
        """
        Params:
          dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
        Requires: this method must be called once for each frame even with empty detections.
        Returns the a similar array, where the last column is the object ID.

        NOTE: The number of objects returned may differ from the number of detections provided.
        """
        self.frame_count += 1
        # get predicted locations from existing trackers.
        trks = np.zeros((len(self.trackers), 5))
        to_del = []
        ret = []
        for t, trk in enumerate(trks):
            pos = self.trackers[t].predict()[0]
            trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
            if np.any(np.isnan(pos)):
                to_del.append(t)
        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
        for t in reversed(to_del):
            self.trackers.pop(t)
        matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks)

        # update matched trackers with assigned detections
        for t, trk in enumerate(self.trackers):
            if t not in unmatched_trks:
                d = matched[np.where(matched[:, 1] == t)[0], 0]  # d: [n]
                trk.update(dets[d, :][0])

        # create and initialise new trackers for unmatched detections
        for i in unmatched_dets:
            trk = KalmanBoxTracker(dets[i, :])
            self.trackers.append(trk)
        i = len(self.trackers)
        for trk in reversed(self.trackers):
            d = trk.get_state()[0]
            if ((trk.time_since_update < 1) and
                    (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
                # +1 as MOT benchmark requires positive
                ret.append(np.concatenate((d, [trk.id + 1])).reshape(1, -1))
            i -= 1
            # remove dead tracklet
            if (trk.time_since_update > self.max_age):
                self.trackers.pop(i)
        if (len(ret) > 0):
            return np.concatenate(ret)
        return np.empty((0, 5))


def coco_h36m(keypoints):
    temporal = keypoints.shape[0]
    keypoints_h36m = np.zeros_like(keypoints, dtype=np.float32)
    htps_keypoints = np.zeros((temporal, 4, 2), dtype=np.float32)

    # htps_keypoints: head, thorax, pelvis, spine
    htps_keypoints[:, 0, 0] = np.mean(keypoints[:, 1:5, 0], axis=1, dtype=np.float32)
    htps_keypoints[:, 0, 1] = np.sum(keypoints[:, 1:3, 1], axis=1, dtype=np.float32) - keypoints[:, 0, 1]
    htps_keypoints[:, 1, :] = np.mean(keypoints[:, 5:7, :], axis=1, dtype=np.float32)
    htps_keypoints[:, 1, :] += (keypoints[:, 0, :] - htps_keypoints[:, 1, :]) / 3

    htps_keypoints[:, 2, :] = np.mean(keypoints[:, 11:13, :], axis=1, dtype=np.float32)
    htps_keypoints[:, 3, :] = np.mean(keypoints[:, [5, 6, 11, 12], :], axis=1, dtype=np.float32)

    keypoints_h36m[:, spple_keypoints, :] = htps_keypoints
    keypoints_h36m[:, h36m_coco_order, :] = keypoints[:, coco_order, :]

    keypoints_h36m[:, 9, :] -= (keypoints_h36m[:, 9, :] - np.mean(keypoints[:, 5:7, :], axis=1, dtype=np.float32)) / 4
    keypoints_h36m[:, 7, 0] += 2 * (
            keypoints_h36m[:, 7, 0] - np.mean(keypoints_h36m[:, [0, 8], 0], axis=1, dtype=np.float32))
    keypoints_h36m[:, 8, 1] -= (np.mean(keypoints[:, 1:3, 1], axis=1, dtype=np.float32) - keypoints[:, 0, 1]) * 2 / 3

    # half body: the joint of ankle and knee equal to hip
    # keypoints_h36m[:, [2, 3]] = keypoints_h36m[:, [1, 1]]
    # keypoints_h36m[:, [5, 6]] = keypoints_h36m[:, [4, 4]]

    valid_frames = np.where(np.sum(keypoints_h36m.reshape(-1, 34), axis=1) != 0)[0]
    return keypoints_h36m, valid_frames


def h36m_coco_format(keypoints, scores):
    assert len(keypoints.shape) == 4 and len(scores.shape) == 3

    h36m_kpts = []
    h36m_scores = []
    valid_frames = []

    for i in range(keypoints.shape[0]):
        kpts = keypoints[i]
        score = scores[i]

        new_score = np.zeros_like(score, dtype=np.float32)

        if np.sum(kpts) != 0.:
            kpts, valid_frame = coco_h36m(kpts)
            h36m_kpts.append(kpts)
            valid_frames.append(valid_frame)

            new_score[:, h36m_coco_order] = score[:, coco_order]
            new_score[:, 0] = np.mean(score[:, [11, 12]], axis=1, dtype=np.float32)
            new_score[:, 8] = np.mean(score[:, [5, 6]], axis=1, dtype=np.float32)
            new_score[:, 7] = np.mean(new_score[:, [0, 8]], axis=1, dtype=np.float32)
            new_score[:, 10] = np.mean(score[:, [1, 2, 3, 4]], axis=1, dtype=np.float32)

            h36m_scores.append(new_score)

    h36m_kpts = np.asarray(h36m_kpts, dtype=np.float32)
    h36m_scores = np.asarray(h36m_scores, dtype=np.float32)
    return h36m_kpts, h36m_scores, valid_frames


def get_joints_info(num_joints):
    # Body+toe keypoints
    if num_joints == 19:
        joints_left = [5, 6, 7, 8, 13, 14, 15]
        joints_right = [1, 2, 3, 4, 16, 17, 18]
        h36m_skeleton = Skeleton(
            parents=[-1, 0, 1, 2, 3, 0, 5, 6, 7, 0, 9, 10, 11, 10, 13, 14, 10, 16, 17],
            joints_left=[5, 6, 7, 8, 13, 14, 15],
            joints_right=[1, 2, 3, 4, 16, 17, 18])
    # Body keypoints
    else:
        joints_left = [4, 5, 6, 11, 12, 13]
        joints_right = [1, 2, 3, 14, 15, 16]
        h36m_skeleton = Skeleton(
            parents=[-1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15],
            joints_left=[4, 5, 6, 11, 12, 13],
            joints_right=[1, 2, 3, 14, 15, 16])

    keypoints_metadata = {
        'keypoints_symmetry': (joints_left, joints_right),
        'layout_name': 'Human3.6M',
        'num_joints': num_joints}

    return joints_left, joints_right, h36m_skeleton, keypoints_metadata


def normalize_screen_coordinates(X, w, h):
    assert X.shape[-1] == 2

    # Normalize so that [0, w] is mapped to [-1, 1], while preserving the aspect ratio
    return X / w * 2 - [1, h / w]


def qort(q, v):
    """
    Rotate vector(s) v about the rotation described by quaternion(s) q.
    Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
    where * denotes any number of dimensions.
    Returns a tensor of shape (*, 3).
    """
    assert q.shape[-1] == 4
    assert v.shape[-1] == 3
    assert q.shape[:-1] == v.shape[:-1]

    qvec = q[..., 1:]
    uv = np.cross(qvec, v, axis=len(q.shape) - 1)
    uuv = np.cross(qvec, uv, axis=len(q.shape) - 1)
    return v + 2 * (q[..., :1] * uv + uuv)


def camera_to_world(X, R, t):
    return qort(np.tile(R, (*X.shape[:-1], 1)), X) + t


elbow_knee_v1 = [5, 15]
elbow_knee_v2 = [2, 12]
wrist_ankle_v1 = [6, 16]
wrist_ankle_v2 = [3, 13]
hip_shoulder = [1, 4, 11, 14]
spine_head = [7, 9]
thorax_neck = [8, 10]


def color_edge(joint_num):
    if joint_num in elbow_knee_v1:
        color = 'peru'
    elif joint_num in elbow_knee_v2:
        color = 'indianred'
    elif joint_num in wrist_ankle_v1:
        color = 'coral'
    elif joint_num in wrist_ankle_v2:
        color = 'brown'
    elif joint_num in hip_shoulder:
        color = 'tan'
    elif joint_num in spine_head:
        color = 'olive'
    else:
        color = 'purple'
    return color


def downsample_tensor(X, factor):
    length = X.shape[0] // factor * factor
    return np.mean(X[:length].reshape(-1, factor, *X.shape[1:]), axis=1)


def affine_transform(pt, t):
    new_pt = np.array([pt[0], pt[1], 1.]).T
    new_pt = np.dot(t, new_pt)
    return new_pt[:2]


def get_3rd_point(a, b):
    direct = a - b
    return b + np.array([-direct[1], direct[0]], dtype=np.float32)


def get_dir(src_point, rot_rad):
    sn, cs = np.sin(rot_rad), np.cos(rot_rad)

    src_result = [0, 0]
    src_result[0] = src_point[0] * cs - src_point[1] * sn
    src_result[1] = src_point[0] * sn + src_point[1] * cs

    return src_result


def transform_preds(coords, center, scale, output_size):
    target_coords = np.zeros(coords.shape)
    trans = get_affine_transform(center, scale, 0, output_size, inv=1)
    for p in range(coords.shape[0]):
        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
    return target_coords


def get_affine_transform(
        center, scale, rot, output_size,
        shift=np.array([0, 0], dtype=np.float32), inv=0):
    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
        print(scale)
        scale = np.array([scale, scale])

    scale_tmp = scale * 200.0
    src_w = scale_tmp[0]
    dst_w = output_size[0]
    dst_h = output_size[1]

    rot_rad = np.pi * rot / 180
    src_dir = get_dir([0, src_w * -0.5], rot_rad)
    dst_dir = np.array([0, dst_w * -0.5], np.float32)

    src = np.zeros((3, 2), dtype=np.float32)
    dst = np.zeros((3, 2), dtype=np.float32)
    src[0, :] = center + scale_tmp * shift
    src[1, :] = center + src_dir + scale_tmp * shift
    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir

    src[2:, :] = get_3rd_point(src[0, :], src[1, :])
    dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])

    if inv:
        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
    else:
        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))

    return trans


def get_max_preds(batch_heatmaps):
    '''
    get predictions from score maps
    heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
    '''
    assert isinstance(batch_heatmaps, np.ndarray), \
        'batch_heatmaps should be numpy.ndarray'
    assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'

    batch_size = batch_heatmaps.shape[0]
    num_joints = batch_heatmaps.shape[1]
    width = batch_heatmaps.shape[3]
    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
    idx = np.argmax(heatmaps_reshaped, 2)
    maxvals = np.amax(heatmaps_reshaped, 2)

    maxvals = maxvals.reshape((batch_size, num_joints, 1))
    idx = idx.reshape((batch_size, num_joints, 1))

    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)

    preds[:, :, 0] = (preds[:, :, 0]) % width
    preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)

    pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
    pred_mask = pred_mask.astype(np.float32)

    preds *= pred_mask
    return preds, maxvals


def get_final_preds(batch_heatmaps, center, scale):
    coords, maxvals = get_max_preds(batch_heatmaps)

    heatmap_height = batch_heatmaps.shape[2]
    heatmap_width = batch_heatmaps.shape[3]

    # post-processing
    for n in range(coords.shape[0]):
        for p in range(coords.shape[1]):
            hm = batch_heatmaps[n][p]
            px = int(math.floor(coords[n][p][0] + 0.5))
            py = int(math.floor(coords[n][p][1] + 0.5))
            if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
                diff = np.array(
                    [
                        hm[py][px + 1] - hm[py][px - 1],
                        hm[py + 1][px] - hm[py - 1][px]
                    ]
                )
                coords[n][p] += np.sign(diff) * .25

    preds = coords.copy()

    # Transform back
    for i in range(coords.shape[0]):
        preds[i] = transform_preds(
            coords[i], center[i], scale[i], [heatmap_width, heatmap_height]
        )

    return preds, maxvals


h36m_elbow_knee_v1 = [5, 15]
h36m_elbow_knee_v2 = [2, 12]
h36m_wrist_ankle_v1 = [6, 16]
h36m_wrist_ankle_v2 = [3, 13]
h36m_hip_shoulder = [1, 4, 11, 14]
h36m_spine_neck = [7, 9]
h36m_thorax_head = [8, 10]


def h36m_color_edge(joint_num):
    if joint_num in h36m_elbow_knee_v1:
        color = 'peru'  # (205, 133, 63)
    elif joint_num in h36m_elbow_knee_v2:
        color = 'indianred'  # (205, 92, 92)
    elif joint_num in h36m_wrist_ankle_v1:
        color = 'coral'  # (255, 127, 80)
    elif joint_num in h36m_wrist_ankle_v2:
        # color = 'deepskyblue'
        color = 'brown'  # (165, 42, 42)
    elif joint_num in h36m_hip_shoulder:
        # color = 'dodgerblue'
        color = 'tan'  # (210, 180, 140)
    elif joint_num in h36m_spine_neck:
        color = 'olive'  # (128, 128, 0)
    else:
        color = 'purple'  # (128, 0, 128)
    return color


def render_animation(
        keypoints, keypoints_metadata, poses, skeleton, fps, bitrate,
        azim, output,
        frames, viewport,
        downsample=1, size=5,
        same_coord=False):
    """
    TODO
    Render an animation. The supported output modes are:
     -- 'interactive': display an interactive figure
                       (also works on notebooks if associated with %matplotlib inline)
     -- 'html': render the animation as HTML5 video. Can be displayed in a notebook using HTML(...).
     -- 'filename.mp4': render and export the animation as an h264 video (requires ffmpeg).
     -- 'filename.gif': render and export the animation a gif file (requires imagemagick).
    """
    plt.ioff()

    num_person = keypoints.shape[1]
    if num_person == 2 and same_coord:
        fig = plt.figure(figsize=(size * (1 + len(poses)), size))
        ax_in = fig.add_subplot(1, 2, 1)
    else:
        fig = plt.figure(figsize=(size * (1 + len(poses)), size))
        ax_in = fig.add_subplot(1, 1 + len(poses), 1)
    ax_in.get_xaxis().set_visible(False)
    ax_in.get_yaxis().set_visible(False)
    ax_in.set_axis_off()

    ax_3d = []
    lines_3d = []
    radius = 1.7
    if num_person == 2 and same_coord:
        ax = fig.add_subplot(1, 2, 2, projection='3d')
        ax.view_init(elev=15., azim=azim)
        ax.set_xlim3d([-radius, radius])
        ax.set_zlim3d([0, radius])
        ax.set_ylim3d([-radius, radius])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_zticklabels([])
        ax.dist = 7.5
        ax_3d.append(ax)
        lines_3d.append([])
        poses = list(poses.values())
    else:
        for index, (title, data) in enumerate(poses.items()):
            ax = fig.add_subplot(1, 1 + len(poses), index + 2, projection='3d')
            ax.view_init(elev=15., azim=azim)
            ax.set_xlim3d([-radius / 2, radius / 2])
            ax.set_zlim3d([0, radius])
            ax.set_ylim3d([-radius / 2, radius / 2])
            ax.set_aspect('auto')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_zticklabels([])
            ax.dist = 7.5
            ax_3d.append(ax)
            lines_3d.append([])
        poses = list(poses.values())

    effective_length = min(keypoints.shape[0], len(frames))
    frames = frames[:effective_length]

    if downsample > 1:
        keypoints = downsample_tensor(keypoints, downsample)
        frames = downsample_tensor(np.array(frames), downsample).astype('uint8')
        for idx in range(len(poses)):
            poses[idx] = downsample_tensor(poses[idx], downsample)
        fps /= downsample

    initialized = False
    image = None
    lines = []
    points = None
    limit = len(frames)
    parents = skeleton.parents()
    index = [i for i in np.arange(17)]

    def update_video(i):
        nonlocal initialized, image, lines, points

        joints_right_2d = keypoints_metadata['keypoints_symmetry'][1]

        if num_person == 2:
            joints_right_2d_two = []
            joints_right_2d_two += joints_right_2d
            joints_right_2d_second = [i + 17 for i in joints_right_2d]
            joints_right_2d_two += joints_right_2d_second

            colors_2d = np.full(34, 'black')
            colors_2d[joints_right_2d_two] = 'red'
        else:
            colors_2d = np.full(17, 'black')
            colors_2d[joints_right_2d] = 'red'

        if not initialized:
            image = ax_in.imshow(frames[i], aspect='equal')

            for j, j_parent in zip(index, parents):
                if j_parent == -1:
                    continue

                if len(parents) == 17 and keypoints_metadata['layout_name'] != 'coco':
                    for m in range(num_person):
                        # Draw skeleton only if keypoints match (otherwise we don't have the parents definition)
                        lines.append(ax_in.plot(
                            [keypoints[i, m, j, 0], keypoints[i, m, j_parent, 0]],
                            [keypoints[i, m, j, 1], keypoints[i, m, j_parent, 1]],
                            color='pink'))

                # Apply different colors for each joint
                col = h36m_color_edge(j)

                if same_coord:
                    for pose in poses:
                        pos = pose[i]
                        lines_3d[0].append(ax_3d[0].plot(
                            [pos[j, 0], pos[j_parent, 0]],
                            [pos[j, 1], pos[j_parent, 1]],
                            [pos[j, 2], pos[j_parent, 2]],
                            zdir='z', c=col, linewidth=3)
                        )
                else:
                    for n, ax in enumerate(ax_3d):
                        pos = poses[n][i]
                        lines_3d[n].append(ax.plot(
                            [pos[j, 0], pos[j_parent, 0]],
                            [pos[j, 1], pos[j_parent, 1]],
                            [pos[j, 2], pos[j_parent, 2]],
                            zdir='z', c=col, linewidth=3)
                        )

            points = ax_in.scatter(
                *keypoints[i].reshape(17 * num_person, 2).T, 10,
                color=colors_2d, edgecolors='white', zorder=10)
            initialized = True
        else:
            image.set_data(frames[i])

            for j, j_parent in zip(index, parents):
                if j_parent == -1:
                    continue

                if len(parents) == 17 and keypoints_metadata['layout_name'] != 'coco':
                    for m in range(num_person):
                        lines[j + 16 * m - 1][0].set_data(
                            np.array([keypoints[i, m, j, 0], keypoints[i, m, j_parent, 0]]),
                            np.array([keypoints[i, m, j, 1], keypoints[i, m, j_parent, 1]]))

                if same_coord:
                    for k, pose in enumerate(poses):
                        pos = pose[i]
                        lines_3d[0][j + k * 16 - 1][0].set_xdata(np.array([pos[j, 0], pos[j_parent, 0]]))
                        lines_3d[0][j + k * 16 - 1][0].set_ydata(np.array([pos[j, 1], pos[j_parent, 1]]))
                        lines_3d[0][j + k * 16 - 1][0].set_3d_properties([pos[j, 2], pos[j_parent, 2]], zdir='z')
                else:
                    for n, ax in enumerate(ax_3d):
                        pos = poses[n][i]
                        lines_3d[n][j - 1][0].set_xdata(np.array([pos[j, 0], pos[j_parent, 0]]))
                        lines_3d[n][j - 1][0].set_ydata(np.array([pos[j, 1], pos[j_parent, 1]]))
                        lines_3d[n][j - 1][0].set_3d_properties([pos[j, 2], pos[j_parent, 2]], zdir='z')

            points.set_offsets(keypoints[i].reshape(17 * num_person, 2))

        print('{}/{}      '.format(i, limit), end='\r')

    fig.tight_layout()

    # rendering to video
    writer=None
    for i in range(limit):
        update_video(i)
        fig.canvas.draw()
        im = np.array(fig.canvas.renderer.buffer_rgba())
        im = cv2.cvtColor(im, cv2.COLOR_RGBA2BGR)
        if writer==None:
            f_h = int(im.shape[0])
            f_w = int(im.shape[1])
            writer = webcamera_utils.get_writer(output, f_h, f_w, fps=fps)
        writer.write(im)
    writer.release()

    plt.close()