Skip to content

Commit

Permalink
First stable version (#1)
Browse files Browse the repository at this point in the history
* use a median filter to smooth the output heat map

* add DLA Lite version; fix a critical bug in calculating accuracy

* remove the smooth colvolve due to extremely slow execution time

* Get centroid, bounding box and probability

* replace the block model with function; fix a bug in train

* wrap the post-processing into saved model
  • Loading branch information
biendltb authored Dec 12, 2019
1 parent 8b8bac7 commit b06221e
Show file tree
Hide file tree
Showing 7 changed files with 1,274 additions and 31 deletions.
12 changes: 8 additions & 4 deletions src/models/centernet.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class Centernet:
def __init__(self, dataset_fn: Callable = load_all_ds, network_fn: Callable = dla.dla_net,
def __init__(self, dataset_fn: Callable = load_all_ds, network_fn: Callable = dla.dla_lite_net,
lr: float = 1e-4,
dataset_args: Dict = None):

Expand All @@ -25,9 +25,13 @@ def forward_pass(self, thermal_mat, heat_map):
# use L2 loss
loss = tf.reduce_mean(tf.square(out_map - heat_map))

# smooth the output heat map
# median_filter = tf.ones((3, 3, 1, 1)) * 1/9
# out_map = tf.nn.conv2d(out_map, median_filter, strides=1, padding='SAME')

# calculate the l2 distance to the ground truth centroid
gt_pnt = tf.unravel_index(tf.math.argmax(tf.reshape(heat_map, [-1])), tf.cast(tf.shape(heat_map), tf.int64))
out_pnt = tf.unravel_index(tf.math.argmax(tf.reshape(out_map, [-1])), tf.cast(tf.shape(out_map), tf.int64))
dist = tf.linalg.norm(tf.cast(out_pnt - gt_pnt, tf.float64))
gt_pnt = tf.unravel_index(tf.math.argmax(tf.reshape(heat_map, [heat_map.shape[0], -1]), axis=1), tf.cast(tf.shape(heat_map), tf.int64)[1:3])
out_pnt = tf.unravel_index(tf.math.argmax(tf.reshape(out_map, [out_map.shape[0], -1]), axis=1), tf.cast(tf.shape(out_map), tf.int64)[1:3])
dist = tf.reduce_mean(tf.math.sqrt(tf.cast(tf.reduce_sum(tf.math.square(out_pnt - gt_pnt), axis=0), tf.float32)))

return loss, dist
50 changes: 41 additions & 9 deletions src/models/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,40 @@
import h5py
import numpy as np
import cv2
import time

from src.networks.dla import dla_net
from src.networks import dla
from src.utils import helpers


class ThermalEval:
def __init__(self):
self.model = dla_net()
self.model = dla.dla_lite_net(mode='eval')

def load_ckpt(self, ckpt_path):
checkpoint = tf.train.Checkpoint(model=self.model)
checkpoint.restore(ckpt_path)

def infer_frame(self, thermal_frame):
heat_map = self.model.predict(thermal_frame)
outs = self.model.predict(thermal_frame, batch_size=1)[0]

_map = heat_map[0, :, :, 0]
# _map = outs[0, :, :, 0]
# outs = helpers.heatmap_to_point(_map)

key_point = helpers.heatmap_to_point(_map)
max_y, max_x, bb_h, bb_w, prob = outs
max_y, max_x = int(max_y), int(max_x)

return key_point
print(prob)

return (max_y, max_x), (bb_h, bb_w), prob

def infer_video(self, thermal_path):
with h5py.File(thermal_path) as f:
# get number of databases - frame count database
n_thermal_frames = len(f.keys()) - 1

exec_time = []

# grab thermal and visual frames one-by-one for processing
for i in range(n_thermal_frames):
key = 'frame{}'.format(i)
Expand All @@ -39,23 +46,48 @@ def infer_video(self, thermal_path):
thermal_frame = np.expand_dims(thermal_frame, axis=-1)
thermal_frame = np.expand_dims(thermal_frame, axis=0)

key_point = self.infer_frame(thermal_frame)
start = time.time()
key_point, bb_size, prob = self.infer_frame(thermal_frame)
exec_time.append(time.time()-start)

vis_frame = cv2.cvtColor(thermal_gray, cv2.COLOR_GRAY2BGR)
vis_frame[key_point] = (0, 0, 255)

if prob > 0.1:
vis_frame[key_point] = (0, 255, 0)

# draw bounding boxes
bb_h, bb_w = bb_size
kp_y, kp_x = key_point

x1, y1 = int(round(kp_x - bb_w / 2)), int(round(kp_y - bb_h / 2))
x2, y2 = int(round(x1 + bb_w)), int(round(y1 + bb_h))
vis_frame = cv2.rectangle(vis_frame, (x1, y1), (x2, y2), (0, 0, 255))

scale = 4
vis_frame = cv2.resize(vis_frame, (vis_frame.shape[1] * scale, vis_frame.shape[0] * scale))

cv2.imshow('test', vis_frame)
if cv2.waitKey(1) & 0xFF == ord('q'): # 5 ms
break
print('Average execution time: {:.2f}ms | {:.2f} fps'.format(np.mean(exec_time) * 1000, 1/np.mean(exec_time)))

def save_model(self, save_path):
self.model.save(save_path, save_format='tf')


if __name__ == '__main__':
thermal_video = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/HD/thermal/anhdnt/set1_0.hdf5'
ckpt_path = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/model_gym/centernet/model_1/ckpts/ckpt-9'
# ckpt_path = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/model_gym/centernet/model_1/ckpts/ckpt-9'
# ckpt_path = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/model_gym/centernet/model_3/ckpts/ckpt-24'
# ckpt_path = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/model_gym/centernet/model_6/ckpts/ckpt-8'
# ckpt_path = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/model_gym/centernet/model_7/ckpts/ckpt-20'
# ckpt_path = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/model_gym/centernet/model_8/ckpts/ckpt-20'
ckpt_path = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/model_gym/centernet/model_9/ckpts/ckpt-12'

thermal_model = ThermalEval()
thermal_model.load_ckpt(ckpt_path)

thermal_model.infer_video(thermal_video)

# save_path = '/media/biendltb/6e1ef38e-db2f-4eda-ad11-31252df3b87b/data/model_gym/centernet/model_9/export/'
# thermal_model.save_model(save_path)
13 changes: 10 additions & 3 deletions src/models/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from src.models.centernet import Centernet
from src.utils.path_cvt import get_path_to_vis_ims, get_path_to_ckpts
from src.datasets.thermal_dataset import load_vis_data
from src.utils import helpers

# -------> For RTX NVIDIA GPU only
from tensorflow.compat.v1 import ConfigProto
Expand Down Expand Up @@ -134,7 +135,13 @@ def generate_and_save_images(self, epoch):
plt.figure(figsize=(9, 9))

for i in range(predictions.shape[0]):
fig = predictions[i, :, :, 0] + self.visualized_eval_images[i, :, :, 0]
h_map = predictions[i, :, :, 0]
fig = h_map + self.visualized_eval_images[i, :, :, 0]

# plot the keypoint on image
keypoint, bb_size, _ = helpers.heatmap_to_point(h_map)
fig[keypoint] = 0

cmap = plt.cm.viridis
norm = plt.Normalize(vmin=fig.min(), vmax=fig.max())
image = cmap(norm(fig))
Expand All @@ -149,8 +156,8 @@ def generate_and_save_images(self, epoch):

if __name__ == '__main__':
trainer = ModelTrain(
epochs=5000,
batch_size=2,
epochs=500,
batch_size=32,
use_wandb=True
)

Expand Down
133 changes: 130 additions & 3 deletions src/networks/dla.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def _max_pooling(x, pool_size, strides):


def _avg_pooling(x, pool_size, strides):
return tf.keras.layers.AveragePooling2D(pool_size=pool_size, strides=strides)(x)
return tf.keras.layers.AveragePooling2D(pool_size=pool_size, strides=strides, padding='same')(x)


class BasicBlock(tf.keras.Model):
Expand Down Expand Up @@ -78,11 +78,34 @@ def call(self, x):
return x


def _basic_block(x, filters, kernel_size=3, strides=1):
input_filters = x.shape[3]

_tmp_conv = tf.keras.layers.Conv2D(filters=filters, kernel_size=1, strides=1,
padding='same', use_bias=False)

# if input and the block have different number of filters, use one more conv layer to equalise it
residual = tf.cond(tf.equal(input_filters, filters),
lambda: x,
lambda: _tmp_conv(x))

x = _conv(x, filters=filters, kernel_size=kernel_size)

x = tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides,
padding='same', use_bias=False)(x)
x = tf.keras.layers.BatchNormalization()(x)

x += residual
x = tf.keras.layers.ReLU()(x)

return x


# modified from Stick-To
def _dla_generator(bottom, filters, levels):
if levels == 1:
block1 = BasicBlock(filters=filters)(bottom)
block2 = BasicBlock(filters=filters)(block1)
block1 = _basic_block(bottom, filters) # BasicBlock(filters=filters)(bottom)
block2 = _basic_block(block1, filters) # BasicBlock(filters=filters)(block1)
aggregation = block1 + block2
aggregation = _conv(aggregation, filters, kernel_size=3)
else:
Expand Down Expand Up @@ -156,6 +179,110 @@ def dla_net():
return model


def heatmap_to_point(heatmaps_tensor, batch_size=1):
""" Convert the heat map to point and bounding box in Tensorflow
Input tensor shape: batch_size * h * w * channel
"""

gaussian_kernel = tf.constant([
[1, 2, 1],
[2, 4, 2],
[1, 2, 1]
], dtype=tf.float32) / 16.0

filters = gaussian_kernel[:, :, tf.newaxis, tf.newaxis]

original_tensor = heatmaps_tensor

heatmaps_tensor = tf.nn.conv2d(heatmaps_tensor, filters, strides=1, padding="SAME")

h, w = heatmaps_tensor.shape[1:3]

max_x = tf.math.argmax(tf.math.reduce_sum(heatmaps_tensor, axis=1), axis=1, output_type=tf.int32)[:, 0]
max_y = tf.math.argmax(tf.math.reduce_sum(heatmaps_tensor, axis=2), axis=1, output_type=tf.int32)[:, 0]

# probs = tf.gather_nd(original_tensor, tf.stack([tf.range(batch_size), max_y, max_x, tf.zeros_like(max_y)], axis=-1))
probs = tf.stack(
[tf.reduce_max(tf.slice(original_tensor, [i, max_y[i] - 2, max_x[i] - 2, 0], [1, 5, 5, 1])) for i in
range(batch_size)])
probs = tf.clip_by_value(probs, clip_value_min=0, clip_value_max=0.99999)

pos_diff_h = tf.cast(
tf.math.square(
(tf.tile(tf.expand_dims(tf.range(h), axis=0), [batch_size, 1]) - tf.tile(tf.expand_dims(max_y, -1),
[1, h])) / (h - 1)
),
tf.float32
)
bb_h = tf.reduce_mean(tf.sqrt(
abs((pos_diff_h / (2.0 * tf.math.log(tf.stack([heatmaps_tensor[i, :, max_x[i], 0] for i in range(batch_size)])))))),
axis=1) * 2 * h

pos_diff_w = tf.cast(
tf.math.square(
(tf.tile(tf.expand_dims(tf.range(w), axis=0), [batch_size, 1]) - tf.tile(tf.expand_dims(max_x, -1),
[1, w])) / (w - 1)
),
tf.float32
)
bb_w = tf.reduce_mean(tf.sqrt(
abs((pos_diff_w / (2.0 * tf.math.log(tf.stack([heatmaps_tensor[i, max_y[i], :, 0] for i in range(batch_size)])))))),
axis=1) * 2 * w

out = tf.stack([tf.cast(max_y, tf.float32), tf.cast(max_x, tf.float32), bb_h, bb_w, probs], axis=-1)

return out


def dla_lite_net(mode='train'):
base_filters = 8
# channel last; None -> grayscale or color images
inputs = tf.keras.layers.Input(shape=INPUT_SHAPE, name='thermal_frame')

x = _conv(inputs, base_filters, 7)
stage1 = _conv(x, base_filters * 2, 3)
stage2 = _conv(stage1, base_filters * 2, 3, strides=2) # 1/2

# stage 3
dla_stage3 = _dla_generator(stage2, base_filters * 4, levels=1)
dla_stage3 = _max_pooling(dla_stage3, 2, 2) # 1/4

# stage 4
dla_stage4 = _dla_generator(dla_stage3, base_filters * 8, levels=2)
dla_stage4 = _max_pooling(dla_stage4, 2, 2) # 1/8
residual = _conv(dla_stage3, base_filters * 8, 1)
residual = _avg_pooling(residual, 2, 2) # 1/8
dla_stage4 += residual

dla_stage4 = _conv(dla_stage4, base_filters * 16, 1)
dla_stage4_3 = _dconv(dla_stage4, base_filters * 8, 4, 2) # 1/4

dla_stage3 = _conv(dla_stage3, base_filters * 8, 1)
dla_stage3_3 = _conv(dla_stage3 + dla_stage4_3, base_filters * 8, 3)
dla_stage3_3 = _dconv(dla_stage3_3, base_filters * 4, 4, 2) # 1/2

stage2 = _conv(stage2, base_filters * 4, 1)
stage2 = _conv(stage2 + dla_stage3_3, base_filters * 4, 1)
stage2 = _dconv(stage2, base_filters * 2, 4, 2)

stage1 = _conv(stage1, base_filters * 2, 1)
stage1 = _conv(stage1 + stage2, base_filters * 2, 1)

features = _conv(stage1, base_filters * 1, 1)

# separate to multiple output heads
keypoints = _conv(features, NUM_CLASS, 3)
# size = _conv(features, 2, 3, 1)

if mode == 'train':
model = tf.keras.Model(inputs=inputs, outputs=keypoints)
else:
out = tf.keras.layers.Lambda(lambda hmap: heatmap_to_point(hmap), name='thermal_output')(keypoints)
model = tf.keras.Model(inputs=inputs, outputs=out)

return model





Expand Down
Loading

0 comments on commit b06221e

Please sign in to comment.