Skip to content

Commit

Permalink
fix: separate inference output formatting for each detection model; c…
Browse files Browse the repository at this point in the history
…loses ambianic#254

Merge pull request ambianic#314 from bhavikapanara/feat/dev-inference-output
  • Loading branch information
Ivelin Ivanov authored Feb 17, 2021
2 parents e95eaa7 + e956077 commit 8768c17
Show file tree
Hide file tree
Showing 15 changed files with 647 additions and 247 deletions.
7 changes: 7 additions & 0 deletions peerjs-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
host: ambianic-pnp.herokuapp.com
ice_servers:
- urls:
- stun:stun.l.google.com:19302
log_level: INFO
port: 443
secure: true
43 changes: 37 additions & 6 deletions src/ambianic/pipeline/ai/face_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def crop_image(image, box):
width, height = image.size

# Setting the points for cropped image
left = box[0]*width
top = box[1]*height
right = box[2]*width
bottom = box[3]*height
left = box['xmin']*width
top = box['ymin']*height
right = box['xmax']*width
bottom = box['ymax']*height

# Cropped image of above dimension
# (It will not change orginal image)
Expand All @@ -46,16 +46,23 @@ def process_sample(self, **sample):
else:
# - apply face detection to cropped person areas
# - pass face detections on to next pipe element
for label, confidence, box in prev_inference_result:
for e_result in prev_inference_result:
label, confidence, box = e_result['label'], \
e_result['confidence'], \
e_result['box']

if label == 'person' and \
confidence >= self._tfengine.confidence_threshold:
confidence >= self._tfengine.confidence_threshold:
person_regions.append(box)
log.debug('Received %d person boxes for face detection',
len(person_regions))
for box in person_regions:
person_image = self.crop_image(image, box)
thumbnail, tensor_image, inference_result = \
self.detect(image=person_image)

inference_result = self.convert_inference_result(
inference_result)
log.debug('Face detection inference_result: %r',
inference_result)
inf_meta = {
Expand All @@ -73,3 +80,27 @@ def process_sample(self, **sample):
'Dropping sample: %r',
e,
sample)

def convert_inference_result(self, inference_result):

inf_json = []
if inference_result:
for inf in inference_result:
label, confidence, box = inf[0:3]
log.info('label: %s , confidence: %.0f, box: %s',
label,
confidence,
box)
one_inf = {
'label': label,
'confidence': float(confidence),
'box': {
'xmin': float(box[0]),
'ymin': float(box[1]),
'xmax': float(box[2]),
'ymax': float(box[3]),
}
}
inf_json.append(one_inf)

return inf_json
255 changes: 169 additions & 86 deletions src/ambianic/pipeline/ai/fall_detect.py

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions src/ambianic/pipeline/ai/image_boundingBox_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def __init__(self,

super().__init__(model, **kwargs)


def detect(self, image=None):
"""Detect objects in image.
Expand Down Expand Up @@ -58,7 +57,8 @@ def detect(self, image=None):

desired_size = (width, height)

new_im, thumbnail = self.resize_to_input_tensor(image=image, desired_size=desired_size)
new_im, thumbnail = self.resize_to_input_tensor(image=image,
desired_size=desired_size)

# calculate what fraction of the new image is the thumbnail size
# we will use these factors to adjust detection box coordinates
Expand Down Expand Up @@ -142,13 +142,13 @@ def detect(self, image=None):
x1 = min(box[3] / w_factor, 1)
y1 = min(box[2] / h_factor, 1)
log.debug('thumbnail image size: %r , '
'tensor image size: %r',
thumbnail.size,
new_im.size)
'tensor image size: %r',
thumbnail.size,
new_im.size)
log.debug('resizing detection box (x0, y0, x1, y1) '
'from: %r to %r',
(box[1], box[0], box[3], box[2]),
(x0, y0, x1, y1))
'from: %r to %r',
(box[1], box[0], box[3], box[2]),
(x0, y0, x1, y1))
inference_result.append((
label,
confidence,
Expand Down
27 changes: 27 additions & 0 deletions src/ambianic/pipeline/ai/object_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ def process_sample(self, **sample):
image = sample['image']
thumbnail, tensor_image, inference_result = \
self.detect(image=image)

inference_result = self.convert_inference_result(
inference_result)
log.debug('Object detection inference_result: %r',
inference_result)
inf_meta = {
Expand All @@ -40,3 +43,27 @@ def process_sample(self, **sample):
str(sample)
)
log.warning(stacktrace())

def convert_inference_result(self, inference_result):

inf_json = []
if inference_result:
for inf in inference_result:
label, confidence, box = inf[0:3]
log.info('label: %s , confidence: %.0f, box: %s',
label,
confidence,
box)
one_inf = {
'label': label,
'confidence': float(confidence),
'box': {
'xmin': float(box[0]),
'ymin': float(box[1]),
'xmax': float(box[2]),
'ymax': float(box[3]),
}
}
inf_json.append(one_inf)

return inf_json
89 changes: 55 additions & 34 deletions src/ambianic/pipeline/ai/pose_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
'right ankle'
)


class Keypoint:
__slots__ = ['k', 'yx', 'score']

Expand Down Expand Up @@ -66,23 +67,26 @@ def __init__(self, tfengine=None, context=None):
self._tfengine = tfengine

self._input_tensor_shape = self.get_input_tensor_shape()

_, self._tensor_image_height, self._tensor_image_width, self._tensor_image_depth = \
self.get_input_tensor_shape()
self.confidence_threshold = self._tfengine.confidence_threshold
log.debug(f"Initializing PoseEngine with confidence threshold {self.confidence_threshold}")
self.get_input_tensor_shape()

self.confidence_threshold = self._tfengine.confidence_threshold
log.debug(f"Initializing PoseEngine with confidence threshold \
{self.confidence_threshold}")

def get_input_tensor_shape(self):
"""Get the shape of the input tensor structure.
Gets the shape required for the input tensor.
For models trained for image classification / detection, the shape is always
[1, height, width, channels]. To be used as input for :func:`run_inference`,
this tensor shape must be flattened into a 1-D array with size ``height *
width * channels``. To instead get that 1-D array size, use
For models trained for image classification / detection, the shape is
always [1, height, width, channels].
To be used as input for :func:`run_inference`,
this tensor shape must be flattened into a 1-D array with size
``height * width * channels``. To instead get that 1-D array size, use
:func:`required_input_array_size`.
Returns:
A 1-D array (:obj:`numpy.ndarray`) representing the required input tensor
shape.
A 1-D array (:obj:`numpy.ndarray`) representing the required input
tensor shape.
"""
return self._tfengine.input_details[0]['shape']

Expand All @@ -92,20 +96,24 @@ def parse_output(self, heatmap_data, offset_data, threshold):

for i in range(heatmap_data.shape[-1]):

joint_heatmap = heatmap_data[...,i]
max_val_pos = np.squeeze(np.argwhere(joint_heatmap == np.max(joint_heatmap)))
remap_pos = np.array(max_val_pos/8*self._tensor_image_height, dtype=np.int32)
pose_kps[i, 0] = int(remap_pos[0] + offset_data[max_val_pos[0], max_val_pos[1], i])
pose_kps[i, 1] = int(remap_pos[1] + offset_data[max_val_pos[0], max_val_pos[1], i+joint_num])
joint_heatmap = heatmap_data[..., i]
max_val_pos = np.squeeze(
np.argwhere(joint_heatmap == np.max(joint_heatmap)))
remap_pos = np.array(max_val_pos/8*self._tensor_image_height,
dtype=np.int32)
pose_kps[i, 0] = int(remap_pos[0] + offset_data[max_val_pos[0],
max_val_pos[1], i])
pose_kps[i, 1] = int(remap_pos[1] + offset_data[max_val_pos[0],
max_val_pos[1], i+joint_num])
max_prob = np.max(joint_heatmap)
pose_kps[i, 3] = max_prob
if max_prob > threshold:
if pose_kps[i, 0] < self._tensor_image_height and pose_kps[i, 1] < self._tensor_image_width:
if pose_kps[i, 0] < self._tensor_image_height and \
pose_kps[i, 1] < self._tensor_image_width:
pose_kps[i, 2] = 1

return pose_kps


def sigmoid(self, x):
return 1 / (1 + np.exp(-x))

Expand All @@ -129,31 +137,39 @@ def detect_poses(self, img):
Resized image fitting the AI model input tensor.
"""

_tensor_input_size = (self._tensor_image_width, self._tensor_image_height)
_tensor_input_size = (self._tensor_image_width,
self._tensor_image_height)

# thumbnail is a proportionately resized image
thumbnail = TFDetectionModel.thumbnail(image=img, desired_size=_tensor_input_size)
thumbnail = TFDetectionModel.thumbnail(image=img,
desired_size=_tensor_input_size)
# convert thumbnail into an image with the exact size
# as the input tensor preserving proportions by padding with a solid color as needed
template_image = TFDetectionModel.resize(image=thumbnail, desired_size=_tensor_input_size)

# as the input tensor preserving proportions by padding with
# a solid color as needed
template_image = TFDetectionModel.resize(image=thumbnail,
desired_size=_tensor_input_size)

template_input = np.expand_dims(template_image.copy(), axis=0)
floating_model = self._tfengine.input_details[0]['dtype'] == np.float32

if floating_model:
template_input = (np.float32(template_input) - 127.5) / 127.5

self.tf_interpreter().set_tensor(self._tfengine.input_details[0]['index'], template_input)
self.tf_interpreter().\
set_tensor(self._tfengine.input_details[0]['index'],
template_input)
self.tf_interpreter().invoke()

template_output_data = self.tf_interpreter().get_tensor(self._tfengine.output_details[0]['index'])
template_offset_data = self.tf_interpreter().get_tensor(self._tfengine.output_details[1]['index'])
template_output_data = self.tf_interpreter().\
get_tensor(self._tfengine.output_details[0]['index'])
template_offset_data = self.tf_interpreter().\
get_tensor(self._tfengine.output_details[1]['index'])

template_heatmaps = np.squeeze(template_output_data)
template_offsets = np.squeeze(template_offset_data)

kps = self.parse_output(template_heatmaps, template_offsets, 0.3)

poses = []

keypoint_dict = {}
Expand All @@ -163,25 +179,30 @@ def detect_poses(self, img):
for point_i in range(keypoint_count):
x, y = kps[point_i, 1], kps[point_i, 0]
prob = self.sigmoid(kps[point_i, 3])

if prob > self.confidence_threshold:
cnt += 1
if log.getEffectiveLevel() <= logging.DEBUG: # development mode
#draw on image and save it for debugging
if log.getEffectiveLevel() <= logging.DEBUG:
# development mode
# draw on image and save it for debugging
draw = ImageDraw.Draw(template_image)
draw.line(((0,0), (x, y)), fill='blue')
draw.line(((0, 0), (x, y)), fill='blue')

keypoint = Keypoint(KEYPOINTS[point_i], [x, y], prob)
keypoint = Keypoint(KEYPOINTS[point_i], [x, y], prob)
keypoint_dict[KEYPOINTS[point_i]] = keypoint

# overall pose score is calculated as the average of all individual keypoint scores
# overall pose score is calculated as the average of all
# individual keypoint scores
pose_score = cnt/keypoint_count
log.debug(f"Overall pose score (keypoint score average): {pose_score}")
poses.append(Pose(keypoint_dict, pose_score))
if cnt > 0 and log.getEffectiveLevel() <= logging.DEBUG: # development mode
if cnt > 0 and log.getEffectiveLevel() <= logging.DEBUG:
# development mode
# save template_image for debugging
timestr = int(time.monotonic()*1000)
log.debug(f"Detected a pose with {cnt} keypoints that score over the minimum confidence threshold of {self.confidence_threshold}.")
log.debug(f"Detected a pose with {cnt} keypoints that score over \
the minimum confidence threshold of \
{self.confidence_threshold}.")
debug_image_file_name = \
f'tmp-pose-detect-image-time-{timestr}-keypoints-{cnt}.jpg'
template_image.save(
Expand Down
23 changes: 3 additions & 20 deletions src/ambianic/pipeline/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,32 +81,15 @@ def _save_sample(self,
thumbnail=None,
inference_result=None,
inference_meta=None):

time_prefix = inf_time.strftime("%Y%m%d-%H%M%S.%f%z-{suffix}.{fext}")
image_file = time_prefix.format(suffix='image', fext='jpg')
image_path = self._output_directory / image_file
thumbnail_file = time_prefix.format(suffix='thumbnail', fext='jpg')
thumbnail_path = self._output_directory / thumbnail_file
json_file = time_prefix.format(suffix='inference', fext='json')
json_path = self._output_directory / json_file
inf_json = []
if inference_result:
for inf in inference_result:
label, confidence, box = inf[0:3]
log.info('label: %s , confidence: %.0f, box: %s',
label,
confidence,
box)
one_inf = {
'label': label,
'confidence': float(confidence),
'box': {
'xmin': float(box[0]),
'ymin': float(box[1]),
'xmax': float(box[2]),
'ymax': float(box[3]),
}
}
inf_json.append(one_inf)

save_json = {
'id': uuid.uuid4().hex,
'datetime': inf_time.isoformat(),
Expand All @@ -117,7 +100,7 @@ def _save_sample(self,
# this will be important when resloving REST API data
# file serving
'rel_dir': self._rel_data_dir,
'inference_result': inf_json,
'inference_result': inference_result,
'inference_meta': inference_meta
}
image.save(image_path)
Expand Down
Loading

0 comments on commit 8768c17

Please sign in to comment.