fix: separate inference output formatting for each detection model; c…

…loses ambianic#254 Merge pull request ambianic#314 from bhavikapanara/feat/dev-inference-output
ivelin · Feb 17, 2021 · 8768c17 · 8768c17
2 parents e95eaa7 + e956077
commit 8768c17
Show file tree

Hide file tree

Showing 15 changed files with 647 additions and 247 deletions.
diff --git a/peerjs-config.yaml b/peerjs-config.yaml
@@ -0,0 +1,7 @@
+host: ambianic-pnp.herokuapp.com
+ice_servers:
+- urls:
+  - stun:stun.l.google.com:19302
+log_level: INFO
+port: 443
+secure: true
diff --git a/src/ambianic/pipeline/ai/face_detect.py b/src/ambianic/pipeline/ai/face_detect.py
@@ -16,10 +16,10 @@ def crop_image(image, box):
         width, height = image.size
 
         # Setting the points for cropped image
-        left = box[0]*width
-        top = box[1]*height
-        right = box[2]*width
-        bottom = box[3]*height
+        left = box['xmin']*width
+        top = box['ymin']*height
+        right = box['xmax']*width
+        bottom = box['ymax']*height
 
         # Cropped image of above dimension
         # (It will not change orginal image)
@@ -46,16 +46,23 @@ def process_sample(self, **sample):
                 else:
                     # - apply face detection to cropped person areas
                     # - pass face detections on to next pipe element
-                    for label, confidence, box in prev_inference_result:
+                    for e_result in prev_inference_result:
+                        label, confidence, box = e_result['label'], \
+                                                 e_result['confidence'], \
+                                                 e_result['box']
+
                         if label == 'person' and \
-                          confidence >= self._tfengine.confidence_threshold:
+                           confidence >= self._tfengine.confidence_threshold:
                             person_regions.append(box)
                     log.debug('Received %d person boxes for face detection',
                               len(person_regions))
                     for box in person_regions:
                         person_image = self.crop_image(image, box)
                         thumbnail, tensor_image, inference_result = \
                             self.detect(image=person_image)
+
+                        inference_result = self.convert_inference_result(
+                            inference_result)
                         log.debug('Face detection inference_result: %r',
                                   inference_result)
                         inf_meta = {
@@ -73,3 +80,27 @@ def process_sample(self, **sample):
                               'Dropping sample: %r',
                               e,
                               sample)
+
+    def convert_inference_result(self, inference_result):
+
+        inf_json = []
+        if inference_result:
+            for inf in inference_result:
+                label, confidence, box = inf[0:3]
+                log.info('label: %s , confidence: %.0f, box: %s',
+                         label,
+                         confidence,
+                         box)
+                one_inf = {
+                    'label': label,
+                    'confidence': float(confidence),
+                    'box': {
+                        'xmin': float(box[0]),
+                        'ymin': float(box[1]),
+                        'xmax': float(box[2]),
+                        'ymax': float(box[3]),
+                    }
+                }
+                inf_json.append(one_inf)
+
+        return inf_json
diff --git a/src/ambianic/pipeline/ai/fall_detect.py b/src/ambianic/pipeline/ai/fall_detect.py
diff --git a/src/ambianic/pipeline/ai/image_boundingBox_detection.py b/src/ambianic/pipeline/ai/image_boundingBox_detection.py
@@ -28,7 +28,6 @@ def __init__(self,
 
         super().__init__(model, **kwargs)
 
-
     def detect(self, image=None):
         """Detect objects in image.
 
@@ -58,7 +57,8 @@ def detect(self, image=None):
 
         desired_size = (width, height)
 
-        new_im, thumbnail = self.resize_to_input_tensor(image=image, desired_size=desired_size)
+        new_im, thumbnail = self.resize_to_input_tensor(image=image,
+                                                        desired_size=desired_size)
 
         # calculate what fraction of the new image is the thumbnail size
         # we will use these factors to adjust detection box coordinates
@@ -142,13 +142,13 @@ def detect(self, image=None):
                         x1 = min(box[3] / w_factor, 1)
                         y1 = min(box[2] / h_factor, 1)
                         log.debug('thumbnail image size: %r , '
-                                'tensor image size: %r',
-                                thumbnail.size,
-                                new_im.size)
+                                  'tensor image size: %r',
+                                  thumbnail.size,
+                                  new_im.size)
                         log.debug('resizing detection box (x0, y0, x1, y1) '
-                                'from: %r to %r',
-                                (box[1], box[0], box[3], box[2]),
-                                (x0, y0, x1, y1))
+                                  'from: %r to %r',
+                                  (box[1], box[0], box[3], box[2]),
+                                  (x0, y0, x1, y1))
                         inference_result.append((
                             label,
                             confidence,

diff --git a/src/ambianic/pipeline/ai/object_detect.py b/src/ambianic/pipeline/ai/object_detect.py
@@ -20,6 +20,9 @@ def process_sample(self, **sample):
                 image = sample['image']
                 thumbnail, tensor_image, inference_result = \
                     self.detect(image=image)
+
+                inference_result = self.convert_inference_result(
+                    inference_result)
                 log.debug('Object detection inference_result: %r',
                           inference_result)
                 inf_meta = {
@@ -40,3 +43,27 @@ def process_sample(self, **sample):
                           str(sample)
                           )
                 log.warning(stacktrace())
+
+    def convert_inference_result(self, inference_result):
+
+        inf_json = []
+        if inference_result:
+            for inf in inference_result:
+                label, confidence, box = inf[0:3]
+                log.info('label: %s , confidence: %.0f, box: %s',
+                         label,
+                         confidence,
+                         box)
+                one_inf = {
+                    'label': label,
+                    'confidence': float(confidence),
+                    'box': {
+                        'xmin': float(box[0]),
+                        'ymin': float(box[1]),
+                        'xmax': float(box[2]),
+                        'ymax': float(box[3]),
+                    }
+                }
+                inf_json.append(one_inf)
+
+        return inf_json
diff --git a/src/ambianic/pipeline/ai/pose_engine.py b/src/ambianic/pipeline/ai/pose_engine.py
@@ -28,6 +28,7 @@
   'right ankle'
 )
 
+
 class Keypoint:
     __slots__ = ['k', 'yx', 'score']
 
@@ -66,23 +67,26 @@ def __init__(self, tfengine=None, context=None):
         self._tfengine = tfengine
 
         self._input_tensor_shape = self.get_input_tensor_shape()
+
         _, self._tensor_image_height, self._tensor_image_width, self._tensor_image_depth = \
-                                                self.get_input_tensor_shape()
-        self.confidence_threshold = self._tfengine.confidence_threshold
-        log.debug(f"Initializing PoseEngine with confidence threshold {self.confidence_threshold}")
+            self.get_input_tensor_shape()
 
+        self.confidence_threshold = self._tfengine.confidence_threshold
+        log.debug(f"Initializing PoseEngine with confidence threshold \
+            {self.confidence_threshold}")
 
     def get_input_tensor_shape(self):
         """Get the shape of the input tensor structure.
         Gets the shape required for the input tensor.
-        For models trained for image classification / detection, the shape is always
-        [1, height, width, channels]. To be used as input for :func:`run_inference`,
-        this tensor shape must be flattened into a 1-D array with size ``height *
-        width * channels``. To instead get that 1-D array size, use
+        For models trained for image classification / detection, the shape is
+        always [1, height, width, channels].
+        To be used as input for :func:`run_inference`,
+        this tensor shape must be flattened into a 1-D array with size
+        ``height * width * channels``. To instead get that 1-D array size, use
         :func:`required_input_array_size`.
         Returns:
-        A 1-D array (:obj:`numpy.ndarray`) representing the required input tensor
-        shape.
+        A 1-D array (:obj:`numpy.ndarray`) representing the required input
+        tensor shape.
         """
         return self._tfengine.input_details[0]['shape']
 
@@ -92,20 +96,24 @@ def parse_output(self, heatmap_data, offset_data, threshold):
 
         for i in range(heatmap_data.shape[-1]):
 
-            joint_heatmap = heatmap_data[...,i]
-            max_val_pos = np.squeeze(np.argwhere(joint_heatmap == np.max(joint_heatmap)))
-            remap_pos = np.array(max_val_pos/8*self._tensor_image_height, dtype=np.int32)
-            pose_kps[i, 0] = int(remap_pos[0] + offset_data[max_val_pos[0], max_val_pos[1], i])
-            pose_kps[i, 1] = int(remap_pos[1] + offset_data[max_val_pos[0], max_val_pos[1], i+joint_num])
+            joint_heatmap = heatmap_data[..., i]
+            max_val_pos = np.squeeze(
+                np.argwhere(joint_heatmap == np.max(joint_heatmap)))
+            remap_pos = np.array(max_val_pos/8*self._tensor_image_height,
+                                 dtype=np.int32)
+            pose_kps[i, 0] = int(remap_pos[0] + offset_data[max_val_pos[0],
+                                 max_val_pos[1], i])
+            pose_kps[i, 1] = int(remap_pos[1] + offset_data[max_val_pos[0],
+                                 max_val_pos[1], i+joint_num])
             max_prob = np.max(joint_heatmap)
             pose_kps[i, 3] = max_prob
             if max_prob > threshold:
-                if pose_kps[i, 0] < self._tensor_image_height and pose_kps[i, 1] < self._tensor_image_width:
+                if pose_kps[i, 0] < self._tensor_image_height and \
+                   pose_kps[i, 1] < self._tensor_image_width:
                     pose_kps[i, 2] = 1
 
         return pose_kps
 
-
     def sigmoid(self, x):
         return 1 / (1 + np.exp(-x))
 
@@ -129,31 +137,39 @@ def detect_poses(self, img):
             Resized image fitting the AI model input tensor.
         """
 
-        _tensor_input_size = (self._tensor_image_width, self._tensor_image_height)
+        _tensor_input_size = (self._tensor_image_width,
+                              self._tensor_image_height)
 
         # thumbnail is a proportionately resized image
-        thumbnail = TFDetectionModel.thumbnail(image=img, desired_size=_tensor_input_size)
+        thumbnail = TFDetectionModel.thumbnail(image=img,
+                                               desired_size=_tensor_input_size)
         # convert thumbnail into an image with the exact size
-        # as the input tensor preserving proportions by padding with a solid color as needed
-        template_image = TFDetectionModel.resize(image=thumbnail, desired_size=_tensor_input_size)
-
+        # as the input tensor preserving proportions by padding with
+        # a solid color as needed
+        template_image = TFDetectionModel.resize(image=thumbnail,
+                                                 desired_size=_tensor_input_size)
+
         template_input = np.expand_dims(template_image.copy(), axis=0)
         floating_model = self._tfengine.input_details[0]['dtype'] == np.float32
 
         if floating_model:
             template_input = (np.float32(template_input) - 127.5) / 127.5
 
-        self.tf_interpreter().set_tensor(self._tfengine.input_details[0]['index'], template_input)
+        self.tf_interpreter().\
+            set_tensor(self._tfengine.input_details[0]['index'],
+                       template_input)
         self.tf_interpreter().invoke()
 
-        template_output_data = self.tf_interpreter().get_tensor(self._tfengine.output_details[0]['index'])
-        template_offset_data = self.tf_interpreter().get_tensor(self._tfengine.output_details[1]['index'])
+        template_output_data = self.tf_interpreter().\
+            get_tensor(self._tfengine.output_details[0]['index'])
+        template_offset_data = self.tf_interpreter().\
+            get_tensor(self._tfengine.output_details[1]['index'])
 
         template_heatmaps = np.squeeze(template_output_data)
         template_offsets = np.squeeze(template_offset_data)
-        
+
         kps = self.parse_output(template_heatmaps, template_offsets, 0.3)
-        
+
         poses = []
 
         keypoint_dict = {}
@@ -163,25 +179,30 @@ def detect_poses(self, img):
         for point_i in range(keypoint_count):
             x, y = kps[point_i, 1], kps[point_i, 0]
             prob = self.sigmoid(kps[point_i, 3])
-        
+
             if prob > self.confidence_threshold:
                 cnt += 1
-                if log.getEffectiveLevel() <= logging.DEBUG:  # development mode
-                    #draw on image and save it for debugging
+                if log.getEffectiveLevel() <= logging.DEBUG:
+                    # development mode
+                    # draw on image and save it for debugging
                     draw = ImageDraw.Draw(template_image)
-                    draw.line(((0,0), (x, y)), fill='blue')
+                    draw.line(((0, 0), (x, y)), fill='blue')
 
-            keypoint = Keypoint(KEYPOINTS[point_i], [x, y], prob)            
+            keypoint = Keypoint(KEYPOINTS[point_i], [x, y], prob)
             keypoint_dict[KEYPOINTS[point_i]] = keypoint
 
-        # overall pose score is calculated as the average of all individual keypoint scores
+        # overall pose score is calculated as the average of all
+        # individual keypoint scores
         pose_score = cnt/keypoint_count
         log.debug(f"Overall pose score (keypoint score average): {pose_score}")
         poses.append(Pose(keypoint_dict, pose_score))
-        if cnt > 0 and log.getEffectiveLevel() <= logging.DEBUG:  # development mode
+        if cnt > 0 and log.getEffectiveLevel() <= logging.DEBUG:
+            # development mode
             # save template_image for debugging
             timestr = int(time.monotonic()*1000)
-            log.debug(f"Detected a pose with {cnt} keypoints that score over the minimum confidence threshold of {self.confidence_threshold}.")
+            log.debug(f"Detected a pose with {cnt} keypoints that score over \
+                the minimum confidence threshold of \
+                {self.confidence_threshold}.")
             debug_image_file_name = \
                 f'tmp-pose-detect-image-time-{timestr}-keypoints-{cnt}.jpg'
             template_image.save(

diff --git a/src/ambianic/pipeline/store.py b/src/ambianic/pipeline/store.py
@@ -81,32 +81,15 @@ def _save_sample(self,
                      thumbnail=None,
                      inference_result=None,
                      inference_meta=None):
+
         time_prefix = inf_time.strftime("%Y%m%d-%H%M%S.%f%z-{suffix}.{fext}")
         image_file = time_prefix.format(suffix='image', fext='jpg')
         image_path = self._output_directory / image_file
         thumbnail_file = time_prefix.format(suffix='thumbnail', fext='jpg')
         thumbnail_path = self._output_directory / thumbnail_file
         json_file = time_prefix.format(suffix='inference', fext='json')
         json_path = self._output_directory / json_file
-        inf_json = []
-        if inference_result:
-            for inf in inference_result:
-                label, confidence, box = inf[0:3]
-                log.info('label: %s , confidence: %.0f, box: %s',
-                        label,
-                        confidence,
-                        box)
-                one_inf = {
-                    'label': label,
-                    'confidence': float(confidence),
-                    'box': {
-                        'xmin': float(box[0]),
-                        'ymin': float(box[1]),
-                        'xmax': float(box[2]),
-                        'ymax': float(box[3]),
-                    }
-                }
-                inf_json.append(one_inf)
+
         save_json = {
             'id': uuid.uuid4().hex,
             'datetime': inf_time.isoformat(),
@@ -117,7 +100,7 @@ def _save_sample(self,
             # this will be important when resloving REST API data
             # file serving
             'rel_dir': self._rel_data_dir,
-            'inference_result': inf_json,
+            'inference_result': inference_result,
             'inference_meta': inference_meta
         }
         image.save(image_path)