predict: fix lpr deskew bugs

koush · Jun 5, 2024 · 5b12401 · 5b12401
1 parent d2f1c69
commit 5b12401
Show file tree

Hide file tree

Showing 10 changed files with 73 additions and 27 deletions.
diff --git a/plugins/coreml/package-lock.json b/plugins/coreml/package-lock.json
diff --git a/plugins/coreml/package.json b/plugins/coreml/package.json
@@ -42,5 +42,5 @@
    "devDependencies": {
       "@scrypted/sdk": "file:../../sdk"
    },
-   "version": "0.1.58"
+   "version": "0.1.59"
 }
diff --git a/plugins/onnx/package-lock.json b/plugins/onnx/package-lock.json
diff --git a/plugins/onnx/package.json b/plugins/onnx/package.json
@@ -42,5 +42,5 @@
    "devDependencies": {
       "@scrypted/sdk": "file:../../sdk"
    },
-   "version": "0.1.96"
+   "version": "0.1.97"
 }
diff --git a/plugins/openvino/package-lock.json b/plugins/openvino/package-lock.json
diff --git a/plugins/openvino/package.json b/plugins/openvino/package.json
@@ -42,5 +42,5 @@
    "devDependencies": {
       "@scrypted/sdk": "file:../../sdk"
    },
-   "version": "0.1.97"
+   "version": "0.1.98"
 }
diff --git a/plugins/openvino/src/common/text.py b/plugins/openvino/src/common/text.py
@@ -49,27 +49,37 @@ def calculate_y_change(original_height, skew_angle_radians):
 
     return y_change
 
-async def prepare_text_result(d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float):
+async def prepare_text_result(d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float, deskew_height: float):
     textImage = await crop_text(d, image)
 
     skew_height_change = calculate_y_change(d["boundingBox"][3], skew_angle)
     skew_height_change = math.floor(skew_height_change)
     textImage = skew_image(textImage, skew_angle)
     # crop skew_height_change from top
     if skew_height_change > 0:
-        textImage = textImage.crop((0, 0, textImage.width, textImage.height - skew_height_change))
+        textImage = textImage.crop((0, 0, textImage.width, deskew_height))
     elif skew_height_change < 0:
-        textImage = textImage.crop((0, -skew_height_change, textImage.width, textImage.height))
+        textImage = textImage.crop((0, textImage.height - deskew_height, textImage.width, textImage.height))
 
-    new_height = 64
+    target_height = 64
+    height_padding = 3
+    new_height = target_height - height_padding * 2
     new_width = int(textImage.width * new_height / textImage.height)
     textImage = textImage.resize((new_width, new_height), resample=Image.LANCZOS).convert("L")
 
     new_width = 256
+    # average the top pixels
+    edge_color = textImage.getpixel((0, textImage.height // 2))
+    # average the bottom pixels
+    edge_color += textImage.getpixel((textImage.width - 1, textImage.height // 2))
+    # average the right pixels
+    edge_color += textImage.getpixel((textImage.width // 2, 0))
+    # average the left pixels
+    edge_color += textImage.getpixel((textImage.width // 2, textImage.height - 1))
+    edge_color = edge_color // 4
+
     # calculate padding dimensions
-    padding = (0, 0, new_width - textImage.width, 0)
-    # todo: clamp entire edge rather than just center
-    edge_color = textImage.getpixel((textImage.width - 1, textImage.height // 2))
+    padding = (0, height_padding, new_width - textImage.width, height_padding)
     # pad image
     textImage = ImageOps.expand(textImage, padding, fill=edge_color)
     # pil to numpy

diff --git a/plugins/openvino/src/predict/face_recognize.py b/plugins/openvino/src/predict/face_recognize.py
@@ -15,6 +15,12 @@
 from common import yolo
 from predict import PredictPlugin
 
+def cosine_similarity(vector_a, vector_b):
+    dot_product = np.dot(vector_a, vector_b)
+    norm_a = np.linalg.norm(vector_a)
+    norm_b = np.linalg.norm(vector_b)
+    similarity = dot_product / (norm_a * norm_b)
+    return similarity
 
 class FaceRecognizeDetection(PredictPlugin):
     def __init__(self, nativeId: str | None = None):
@@ -153,4 +159,22 @@ async def run_detection_image(
         if len(futures):
             await asyncio.wait(futures)
 
+        # last = None
+        # for d in ret['detections']:
+        #     if d["className"] != "face":
+        #         continue
+        #     check = d.get("embedding")
+        #     if check is None:
+        #         continue
+        #     # decode base64 string check
+        #     embedding = base64.b64decode(check)
+        #     embedding = np.frombuffer(embedding, dtype=np.float32)
+        #     if last is None:
+        #         last = embedding
+        #         continue
+        #     # convert to numpy float32 arrays
+        #     similarity = cosine_similarity(last, embedding)
+        #     print('similarity', similarity)
+        #     last = embedding
+
         return ret
diff --git a/plugins/openvino/src/predict/text_recognize.py b/plugins/openvino/src/predict/text_recognize.py
@@ -62,7 +62,7 @@ async def detect_once(
         ratio_h = ratio_w = 1
         text_threshold = 0.7
         link_threshold = 0.9
-        low_text = 0.4
+        low_text = 0.5
         poly = False
 
         boxes_list, polys_list, scores_list = [], [], []
@@ -138,7 +138,7 @@ async def run_detection_image(
                 "className": "text",
             }
             futures.append(
-                asyncio.ensure_future(self.setLabel(d, image, group["skew_angle"]))
+                asyncio.ensure_future(self.setLabel(d, image, group["skew_angle"], group['deskew_height']))
             )
             detections.append(d)
 
@@ -153,10 +153,10 @@ async def run_detection_image(
         return ret
 
     async def setLabel(
-        self, d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float
+        self, d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float, deskew_height: float
     ):
         try:
-            image_tensor = await prepare_text_result(d, image, skew_angle)
+            image_tensor = await prepare_text_result(d, image, skew_angle, deskew_height)
             preds = await self.predictTextModel(image_tensor)
             d["label"] = process_text_result(preds)
 

diff --git a/plugins/openvino/src/predict/text_skew.py b/plugins/openvino/src/predict/text_skew.py
@@ -61,28 +61,40 @@ def find_adjacent_groups(boxes: List[BoundingBox], scores: List[float]) -> List[
             if added_to_group:
                 break
         if not added_to_group:
-            groups.append({"boxes": [box], "scores": [scores[index]], "skew_angle": 0})
+            groups.append({"boxes": [box], "scores": [scores[index]]})
 
     # Calculate the skew angle of each group
     for group in groups:
         boxes = group["boxes"]
         group["union"] = union_boxes(boxes)
         if len(boxes) - 1:
-            lm = (boxes[0][1] + boxes[0][3]) / 2
-            rm = (boxes[-1][1] + boxes[-1][3]) / 2
-            dx = (boxes[-1][0]) - (boxes[0][0] + boxes[0][2])
+            lm = boxes[0][1] + boxes[0][3] / 2
+            rm = boxes[-1][1] + boxes[-1][3] / 2
+            dx = (boxes[-1][0]) - (boxes[0][0])
             minx = min([box[0] for box in boxes])
             maxx = max([box[0] + box[2] for box in boxes])
+
+            # denoise by filtering the box height
+            minh = min([box[3] for box in boxes])
+            median_height = sorted([box[3] for box in boxes])[len(boxes) // 2]
             maxh = max([box[3] for box in boxes])
-            pad_height = maxh * 0.05
+            filter_height = median_height
+            pad_height = filter_height * 0.05
+
             dx = maxx - minx
-            group['skew_angle'] = math.atan2(rm - lm, dx) * 2
+            group['skew_angle'] = math.atan((rm - lm) / dx)
+            group['deskew_height'] = filter_height + pad_height * 2
             # pad this box by a few pixels
-            group['union'] = (group['union'][0] - pad_height, group['union'][1] - pad_height, group['union'][2] + pad_height * 2, group['union'][3] + pad_height * 2)
+            group['union'] = (
+                group['union'][0] - pad_height,
+                group['union'][1] - pad_height,
+                group['union'][2] + pad_height * 2,
+                group['union'][3] + pad_height * 2)
             # average the scores
             group['score'] = sum(group['scores']) / len(group['scores'])
         else:
             group['skew_angle'] = 0
+            group['deskew_height'] = boxes[0][3]
             group['score'] = group['scores'][0]
 
     return groups