coreml: encode embedding

koush · Apr 3, 2024 · 95ac72c · 95ac72c
1 parent faa667f
commit 95ac72c
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 12 deletions.
diff --git a/plugins/coreml/src/vision/__init__.py b/plugins/coreml/src/vision/__init__.py
@@ -1,17 +1,18 @@
 from __future__ import annotations
 
 import asyncio
+import base64
 import concurrent.futures
 import os
 from typing import Any, Tuple
 
 import coremltools as ct
+import numpy as np
 import Quartz
 import scrypted_sdk
 from Foundation import NSData, NSMakeSize
 from PIL import Image, ImageOps
 from scrypted_sdk import Setting, SettingValue
-import numpy as np
 
 import Vision
 from predict import Prediction, PredictPlugin, from_bounding_box
@@ -134,7 +135,6 @@ async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss):
 
         observations = await future
 
-        last = None
         objs = []
         for o in observations:
             confidence = o.confidence()
@@ -149,21 +149,19 @@ async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss):
             prediction = Prediction(0, confidence, from_bounding_box((l, t, w, h)))
             objs.append(prediction)
 
+            if confidence < .7:
+                continue
+
             face = input.crop((l, t, l + w, t + h)).copy().convert("RGB").resize((160, 160), Image.BILINEAR)
-            # face to tensor
             image_tensor = np.array(face).astype(np.float32).transpose([2, 0, 1])
             processed_tensor = (image_tensor - 127.5) / 128.0
-            # expand rank
             processed_tensor = np.expand_dims(processed_tensor, axis=0)
 
-            descriptor = self.model.predict({"x_1": processed_tensor})
-
-            descriptor = descriptor["var_2167"]
-            if last is not None:
-                dist = cosine_similarity(descriptor[0], last[0])
-                print(dist)
+            output = self.model.predict({"x_1": processed_tensor})["var_2167"][0]
 
-            last = descriptor
+            b = output.tobytes()
+            embedding = str(base64.encodebytes(b))
+            prediction.embedding = embedding
 
         ret = self.create_detection_result(objs, src_size, cvss)
         return ret
diff --git a/plugins/tensorflow-lite/src/predict/__init__.py b/plugins/tensorflow-lite/src/predict/__init__.py
@@ -63,10 +63,11 @@ def parse_label_contents(contents: str):
     return ret
 
 class Prediction:
-    def __init__(self, id: int, score: float, bbox: Tuple[float, float, float, float]):
+    def __init__(self, id: int, score: float, bbox: Tuple[float, float, float, float], embedding: str = None):
         self.id = id
         self.score = score
         self.bbox = bbox
+        self.embedding = embedding
 
 class PredictPlugin(DetectPlugin):
     labels: dict
@@ -121,6 +122,8 @@ def create_detection_result(self, objs: List[Prediction], size, convert_to_src_s
                 obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax - obj.bbox.xmin, obj.bbox.ymax - obj.bbox.ymin)
             detection['className'] = className
             detection['score'] = obj.score
+            if obj.embedding is not None:
+                detection['embedding'] = obj.embedding
             detections.append(detection)
 
         if convert_to_src_size: