Skip to content

Commit

Permalink
predict: fix lpr deskew bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
koush committed Jun 5, 2024
1 parent d2f1c69 commit 5b12401
Show file tree
Hide file tree
Showing 10 changed files with 73 additions and 27 deletions.
4 changes: 2 additions & 2 deletions plugins/coreml/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion plugins/coreml/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
},
"version": "0.1.58"
"version": "0.1.59"
}
4 changes: 2 additions & 2 deletions plugins/onnx/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion plugins/onnx/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
},
"version": "0.1.96"
"version": "0.1.97"
}
4 changes: 2 additions & 2 deletions plugins/openvino/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion plugins/openvino/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
},
"version": "0.1.97"
"version": "0.1.98"
}
24 changes: 17 additions & 7 deletions plugins/openvino/src/common/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,27 +49,37 @@ def calculate_y_change(original_height, skew_angle_radians):

return y_change

async def prepare_text_result(d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float):
async def prepare_text_result(d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float, deskew_height: float):
textImage = await crop_text(d, image)

skew_height_change = calculate_y_change(d["boundingBox"][3], skew_angle)
skew_height_change = math.floor(skew_height_change)
textImage = skew_image(textImage, skew_angle)
# crop skew_height_change from top
if skew_height_change > 0:
textImage = textImage.crop((0, 0, textImage.width, textImage.height - skew_height_change))
textImage = textImage.crop((0, 0, textImage.width, deskew_height))
elif skew_height_change < 0:
textImage = textImage.crop((0, -skew_height_change, textImage.width, textImage.height))
textImage = textImage.crop((0, textImage.height - deskew_height, textImage.width, textImage.height))

new_height = 64
target_height = 64
height_padding = 3
new_height = target_height - height_padding * 2
new_width = int(textImage.width * new_height / textImage.height)
textImage = textImage.resize((new_width, new_height), resample=Image.LANCZOS).convert("L")

new_width = 256
# average the top pixels
edge_color = textImage.getpixel((0, textImage.height // 2))
# average the bottom pixels
edge_color += textImage.getpixel((textImage.width - 1, textImage.height // 2))
# average the right pixels
edge_color += textImage.getpixel((textImage.width // 2, 0))
# average the left pixels
edge_color += textImage.getpixel((textImage.width // 2, textImage.height - 1))
edge_color = edge_color // 4

# calculate padding dimensions
padding = (0, 0, new_width - textImage.width, 0)
# todo: clamp entire edge rather than just center
edge_color = textImage.getpixel((textImage.width - 1, textImage.height // 2))
padding = (0, height_padding, new_width - textImage.width, height_padding)
# pad image
textImage = ImageOps.expand(textImage, padding, fill=edge_color)
# pil to numpy
Expand Down
24 changes: 24 additions & 0 deletions plugins/openvino/src/predict/face_recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
from common import yolo
from predict import PredictPlugin

def cosine_similarity(vector_a, vector_b):
dot_product = np.dot(vector_a, vector_b)
norm_a = np.linalg.norm(vector_a)
norm_b = np.linalg.norm(vector_b)
similarity = dot_product / (norm_a * norm_b)
return similarity

class FaceRecognizeDetection(PredictPlugin):
def __init__(self, nativeId: str | None = None):
Expand Down Expand Up @@ -153,4 +159,22 @@ async def run_detection_image(
if len(futures):
await asyncio.wait(futures)

# last = None
# for d in ret['detections']:
# if d["className"] != "face":
# continue
# check = d.get("embedding")
# if check is None:
# continue
# # decode base64 string check
# embedding = base64.b64decode(check)
# embedding = np.frombuffer(embedding, dtype=np.float32)
# if last is None:
# last = embedding
# continue
# # convert to numpy float32 arrays
# similarity = cosine_similarity(last, embedding)
# print('similarity', similarity)
# last = embedding

return ret
8 changes: 4 additions & 4 deletions plugins/openvino/src/predict/text_recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async def detect_once(
ratio_h = ratio_w = 1
text_threshold = 0.7
link_threshold = 0.9
low_text = 0.4
low_text = 0.5
poly = False

boxes_list, polys_list, scores_list = [], [], []
Expand Down Expand Up @@ -138,7 +138,7 @@ async def run_detection_image(
"className": "text",
}
futures.append(
asyncio.ensure_future(self.setLabel(d, image, group["skew_angle"]))
asyncio.ensure_future(self.setLabel(d, image, group["skew_angle"], group['deskew_height']))
)
detections.append(d)

Expand All @@ -153,10 +153,10 @@ async def run_detection_image(
return ret

async def setLabel(
self, d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float
self, d: ObjectDetectionResult, image: scrypted_sdk.Image, skew_angle: float, deskew_height: float
):
try:
image_tensor = await prepare_text_result(d, image, skew_angle)
image_tensor = await prepare_text_result(d, image, skew_angle, deskew_height)
preds = await self.predictTextModel(image_tensor)
d["label"] = process_text_result(preds)

Expand Down
26 changes: 19 additions & 7 deletions plugins/openvino/src/predict/text_skew.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,28 +61,40 @@ def find_adjacent_groups(boxes: List[BoundingBox], scores: List[float]) -> List[
if added_to_group:
break
if not added_to_group:
groups.append({"boxes": [box], "scores": [scores[index]], "skew_angle": 0})
groups.append({"boxes": [box], "scores": [scores[index]]})

# Calculate the skew angle of each group
for group in groups:
boxes = group["boxes"]
group["union"] = union_boxes(boxes)
if len(boxes) - 1:
lm = (boxes[0][1] + boxes[0][3]) / 2
rm = (boxes[-1][1] + boxes[-1][3]) / 2
dx = (boxes[-1][0]) - (boxes[0][0] + boxes[0][2])
lm = boxes[0][1] + boxes[0][3] / 2
rm = boxes[-1][1] + boxes[-1][3] / 2
dx = (boxes[-1][0]) - (boxes[0][0])
minx = min([box[0] for box in boxes])
maxx = max([box[0] + box[2] for box in boxes])

# denoise by filtering the box height
minh = min([box[3] for box in boxes])
median_height = sorted([box[3] for box in boxes])[len(boxes) // 2]
maxh = max([box[3] for box in boxes])
pad_height = maxh * 0.05
filter_height = median_height
pad_height = filter_height * 0.05

dx = maxx - minx
group['skew_angle'] = math.atan2(rm - lm, dx) * 2
group['skew_angle'] = math.atan((rm - lm) / dx)
group['deskew_height'] = filter_height + pad_height * 2
# pad this box by a few pixels
group['union'] = (group['union'][0] - pad_height, group['union'][1] - pad_height, group['union'][2] + pad_height * 2, group['union'][3] + pad_height * 2)
group['union'] = (
group['union'][0] - pad_height,
group['union'][1] - pad_height,
group['union'][2] + pad_height * 2,
group['union'][3] + pad_height * 2)
# average the scores
group['score'] = sum(group['scores']) / len(group['scores'])
else:
group['skew_angle'] = 0
group['deskew_height'] = boxes[0][3]
group['score'] = group['scores'][0]

return groups

0 comments on commit 5b12401

Please sign in to comment.