Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve bbox processor #1163

Merged
merged 4 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 71 additions & 15 deletions presidio-image-redactor/presidio_image_redactor/bbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
class BboxProcessor:
"""Common module for general bounding box operators."""

@staticmethod
def get_bboxes_from_ocr_results(
self,
ocr_results: Dict[str, List[Union[int, str]]],
) -> List[Dict[str, Union[int, float, str]]]:
"""Get bounding boxes on padded image for all detected words from ocr_results.
Expand All @@ -30,8 +30,8 @@ def get_bboxes_from_ocr_results(

return bboxes

@staticmethod
def get_bboxes_from_analyzer_results(
self,
analyzer_results: List[ImageRecognizerResult],
) -> List[Dict[str, Union[str, float, int]]]:
"""Organize bounding box info from analyzer results.
Expand All @@ -56,8 +56,8 @@ def get_bboxes_from_analyzer_results(

return bboxes

@staticmethod
def remove_bbox_padding(
self,
analyzer_bboxes: List[Dict[str, Union[str, float, int]]],
padding_width: int,
) -> List[Dict[str, int]]:
Expand All @@ -71,21 +71,73 @@ def remove_bbox_padding(
if padding_width < 0:
raise ValueError("Padding width must be a non-negative integer.")

# Remove padding from all bounding boxes
bboxes = [
{
"top": max(0, bbox["top"] - padding_width),
"left": max(0, bbox["left"] - padding_width),
"width": bbox["width"],
"height": bbox["height"],
}
for bbox in analyzer_bboxes
]
if len(analyzer_bboxes) > 0:
# Get fields
has_label = False
has_entity_type = False
try:
_ = analyzer_bboxes[0]["label"]
has_label = True
except KeyError:
has_label = False
try:
_ = analyzer_bboxes[0]["entity_type"]
has_entity_type = True
except KeyError:
has_entity_type = False

# Remove padding from all bounding boxes
if has_label is True and has_entity_type is True:
bboxes = [
{
"left": max(0, bbox["left"] - padding_width),
"top": max(0, bbox["top"] - padding_width),
"width": bbox["width"],
"height": bbox["height"],
"label": bbox["label"],
"entity_type": bbox["entity_type"]
}
for bbox in analyzer_bboxes
]
elif has_label is True and has_entity_type is False:
bboxes = [
{
"left": max(0, bbox["left"] - padding_width),
"top": max(0, bbox["top"] - padding_width),
"width": bbox["width"],
"height": bbox["height"],
"label": bbox["label"]
}
for bbox in analyzer_bboxes
]
elif has_label is False and has_entity_type is True:
bboxes = [
{
"left": max(0, bbox["left"] - padding_width),
"top": max(0, bbox["top"] - padding_width),
"width": bbox["width"],
"height": bbox["height"],
"entity_type": bbox["entity_type"]
}
for bbox in analyzer_bboxes
]
elif has_label is False and has_entity_type is False:
bboxes = [
{
"left": max(0, bbox["left"] - padding_width),
"top": max(0, bbox["top"] - padding_width),
"width": bbox["width"],
"height": bbox["height"]
}
for bbox in analyzer_bboxes
]
else:
bboxes = analyzer_bboxes

return bboxes

@staticmethod
def match_with_source(
self,
all_pos: List[Dict[str, Union[str, int, float]]],
pii_source_dict: List[Dict[str, Union[str, int, float]]],
detected_pii: Dict[str, Union[str, float, int]],
Expand All @@ -107,7 +159,11 @@ def match_with_source(
results_top = detected_pii["top"]
results_width = detected_pii["width"]
results_height = detected_pii["height"]
results_score = detected_pii["score"]
try:
results_score = detected_pii["score"]
except KeyError:
# Handle matching when no score available
results_score = 0
match_found = False

# See what in the ground truth this positive matches
Expand Down
6 changes: 3 additions & 3 deletions presidio-image-redactor/tests/test_bbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,9 @@ def test_get_bboxes_from_analyzer_results_happy_path(
],
25,
[
{"top": 0, "left": 0, "width": 100, "height": 100},
{"top": 24, "left": 0, "width": 75, "height": 51},
{"top": 1, "left": 588, "width": 226, "height": 35},
{"left": 0, "top": 0, "width": 100, "height": 100, "entity_type": "TYPE_1"},
{"left": 0, "top": 24, "width": 75, "height": 51, "entity_type": "TYPE_2"},
{"left": 588, "top": 1, "width": 226, "height": 35, "entity_type": "TYPE_3"},
],
),
],
Expand Down