Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added in yolo script for Auto Annotation #794

Merged
merged 1 commit into from
Oct 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
https://github.com/opencv/cvat/issues/750).
- Changed REST API: removed PUT and added DELETE methods for /api/v1/users/ID.
- Added Mask-RCNN Auto Annotation Script
- Added Yolo Auto Annotation Script

### Changed
-
Expand Down
22 changes: 22 additions & 0 deletions utils/open_model_zoo/yolov3/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Object Detection YOLO V3 Python Demo, Async API Performance Showcase

See [these instructions][1] for converting the yolo weights to the OpenVino format.

As of OpenVINO 2019 R3, only tensorflow 1.13 and NetworkX 2.3.
These can be explicitly installed using the following command.

```bash
$ pip3 install tensorflow==1.13 networkx==2.3
```


Additionally, at the time of writing, the model optimizer required an input shape.

``` bash
$ python3 mo_tf.py \
--input_model /path/to/yolo_v3.pb \
--tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/yolo_v3.json \
--input_shape [1,416,416,3]
```

[1]: https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html
160 changes: 160 additions & 0 deletions utils/open_model_zoo/yolov3/interp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
from math import exp


class Parser:
IOU_THRESHOLD = 0.4
PROB_THRESHOLD = 0.5

def __init__(self):
self.objects = []

def scale_bbox(self, x, y, h, w, class_id, confidence, h_scale, w_scale):
xmin = int((x - w / 2) * w_scale)
ymin = int((y - h / 2) * h_scale)
xmax = int(xmin + w * w_scale)
ymax = int(ymin + h * h_scale)

return dict(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, class_id=class_id, confidence=confidence)

def entry_index(self, side, coord, classes, location, entry):
side_power_2 = side ** 2
n = location // side_power_2
loc = location % side_power_2
return int(side_power_2 * (n * (coord + classes + 1) + entry) + loc)

def intersection_over_union(self, box_1, box_2):
width_of_overlap_area = min(box_1['xmax'], box_2['xmax']) - max(box_1['xmin'], box_2['xmin'])
height_of_overlap_area = min(box_1['ymax'], box_2['ymax']) - max(box_1['ymin'], box_2['ymin'])
if width_of_overlap_area < 0 or height_of_overlap_area < 0:
area_of_overlap = 0
else:
area_of_overlap = width_of_overlap_area * height_of_overlap_area
box_1_area = (box_1['ymax'] - box_1['ymin']) * (box_1['xmax'] - box_1['xmin'])
box_2_area = (box_2['ymax'] - box_2['ymin']) * (box_2['xmax'] - box_2['xmin'])
area_of_union = box_1_area + box_2_area - area_of_overlap
if area_of_union == 0:
return 0
return area_of_overlap / area_of_union


def sort_objects(self):
self.objects = sorted(self.objects, key=lambda obj : obj['confidence'], reverse=True)

for i in range(len(self.objects)):
if self.objects[i]['confidence'] == 0:
continue
for j in range(i + 1, len(self.objects)):
if self.intersection_over_union(self.objects[i], self.objects[j]) > self.IOU_THRESHOLD:
self.objects[j]['confidence'] = 0

def parse_yolo_region(self, blob: 'np.ndarray', original_shape: list, params: dict) -> list:

# YOLO magic numbers
# See: https://github.com/opencv/open_model_zoo/blob/acf297c73db8cb3f68791ae1fad4a7cc4a6039e5/demos/python_demos/object_detection_demo_yolov3_async/object_detection_demo_yolov3_async.py#L61
num = 3
coords = 4
classes = 80
# -----------------

_, _, out_blob_h, out_blob_w = blob.shape
assert out_blob_w == out_blob_h, "Invalid size of output blob. It sould be in NCHW layout and height should " \
"be equal to width. Current height = {}, current width = {}" \
"".format(out_blob_h, out_blob_w)

# ------ Extracting layer parameters --
orig_im_h, orig_im_w = original_shape
predictions = blob.flatten()
side_square = params['side'] * params['side']

# ------ Parsing YOLO Region output --
for i in range(side_square):
row = i // params['side']
col = i % params['side']
for n in range(num):
# -----entry index calcs------
obj_index = self.entry_index(params['side'], coords, classes, n * side_square + i, coords)
# -----entry index calcs------
scale = predictions[obj_index]
if scale < self.PROB_THRESHOLD:
continue
box_index = self.entry_index(params['side'], coords, classes, n * side_square + i, 0)

# Network produces location predictions in absolute coordinates of feature maps.
# Scale it to relative coordinates.
x = (col + predictions[box_index + 0 * side_square]) / params['side']
y = (row + predictions[box_index + 1 * side_square]) / params['side']
# Value for exp is very big number in some cases so following construction is using here
try:
w_exp = exp(predictions[box_index + 2 * side_square])
h_exp = exp(predictions[box_index + 3 * side_square])
except OverflowError:
continue

w = w_exp * params['anchors'][2 * n] / 416
h = h_exp * params['anchors'][2 * n + 1] / 416
for j in range(classes):
class_index = self.entry_index(params['side'], coords, classes, n * side_square + i,
coords + 1 + j)
confidence = scale * predictions[class_index]
if confidence < self.PROB_THRESHOLD:
continue

self.objects.append(self.scale_bbox(x=x,
y=y,
h=h,
w=w,
class_id=j,
confidence=confidence,
h_scale=orig_im_h,
w_scale=orig_im_w))


for detection in detections:
frame_number = detection['frame_id']
height = detection['frame_height']
width = detection['frame_width']
detection = detection['detections']

original_shape = (width, height)

resized_width = width / 416
resized_height = height / 416

resized_shape = (resized_width, resized_height)

# https://github.com/opencv/open_model_zoo/blob/master/demos/python_demos/object_detection_demo_yolov3_async/object_detection_demo_yolov3_async.py#L72
anchors = [10,13,16,30,33,23,30,61,62,45,59,119,116,90,156,198,373,326]
conv_6 = {'side': 13, 'mask': [6,7,8]}
conv_14 = {'side': 26, 'mask': [3,4,5]}
conv_22 = {'side': 52, 'mask': [0,1,2]}

yolo_params = {'detector/yolo-v3/Conv_6/BiasAdd/YoloRegion': conv_6,
'detector/yolo-v3/Conv_14/BiasAdd/YoloRegion': conv_14,
'detector/yolo-v3/Conv_22/BiasAdd/YoloRegion': conv_22}

for conv_net in yolo_params.values():
mask = conv_net['mask']
masked_anchors = []
for idx in mask:
masked_anchors += [anchors[idx * 2], anchors[idx * 2 + 1]]

conv_net['anchors'] = masked_anchors

parser = Parser()

for name, blob in detection.items():
parser.parse_yolo_region(blob, original_shape, yolo_params[name])

parser.sort_objects()

objects = []

for obj in parser.objects:
if obj['confidence'] >= parser.PROB_THRESHOLD:
label = obj['class_id']
xmin = obj['xmin']
xmax = obj['xmax']
ymin = obj['ymin']
ymax = obj['ymax']

results.add_box(xmax, ymax, xmin, ymin, label, frame_number)
84 changes: 84 additions & 0 deletions utils/open_model_zoo/yolov3/mapping.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"label_map": {
"1": "person",
"2": "bicycle",
"3": "car",
"4": "motorbike",
"5": "aeroplane",
"6": "bus",
"7": "train",
"8": "truck",
"9": "boat",
"10": "traffic light",
"11": "fire hydrant",
"12": "stop sign",
"13": "parking meter",
"14": "bench",
"15": "bird",
"16": "cat",
"17": "dog",
"18": "horse",
"19": "sheep",
"20": "cow",
"21": "elephant",
"22": "bear",
"23": "zebra",
"24": "giraffe",
"25": "backpack",
"26": "umbrella",
"27": "handbag",
"28": "tie",
"29": "suitcase",
"30": "frisbee",
"31": "skis",
"32": "snowboard",
"33": "sports ball",
"34": "kite",
"35": "baseball bat",
"36": "baseball glove",
"37": "skateboard",
"38": "surfboard",
"39": "tennis racket",
"40": "bottle",
"41": "wine glass",
"42": "cup",
"43": "fork",
"44": "knife",
"45": "spoon",
"46": "bowl",
"47": "banana",
"48": "apple",
"49": "sandwich",
"50": "orange",
"51": "broccoli",
"52": "carrot",
"53": "hot dog",
"54": "pizza",
"55": "donut",
"56": "cake",
"57": "chair",
"58": "sofa",
"59": "pottedplant",
"60": "bed",
"61": "diningtable",
"62": "toilet",
"63": "tvmonitor",
"64": "laptop",
"65": "mouse",
"66": "remote",
"67": "keyboard",
"68": "cell phone",
"69": "microwave",
"70": "oven",
"71": "toaster",
"72": "sink",
"73": "refrigerator",
"74": "book",
"75": "clock",
"76": "vase",
"77": "scissors",
"78": "teddy bear",
"79": "hair drier",
"80": "toothbrush"
}
}