-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathobject-detection.py
227 lines (185 loc) · 7.47 KB
/
object-detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# This has been referenced from www.pyimagesearch.com, and the credit goes to Adrian Rosebrock.
# This script performs object detection on Raspberry Pi 3 using Movidius Neural Compute Stick.
# For the object detection,we applied MobileNet neural network
# and Single Shot Multibox Detector as our object detection algorithm.
# Example USAGE:
# python object-detection.py --graph graph --display 1
# To display objects above the threshold 50%, we could specify --confidence 0.5 as below
# python object-detection.py --graph graph --confidence 0.5 --display 1
# import the necessary packages
from mvnc import mvncapi as mvnc
from imutils.video import VideoStream
from imutils.video import FPS
import argparse
import numpy as np
import time
import cv2
# initialize the list of class labels our network was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ("background", "aeroplane", "bicycle", "bird",
"boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor")
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# frame dimensions should be sqaure
PREPROCESS_DIMS = (300, 300)
DISPLAY_DIMS = (900, 900)
# calculate the multiplier needed to scale the bounding boxes
DISP_MULTIPLIER = DISPLAY_DIMS[0] // PREPROCESS_DIMS[0]
def preprocess_image(input_image):
# preprocess the image
preprocessed = cv2.resize(input_image, PREPROCESS_DIMS)
preprocessed = preprocessed - 127.5
preprocessed = preprocessed * 0.007843
preprocessed = preprocessed.astype(np.float16)
# return the image to the calling function
return preprocessed
def predict(image, graph):
# preprocess the image
image = preprocess_image(image)
# send the image to the NCS and run a forward pass to grab the
# network predictions
graph.LoadTensor(image, None)
(output, _) = graph.GetResult()
# grab the number of valid object predictions from the output,
# then initialize the list of predictions
num_valid_boxes = output[0]
predictions = []
# loop over results
for box_index in range(num_valid_boxes):
# calculate the base index into our array so we can extract
# bounding box information
base_index = 7 + box_index * 7
# boxes with non-finite (inf, nan, etc) numbers must be ignored
if (not np.isfinite(output[base_index]) or
not np.isfinite(output[base_index + 1]) or
not np.isfinite(output[base_index + 2]) or
not np.isfinite(output[base_index + 3]) or
not np.isfinite(output[base_index + 4]) or
not np.isfinite(output[base_index + 5]) or
not np.isfinite(output[base_index + 6])):
continue
# extract the image width and height and clip the boxes to the
# image size in case network returns boxes outside of the image
# boundaries
(h, w) = image.shape[:2]
x1 = max(0, int(output[base_index + 3] * w))
y1 = max(0, int(output[base_index + 4] * h))
x2 = min(w, int(output[base_index + 5] * w))
y2 = min(h, int(output[base_index + 6] * h))
# grab the prediction class label, confidence (i.e., probability),
# and bounding box (x, y)-coordinates
pred_class = int(output[base_index + 1])
pred_conf = output[base_index + 2]
pred_boxpts = ((x1, y1), (x2, y2))
# create prediciton tuple and append the prediction to the
# predictions list
prediction = (pred_class, pred_conf, pred_boxpts)
predictions.append(prediction)
# return the list of predictions to the calling function
return predictions
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-g", "--graph", required=True,
help="path to input graph file")
ap.add_argument("-c", "--confidence", default=.5,
help="confidence threshold")
ap.add_argument("-d", "--display", type=int, default=0,
help="switch to display image on screen")
args = vars(ap.parse_args())
# grab a list of all NCS devices plugged in to USB
print("[INFO] finding NCS devices...")
devices = mvnc.EnumerateDevices()
# if no devices found, exit the script
if len(devices) == 0:
print("[INFO] No devices found. Please plug in a NCS")
quit()
# use the first device since this is a simple test script
# (you'll want to modify this is using multiple NCS devices)
print("[INFO] found {} devices. device0 will be used. "
"opening device0...".format(len(devices)))
device = mvnc.Device(devices[0])
device.OpenDevice()
# open the CNN graph file
print("[INFO] loading the graph file into RPi memory...")
with open(args["graph"], mode="rb") as f:
graph_in_memory = f.read()
# load the graph into the NCS
print("[INFO] allocating the graph on the NCS...")
graph = device.AllocateGraph(graph_in_memory)
# open a pointer to the video stream thread and allow the buffer to
# start to fill, then start the FPS counter
print("[INFO] starting the video stream and FPS counter...")
vs = VideoStream(usePiCamera=True).start()
time.sleep(1)
fps = FPS().start()
# loop over frames from the video file stream
while True:
try:
# grab the frame from the threaded video stream
# make a copy of the frame and resize it for display/video purposes
frame = vs.read()
image_for_result = frame.copy()
image_for_result = cv2.resize(image_for_result, DISPLAY_DIMS)
# use the NCS to acquire predictions
predictions = predict(frame, graph)
# loop over our predictions
for (i, pred) in enumerate(predictions):
# extract prediction data for readability
(pred_class, pred_conf, pred_boxpts) = pred
# filter out weak detections by ensuring the `confidence`
# is greater than the minimum confidence
if pred_conf > args["confidence"]:
# print prediction to terminal
print("[INFO] Prediction #{}: class={}, confidence={}, "
"boxpoints={}".format(i, CLASSES[pred_class], pred_conf,
pred_boxpts))
# check if we should show the prediction data
# on the frame
if args["display"] > 0:
# build a label consisting of the predicted class and
# associated probability
label = "{}: {:.2f}%".format(CLASSES[pred_class],
pred_conf * 100)
# extract information from the prediction boxpoints
(ptA, ptB) = (pred_boxpts[0], pred_boxpts[1])
ptA = (ptA[0] * DISP_MULTIPLIER, ptA[1] * DISP_MULTIPLIER)
ptB = (ptB[0] * DISP_MULTIPLIER, ptB[1] * DISP_MULTIPLIER)
(startX, startY) = (ptA[0], ptA[1])
y = startY - 15 if startY - 15 > 15 else startY + 15
# display the rectangle and label text
cv2.rectangle(image_for_result, ptA, ptB,
COLORS[pred_class], 2)
cv2.putText(image_for_result, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 1, COLORS[pred_class], 3)
# check if we should display the frame on the screen
# with prediction data (you can achieve faster FPS if you
# do not output to the screen)
if args["display"] > 0:
# display the frame to the screen
cv2.imshow("Output", image_for_result)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# update the FPS counter
fps.update()
# if "ctrl+c" is pressed in the terminal, break from the loop
except KeyboardInterrupt:
break
# if there's a problem reading a frame, break gracefully
except AttributeError:
break
# stop the FPS counter timer
fps.stop()
# destroy all windows if we are displaying them
if args["display"] > 0:
cv2.destroyAllWindows()
# stop the video stream
vs.stop()
# clean up the graph and device
graph.DeallocateGraph()
device.CloseDevice()
# display FPS information
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))