-
Notifications
You must be signed in to change notification settings - Fork 27
/
main.py
114 lines (93 loc) · 3.49 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import sys
import cv2
import time
import ctypes
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import coco
import uff
import tensorrt as trt
import graphsurgeon as gs
#from config import model_ssd_inception_v2_coco_2017_11_17 as model
#from config import model_ssd_mobilenet_v1_coco_2018_01_28 as model
from config import model_ssd_mobilenet_v2_coco_2018_03_29 as model
ctypes.CDLL("lib/libflattenconcat.so")
COCO_LABELS = coco.COCO_CLASSES_LIST
# initialize
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(TRT_LOGGER, '')
runtime = trt.Runtime(TRT_LOGGER)
# compile model into TensorRT
if not os.path.isfile(model.TRTbin):
dynamic_graph = model.add_plugin(gs.DynamicGraph(model.path))
uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), model.output_name, output_filename='tmp.uff')
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
builder.max_workspace_size = 1 << 28
builder.max_batch_size = 1
builder.fp16_mode = True
parser.register_input('Input', model.dims)
parser.register_output('MarkOutput_0')
parser.parse('tmp.uff', network)
engine = builder.build_cuda_engine(network)
buf = engine.serialize()
with open(model.TRTbin, 'wb') as f:
f.write(buf)
# create engine
with open(model.TRTbin, 'rb') as f:
buf = f.read()
engine = runtime.deserialize_cuda_engine(buf)
# create buffer
host_inputs = []
cuda_inputs = []
host_outputs = []
cuda_outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
host_mem = cuda.pagelocked_empty(size, np.float32)
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(cuda_mem))
if engine.binding_is_input(binding):
host_inputs.append(host_mem)
cuda_inputs.append(cuda_mem)
else:
host_outputs.append(host_mem)
cuda_outputs.append(cuda_mem)
context = engine.create_execution_context()
# inference
#TODO enable video pipeline
#TODO using pyCUDA for preprocess
ori = cv2.imread(sys.argv[1])
image = cv2.cvtColor(ori, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (model.dims[2],model.dims[1]))
image = (2.0/255.0) * image - 1.0
image = image.transpose((2, 0, 1))
np.copyto(host_inputs[0], image.ravel())
start_time = time.time()
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
context.execute_async(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_outputs[1], cuda_outputs[1], stream)
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
stream.synchronize()
print("execute times "+str(time.time()-start_time))
output = host_outputs[0]
height, width, channels = ori.shape
for i in range(int(len(output)/model.layout)):
prefix = i*model.layout
index = int(output[prefix+0])
label = int(output[prefix+1])
conf = output[prefix+2]
xmin = int(output[prefix+3]*width)
ymin = int(output[prefix+4]*height)
xmax = int(output[prefix+5]*width)
ymax = int(output[prefix+6]*height)
if conf > 0.7:
print("Detected {} with confidence {}".format(COCO_LABELS[label], "{0:.0%}".format(conf)))
cv2.rectangle(ori, (xmin,ymin), (xmax, ymax), (0,0,255),3)
cv2.putText(ori, COCO_LABELS[label],(xmin+10,ymin+10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
cv2.imwrite("result.jpg", ori)
cv2.imshow("result", ori)
cv2.waitKey(0)