-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathedgetpu_benchmark.py
executable file
·93 lines (66 loc) · 2.43 KB
/
edgetpu_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python3
import time
import argparse
import os
import statistics
import tensorflow as tf
import tflite_runtime.interpreter as tflite
import numpy as np
# Stop claiming CUDA devices!
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
parser = argparse.ArgumentParser(description='EdgeTPU BiSeNetV2 benchmark')
parser.add_argument('model', help='Model path')
parser.add_argument('--device', default='usb', choices=['usb', 'pci', 'cpu'], help='Device to run model on')
parser.add_argument('--device-id', default=None, type=int, help='Device index to use')
parser.add_argument('--count', type=int, default=10, help='Number of invokations')
args = parser.parse_args()
DEVICE = args.device
DEVICE_ID = args.device_id
USE_EDGETPU = (DEVICE != 'cpu')
if DEVICE_ID is not None:
DEVICE = f'{DEVICE}:{DEVICE_ID}'
print(f'Using device: {DEVICE}')
model_path = args.model
COUNT = args.count
if USE_EDGETPU:
interpreter = tflite.Interpreter(model_path,
experimental_delegates=[tflite.load_delegate('libedgetpu.so.1', options={'device': DEVICE})])
else:
interpreter = tflite.Interpreter(model_path, num_threads=os.cpu_count())
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
INPUT_SHAPE = input_details[0]['shape'][1:4]
OUTPUT_SHAPE = output_details[0]['shape'][1:4]
print(input_details)
print(output_details)
times = []
times2 = []
first = True
for i in range(COUNT+1):
shape = (1, *INPUT_SHAPE)
input_data = np.zeros(shape, dtype=input_details[0]['dtype'])
s1 = time.time()
interpreter.set_tensor(input_details[0]['index'], input_data)
s2 = time.time()
interpreter.invoke()
e2 = time.time()
output_data = interpreter.get_tensor(output_details[0]['index'])
e1 = time.time()
if not first:
times.append(e2-s2)
times2.append(e1-s1)
else:
first = False
print(f'invoke: {e2-s2:.3f}s ({1/(e2-s2):.2f} fps)')
print(f'invoke+load: {e1-s1:.3f}s ({1/(e1-s1):.2f} fps)')
print()
print('Invoke:')
invoke_avg = sum(times) / len(times)
print(f'Average: {invoke_avg:.3f}s ({1/invoke_avg:.2f} fps)')
print(f'min/max/stdev: {min(times):.03f}/{max(times):.03f}/{statistics.stdev(times):.03f}')
print()
print('Total:')
total_avg = sum(times2) / len(times2)
print(f'Average: {total_avg:.3f}s ({1/total_avg:.2f} fps)')
print(f'min/max/stdev: {min(times2):.03f}/{max(times2):.03f}/{statistics.stdev(times2):.03f}')