-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSLAM.py
294 lines (224 loc) · 11.9 KB
/
SLAM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import numpy as np
import argparse
from threading import Thread
from multiprocessing import Queue, Value
from collections import deque
import cv2
from matplotlib import pyplot as plt
from timeit import default_timer as timer
# Our classes
from Extractor import *
from Preprocessor import *
from Keyframe import *
from Endpoint import Endpoint
# Mapping
from Map import *
MIN_MATCH_COUNT = 30
def plot_performance(t, fps_record):
plt.xlabel("Seconds")
plt.ylabel("Frames")
plt.plot(t, fps_record, "ob")
plt.title("System Performance Over Time")
axs = plt.gca()
axs.set_ylim([0, 45])
plt.show()
# Useful statistics for post-video analysis
def printStatistics(*, totalFrames, lowFeatureFrames, totalKeyframes):
print(f'Total frames: {totalFrames}.')
print(f'Low feature Frames: {lowFeatureFrames}.')
print(f'Total % of low-feature frames: {lowFeatureFrames/totalFrames*100:.2f}%.')
print(f'Total keyframes: {totalKeyframes}.')
def convert_world2pangolin(point):
# 3D Rotation Matrix (rotation about y)
t = np.array([[0,0,-1],[0,1,0],[1,0,0]])
return np.matmul(t, np.asarray(point))
def createArgumentParser():
# Make argument parser for PySlam
parser = argparse.ArgumentParser(description='Python SLAM implementation.')
# optional arguments, require additional args
parser.add_argument('-i', '--image_path', nargs='?', default = "", \
type=str, help='Absolute or relative path of the video file')
parser.add_argument('-s', '--scale_percent', nargs='?', default=50, \
type=int, help='An integer percentage of preprocessor image scaling (default=50)')
parser.add_argument('-m', '--feature_matches', nargs='?', default=10, \
type=int, help='An integer representing how many feature matches to visualiz (default 10)')
parser.add_argument('-f', '--num_features', nargs='?', default=500, \
type=int, help='An integer representing the maximum number of features to retain (default 500)')
parser.add_argument('-t', '--score_type', nargs='?', default=0, \
type=int, help='An integer representing the feature detection type of 0:Harris, or 1:Fast (default:0)')
# optional boolean action arguments, dont require additional args
parser.add_argument('-d', '--debug', action='store_true', \
help='An argument that enables debug mode, which prints information & displays several screens')
parser.add_argument('-r', '--rpi_stream', action='store_true', \
help='An argument specifying to run PiSlam in client/server configuration (default port:30000)')
return parser
# Simple wrapper to increase readability
def getProgramArguments():
return createArgumentParser().parse_args()
def main():
# Take command line arguments for various params
args = getProgramArguments()
# track recent data
# TODO: change features to tuple? named_tuple('Feature', ['keypoint', 'descriptor'] & update code
frameDeque = deque(maxlen=2)
kpDeque = deque(maxlen=2)
desDeque = deque(maxlen=2)
keyframes = []
# Create feature extractor, capture stream, and preprocessor
fe = Extractor(args.num_features)
# Link with RPI depending upon program arguments
cap = Endpoint() if args.rpi_stream else cv2.VideoCapture(args.image_path)
pp = Preprocessor(scalePercent=args.scale_percent)
# Instantiate MAP & start mapping thread
state_information = Queue()
map_system = Mapper(state_information)
map_system.start()
# For post-video statistics
totalFrames = lowFeatureFrames = 0
framesSinceKeyframeInsert = 0
acummulatingPose = np.identity(3)
# Performance analysis
fps_record = []
time_sequence = []
program_start = timer()
elapsed = 0
while(cap.isOpened() and elapsed < 60.0):
start = timer()
ret, frame = cap.read()
# TODO: catch sigint
if not ret:
printStatistics(totalFrames=totalFrames, \
lowFeatureFrames=lowFeatureFrames, totalKeyframes=len(keyframes))
exit()
# Iterate statistics variables
totalFrames += 1
framesSinceKeyframeInsert += 1
img = pp.preprocess(frame)
# Create Camera Intrinsics matrix for 3d-2d mapping
H, W = img.shape
focal_len = 100
K = np.array([[W, 0, W//2],
[0, W, H//2],
[0, 0, 1]])
features = fe.extract(img)
if features['kps'] is None or features['des'] is None:
continue
# record image, keypoints, and descriptors between frames
kpDeque.appendleft(features['kps'])
desDeque.appendleft(features['des'])
kpImg = cv2.drawKeypoints(img, kpDeque[0], None, color=(0,255,0), flags=0)
frameDeque.appendleft(kpImg)
# match between frames
if len(frameDeque) > 1:
# match descriptors
matches = fe.match(desDeque[0], desDeque[1])
# Get perspective transform
if len(matches)>MIN_MATCH_COUNT:
# Identifies matches between keypoints deques and puts them into their respective lists
# TODO: Change old frame to src and newframe to dst
src_pts = np.float32([ kpDeque[0][m.queryIdx].pt for m in matches ]).reshape(-1,1,2)
dst_pts = np.float32([ kpDeque[1][m.trainIdx].pt for m in matches ]).reshape(-1,1,2)
# Homography model is good estimator for planar scenes
pose, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
ret1, rot, tran, _ = cv2.decomposeHomographyMat(pose, K)
# TODO identify valid translation
# Recover global & local transformation data
F, maskf = cv2.findFundamentalMat(src_pts, dst_pts, cv2.FM_LMEDS)
# Two views - general moteion, general structure
# 1. Esimate essential/fundamental mat
# 2. Decompose the essential mat
# 3. Impose positive depth requirement (up to 4 sols)
# 4. Recover 3d structure
# Note fundamental mat is sensitive to all points lying on same plane
# Keep only inliers
src_pts = src_pts[mask.ravel() == 1]
dst_pts = dst_pts[mask.ravel() == 1]
#normalized_src_pts = np.array([m/np.linalg.norm(m) for m in src_pts])
#normalized_dst_pts = np.array([m/np.linalg.norm(m) for m in dst_pts])
E, maske = cv2.findEssentialMat(src_pts, dst_pts, K, cv2.RANSAC, 0.9, 2)
points_e, r_E, t, mask_E = cv2.recoverPose(E, src_pts, dst_pts)
correct_translation = t
# Accumulate pose transformation to track global transform over time
extrinsic1 = np.concatenate((acummulatingPose, np.array([map_system.system_coord]).T), axis=1)
acummulatingPose = np.matmul(r_E,acummulatingPose)
#Use below for actual translation, uncomment line further below when appending to system coords
#system_plus_translation = np.array([map_system.system_coord+convert_world2pangolin(t.T[0])]).T
#Use below for z+1 translation
temp = np.array([-1,0,0])
system_plus_translation = np.array([map_system.system_coord+temp]).T
#Calculate extrinsic2 based off of new acummulated pose and system coords plus translation from previous location
extrinsic2 = np.concatenate((acummulatingPose, system_plus_translation), axis=1)
#Multiply extrinsic matrices by camera matrix
extrinsic1 = np.matmul(K, extrinsic1)
extrinsic2 = np.matmul(K, extrinsic2)
points_4d = Mapper.convert2D_4D(src_pts, dst_pts, extrinsic1, extrinsic2)
#mask_4d = np.abs(points_4d[:,3]) > .005
#points_4d = points_4d[mask_4d]
#points_4d /= points_4d[3]
#points_4d = np.array([point/point[3] for point in points_4d])
#mask_4d = points_4d[:,2]>0
#points_4d = points_4d[mask_4d]
points_3d = points_4d[:,:3]
points_3d = [convert_world2pangolin(np.transpose(5*point)) for point in points_3d]
# Matrix of 3d points
#Use below for actual translation
#map_system.q.append((convert_world2pangolin(t.T[0]), points_3d))
#Use below for z+1 translation
state_information.put((np.array([-1,0,0]), points_3d))
map_system.cur_pose = acummulatingPose
# TODO: Get essential matrix here, then recover pose, find which model has min error
#reval, mask = cv2.findFundamentalMat(src_pts, dst_pts, cv2.FM_RANSAC)
# Make array in which x_i == 1 -> x_i feature is a match between src & dst points
matchesMask = mask.ravel().tolist()
h,w,d = frameDeque[0].shape
# Maps ROI to corners of image before performing perspective transform. New pose
# of ROI is representative of transformations being performed on camera between frames
pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,pose)
# Overlay perspective transform so its visible how camera is moving through environment
poseDeltaImage = cv2.polylines(frameDeque[0].copy(),[np.int32(dst)],True,255,3, cv2.LINE_AA)
# TODO: Develop 2 models, one for planar and one for non planar scene.
# Calculate idea model for given situation & use it
# TODO: update keyframe insertion criteria to be more reflective of ORB
if framesSinceKeyframeInsert >= 20:
# TODO: Add intrinsics
keyframes.append(Keyframe(pose=pose, features=features, intrinsics=K))
framesSinceKeyframeInsert = 0
else:
lowFeatureFrames += 1
matchesMask = None
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
# Inlier matches are those decided acceptable by RANSAC
inlierMatches = cv2.drawMatches(frameDeque[0],kpDeque[0],\
frameDeque[1],kpDeque[1],matches,None,**draw_params)
# Draw first args.feature_matches amount of matches
matchImage = cv2.drawMatches(frameDeque[0], kpDeque[0], \
frameDeque[1], kpDeque[1], matches[:args.feature_matches], None, flags=2)
if args.debug:
cv2.imshow("Inlier Matches", inlierMatches)
cv2.imshow("Matches", matchImage)
cv2.imshow("Current frame with delta pose", poseDeltaImage)
# Time management
end = timer()
elapsed = end - program_start
time_sequence.append(elapsed)
fps = 1/(end - start)
fps_record.append(fps)
# Escape on <esc>
if cv2.waitKey(30) == 27:
printStatistics(totalFrames=totalFrames, lowFeatureFrames=lowFeatureFrames, \
totalKeyframes=len(keyframes))
#map_system.()
break
# TODO: override join to call the stop function in mapper
#map_system.stop()
cap.release()
cv2.destroyAllWindows()
# Performance analysis
plot_performance(time_sequence, fps_record)
map_system.join()
if __name__ == "__main__":
main()