-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathSmallBlurryImage.cpp
452 lines (339 loc) · 17.3 KB
/
SmallBlurryImage.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
// George Terzakis 2016
//
// University of Portsmouth
//
// Code based on PTAM by Klein and Murray (Copyright 2008 Isis Innovation Limited)
#include "SmallBlurryImage.h"
#include "GCVD/GraphSLAM.h"
#include "GCVD/Addedutils.h"
using namespace RigidTransforms;
using namespace Optimization;
using namespace std;
cv::Size2i SmallBlurryImage::mirSize(-1,-1);
SmallBlurryImage::SmallBlurryImage(KeyFrame &kf, double dBlur) {
mbMadeGradients = false; // no derivatives calculated yet
// create the small, blurry image from the KF
MakeFromKF(kf, dBlur);
}
SmallBlurryImage::SmallBlurryImage()
{
mbMadeGradients = false;
}
// Make a SmallBlurryImage from a KeyFrame This fills in the mimSmall
// // image (Which is just a small un-blurred version of the KF) and
// mimTemplate (which is a floating-point, zero-mean blurred version
// of the above)
void SmallBlurryImage::MakeFromKF(KeyFrame &kf, double dBlur) {
// size the SBI to half the size of the top image in the KF pyramid
if (mirSize.width == -1)
mirSize = cv::Size2i(kf.aLevels[3].im.cols / 2, kf.aLevels[3].im.rows / 2);
// Image gradients will be computed for the need of the Benhimane-Malis LS formulation.
// But we may skip this Optimization, so let's just save us the trouble for the time being.
// So lower this flag...
mbMadeGradients = false;
// allocate what needs to be allocated
// (in thiscase, only the template)
mimTemplate.create(mirSize);
// halfsampling the top level into the SBI now
// (Note that mirSize is now half the size of the top image)
//DEPRECATED: cv::resize(kf.aLevels[3].im, mimSmall, mirSize);
// Simple averaging does a lot better than openCV's resizing or pyrdown function!!!!
CvUtils::halfSample(kf.aLevels[3].im, mimSmall);
// get the mean of mimSmall:
float fMean = cv::mean(mimSmall)[0];
//float fMean = CvUtils::mavg(mimSmall);
// subtracting the mean from the template
int r, c;
float* tRowPtr; // mimTemplate row pointer
uchar* sRowPtr; // mimSmall row pointer
for (r = 0; r < mimTemplate.rows; r++) {
tRowPtr = mimTemplate.ptr<float>(r);
sRowPtr = mimSmall.ptr<uchar>(r);
for (c = 0; c < mimTemplate.cols; c++)
tRowPtr[c] = sRowPtr[c] - fMean;
}
//cv::subtract(mimSmall, fMean, mimTemplate);
// applying Gaussian blurring
cv::Mat_<float> imTemp; // no need to allocate!
// using the formula Rosten uses in the "convolveGaussian" libCVD function...
int gkerSize = (int)ceil(dBlur*3.0); // where 3.0 is the default "sigmas" parameter in libCVD
gkerSize += gkerSize % 2 == 0 ? 1 : 0;
cv::GaussianBlur(mimTemplate, imTemp, cv::Size(gkerSize, gkerSize), dBlur);
//copy back to mimTemplate
imTemp.copyTo(mimTemplate);
}
// Make the jacobians (actually, no more than a gradient image)
// of the blurred template
void SmallBlurryImage::MakeGradients() {
mimImageGradients.create(mirSize);
// Fill-in the gradient image
int r, c;
float* tRowPtr00, *tRowPtr10, *tRowPtr_10; // template row pointers at rows r, r+1, r-1
cv::Vec<float, 2>* gRowPtr; // gradient row pointer
for (r = 0; r < mirSize.height; r++) {
tRowPtr00 = mimTemplate.ptr<float>(r);
tRowPtr10 = mimTemplate.ptr<float>(r + 1);
tRowPtr_10 = mimTemplate.ptr<float>(r - 1);
gRowPtr = mimImageGradients.ptr<cv::Vec<float,2 > >(r);
for (c = 0; c < mirSize.width; c++)
// probably all vectors are already zero, but let's be on the safe side
if ( (r == 0) || (c ==0) || (r == mirSize.height - 1) || (c == mirSize.width - 1) )
gRowPtr[c][0] = gRowPtr[c][1] = 0;
//otherwise simply compute and store the gradient
else {
gRowPtr[c][0] = tRowPtr00[c + 1] - tRowPtr00[c - 1];
gRowPtr[c][1] = tRowPtr10[c] - tRowPtr_10[c];
// N.b. missing 0.5 factor in above - will be added later.
}
}
mbMadeGradients = true;
}
// Calculate the zero-mean SSD between one image and the next.
// Since both are zero mean already, just calculate the SSD...
double SmallBlurryImage::ZMSSD(SmallBlurryImage &other) {
double dSSD = 0.0;
int r, c;
float *tRowPtr, *oRowPtr;
for (r = 0; r < mirSize.height; r++) {
tRowPtr = mimTemplate.ptr<float>(r);
oRowPtr = other.mimTemplate.ptr<float>(r);
for (c = 0; c < mirSize.width; c++) {
double dDiff = tRowPtr[c] - oRowPtr[c];
dSSD += dDiff * dDiff;
}
}
return dSSD;
}
// Find an SE2 which best aligns an SBI to a target
// Do this by ESM-tracking a la Benhimane & Malis
// George: This means formulating a cost function that between image intensities.
// This is actually a standard pyramidal optical flow estimation method
// (see the revised LK tracker paper or Bouguet's pyramidal LK tracker report)
//
// ***************************** Method Overview ***********************************
//
// Optical flow estimation with rotation
// Although this method is attributed to Benhimane & Malis,
// I would defer the reader to Bouguet or the "Handbook of mathematical models for Computer Vision"
// by Paragios, Chen et al. for more comperehensive reading...
//
// OR... You can read right below...
//
//
// We formulate a cost function comprising the following terms:
//
// f(di) - ( g(R*di + t) + d )
//
// where g is the current SBI and f is the "other" (nb target). di are the CENTRALIZED coordinates of the i-th pixel in the patch,
// R is a 2D rotation, t is a 2D translation from the center of the patch and d (aka "dMeanOffset") is a DC offset unknown
// (could be omitted, but PTAM authors appear to be very fond of it; I guess they know something about it..).
// - Linearizing g at R0*di+t0 yields the linearized Gauss-Newton factor:
//
// f(di) - f(R0*di+t0) - d0 - [ G * [dR/da , I] , I] * ([t;a;d] - [t0;a0;d0] )
//
// where G is the spatial gradient of G, a is the angle of R. In practice, instead of G we use the average (G + F) / 2
// of both image gradients. The rest is G-N optimization... Note that t0, a0 and d0 are the current estimates
// of rotation,translation and DC offset.
//
// Now notice that the error is: error = -f(di) + g(R0*di+t) + d,
//
// which, in the code, corresponds to: error = -otRowPtr[c] + here_ + dMeanOffset;
//
// the rest can be wokred-out based on the above.... Enjoy!
//
pair<SE2<>,double> SmallBlurryImage::IteratePosRelToTarget(SmallBlurryImage &other, int nIterations)
{
// A few explanations for the below choice of transformations : "se2CtoC" and "se2WfromC"
// G.K is looking for a single 2D rigid transformation (ala Benhimane style) which will transform pixels to pixels.
// HOWEVER:
// He decides to decompose this transformation using two transformations (SE2) ,
// possibly because he is actually looking for the transformation of some central point
// translation (and rotation), hence the "se2CToC"...
// So, se2fromWToC is simply the FIXED translation that takes the upper-left corner of the image center!
// In other words, the LS cost function is formulates in CENTRALIZED coordinates (hence the respective subtractions in the
// rotation derivatives.
// The unknown translation that takes central point to central point ([I | 0] initially of course...)
SE2<> se2CtoC;
// And the transformation that links the upper-left corner with the central point. it is FIXED.
SE2<> se2WfromC;
// It makes sense that se2FromC is,
cv::Point2i irCenter(mirSize.width / 2, mirSize.height / 2);
// It would make sense to initialize se2WToC to the center of the SBI
se2WfromC.get_translation() = cv::Vec2f(irCenter.x, irCenter.y);
pair<SE2<>, double> result_pair;
if(!other.mbMadeGradients)
{
cerr << "Target SBI gradients not made!" << endl;
assert(other.mbMadeGradients);
}
double dMeanOffset = 0.0;
cv::Vec<double, 4> v4Accum; // accumulator
cv::Mat_<float> imWarped(mirSize);
double dFinalScore = 0.0;
for(int it = 0; it<nIterations; it++) {
SE2<> se2XForm = se2WfromC * se2CtoC * se2WfromC.inverse();
// Warp the current image template based on our estimate:
cv::Vec2f v2Zero(0, 0);
CvUtils::transform<>(mimTemplate,
imWarped,
se2XForm.get_rotation().get_matrix(),
se2XForm.get_translation(),
v2Zero,
-9e20f);
// Note here that the 3 first unknowns represent the SE2,
// but the 4th unknown is is simply a "DC offset" error in brightness (aka "dMeanOffset" in the code).
float* iwRowPtr00, *iwRowPtr10, *iwRowPtr_10; // r, r+1, r-1 row pointers to warped image
float* otRowPtr; // r row pointer of other.mimTemplate
cv::Vec<float, 2>* ogRowPtr; // r row of other.mimImageGradient
// use a scaler...
double s = 1.0;
dFinalScore = 0.0;
v4Accum = cv::Vec<float, 4>(0, 0, 0, 0);
// We cache the UPPER triangle of sum(JTJ) in a 1D array
double UT[10] = {0, 0, 0, 0, 0, 0, 0, 0 ,0 , 0};
// cache vector for the (4D) Jacobian
cv::Vec<float, 4> v4Jac;
v4Jac[3] = 1.0; // always one...
int r, c;
for (r =1; r < mirSize.height; r++) {
iwRowPtr00 = imWarped.ptr<float>(r); // r row of imWarped
iwRowPtr10 = imWarped.ptr<float>(r + 1); // r+1 row of imWarped
iwRowPtr_10 = imWarped.ptr<float>(r - 1); // r-1 row of imWarped
otRowPtr = other.mimTemplate.ptr<float>(r); // r row of other.mimTemplate
ogRowPtr = other.mimImageGradients.ptr<cv::Vec<float, 2> >(r); // r row of other.mimImageGradients
for (c = 1; c < mirSize.width; c++) {
//we skip the boundary pixels for obvious reasons....
if ( (r == 0) || (c == 0) || (r == mirSize.height-1) || (c == mirSize.width - 1) ) continue;
// now get the values of the left, right, up, down pixels in the warped image
// we need warped coordinates in order to obtain the warped image gradient
float left_,right_,up_,down_,here_;
left_ = iwRowPtr00[c - 1];
right_ = iwRowPtr00[c + 1];
up_ = iwRowPtr_10[c];
down_ = iwRowPtr10[c];
here_ = iwRowPtr00[c];
// just a test in case warped point falls out of the image; c.f. the -9e20f param to transform.
if(left_ + right_ + up_ + down_ + here_ < -9999.9) continue;
//cout <<"DEBUG: l, r, u, d: "<<left_<<" , "<<right_<<" , "<<up_<<" , "<<down_<<endl;
// now this is the gradient of TARGET image at the warped locations
cv::Vec2f v2CurrentGrad( right_ - left_, // Missing 0.5 factor (read below about the notorious lost factor...)
down_ - up_ );
// Now this averaging business is just a RECOMMENDED hack as far as I know... Strictly speaking, ONLY the warped gradient
// should be taken into consideration.It turns-out that many use the average of both source
// and target image gradients at the warped coordinatesand have reported better results...
// Anyway, this whole business is dodgy if you ask me...
cv::Vec2f v2GradAvg = 0.25 * (v2CurrentGrad + ogRowPtr[c] ); // actualy, this is 0.5 * v2GradVg
// but in effect IT IS AN AVERAGE, so I renamed it...
// Why 0.25? This is from missing 0.5 factors: One for
// the fact we average two gradients, the other from
// each gradient missing a 0.5 factor.
// And now storing the gradient:
// a) The first two components of the Jacobian are the derivatives in terms of translation.
// In effect, they simply turn-out to be the averaged directional gradients
// of the source and target images at the warped locations.
// b) The derivatives of the warped image in terms of the angle of the SE2.
// They turn out to be: [Df/Dx , Df/Dy, 1] * [1 , -1 * y, 0; 1 * x, 1, 0; 0, 0, 1] * [x - cx; y - cy; 1]
// where Df/Dx and Df/Dy are the spatial gradients at the warped locations, phi is the rotation angle,
// and (cx, cy) is the center of the image (note that in the formula I used homogenous representations in
// order to impress that the matrix in the middle is a 3x3 Lie infinitesimal rotation/generator associated with the z - axis).
v4Jac[0] = v2GradAvg[0];
v4Jac[1] = v2GradAvg[1];
v4Jac[2] = -(r - irCenter.y) * v2GradAvg[0] + (c - irCenter.x) * v2GradAvg[1];
//v4Jac[3] = 1.0; // already there...
double error = here_ - otRowPtr[c] + dMeanOffset;
dFinalScore += error * error;
v4Accum += error * v4Jac / s;
// speeding up access a bit...
UT[0] += v4Jac[0] * v4Jac[0] / s; UT[1] += v4Jac[0] * v4Jac[1] / s; UT[2] += v4Jac[0] * v4Jac[2] / s; UT[3] += v4Jac[0] * v4Jac[3] / s;
UT[4] += v4Jac[1] * v4Jac[1] / s; UT[5] += v4Jac[1] * v4Jac[2] / s; UT[6] += v4Jac[1] * v4Jac[3] / s;
UT[7] += v4Jac[2] * v4Jac[2] / s; UT[8] += v4Jac[2] * v4Jac[3] / s;
UT[9] += v4Jac[3] * v4Jac[3] / s;
}
}
// Filling the m4 accumulator matrix values now...
// Jacobian Gram-matrix accumulator m4 (i.e., sum(JtJ) )
cv::Matx<double, 4, 4> m4;
m4(0, 0) = UT[0];
m4(1, 0) = m4(0, 1) = UT[1]; m4(1, 1) = UT[4]; m4(1, 2) = m4(2, 1) = UT[5]; m4(1, 3) = m4(3, 1) = UT[6];
m4(2, 0) = m4(0, 2) = UT[2]; m4(2, 2) = UT[7]; m4(2, 3) = m4(3, 2) = UT[8];
m4(3, 0) = m4(0, 3) = UT[3]; m4(3, 3) = UT[9];
// m4 is PSD; solving with Cholesky preferrably...
cv::Vec<double, 4> v4Update;
cv::solve(m4, v4Accum, v4Update, cv::DECOMP_CHOLESKY);
SE2<> se2Update;
se2Update.get_translation() = cv::Vec2f(-v4Update[0], -v4Update[1]);
se2Update.get_rotation() = SO2<>::exp(-v4Update[2]);
se2CtoC = se2CtoC * se2Update;
dMeanOffset -= v4Update[3];
}
result_pair.first = se2CtoC;
result_pair.second = dFinalScore;
//cout <<"DEBUG: The recovered SBI transformation : "<<se2CtoC<<endl;
return result_pair;
}
// What is the 3D camera rotation (zero trans) SE3<> which causes an
// input image SO2 rotation?
SE3<> SmallBlurryImage::SE3fromSE2(SE2<> se2, ATANCamera camera)
{
// Do this by projecting two points, and then iterating the SE3<> (SO3
// actually) until convergence. It might seem stupid doing this so
// precisely when the whole SE2-finding is one big hack, but hey.
// BACKUP ORIGINAL CAMERA SIZE!!! (dont forget to restore upon exit!)
cv::Size2i orgCamSize = camera.GetImageSize();
// Set the camera size to the size of the SBI
// in order to use it for projection and derivative computations...
camera.SetImageSize(mirSize);
// Our two warped points in pixels: a) center -(5, 0) and, b) center + (5, 0)
cv::Vec<float, 2> av2Turned[2];
cv::Matx<float, 2, 2> m2R = se2.get_rotation().get_matrix();
cv::Vec<float, 2> v2t = se2.get_translation();
av2Turned[0] = cv::Vec<float, 2>( mirSize.width / 2 + m2R(0, 0) * 5 + v2t[0],
mirSize.height / 2 + m2R(1, 0) * 5 + v2t[1]);
av2Turned[1] = cv::Vec<float, 2>( mirSize.width / 2 + m2R(0, 0) * (-5) + v2t[0],
mirSize.height / 2 + m2R(1, 0) * (-5) + v2t[1]);
// And the normalized Euclidean coordinates of the ORIGINAL points
// (hence the absense of the 2D transformation from the expressions below)
cv::Vec3f av3OrigPoints[2];
av3OrigPoints[0] = CvUtils::backproject( camera.UnProject(mirSize.width / 2 + 5, mirSize.height / 2) );
av3OrigPoints[1] = CvUtils::backproject( camera.UnProject(mirSize.width / 2 - 5, mirSize.height / 2) );
SO3<> so3; // start with the identity
// Just 3 iterations...
for(int it = 0; it<3; it++) {
// simply inline least squares here. No need to use WLS
cv::Matx<float, 3, 3> m3Omega = 10.0 * cv::Matx<float, 3, 3>::eye(); // information matrix. initialize with 10*I3
cv::Vec<float, 3> v3ksi(0, 0, 0); // information vector
for(int i=0; i<2; i++) {
// Project into the image to find error
cv::Vec3f v3Cam = so3 * av3OrigPoints[i];
cv::Vec2f v2Pixels = camera.Project( CvUtils::pproject(v3Cam) );
cv::Vec2f v2Error = av2Turned[i] - v2Pixels;
// NOTE!!!! Camera derivatives are cv::Mat objects. Ridiculous, but will have to do for the time being....
cv::Matx<float, 2, 2> m2CamDerivs = camera.GetProjectionDerivs();
cv::Matx<float, 2, 3> m23Jacobian;
double dOneOverCameraZ = 1.0 / v3Cam[2];
for(int m=0; m<3; m++) {
const cv::Vec<float, 3> v3Motion = SO3<>::generator_field(m, v3Cam);
cv::Vec<float, 2> v2CamFrameMotion( ( v3Motion[0] - v3Cam[0] * v3Motion[2] * dOneOverCameraZ ) * dOneOverCameraZ,
( v3Motion[1] - v3Cam[1] * v3Motion[2] * dOneOverCameraZ ) * dOneOverCameraZ
);
//m23Jacobian.T()[m] = m2CamDerivs * v2CamFrameMotion;
m23Jacobian(0, m) = m2CamDerivs(0, 0) * v2CamFrameMotion[0] + m2CamDerivs(0, 1) * v2CamFrameMotion[1];
m23Jacobian(1, m) = m2CamDerivs(1, 0) * v2CamFrameMotion[0] + m2CamDerivs(1, 1) * v2CamFrameMotion[1];
}
// 1. updating the information matrix (m3Omega)
m3Omega += 1.0 * m23Jacobian.t() * m23Jacobian;
// 2. updating the information vector
v3ksi += 1.0 * m23Jacobian.t() * v2Error;
}
// solve the linear system
cv::Vec<float, 3> v3Res;
cv::solve(m3Omega, v3ksi, v3Res, cv::DECOMP_CHOLESKY);
so3 = SO3<>::exp(v3Res) * so3;
}
SE3<> se3Result;
se3Result.get_rotation() = so3;
// ********************** Restore camera size!!!! **********************
//camera.SetImageSize(orgCamSize); // NOT?????
// get outta here...
return se3Result;
}