-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcvPerceptualHash.cpp
189 lines (164 loc) · 6.13 KB
/
cvPerceptualHash.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#include "cvPerceptualHash.h"
#include "MathUtils.cpp"
using namespace cv;
/**
* This function transforms a BGR image to gray scale via desaturation
* @param inputImage
* @return gray scale image 1 channel image
*/
void flatten(const Mat &inputImage, Mat &outputImage) {
Mat hsv, desaturated3C;
cvtColor(inputImage, hsv, CV_BGR2HSV); // convert the inputImage to HSV space
cvtColor(hsv & Vec3b(255, 0, 255), desaturated3C, CV_HSV2BGR); // zero out the saturation channel
Mat chans[3];
split(desaturated3C, chans); // extract each individual channel
chans[0].copyTo(outputImage);
}
/**
* Downsamples an input image by iteratively halving it's size until either its width matches hDim or its height matches
* vDim
* @param inputImage
* @param outputImage
* @param hDim
* @param vDim
*/
void downsample(const Mat &inputImage, Mat &outputImage, int hDim, int vDim) {
int originalWidth = inputImage.size().width;
int originalHeight = inputImage.size().height;
// Param checking
if (originalWidth < hDim || originalHeight < vDim) {
throw ("Cannot downsample! Target dimensions are larger than the actual image size");
}
if (!isPowerOfTwo(originalWidth) || !isPowerOfTwo(originalHeight)) {
throw ("Image dimensions are not powers of two!");
}
if (!isPowerOfTwo(hDim), !isPowerOfTwo(vDim)) {
throw ("Target dimensions are not powers of two!");
}
Mat aux, downsampled;
inputImage.copyTo(aux);
while (aux.size().width > hDim && aux.size().height > vDim) {
pyrDown(aux, downsampled, Size(aux.size().width / 2, aux.size().height / 2));
downsampled.copyTo(aux);
}
downsampled.copyTo(outputImage);
}
/**
* Upsamples an input image by iteratively duplicating it's size until either its width matches hDim or its height
* matches vDim
* @param inputImage
* @param outputImage
* @param hDim
* @param vDim
*/
void upsample(const Mat &inputImage, Mat &outputImage, int hDim, int vDim) {
int originalWidth = inputImage.size().width;
int originalHeight = inputImage.size().height;
// Param checking
if (originalWidth > hDim || originalHeight > vDim) {
throw ("Cannot downsample! Target dimensions are smaller than the actual image size");
}
if (!isPowerOfTwo(originalWidth) || !isPowerOfTwo(originalHeight)) {
throw ("Image dimensions are not powers of two!");
}
if (!isPowerOfTwo(hDim), !isPowerOfTwo(vDim)) {
throw ("Target dimensions are not powers of two!");
}
Mat aux, upsampled;
inputImage.copyTo(aux);
while (aux.size().width < hDim && aux.size().height < vDim) {
pyrUp(aux, upsampled, Size(aux.size().width * 2, aux.size().height * 2));
upsampled.copyTo(aux);
}
upsampled.copyTo(outputImage);
}
/**
* This function transforms the input image into a 256x256 squared image by resizing and downsampling
* @param inputImage
* @return 256x256 image
*/
void normalize(const Mat &inputImage, Mat &outputImage) {
Mat resizedImage;
// Resize the inputImage to an 2^{n} x 2^{n} squared inputImage
int width = inputImage.size().width;
int height = inputImage.size().height;
int xDim = nearest2nValue(width), yDim = nearest2nValue(height); // find the closest 2^{n} value
int dim = (abs(width - xDim) < abs(height - yDim)) ? xDim : yDim; // set the resize dimension
resize(inputImage, resizedImage, Size(dim, dim)); // resize the inputImage
if (dim == 256) {
resizedImage.copyTo(outputImage);
} else if (dim > 256) { // downsample
downsample(resizedImage, outputImage, 256, 256);
} else { // upsample
upsample(resizedImage, outputImage, 256, 256);
}
}
/**
* Get the top left n-th portion of the input image
* @param inputImage
* @param outputImage
* @param n
*/
void cropTopLeftNthPortion(Mat &inputImage, Mat &outputImage, int n) {
int x = inputImage.cols / n, y = inputImage.rows / 16;
Mat croppedImage = Mat(inputImage, Rect(Point(0, 0), Point(x, y))).clone();
croppedImage.copyTo(outputImage);
}
/**
* Transfoms the input image from the spatial domain to the frequency domain via DCT (Discrete Cosine Transform)
* @param inputImage
* @param outputMat
*/
void transformToFreqSpace(const Mat &inputImage, Mat &outputMat) {
Mat floatMat;
if (inputImage.type() == CV_32F) {
inputImage.copyTo(floatMat);
} else {
inputImage.convertTo(floatMat, CV_32F);
}
dct(floatMat, outputMat);
}
/**
* Takes an input Mat corresponding to a DCT(?) transform and averages all its AC coefficients (ignores the
* DC coefficient)
* @param freqMat
* @return
*/
float averageAcCoefs(const Mat &freqMat) {
// Format matrix
Mat _32bitMat;
if (freqMat.type() == CV_32F) {
freqMat.copyTo(_32bitMat);
} else {
freqMat.convertTo(_32bitMat, CV_32F);
}
// Get the total sum
Mat rowSums, totalSum;
reduce(_32bitMat, rowSums, 1, CV_REDUCE_SUM);
reduce(rowSums, totalSum, 0, CV_REDUCE_SUM);
float acCoefsSum = totalSum.at<float>(0, 0) - freqMat.at<float>(0, 0); // subtract DC coefficient
auto nAcCoefs = (float) (freqMat.rows * freqMat.cols - 1); // get the number of AC coefficients
return acCoefsSum / nAcCoefs;
}
// Documented in .h
float cvPerceptualHash::hash(cv::Mat image) {
Mat grayscale, normalized, transformed, cropped;
flatten(image, grayscale);
normalize(grayscale, normalized);
transformToFreqSpace(normalized, transformed);
cropTopLeftNthPortion(transformed, cropped, 16);
float avg = averageAcCoefs(cropped);
return avg;
}
// Documented in .h
float cvPerceptualHash::compareImgs(cv::Mat image1, cv::Mat image2) {
float h1 = cvPerceptualHash::hash(image1);
float h2 = cvPerceptualHash::hash(image2);
return cvPerceptualHash::compareHashes(h1, h2);
}
float cvPerceptualHash::compareHashes(float hash1, float hash2) {
float hashLength = sizeof(float) * 8;
float hammingDist = hammingDistance(hash1, hash2);
float similarityScore = (hashLength - hammingDist) / hashLength;
return similarityScore;
}