-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgroup.py
executable file
·572 lines (500 loc) · 20.3 KB
/
group.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
#!/usr/bin/env python3
"""Stage 2: Use feature detection/comparison to group images of rolled dice.
Example:
%(prog)s data/myd20/
where there is a subdirectory data/myd20/crop/ containing extracted die images
from stage 1.
Send SIGHUP to render an intermediate summary image and show it.
On a 2.4GHz i5 MacBook Pro, this takes about 20 minutes for 3k images.
TODO: Use multiprocessing.
"""
import cv2
import numpy
import PIL
import PIL.Image
import PIL.ImageDraw
import argparse
import collections
import json
import os
import random
import signal
import sys
# Edge size for the otherwise unaltered image in the summary image.
SUMMARY_MEMBER_IMAGE_SIZE = 90
DETAIL_COLOR = (254, 0, 0)
IMAGE_SIZE_MAX = 65500 # hard limit imposed by PIL
INF = float('Inf')
class _BaseImageComparison(object):
def __init__(self, in_filename):
self.basename = os.path.basename(in_filename)
self.full_image = PIL.Image.open(in_filename)
self._summary_image = None # lazily calculate a proxy res from full_image
# Was this image ever a representative? Used when drawing the summary image.
self.is_representative = False
# All the other images that match this one.
self.members = []
@property
def summary_image(self):
if self._summary_image is None:
self._summary_image = self.full_image.resize(
(SUMMARY_MEMBER_IMAGE_SIZE, SUMMARY_MEMBER_IMAGE_SIZE))
return self._summary_image
def _AddMember(self, image):
self.members.append(image)
if image.members:
self.members.extend(image.members)
image.members = []
ORIGIN = numpy.array([0, 0, 1])
DX = numpy.array([1, 0, 1])
DY = numpy.array([0, 1, 1])
class FeatureComparison(_BaseImageComparison):
"""Image data, features, and comparison results for one image of a die face.
Based on OpenCV's find_obj.py example, as in:
find_obj.py --feature=akaze crop/DSC_0001.JPG crop/DSC_0002.JPG
"""
# Feature type selection:
# Brisk: faster, some false positive matches
# Orb: faster, less accurate (inlier count less precise a threshold)
# Akaze: slower, better threshold on inlier count v. match and not
_detector = cv2.AKAZE_create()
_matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
def __init__(self, in_filename):
super(FeatureComparison, self).__init__(in_filename)
cv_image = cv2.imread(in_filename, 0)
if cv_image is None:
raise RuntimeError('OpenCV could not open %s' % in_filename)
self._features, self._descriptors = (
FeatureComparison._detector.detectAndCompute(cv_image, None))
if self._descriptors is None or not len(self._descriptors):
raise NoFeaturesError('No features in %s' % in_filename)
self._best_match = None
self._best_match_count = 0
self._best_feature_proportion = INF
self._best_scale = INF
def _GetMatchCount(self, other, verbose=True):
"""Returns how many features match between this image and the other.
Returns:
(match_count, scale_amount) as a tuple. The match count is the number of
matching features in the homography; that is, not only matching
individually but as a group. The scale amount is >= 1.0, and measures
how much the match is distorted as opposed to simply translated/rotated.
"""
raw_matches = FeatureComparison._matcher.knnMatch(
self._descriptors, trainDescriptors=other._descriptors, k=2)
p1, p2, matching_feature_pairs = self._FilterMatches(
self._features, other._features, raw_matches)
match_count = 0
scale_amount = INF
if len(p1) >= 4: # Otherwise not enough for homography estimation.
homography_mat, inlier_pt_mask = cv2.findHomography(
p1, p2, cv2.RANSAC, 5.0)
if homography_mat is not None:
match_count = numpy.sum(inlier_pt_mask)
# Sometimes matching faces are visible but the die is rotated. That is,
# this die has 5 on top but 19 visible to the side, and the other die
# has 19 on top but 5 visible. OpenCV may find a match, but the match
# will not be pure translation/rotation, and will distort scale.
h = homography_mat
scale_amount = sum([abs(
1.0 - numpy.linalg.norm(h.dot(dv) - h.dot(ORIGIN)))
for dv in (DX, DY)])
if scale_amount < 1.0:
scale_amount = (
1.0 / scale_amount if scale_amount > 0 else INF)
if verbose:
print('%s (%d) match %s (%d) = %d match => %s inl / %.2f sh' % (
self.basename,
len(self._descriptors),
other.basename,
len(other._descriptors),
len(p1),
match_count,
scale_amount))
return match_count, scale_amount
def _GetFeatureProportion(self, other):
"""Returns the proportion of total features in this v. another image.
This is always >= 1, and is infinity if either image has 0 features.
"""
a = float(len(other._features))
b = float(len(self._features))
if not (a and b):
return INF
feature_proportion = a / b
if feature_proportion < 1.0:
feature_proportion = 1.0 / feature_proportion
return feature_proportion
def TakeImageIfMatch(
self,
image,
match_threshold,
scale_threshold,
feature_threshold,
try_members=False):
self_potential_matches = [self]
if try_members:
# Usually reparenting works within the first few tries if at all.
self.members.sort(key=lambda m: m._best_match_count)
self_potential_matches.extend(self.members[:10])
for self_potential_match in self_potential_matches:
match_count, scale_amount = image._GetMatchCount(
self_potential_match, verbose=not try_members)
feature_proportion = image._GetFeatureProportion(self_potential_match)
is_best = match_count > image._best_match_count
is_complete = (
match_count >= match_threshold
and scale_amount <= scale_threshold
and feature_proportion < feature_threshold)
if is_complete or is_best:
image._best_match = self_potential_match
image._best_match_count = match_count
image._best_feature_proportion = feature_proportion
image._best_scale = scale_amount
if is_complete:
self._AddMember(image)
print('%s matches %s%s => %d inl / %.2f scale' % (
image.basename,
self.basename,
'' if self_potential_match is self
else ' via ' + self_potential_match.basename,
match_count,
scale_amount))
return True
return False
def DrawOnSummary(self, draw, coords):
x, y = coords
draw.text((x, y), self.basename)
draw.text(
(x, y + 10), 'features: %d' % len(self._features), DETAIL_COLOR)
draw.text(
(x, y + 50), 'matches: %d' % self._best_match_count, DETAIL_COLOR)
draw.text((x, y + 60), ' sh: %.2f' % self._best_scale, DETAIL_COLOR)
draw.text(
(x, y + 70), ' fp: %.2f' % self._best_feature_proportion, DETAIL_COLOR)
if self.is_representative and self._best_match:
draw.text(
(x, y + 80), ' %s' % self._best_match.basename, DETAIL_COLOR)
@staticmethod
def _FilterMatches(features_a, features_b, raw_matches, ratio=0.75):
"""Returns the subset of features which match between the two lists."""
matching_features_a, matching_features_b = [], []
for m in raw_matches:
if len(m) == 2 and m[0].distance < m[1].distance * ratio:
matching_features_a.append(features_a[m[0].queryIdx])
matching_features_b.append(features_b[m[0].trainIdx])
p1 = numpy.float32([kp.pt for kp in matching_features_a])
p2 = numpy.float32([kp.pt for kp in matching_features_b])
return p1, p2, zip(matching_features_a, matching_features_b)
class NoFeaturesError(RuntimeError):
"""No features are detected in an image, rendering it unusable."""
pass
class LabelDetail(object):
def __init__(self):
self._x = []
self._y = []
def Update(self, x, y):
self._x.append(x)
self._y.append(y)
def GetCoords(self):
return list(zip(self._x, self._y))
def GetBounds(self):
return (
min(self._x) if self._x else INF,
min(self._y) if self._y else INF,
max(self._x) if self._x else -INF,
max(self._y) if self._y else -INF)
PIP_THRESHOLD_ADJUST = -10 # more negative means pips shrink apart
PIP_AREA_PX_MIN = 600
PIP_AREA_PX_MAX = 1500
BLACK_PIPS = True
STRICT_PIPS = False
class PipCounter(_BaseImageComparison):
def __init__(self, in_filename):
super(PipCounter, self).__init__(in_filename)
img = cv2.imread(in_filename)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold_mode = cv2.THRESH_BINARY_INV if BLACK_PIPS else cv2.THRESH_BINARY
threshold_adjust = PIP_THRESHOLD_ADJUST
if not BLACK_PIPS:
threshold_adjust *= -1
# Use Otsu thresholding to find a base threshold, then adjust down to
# favor the white die face.
# http://docs.opencv.org/master/d7/d4d/tutorial_py_thresholding.html
otsu_threshold_value, _ = cv2.threshold(
gray,
0,
255,
threshold_mode + cv2.THRESH_OTSU)
_, thresh = cv2.threshold(
gray,
otsu_threshold_value + threshold_adjust,
255,
threshold_mode)
num_labels, labels = cv2.connectedComponents(
numpy.uint8(thresh))
# Dictionary lookups take nontrivial time here so use indices instead.
label_details = []
for i in xrange(num_labels + 1):
label_details.append(LabelDetail())
for y, row in enumerate(labels):
for x, label in enumerate(row):
label_details[label].Update(x, y)
# TODO Use convex hull or ellipse fit to determine which regions are pips on
# the front face of the die.
# http://docs.opencv.org/2.4/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html
# Simple label counting (excluding labels touching image edges) works for
# regular pipped dice. Skew d6s show pips on the sides as well as fronts.
self._num_pips = 0
for label, detail in enumerate(label_details):
fill_color = None
coords = detail.GetCoords()
x_min, y_min, x_max, y_max = detail.GetBounds()
if len(coords) > 100:
e = float(y_max - y_min) / (x_max - x_min)
if e < 1.0:
e = 1.0 / e
fill_proportion = float(len(coords)) / (
(y_max - y_min) * (x_max - x_min))
if len(coords) > PIP_AREA_PX_MIN and len(coords) < PIP_AREA_PX_MAX:
fill_color = (254, 0, 0)
if not STRICT_PIPS or (e < 1.65 and fill_proportion > 0.68):
fill_color = (254, 254, 100)
self._num_pips += 1
print('%d\tpx=%d e=%.3f fill=%.3f %s' % (
label, len(coords), e, fill_proportion, fill_color))
if fill_color is not None:
for xy in coords:
self.full_image.putpixel(xy, fill_color)
print('%s = %d' % (self.basename, self._num_pips))
def TakeImageIfMatch(
self,
image,
unused_match_threshold,
unused_scale_threshold,
unused_feature_threshold,
try_members=False):
if self._num_pips == image._num_pips:
self._AddMember(image)
return True
return False
def DrawOnSummary(self, draw, coords):
x, y = coords
draw.text((x, y), self.basename)
draw.text(
(x, y + 10), str(self._num_pips), DETAIL_COLOR)
def AssignToCluster(
in_filename,
representatives,
match_threshold,
scale_threshold,
feature_threshold,
count_pips):
"""Reads an image of a die's face and assigns it to a group where it matches.
The input representatives list is modified. It stores a list of representative
images. Each additional image is either added as a member of the first
representative where it matches the sufficiently; or it starts a new cluster.
"""
image = (
PipCounter(in_filename) if count_pips else
FeatureComparison(in_filename))
for representative in representatives:
if representative.TakeImageIfMatch(
image, match_threshold, scale_threshold, feature_threshold):
return
print('starts new cluster')
image.is_representative = True
representatives.append(image)
def CombineSmallClusters(
representatives, match_threshold, scale_threshold, feature_threshold):
"""Finds small clusters and combines them with existing large clusters.
In the previous step, the representative images for small clusters were only
compared with the large clusters' representative images. Now, compare them
against additional members of the large clusters, checking matches as before.
Typical results have a large cluster (around a hundred members) for each of
the faces of the die, and then a long tail of small clusters (1-10 members)
of images that didn't get a good match. As a heuristic, small clusters are
those with less than half the members of the largest group.
"""
representatives_by_len = []
for r in representatives:
representatives_by_len.append((len(r.members), r, ))
representatives_by_len.sort(reverse=True)
cluster_sizes = [n for n, r in representatives_by_len]
for first_small_index in range(1, len(representatives_by_len)):
if cluster_sizes[first_small_index] < cluster_sizes[0] / 4:
break
print('splitting: %s %s' % (
cluster_sizes[:first_small_index], cluster_sizes[first_small_index:]))
main_clusters, tail_clusters = [], []
for i, (unused_n, representative) in enumerate(representatives_by_len):
if i < first_small_index:
main_clusters.append(representative)
else:
tail_clusters.append(representative)
print('reparent: %d large clusters, %d small clusters' % (
len(main_clusters), len(tail_clusters)))
not_reparented = []
for tail_representative in tail_clusters:
reparented = False
for main_representative in main_clusters:
if main_representative.TakeImageIfMatch(
tail_representative,
match_threshold,
scale_threshold,
feature_threshold,
try_members=True):
reparented = True
break
if not reparented:
print('failed to reparent', tail_representative.basename)
not_reparented.append(tail_representative)
return main_clusters + not_reparented
def BuildClusterSummaryImage(representatives, raw_max_members):
"""Draws a composite image summarizing the clusters."""
if not representatives:
return
large_edge = representatives[0].summary_image.size[0]
max_members = min(raw_max_members or INF, IMAGE_SIZE_MAX / large_edge)
h = large_edge * len(representatives)
w = 0
for representative in representatives:
w = max(w, 1 + len(representative.members))
w = min(max_members, w)
w *= large_edge
summary_image = PIL.Image.new('RGB', (w, h))
draw = PIL.ImageDraw.Draw(summary_image)
for i, representative in enumerate(representatives):
y = i * large_edge
all_members = [representative] + representative.members[:max_members - 1]
for j, member in enumerate(all_members):
x = j * large_edge
summary_image.paste(member.summary_image, (x, y))
member.DrawOnSummary(draw, (x, y))
draw.text(
(0, y + 20),
'members: %d' % (len(representative.members) + 1), DETAIL_COLOR)
return summary_image
def SaveGrouping(
representatives, summary_data, summary_image, summary_max_members=None):
"""Writes the summary image and the JSON representation of the groupings."""
for representative in representatives:
print(representative.basename, (1 + len(representative.members)))
print('saving summary data to', summary_data)
data_summary = []
for representative in representatives:
data_summary.append(
[representative.basename]
+ [m.basename for m in representative.members])
with open(summary_data, 'w') as data_file:
json.dump(data_summary, data_file)
summary = BuildClusterSummaryImage(representatives, summary_max_members)
summary.save(summary_image)
print('summary image saved to', summary_image)
summary.show()
global summary_requested
summary_requested = False
def RequestSummary(signal_num, stack_frame):
global summary_requested
summary_requested = True
def BuildArgParser():
summary_line, _, main_doc = __doc__.partition('\n\n')
parser = argparse.ArgumentParser(
description=summary_line,
epilog=main_doc,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'--match-count-threshold', '-m', default=32, type=int,
dest='match_threshold',
help='Number of matching features to consider two images a match.')
parser.add_argument(
'--scale-threshold', default=INF, type=float,
dest='scale_threshold',
help='Amount of scaling above which two images are not considered a '
+ 'match. Default is infinity (no threshold). Set to a lower value '
+ 'if adjacent sides on a die are being confused.')
parser.add_argument(
'--feature-threshold', default=1.2, type=float,
dest='feature_threshold',
help='Two images are only considered matching if they have a similar '
+ 'overall number of features. This is useful for dice with a '
+ 'side with very few features, such as a 1-pip face. It may '
+ 'have a high match count compared with a 6-pip face, but they '
+ 'will have very different overall feature counts.')
parser.add_argument(
'--crop-dir', default='crop', dest='crop_dir',
help='Subdirectory within the data directory of cropped images from '
+ 'stage 1.')
parser.add_argument(
'--count-pips', action='store_true', dest='count_pips',
help='Search for pips (count spots as on a common six-sided die) instead'
+ 'of matching features (as for numerals on a d20).')
parser.add_argument(
'--summary-image', '-s', dest='summary_image', default='summary.jpg',
help='File path for the summary image. If the path is omitted, '
+ 'the summary image is generated and shown but not saved.')
parser.add_argument(
'--summary-data', '-d', dest='summary_data', default='summary.json',
help='File path for the summary data under the data directory. The JSON '
+ 'is an ordered list of lists. The inner lists are each names of '
+ 'files which map to the same die face.')
parser.add_argument(
'--summary-max-members', default=35, type=int, dest='summary_max_members',
help='Max number of images to show per grouping in the summary image. '
+ 'Set to <= 0 to allow unlimited members shown.')
return parser
if __name__ == '__main__':
parser = BuildArgParser()
args, positional = parser.parse_known_args()
if len(positional) != 1:
parser.error('A single argument for the data directory is required.')
data_dir = positional[0]
crop_dir = os.path.join(data_dir, args.crop_dir)
summary_max_members = (
args.summary_max_members if args.summary_max_members > 0 else None)
signal.signal(signal.SIGHUP, RequestSummary)
print('Send SIGHUP (kill -HUP %d) for current summary image.' % os.getpid())
# List of representative images (with their member lists).
representatives = []
cropped_image_names = os.listdir(crop_dir)
n = len(cropped_image_names)
failed_files = []
try:
for i, cropped_image_filename in enumerate(cropped_image_names):
if not cropped_image_filename.lower().endswith('jpg'):
continue
print('%d/%d ' % (i, n))
try:
AssignToCluster(
os.path.join(crop_dir, cropped_image_filename),
representatives,
args.match_threshold,
args.scale_threshold,
args.feature_threshold,
args.count_pips)
except (NoFeaturesError, cv2.error) as e:
print(e)
failed_files.append(cropped_image_filename)
if summary_requested:
print('Rendering intermediate summary.')
summary_requested = False
BuildClusterSummaryImage(
representatives, summary_max_members).show()
except KeyboardInterrupt as e:
print('got ^C, early stop for categorization')
try:
representatives = CombineSmallClusters(
representatives,
args.match_threshold,
args.scale_threshold,
args.feature_threshold)
except KeyboardInterrupt as e:
print('got ^C, cancelling combining clusters')
print(len(failed_files), 'failed files:', failed_files)
if not representatives:
print('No data!')
sys.exit(1)
SaveGrouping(
representatives,
os.path.join(data_dir, args.summary_data),
os.path.join(data_dir, args.summary_image),
summary_max_members)