-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
385 lines (349 loc) · 15.3 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
import cv2
import numpy as np
from math import log2, ceil
from config import *
def make_gaussian_pyramid(base, levels):
g = base.copy()
pyramid = [g]
for i in range(levels):
g = cv2.pyrDown(g)
pyramid += [g]
return pyramid
def make_laplacian_from_gaussian(gaussian):
lp = [gaussian[-1]]
for i in range(len(gaussian) - 1, 0, -1):
ge = cv2.pyrUp(gaussian[i])
l = cv2.subtract(gaussian[i-1], ge)
lp += [l]
return lp
def square_image(img, pad=0):
# square up an image to the nearest power of 2
max_dim = max(img.shape[0], img.shape[1])
max_dim = 2**ceil(log2(max_dim))
sq_canvas = np.full((max_dim, max_dim), pad, dtype=np.uint8)
# Calculate the position to paste the non-square image in the center
x_offset = (sq_canvas.shape[1] - img.shape[1]) // 2
y_offset = (sq_canvas.shape[0] - img.shape[0]) // 2
# Paste the non-square image in the center of the square canvas
sq_canvas[y_offset:y_offset + img.shape[0], x_offset:x_offset + img.shape[1]] = img
return sq_canvas, (x_offset, y_offset)
def composite_gaussian_pyramid(pyramid):
rows, cols = pyramid[0].shape
# determine the total number of rows and columns for the composite
composite_rows = max(rows, sum(p.shape[0] for p in pyramid[1:]))
composite_cols = cols + pyramid[1].shape[1]
composite_image = np.zeros((composite_rows, composite_cols),
dtype=np.uint8)
# store the original to the left
composite_image[:rows, :cols] = pyramid[0]
# stack all downsampled images in a column to the right of the original
i_row = 0
for p in pyramid[1:]:
n_rows, n_cols = p.shape[:2]
composite_image[i_row:i_row + n_rows, cols:cols + n_cols] = p
i_row += n_rows
return composite_image
def composite_laplacian_pyramid(pyramid):
rows, cols = pyramid[-1].shape
# determine the total number of rows and columns for the composite
composite_rows = max(rows, sum(p.shape[0] for p in pyramid[:-1]))
composite_cols = cols + pyramid[-2].shape[1]
composite_image = np.zeros((composite_rows, composite_cols),
dtype=np.uint8)
# store the original to the left
composite_image[:rows, :cols] = pyramid[-1]
# stack all downsampled images in a column to the right of the original
i_row = 0
for p in reversed(pyramid[:-1]):
n_rows, n_cols = p.shape[:2]
composite_image[i_row:i_row + n_rows, cols:cols + n_cols] = p
i_row += n_rows
return composite_image
# https://stackoverflow.com/questions/43391205/add-padding-to-images-to-get-them-into-the-same-shape
def pad_images_to_same_size(images):
"""
:param images: sequence of images
:return: list of images padded so that all images have same width and height (max width and height are used)
"""
width_max = 0
height_max = 0
for img in images:
h, w = img.shape[:2]
width_max = max(width_max, w)
height_max = max(height_max, h)
images_padded = []
for img in images:
h, w = img.shape[:2]
diff_vert = height_max - h
pad_top = diff_vert//2
pad_bottom = diff_vert - pad_top
diff_hori = width_max - w
pad_left = diff_hori//2
pad_right = diff_hori - pad_left
img_padded = cv2.copyMakeBorder(img, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=0)
assert img_padded.shape[:2] == (height_max, width_max)
images_padded.append(img_padded)
return images_padded
# `img`: image to copy onto canvas - this is SCALED to `scale` (could be thumbnail or full-res)
# `canvas`: destination for image copy (could be overview or full-res)
# `x`, `y`: top left corner coordinates of canvas destination (may be unsafe values), given in UNSCALED canvas units
# (e.g., does not consider thumbnailing optimizations in coordinate transforms; they are applied inside this function)
# `mask`: optional mask, must have dimensions identical to `canvas`. Used
# to track what regions of the canvas has valid data for averaging. A non-zero value means
# the canvas data has not been updated, a zero value means it has image data.
# 1. Zero, overlapping regions of mask and `canvas` are averaged with the incoming `img`
# 2. Non-zero regions of mask and `canvas` are updated with the incoming `img` without averaging
# 3. `mask` is updated with non-zero values to record where pixels have been updated on the `canvas`.
#
# This routine will attempt to take as much as `img` and copy it onto canvas, clipping `img`
# where it would not fit onto canvas, at the desired `x`, `y` offsets. If `x` or `y` are negative,
# the image copy will start at an offset that would correctly map the `img` pixels into the
# available canvas area
def safe_image_broadcast(img, canvas, x, y, result_mask=None, scale=THUMB_SCALE):
if len(canvas.shape) == 3 and len(img.shape) != 3:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
SCALE = 0.05
w = img.shape[1]
h = img.shape[0]
x = int(x * scale)
y = int(y * scale)
if y > canvas.shape[0] or x > canvas.shape[1]:
# destination doesn't even overlap the canvas
return
if x < 0:
w = w + x
x_src = -x
x = 0
else:
x_src = 0
if y < 0:
h = h + y
y_src = -y
y = 0
else:
y_src = 0
if y + h > canvas.shape[0]:
h = canvas.shape[0] - y
if x + w > canvas.shape[1]:
w = canvas.shape[1] - x
# A partial debugging turd left over to generate some test data for blending. Remove once we're over separation anxiety.
# print(f"{x}, {y}")
# cv2.imwrite(f"test_{x}_{y}_{NAME_INC}.png", img)
# NAME_INC += 1
if result_mask is None:
canvas[
y : y + h,
x : x + w
] = img[
y_src : y_src + h,
x_src : x_src + w
]
if result_mask is not None:
if False:
# So, the code here is superceded by the discovery of the cv2.detail_MultiBandBlender -- it
# implements exactly what I was trying to do here. I got as far as a simple blender, but
# there are problems with edge cases and masking and then re-templating the data back into
# the original image (plus what looks like some sort of numerical rounding issues).
#
# The detail_MultiBandBlender is *much* better than anything I could ever write, but here
# are notes where I left off on what I had to do to modify the below code to get it to work.
# The code as-write is based on a naive implementation suggested in the original Burt and Adelson
# paper from 1983, but leaves out all sorts of important details like how to handle non power
# of two images, images that don't butt-up perfectly, and probably some numerical rounding/stability
# points that are painful to figure out but probably anyone who took a course on this stuff
# was taught it.
#
# Anyways, here is what I think would need to happen to get this anywhere near the performance
# I was hoping for:
#
# define the ROI based upon the incoming image region
# center it in a power-of-2 square pixel region
# determine what region of the canvas this ROI corresponds to
# make a copy of the canvas region to a square region, filling in previously undefined pixels by reflecting existing pixels
# laplacian blend the ROI into the canvas region
# clip the blended region back into the original image
# copy over pixels that did not exist before
cv2.copyTo(
img[
y_src : y_src + h,
x_src : x_src + w
],
# non-zero values are to be copied
result_mask[
y : y + h,
x : x + w,
],
canvas[
y : y + h,
x : x + w
]
)
# build a mask that indicates the overlapping region that requires blending
# incoming_mask is the mask of just the incoming image
# on result_mask: 0 = image data, 1 = uninit region
# on incoming_mask: 0 = image data, 1 = uninit region
incoming_mask = np.ones(canvas.shape, dtype=np.uint8)
incoming_mask[
y : y + h,
x : x + w
] = np.zeros((h, w), dtype=np.uint8)
blend_mask_u8 = incoming_mask | result_mask
binary_mask = blend_mask_u8 != 0
blend_mask = np.full_like(blend_mask_u8, 1)
blend_mask[binary_mask] = 0
cv2.imshow("mask",
cv2.normalize(
cv2.resize(blend_mask, None, None, SCALE, SCALE),
None, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U
)
)
# track which pixels have been updated by setting them to 0
updated_region = np.zeros((h, w), dtype=np.uint8)
result_mask[
y : y + h,
x : x + w
] = updated_region
# expand the image to the full size of the canvas
src_img = np.zeros(canvas.shape, dtype=np.uint8)
src_img[
y : y + h,
x : x + w
] = img[
y_src : y_src + h,
x_src : x_src + w
]
# square up the images to the nearest power of 2, a prereq for pyrimidal decompositions
sq_canvas, offsets = square_image(canvas)
sq_mask, _offsets = square_image(blend_mask, pad=0)
sq_src, _offsets = square_image(src_img)
# compute gaussian pyramid for canvas
# figure out depth of pyramid
sq_h, sq_w = sq_canvas.shape
pyr_depth = round(max(log2(sq_w), log2(sq_h)))
gp_canvas = make_gaussian_pyramid(sq_canvas, pyr_depth)
# cv2.imshow("gaussian canvas", cv2.resize(
# composite_gaussian_pyramid(gp_canvas), None, None, SCALE, SCALE
# ))
gp_src = make_gaussian_pyramid(sq_src, pyr_depth)
# cv2.imshow("gaussian source", cv2.resize(
# composite_gaussian_pyramid(gp_src), None, None, SCALE, SCALE
# ))
gp_mask = make_gaussian_pyramid(sq_mask, pyr_depth)
# cv2.imshow("gaussian mask", cv2.resize(
# composite_gaussian_pyramid(gp_mask), None, None, SCALE, SCALE
# ))
lp_canvas = make_laplacian_from_gaussian(gp_canvas)
lp_src = make_laplacian_from_gaussian(gp_src)
# cv2.imshow("laplacian source", cv2.normalize(cv2.resize(
# composite_laplacian_pyramid(lp_src), None, None, SCALE, SCALE
# ), None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U))
# lp_mask = make_laplacian_from_gaussian(gp_mask)
# cv2.imshow("laplacian mask", cv2.normalize(cv2.resize(
# composite_laplacian_pyramid(lp_mask), None, None, SCALE, SCALE
# ), None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U))
# now merge the pyramids according to the mask
cv2.imshow("laplacian before merge", cv2.normalize(cv2.resize(
composite_laplacian_pyramid(lp_canvas), None, None, SCALE, SCALE
), None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U))
stack = []
for dest, src, mask in zip(lp_canvas, lp_src, reversed(gp_mask)):
if True:
cv2.copyTo(
src, mask, dest
)
stack += [dest]
else: # these should be equivalent, but cv2.copyTo is much faster...
blended_layer = (
src * mask +
dest * (1 - mask)
)
stack += [blended_layer]
cv2.imshow("laplacian after merge", cv2.normalize(cv2.resize(
composite_laplacian_pyramid(lp_canvas), None, None, SCALE, SCALE
), None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U))
# reconstruct
sq_canvas = stack[0]
for i in range(1, pyr_depth + 1):
sq_canvas = cv2.pyrUp(sq_canvas)
sq_canvas = cv2.add(sq_canvas, stack[i])
before = cv2.resize(canvas.copy(), None, None, SCALE, SCALE)
# return to original aspect ratio
canvas = sq_canvas[
offsets[1] : offsets[1] + canvas.shape[0],
offsets[0] : offsets[0] + canvas.shape[1]
]
after = cv2.resize(canvas, None, None, SCALE, SCALE)
b_a = np.hstack((before, after))
cv2.imshow("before/after", b_a)
cv2.waitKey()
return canvas, result_mask
else:
assert canvas.shape[0] == result_mask.shape[0] and canvas.shape[1] == result_mask.shape[1], "canvas and result_mask should have identical sizes"
# averages everything, including areas that didn't have an image before
avg = cv2.addWeighted(
canvas[
y : y + h,
x : x + w
],
0.5,
img[
y_src : y_src + h,
x_src : x_src + w
],
0.5,
0.0
)
canvas[
y : y + h,
x : x + w
] = avg
# fixup the areas that were averaged with black by copying over the source pixels
cv2.copyTo(
img[
y_src : y_src + h,
x_src : x_src + w
],
result_mask[
y : y + h,
x : x + w,
],
canvas[
y : y + h,
x : x + w
]
)
updated_region = np.zeros((h, w), dtype=np.uint8)
result_mask[
y : y + h,
x : x + w
] = updated_region
return canvas, result_mask
# move `img` by `x`, `y` and return the portion of `img` that remains within
# the original dimensions of `img`
def translate_and_crop(img, x, y):
x_max = img.shape[1]
y_max = img.shape[0]
if x >= 0:
if x < x_max:
x_min = x
else:
return None
else:
if x + x_max > 0:
x_min = 0
x_max = x + x_max
else:
return None
if y >= 0:
if y < y_max:
y_min = y
else:
return None
else:
if y + y_max > 0:
y_min = 0
y_max = y + y_max
return img[
y_min : y_max,
x_min : x_max
]