-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
373 lines (297 loc) · 16.7 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import h5py
import numpy as np
from pathlib import Path
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
def precalculate_patch_colors(descriptors):
"""Use PCA to reduce the dimensionality of the patch descriptors to 3 and normalize these for RGB color space.
Parameters:
- descriptors: The high-dimensional descriptors for each patch.
Returns:
- normalized_colors: An array of RGB colors for each patch.
"""
# Apply PCA to reduce descriptors to 3 dimensions
pca = PCA()#n_components=3
pca.n_components = 3
reduced_descriptors = pca.fit_transform(descriptors)
# Normalize the reduced descriptors to [0, 255] for RGB values
min_val, max_val = np.min(reduced_descriptors, axis=0), np.max(reduced_descriptors, axis=0)
normalized_colors = 255 * (reduced_descriptors - min_val) / (max_val - min_val)
return normalized_colors
def overlay_colors_on_image(image, colors, patch_height, patch_width, gap_size, alpha=0.6):
"""Overlay precalculated colors on their corresponding patches in the image with alpha blending.
Parameters:
- image: The original image on which to overlay colors.
- colors: The array of RGB colors for each patch.
- patch_height, patch_width: Dimensions of each patch.
- gap_size: The size of the gap between patches.
- alpha: The alpha value for blending the color overlay and the original image.
Returns:
- processed_image: The image with colors overlaid on patches.
"""
processed_image = np.copy(image)
num_patches_x = image.shape[1] // (patch_width + gap_size)
num_patches_y = image.shape[0] // (patch_height + gap_size)
patch_index = 0
for i in range(num_patches_y):
for j in range(num_patches_x):
if patch_index >= len(colors): # Safety check
break
# Define patch coordinates
x_start = j * (patch_width + gap_size) + gap_size
y_start = i * (patch_height + gap_size) + gap_size
x_end = x_start + patch_width
y_end = y_start + patch_height
# Assign color with alpha blending
color = colors[patch_index]
for c in range(3): # RGB channels
processed_image[y_start:y_end, x_start:x_end, c] = (
alpha * color[c] + (1 - alpha) * processed_image[y_start:y_end, x_start:x_end, c]
)
patch_index += 1
return processed_image
def process_image_into_patches(image, patch_height, patch_width, gap_size):
"""Divides an image into patches with specified height and width, including gaps around them, returning a new
image."""
rows, cols, _ = image.shape
num_patches_x = cols // patch_width
num_patches_y = rows // patch_height
# Calculating new dimensions
new_cols = num_patches_x * (patch_width + gap_size) + gap_size
new_rows = num_patches_y * (patch_height + gap_size) + gap_size
# Creating a new image with gaps
new_image = np.full((new_rows, new_cols, 3), 255, dtype=np.uint8) # Start with a white image
for i in range(num_patches_y):
for j in range(num_patches_x):
patch = image[i*patch_height:(i+1)*patch_height, j*patch_width:(j+1)*patch_width]
y = i * (patch_height + gap_size) + gap_size
x = j * (patch_width + gap_size) + gap_size
new_image[y:y+patch_height, x:x+patch_width] = patch
return new_image
def create_composite_image(image1, image2, gap_between_images, orientation='vertical'):
"""Combines two images with a specified gap between them into a single composite image, either horizontally or
vertically based on the orientation parameter.
Parameters:
- image1, image2: The two images to combine.
- gap_between_images: The number of pixels for the gap between the two images.
- orientation: 'horizontal' or 'vertical' - the orientation for combining the images.
Returns:
- composite_image: The combined image with a gap between image1 and image2.
"""
if orientation == 'horizontal':
total_height = max(image1.shape[0], image2.shape[0])
total_width = image1.shape[1] + gap_between_images + image2.shape[1]
composite_image = np.full((total_height, total_width, 3), 255, dtype=np.uint8) # Start with a white image
composite_image[:image1.shape[0], :image1.shape[1]] = image1
composite_image[:image2.shape[0], image1.shape[1] + gap_between_images:] = image2
elif orientation == 'vertical':
total_width = max(image1.shape[1], image2.shape[1])
total_height = image1.shape[0] + gap_between_images + image2.shape[0]
composite_image = np.full((total_height, total_width, 3), 255, dtype=np.uint8) # Start with a white image
composite_image[:image1.shape[0], :image1.shape[1]] = image1
composite_image[image1.shape[0] + gap_between_images:image1.shape[0] + gap_between_images + image2.shape[0], :image2.shape[1]] = image2
else:
raise ValueError("Orientation must be 'horizontal' or 'vertical'")
return composite_image
def draw_connections_with_numbers(composite_image, matches, patch_height, patch_width, gap_size, gap_between_images, fontsize=18, orientation='horizontal'):
"""Places a number in the middle of corresponding patches across two processed images within a composite image,
based on a list of matches in row-major order. Supports both horizontal and vertical orientations.
Parameters:
- composite_image: The composite image containing both processed images and a gap.
- matches: A list of tuples, where each tuple contains the indices [idx1, idx2] of matching patches.
- patch_height, patch_width: The dimensions of each patch.
- gap_size: The size of the gap around each patch.
- gap_between_images: The gap between the two processed images within the composite image.
- fontsize: The font size of the numbers to be placed.
- orientation: 'horizontal' or 'vertical' - the orientation for combining the images.
"""
plt.figure(figsize=(12, 8))
plt.imshow(composite_image)
coordinates = []
if orientation == 'horizontal':
num_patches_x_per_image = (composite_image.shape[1] - gap_between_images) // (patch_width + gap_size) // 2
num_patches_y = composite_image.shape[0] // (patch_height + gap_size)
for match_number, [idx1, idx2] in enumerate(matches, start=1):
# Calculate positions in row-major order
row1, col1 = divmod(idx1, num_patches_x_per_image)
row2, col2 = divmod(idx2, num_patches_x_per_image)
y1 = row1 * (patch_height + gap_size) + gap_size + patch_height // 2
x1 = col1 * (patch_width + gap_size) + gap_size + patch_width // 2
y2 = row2 * (patch_height + gap_size) + gap_size + patch_height // 2
x2 = (col2 + num_patches_x_per_image) * (patch_width + gap_size) + gap_size + patch_width // 2 + gap_between_images
coordinates.append((x1, y1, x2, y2, match_number))
elif orientation == 'vertical':
num_patches_x = composite_image.shape[1] // (patch_width + gap_size)
num_patches_y_per_image = (composite_image.shape[0] - gap_between_images) // (patch_height + gap_size) // 2
for match_number, [idx1, idx2] in enumerate(matches, start=1):
# Calculate positions in column-major order for vertical orientation
col1, row1 = divmod(idx1, num_patches_y_per_image)
col2, row2 = divmod(idx2, num_patches_y_per_image)
x1 = row1 * (patch_width + gap_size) + gap_size + patch_width // 2#col1 * (patch_width + gap_size) + gap_size + patch_width // 2
y1 = col1 * (patch_height + gap_size) + gap_size + patch_height // 2#row1 * (patch_height + gap_size) + gap_size + patch_height // 2
x2 = row2 * (patch_width + gap_size) + gap_size + patch_width // 2#col2 * (patch_width + gap_size) + gap_size + patch_width // 2
y2 = (col2 + num_patches_y_per_image) * (patch_height + gap_size) + gap_size + patch_height // 2 + gap_between_images + 2 * fontsize
#(row2 + num_patches_y_per_image) * (patch_height + gap_size) + gap_size + patch_height // 2 + gap_between_images + 2 * fontsize
coordinates.append((x1, y1, x2, y2, match_number))
else:
raise ValueError("Orientation must be 'horizontal' or 'vertical'")
# Place the connection number in the middle of each corresponding patch for both orientations
for (x1, y1, x2, y2, match_number) in coordinates:
plt.text(x1, y1, str(match_number), ha='center', va='center', color='white', fontsize=fontsize, weight='bold')
plt.text(x2, y2, str(match_number), ha='center', va='center', color='white', fontsize=fontsize, weight='bold')
plt.axis('off')
plt.show()
plt.savefig(f"demo/demo_{patch_height}.pdf")
def draw_connections(composite_image, patch_height, patch_width, gap_size, gap_between_images, num_connections):
"""Draws lines connecting randomly selected patches across two processed images within a composite image."""
rows, cols, _ = composite_image.shape
num_patches_x = (cols - gap_between_images) // (patch_width + gap_size)
num_patches_y = rows // (patch_height + gap_size)
plt.figure(figsize=(12, 8))
plt.imshow(composite_image)
for _ in range(num_connections):
patch_y = random.randint(0, num_patches_y - 1)
patch_x1 = random.randint(0, num_patches_x // 2 - 2)
patch_x2 = random.randint(num_patches_x // 2, num_patches_x - 2)
y = patch_y * (patch_height + gap_size) + gap_size + patch_height // 2
x1 = patch_x1 * (patch_width + gap_size) + gap_size + patch_width // 2
x2 = patch_x2 * (patch_width + gap_size) + gap_size + patch_width // 2 + gap_between_images
plt.plot([x1, x2], [y, y], 'r-')
plt.axis('off')
plt.show()
def downsample_descriptors(descriptors, original_grid_size, new_grid_size, operation='mean'):
"""Downsample descriptors by averaging.
Parameters:
- descriptors: A (256 x 256) array where each row is a descriptor for a patch.
- original_grid_size: The grid size of the original descriptors (e.g., (16, 16) for 16x16 patches).
- new_grid_size: The desired grid size after downsampling (e.g., (8, 8)).
Returns:
- A new array of downsampled descriptors.
"""
# Calculate the number of original patches that fit into a new patch
patches_per_new_patch = (original_grid_size[0] // new_grid_size[0]) * (original_grid_size[1] // new_grid_size[1])
# Initialize an array to hold the downsampled descriptors
downsampled_descriptors = np.zeros((new_grid_size[0] * new_grid_size[1], descriptors.shape[1]))
# Helper function to calculate the index in the flattened grid
def calc_index(row, col, cols):
return row * cols + col
# Iterate over the new grid
for new_row in range(new_grid_size[0]):
for new_col in range(new_grid_size[1]):
# Find the original patches that correspond to the current new patch
start_row = new_row * (original_grid_size[0] // new_grid_size[0])
start_col = new_col * (original_grid_size[1] // new_grid_size[1])
patch_descriptors = []
# Aggregate descriptors of the original patches
for i in range(start_row, start_row + (original_grid_size[0] // new_grid_size[0])):
for j in range(start_col, start_col + (original_grid_size[1] // new_grid_size[1])):
idx = calc_index(i, j, original_grid_size[1])
patch_descriptors.append(descriptors[idx])
# Average the descriptors and assign to the downsampled descriptor
if operation == 'mean':
downsampled_descriptors[calc_index(new_row, new_col, new_grid_size[1])] = np.mean(patch_descriptors, axis=0)
elif operation == 'max':
downsampled_descriptors[calc_index(new_row, new_col, new_grid_size[1])] = np.max(patch_descriptors, axis=0)
return downsampled_descriptors
def update_matches_for_downsampled_grid(matches, original_grid_size, new_grid_size):
"""Update the match indices for a downsampled grid ensuring each patch appears only once.
Parameters:
- matches: List of tuples/lists with the original matching indices.
- original_grid_size: Tuple indicating the size of the original grid (rows, cols).
- new_grid_size: Tuple indicating the size of the new (downsampled) grid (rows, cols).
Returns:
- Updated list of matches with indices corresponding to the downsampled grid, ensuring uniqueness.
"""
# Calculate the factor by which the number of patches per row/column is reduced
row_reduction_factor = original_grid_size[0] // new_grid_size[0]
col_reduction_factor = original_grid_size[1] // new_grid_size[1]
# Function to map original index to new index
def map_index_to_downsampled(idx, original_grid_size, new_grid_size, reduction_factors):
original_row, original_col = divmod(idx, original_grid_size[1])
new_row = original_row // reduction_factors[0]
new_col = original_col // reduction_factors[1]
return new_row * new_grid_size[1] + new_col
used_indices = set() # Keep track of used indices to ensure uniqueness
updated_matches = []
for match in matches:
# Update match indices for the downsampled grid
new_idx1 = map_index_to_downsampled(match[0], original_grid_size, new_grid_size, (row_reduction_factor, col_reduction_factor))
new_idx2 = map_index_to_downsampled(match[1], original_grid_size, new_grid_size, (row_reduction_factor, col_reduction_factor))
# Ensure uniqueness by checking if either index has already been used
if new_idx1 not in used_indices and new_idx2 not in used_indices:
updated_matches.append([new_idx1, new_idx2])
# Mark these indices as used
used_indices.add(new_idx1)
used_indices.add(new_idx2)
return updated_matches
def quaternion_to_rotation_matrix(qvec):
qvec = qvec / np.linalg.norm(qvec)
w, x, y, z = qvec
R = np.array([[1 - 2 * y * y - 2 * z * z, 2 * x * y - 2 * z * w, 2 * x * z + 2 * y * w],
[2 * x * y + 2 * z * w, 1 - 2 * x * x - 2 * z * z, 2 * y * z - 2 * x * w],
[2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y]])
return R
def camera_center_to_translation(c, qvec):
R = quaternion_to_rotation_matrix(qvec)
return (-1) * np.matmul(R, c)
# Simple COLMAP camera class.
class Camera:
def __init__(self):
self.camera_model = None
self.intrinsics = None
self.qvec = None
self.t = None
def set_intrinsics(self, camera_model, intrinsics):
self.camera_model = camera_model
self.intrinsics = intrinsics
def set_pose(self, qvec, t):
self.qvec = qvec
self.t = t
def loadh5(dump_file_full_name):
"""Loads a h5 file as dictionary."""
try:
with h5py.File(dump_file_full_name, 'r') as h5file:
dict_from_file = readh5(h5file)
except Exception as e:
print("Error while loading {}".format(dump_file_full_name))
raise e
return dict_from_file
def readh5(h5node):
"""Recursive function to read h5 nodes as dictionary."""
dict_from_file = {}
for _key in h5node.keys():
if isinstance(h5node[_key], h5py._hl.group.Group):
dict_from_file[_key] = readh5(h5node[_key])
else:
dict_from_file[_key] = h5node[_key][:]
return dict_from_file
def download_best(model_name):
import requests
print(f"No available checkpoint, start downloading...")
url = "https://cmp.felk.cvut.cz/~weitong/vop/checkpoint_best.tar"
local_file_path = f"outputs/training/{model_name}/checkpoint_best.tar"
output_dir = Path(f"outputs/training/{model_name}")
output_dir.mkdir(exist_ok=True, parents=True)
# Download the file
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(local_file_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"Checkpoint downloaded successfully to {local_file_path}")
else:
print(f"Failed to download checkpoint. HTTP Status Code: {response.status_code}")