-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimg_to_pointcloud.py
132 lines (100 loc) · 4.04 KB
/
img_to_pointcloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import cv2
import torch
import time
import numpy as np
import open3d as o3d
# Load a MiDas model for depth estimation
# model_type = "DPT_Large" # MiDaS v3 - Large (highest accuracy, slowest inference speed)
# model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
midas = torch.hub.load("intel-isl/MiDaS", model_type)
# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()
# Load transforms to resize and normalize the image
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
transform = midas_transforms.dpt_transform
else:
transform = midas_transforms.small_transform
# Open up the video capture from a webcam
cap = cv2.VideoCapture(0)
# Create an Open3D visualization window
vis = o3d.visualization.Visualizer()
vis.create_window("Open3D Visualization")
# Initialize rotation angle
rotation_angle = 0.0
def get_rotation_matrix_y(angle_degrees):
angle_radians = np.deg2rad(angle_degrees)
cos_angle = np.cos(angle_radians)
sin_angle = np.sin(angle_radians)
# Rotation matrix around the y-axis
rotation_matrix = np.array([
[cos_angle, 0, sin_angle],
[0, 1, 0],
[-sin_angle, 0, cos_angle]
])
return rotation_matrix
while cap.isOpened():
success, img = cap.read()
start = time.time()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Apply input transforms
input_batch = transform(img).to(device)
# Prediction and resize to original resolution
with torch.no_grad():
prediction = midas(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
depth_map = prediction.cpu().numpy()
depth_map = cv2.normalize(depth_map, None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_64F)
end = time.time()
totalTime = end - start
fps = 1 / totalTime
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
depth_map = (depth_map*255).astype(np.uint8)
depth_map = cv2.applyColorMap(depth_map, cv2.COLORMAP_MAGMA)
depth_map_2 = depth_map.copy();
cv2.putText(depth_map, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
cv2.imshow('Image', img)
cv2.imshow('Depth Map', depth_map)
# Convert numpy arrays to Open3D images
color_image = o3d.geometry.Image(img)
depth_image_2 = o3d.geometry.Image(depth_map)
# Create RGBDImage
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
color_image,
depth_image_2,
depth_scale=255.0, # Adjust depth scale as needed
depth_trunc=1.0, # Adjust depth truncation as needed
convert_rgb_to_intensity=True
)
# cv2.imshow('open3d',rgbd_image.depth)
# Create a point cloud from the RGBD image
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, o3d.camera.PinholeCameraIntrinsic(
o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
# Create a triangular mesh from the point cloud using Poisson surface reconstruction
# mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=9)
# Flip the point cloud to match the orientation
pcd.transform([[1, 0, 0, 0],
[0, -1, 0, 0],
[0, 0, -1, 0],
[0, 0, 0, 1]])
# Apply rotation to the point cloud
rotation_matrix = get_rotation_matrix_y(rotation_angle)
pcd.rotate(rotation_matrix, center=(0, 0, 0))
rotation_angle += 10 # Increase the angle for the next iteration
vis.clear_geometries()
vis.add_geometry(pcd)
vis.poll_events()
vis.update_renderer()
if cv2.waitKey(5) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
vis.destroy_window()