diff --git a/pytorch3d/ops/cameras_alignment.py b/pytorch3d/ops/cameras_alignment.py index babbd733a..667c5b964 100644 --- a/pytorch3d/ops/cameras_alignment.py +++ b/pytorch3d/ops/cameras_alignment.py @@ -39,20 +39,20 @@ def corresponding_cameras_alignment( such that the following holds: Under the change of coordinates using a similarity transform - (R_A, T_A, s_A) a 3D point X' is mapped to X with: - ``` + (R_A, T_A, s_A) a 3D point X' is mapped to X with: :: + X = (X' R_A + T_A) / s_A - ``` - Then, for all cameras `i`, we assume that the following holds: - ``` + + Then, for all cameras `i`, we assume that the following holds: :: + X R_i + T_i = s' (X' R_i' + T_i'), - ``` + i.e. an adjusted point X' is mapped by a camera (R_i', T_i') to the same point as imaged from camera (R_i, T_i) after resolving the scale ambiguity with a global scalar factor s'. - Substituting for X above gives rise to the following: - ``` + Substituting for X above gives rise to the following: :: + (X' R_A + T_A) / s_A R_i + T_i = s' (X' R_i' + T_i') // ยท s_A (X' R_A + T_A) R_i + T_i s_A = (s' s_A) (X' R_i' + T_i') s' := 1 / s_A # without loss of generality @@ -60,10 +60,11 @@ def corresponding_cameras_alignment( X' R_A R_i + T_A R_i + T_i s_A = X' R_i' + T_i' ^^^^^^^ ^^^^^^^^^^^^^^^^^ ~= R_i' ~= T_i' - ``` + i.e. after estimating R_A, T_A, s_A, the aligned source cameras have - extrinsics: - `cameras_src_align = (R_A R_i, T_A R_i + T_i s_A) ~= (R_i', T_i')` + extrinsics: :: + + cameras_src_align = (R_A R_i, T_A R_i + T_i s_A) ~= (R_i', T_i') We support two ways `R_A, T_A, s_A` can be estimated: 1) `mode=='centers'` @@ -73,12 +74,12 @@ def corresponding_cameras_alignment( 2) `mode=='extrinsics'` Defines the alignment problem as a system - of the following equations: - ``` + of the following equations: :: + for all i: [ R_A 0 ] x [ R_i 0 ] = [ R_i' 0 ] [ T_A^T 1 ] [ (s_A T_i^T) 1 ] [ T_i' 1 ] - ``` + `R_A, T_A` and `s_A` are then obtained by solving the system in the least squares sense. diff --git a/pytorch3d/renderer/cameras.py b/pytorch3d/renderer/cameras.py index 7b96609f3..4a4405eaa 100644 --- a/pytorch3d/renderer/cameras.py +++ b/pytorch3d/renderer/cameras.py @@ -36,15 +36,15 @@ class CamerasBase(TensorProperties): For cameras, there are four different coordinate systems (or spaces) - World coordinate system: This is the system the object lives - the world. - - Camera view coordinate system: This is the system that has its origin on the camera - and the and the Z-axis perpendicular to the image plane. + - Camera view coordinate system: This is the system that has its origin on + the camera and the Z-axis perpendicular to the image plane. In PyTorch3D, we assume that +X points left, and +Y points up and +Z points out from the image plane. The transformation from world --> view happens after applying a rotation (R) and translation (T) - NDC coordinate system: This is the normalized coordinate system that confines - in a volume the rendered part of the object or scene. Also known as view volume. - For square images, given the PyTorch3D convention, (+1, +1, znear) + points in a volume the rendered part of the object or scene, also known as + view volume. For square images, given the PyTorch3D convention, (+1, +1, znear) is the top left near corner, and (-1, -1, zfar) is the bottom right far corner of the volume. The transformation from view --> NDC happens after applying the camera @@ -54,10 +54,9 @@ class CamerasBase(TensorProperties): - Screen coordinate system: This is another representation of the view volume with the XY coordinates defined in image space instead of a normalized space. - A better illustration of the coordinate systems can be found in - pytorch3d/docs/notes/cameras.md. + An illustration of the coordinate systems can be found in pytorch3d/docs/notes/cameras.md. - It defines methods that are common to all camera models: + CameraBase defines methods that are common to all camera models: - `get_camera_center` that returns the optical center of the camera in world coordinates - `get_world_to_view_transform` which returns a 3D transform from @@ -167,8 +166,8 @@ def get_camera_center(self, **kwargs) -> torch.Tensor: as keyword arguments to override the default values set in __init__. - Setting T here will update the values set in init as this - value may be needed later on in the rendering pipeline e.g. for + Setting R or T here will update the values set in init as these + values may be needed later on in the rendering pipeline e.g. for lighting calculations. Returns: @@ -237,8 +236,9 @@ def transform_points( self, points, eps: Optional[float] = None, **kwargs ) -> torch.Tensor: """ - Transform input points from world to camera space with the - projection matrix defined by the camera. + Transform input points from world to camera space. + If camera is defined in NDC space, the projected points are in NDC space. + If camera is defined in screen space, the projected points are in screen space. For `CamerasBase.transform_points`, setting `eps > 0` stabilizes gradients since it leads to avoiding division @@ -492,7 +492,7 @@ class FoVPerspectiveCameras(CamerasBase): """ A class which stores a batch of parameters to generate a batch of projection matrices by specifying the field of view. - The definition of the parameters follow the OpenGL perspective camera. + The definitions of the parameters follow the OpenGL perspective camera. The extrinsics of the camera (R and T matrices) can also be set in the initializer or passed in to `get_full_projection_transform` to get @@ -780,7 +780,7 @@ class FoVOrthographicCameras(CamerasBase): """ A class which stores a batch of parameters to generate a batch of projection matrices by specifying the field of view. - The definition of the parameters follow the OpenGL orthographic camera. + The definitions of the parameters follow the OpenGL orthographic camera. """ # For __getitem__ diff --git a/pytorch3d/transforms/transform3d.py b/pytorch3d/transforms/transform3d.py index 24a5663d7..620380318 100644 --- a/pytorch3d/transforms/transform3d.py +++ b/pytorch3d/transforms/transform3d.py @@ -165,7 +165,7 @@ def __init__( raise ValueError('"matrix" has to be a 2- or a 3-dimensional tensor.') if matrix.shape[-2] != 4 or matrix.shape[-1] != 4: raise ValueError( - '"matrix" has to be a tensor of shape (minibatch, 4, 4)' + '"matrix" has to be a tensor of shape (minibatch, 4, 4) or (4, 4).' ) # set dtype and device from matrix dtype = matrix.dtype