diff --git a/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu b/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu index c18475def..58aeb20fc 100644 --- a/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu +++ b/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu @@ -266,6 +266,8 @@ at::Tensor FaceAreasNormalsBackwardCuda( grad_normals_t{grad_normals, "grad_normals", 4}; at::CheckedFrom c = "FaceAreasNormalsBackwardCuda"; at::checkAllSameGPU(c, {verts_t, faces_t, grad_areas_t, grad_normals_t}); + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic("FaceAreasNormalsBackwardCuda"); // Set the device for the kernel launch based on the device of verts at::cuda::CUDAGuard device_guard(verts.device()); diff --git a/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu b/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu index adeceedff..6bd2a80d9 100644 --- a/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu +++ b/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu @@ -130,6 +130,9 @@ std::tuple InterpFaceAttrsBackwardCuda( at::checkAllSameType( c, {barycentric_coords_t, face_attrs_t, grad_pix_attrs_t}); + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic("InterpFaceAttrsBackwardCuda"); + // Set the device for the kernel launch based on the input at::cuda::CUDAGuard device_guard(pix_to_face.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); diff --git a/pytorch3d/csrc/knn/knn.cu b/pytorch3d/csrc/knn/knn.cu index 191934057..779fcbf18 100644 --- a/pytorch3d/csrc/knn/knn.cu +++ b/pytorch3d/csrc/knn/knn.cu @@ -534,6 +534,9 @@ std::tuple KNearestNeighborBackwardCuda( c, {p1_t, p2_t, lengths1_t, lengths2_t, idxs_t, grad_dists_t}); at::checkAllSameType(c, {p1_t, p2_t, grad_dists_t}); + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic("KNearestNeighborBackwardCuda"); + // Set the device for the kernel launch based on the device of the input at::cuda::CUDAGuard device_guard(p1.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); diff --git a/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu b/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu index 382d06d5c..3788d4055 100644 --- a/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu +++ b/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu @@ -305,6 +305,8 @@ std::tuple DistanceBackwardCuda( at::CheckedFrom c = "DistanceBackwardCuda"; at::checkAllSameGPU(c, {objects_t, targets_t, idx_objects_t, grad_dists_t}); at::checkAllSameType(c, {objects_t, targets_t, grad_dists_t}); + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic("DistanceBackwardCuda"); // Set the device for the kernel launch based on the device of the input at::cuda::CUDAGuard device_guard(objects.device()); @@ -624,6 +626,9 @@ std::tuple PointFaceArrayDistanceBackwardCuda( at::CheckedFrom c = "PointFaceArrayDistanceBackwardCuda"; at::checkAllSameGPU(c, {points_t, tris_t, grad_dists_t}); at::checkAllSameType(c, {points_t, tris_t, grad_dists_t}); + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic( + "PointFaceArrayDistanceBackwardCuda"); // Set the device for the kernel launch based on the device of the input at::cuda::CUDAGuard device_guard(points.device()); @@ -787,6 +792,9 @@ std::tuple PointEdgeArrayDistanceBackwardCuda( at::CheckedFrom c = "PointEdgeArrayDistanceBackwardCuda"; at::checkAllSameGPU(c, {points_t, segms_t, grad_dists_t}); at::checkAllSameType(c, {points_t, segms_t, grad_dists_t}); + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic( + "PointEdgeArrayDistanceBackwardCuda"); // Set the device for the kernel launch based on the device of the input at::cuda::CUDAGuard device_guard(points.device()); diff --git a/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu b/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu index 9d80dc64a..43d4ed55a 100644 --- a/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu +++ b/pytorch3d/csrc/points_to_volumes/points_to_volumes.cu @@ -141,6 +141,9 @@ void PointsToVolumesForwardCuda( grid_sizes_t, mask_t}); + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic("PointsToVolumesForwardCuda"); + // Set the device for the kernel launch based on the device of the input at::cuda::CUDAGuard device_guard(points_3d.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); diff --git a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu index 87348ee05..21ff7e504 100644 --- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu +++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu @@ -583,6 +583,9 @@ at::Tensor RasterizeMeshesBackwardCuda( at::checkAllSameType( c, {face_verts_t, grad_zbuf_t, grad_bary_t, grad_dists_t}); + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic("RasterizeMeshesBackwardCuda"); + // Set the device for the kernel launch based on the device of the input at::cuda::CUDAGuard device_guard(face_verts.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); diff --git a/pytorch3d/csrc/rasterize_points/rasterize_points.cu b/pytorch3d/csrc/rasterize_points/rasterize_points.cu index dafc810da..5b18d8334 100644 --- a/pytorch3d/csrc/rasterize_points/rasterize_points.cu +++ b/pytorch3d/csrc/rasterize_points/rasterize_points.cu @@ -423,7 +423,8 @@ at::Tensor RasterizePointsBackwardCuda( at::CheckedFrom c = "RasterizePointsBackwardCuda"; at::checkAllSameGPU(c, {points_t, idxs_t, grad_zbuf_t, grad_dists_t}); at::checkAllSameType(c, {points_t, grad_zbuf_t, grad_dists_t}); - + // This is nondeterministic because atomicAdd + at::globalContext().alertNotDeterministic("RasterizePointsBackwardCuda"); // Set the device for the kernel launch based on the device of the input at::cuda::CUDAGuard device_guard(points.device()); cudaStream_t stream = at::cuda::getCurrentCUDAStream();