Skip to content

Commit

Permalink
Avoid image copies in Parallelproj
Browse files Browse the repository at this point in the history
if data is contiguous, we don't need an extra copy.
  • Loading branch information
KrisThielemans committed May 17, 2024
1 parent 74579af commit 8752e45
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,19 @@ TOF_transpose(std::vector<float>& mem_for_PP_back,
void
BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density) const
{
std::vector<float> image_vec;
float* image_ptr;
if (_density_sptr->is_contiguous())
{
image_ptr = _density_sptr->get_full_data_ptr();
}
else
{
image_vec.resize(density.size_all());
std::copy(_density_sptr->begin_all(), _density_sptr->end_all(), image_vec.begin());
image_ptr = image_vec.data();
}

std::vector<float> image_vec(density.size_all());
// create an alias for the projection data
const ProjDataInMemory& p(*_proj_data_to_backproject_sptr);

Expand All @@ -149,7 +160,7 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
long long offset = 0;

// send image to all visible CUDA devices
float** image_on_cuda_devices = copy_float_array_to_all_devices(image_vec.data(), _helper->num_image_voxel);
float** image_on_cuda_devices = copy_float_array_to_all_devices(image_ptr, _helper->num_image_voxel);

// do (chuck-wise) back projection on the CUDA devices
for (int chunk_num = 0; chunk_num < _num_gpu_chunks; chunk_num++)
Expand Down Expand Up @@ -211,7 +222,7 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
sum_float_arrays_on_first_device(image_on_cuda_devices, _helper->num_image_voxel);

// copy summed image back to host
get_float_array_from_device(image_on_cuda_devices, _helper->num_image_voxel, 0, image_vec.data());
get_float_array_from_device(image_on_cuda_devices, _helper->num_image_voxel, 0, image_ptr);

// free image array from CUDA devices
free_float_array_on_all_devices(image_on_cuda_devices);
Expand All @@ -226,7 +237,7 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density

joseph3d_back_tof_sino(_helper->xend.data(),
_helper->xstart.data(),
image_vec.data(),
image_ptr,
_helper->origin.data(),
_helper->voxsize.data(),
mem_for_PP_back.data(),
Expand All @@ -245,7 +256,7 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
{
joseph3d_back(_helper->xstart.data(),
_helper->xend.data(),
image_vec.data(),
image_ptr,
_helper->origin.data(),
_helper->voxsize.data(),
p.get_const_data_ptr(),
Expand All @@ -260,7 +271,14 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
// --------------------------------------------------------------- //
// Parallelproj -> STIR image conversion
// --------------------------------------------------------------- //
std::copy(image_vec.begin(), image_vec.end(), density.begin_all());
if (_density_sptr->is_contiguous())
{
_density_sptr->release_full_data_ptr();
}
else
{
std::copy(image_vec.begin(), image_vec.end(), density.begin_all());
}

// After the back projection, we enforce a truncation outside of the FOV.
// This is because the parallelproj projector seems to have some trouble at the edges and this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,18 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
truncate_rim(*_density_sptr, static_cast<int>(std::max((image_radius - radius) / _helper->voxsize[2], 0.F)));
}

std::vector<float> image_vec(density.size_all());
std::copy(_density_sptr->begin_all(), _density_sptr->end_all(), image_vec.begin());
std::vector<float> image_vec;
float* image_ptr;
if (_density_sptr->is_contiguous())
{
image_ptr = _density_sptr->get_full_data_ptr();
}
else
{
image_vec.resize(density.size_all());
std::copy(_density_sptr->begin_all(), _density_sptr->end_all(), image_vec.begin());
image_ptr = image_vec.data();
}

#if 0
// needed to set output to zero as parallelproj accumulates but is no longer the case
Expand All @@ -174,7 +184,7 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
long long offset = 0;

// send image to all visible CUDA devices
float** image_on_cuda_devices = copy_float_array_to_all_devices(image_vec.data(), _helper->num_image_voxel);
float** image_on_cuda_devices = copy_float_array_to_all_devices(image_ptr, _helper->num_image_voxel);

// do (chuck-wise) projection on the CUDA devices
for (int chunk_num = 0; chunk_num < _num_gpu_chunks; chunk_num++)
Expand Down Expand Up @@ -246,7 +256,7 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
std::vector<float> mem_for_PP(_helper->num_lors * _helper->num_tof_bins);
joseph3d_fwd_tof_sino(_helper->xend.data(),
_helper->xstart.data(),
image_vec.data(),
image_ptr,
_helper->origin.data(),
_helper->voxsize.data(),
mem_for_PP.data(),
Expand All @@ -268,7 +278,7 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
{
joseph3d_fwd(_helper->xstart.data(),
_helper->xend.data(),
image_vec.data(),
image_ptr,
_helper->origin.data(),
_helper->voxsize.data(),
_projected_data_sptr->get_data_ptr(),
Expand All @@ -278,6 +288,10 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
#endif
info("done", 2);

if (_density_sptr->is_contiguous())
{
_density_sptr->release_full_data_ptr();
}
_projected_data_sptr->release_data_ptr();
}

Expand Down

0 comments on commit 8752e45

Please sign in to comment.