Parallelproj tof (#1356)

- add TOF functionality to the Parallelproj projector - require Parallelproj 1.3.4 as older versions had a bug - [GHA] update parallelproj to 1.7.3 Co-authored-by: robbietuk <robbietuk@gmail.com> Co-authored-by: Kris Thielemans <KrisThielemans@users.noreply.github.com>
UCL · Feb 12, 2024 · 250da05 · 250da05
1 parent 7df3780
commit 250da05
Show file tree

Hide file tree

Showing 7 changed files with 228 additions and 65 deletions.
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
@@ -246,7 +246,7 @@ jobs:
           python -m pip install pytest
 
           if test "${{matrix.parallelproj}}XX" == "ONXX"; then
-             git clone --depth 1 --branch v1.3.7 https://github.com/gschramm/parallelproj
+             git clone --depth 1 --branch v1.7.3 https://github.com/gschramm/parallelproj
              mkdir parallelproj/build
              cd parallelproj/build
              if test "${{matrix.cuda_version}}" == "0"; then

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -188,7 +188,7 @@ endif()
 
 # Parallelproj
 if(NOT DISABLE_Parallelproj_PROJECTOR)
-    find_package(parallelproj 1.0.1 CONFIG)
+    find_package(parallelproj 1.3.4 CONFIG)
     if (parallelproj_FOUND)
       set(STIR_WITH_Parallelproj_PROJECTOR ON)
       if (parallelproj_built_with_CUDA)

diff --git a/documentation/release_6.1.htm b/documentation/release_6.1.htm
@@ -51,6 +51,7 @@ <h4>Python (and MATLAB)</h4>
 <h3>New functionality</h3>
 <ul>
 <li>
+  Add TOF capability of the parallelproj projector (see <a href=https://github.com/UCL/STIR/pull/1356>PR #1356</a>)
 </li>
 </ul>
 

diff --git a/src/include/stir/recon_buildblock/Parallelproj_projector/ParallelprojHelper.h b/src/include/stir/recon_buildblock/Parallelproj_projector/ParallelprojHelper.h
@@ -49,6 +49,14 @@ class ParallelprojHelper
   std::array<float, 3> origin;
   std::vector<float> xstart;
   std::vector<float> xend;
+
+  long long num_image_voxel;
+  long long num_lors;
+
+  float sigma_tof;
+  float tofcenter_offset;
+  float tofbin_width;
+  short num_tof_bins;
 };
 
 } // namespace detail

diff --git a/src/recon_buildblock/Parallelproj_projector/BackProjectorByBinParallelproj.cxx b/src/recon_buildblock/Parallelproj_projector/BackProjectorByBinParallelproj.cxx
@@ -8,10 +8,9 @@
 
   \author Richard Brown
   \author Kris Thielemans
+  \author Nicole Jurjew
 
-*/
-/*
-    Copyright (C) 2019, 2021 University College London
+    Copyright (C) 2019, 2021, 2024 University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0
@@ -25,6 +24,7 @@
 #include "stir/RelatedViewgrams.h"
 #include "stir/recon_buildblock/TrivialDataSymmetriesForBins.h"
 #include "stir/ProjDataInfo.h"
+#include "stir/TOF_conversions.h"
 #include "stir/VoxelsOnCartesianGrid.h"
 #include "stir/recon_array_functions.h"
 #include "stir/ProjDataInMemory.h"
@@ -116,6 +116,20 @@ back_project(const ProjData& proj_data, int subset_num, int num_subsets)
 }
 #endif
 
+static void
+TOF_transpose(std::vector<float>& mem_for_PP_back,
+              const float* STIR_mem,
+              const shared_ptr<const detail::ParallelprojHelper> _helper,
+              const long long offset)
+{
+  const auto num_tof_bins = static_cast<unsigned>(_helper->num_tof_bins);
+  for (unsigned tof_idx = 0; tof_idx < num_tof_bins; ++tof_idx)
+    for (long long lor_idx = 0; lor_idx < _helper->num_lors; ++lor_idx)
+      {
+        mem_for_PP_back[lor_idx * num_tof_bins + tof_idx] = STIR_mem[offset + tof_idx * _helper->num_lors + lor_idx];
+      }
+}
+
 void
 BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density) const
 {
@@ -128,18 +142,14 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
 
 #ifdef parallelproj_built_with_CUDA
 
-  long long num_image_voxel = static_cast<long long>(image_vec.size());
-  long long num_lors = static_cast<long long>(p.get_proj_data_info_sptr()->size_all());
-
-  long long num_lors_per_chunk_floor = num_lors / _num_gpu_chunks;
-  long long remainder = num_lors % _num_gpu_chunks;
+  long long num_lors_per_chunk_floor = _helper->num_lors / _num_gpu_chunks;
+  long long remainder = _helper->num_lors % _num_gpu_chunks;
 
   long long num_lors_per_chunk;
   long long offset = 0;
 
   // send image to all visible CUDA devices
-  float** image_on_cuda_devices;
-  image_on_cuda_devices = copy_float_array_to_all_devices(image_vec.data(), num_image_voxel);
+  float** image_on_cuda_devices = copy_float_array_to_all_devices(image_vec.data(), _helper->num_image_voxel);
 
   // do (chuck-wise) back projection on the CUDA devices
   for (int chunk_num = 0; chunk_num < _num_gpu_chunks; chunk_num++)
@@ -152,38 +162,96 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
         {
           num_lors_per_chunk = num_lors_per_chunk_floor;
         }
-
-      joseph3d_back_cuda(_helper->xstart.data() + 3 * offset,
-                         _helper->xend.data() + 3 * offset,
-                         image_on_cuda_devices,
-                         _helper->origin.data(),
-                         _helper->voxsize.data(),
-                         p.get_const_data_ptr() + offset,
-                         num_lors_per_chunk,
-                         _helper->imgdim.data(),
-                         /*threadsperblock*/ 64);
-
+      if (p.get_proj_data_info_sptr()->is_tof_data())
+        {
+          info("running the CUDA version of parallelproj, about to call function joseph3d_back_tof_sino_cuda", 2);
+
+          std::vector<float> mem_for_PP_back(num_lors_per_chunk * _helper->num_tof_bins);
+          const float* STIR_mem = p.get_const_data_ptr();
+
+          TOF_transpose(mem_for_PP_back, STIR_mem, _helper, offset);
+
+          // info("created object mem_for_PP_img", 2);
+          joseph3d_back_tof_sino_cuda(_helper->xend.data() + 3 * offset,
+                                      _helper->xstart.data() + 3 * offset,
+                                      image_on_cuda_devices,
+                                      _helper->origin.data(),
+                                      _helper->voxsize.data(),
+                                      mem_for_PP_back.data(), // p.get_const_data_ptr() + offset* num_tof_bins,
+                                      num_lors_per_chunk,
+                                      _helper->imgdim.data(),
+                                      _helper->tofbin_width,
+                                      &_helper->sigma_tof,
+                                      &_helper->tofcenter_offset,
+                                      4, // float n_sigmas
+                                      _helper->num_tof_bins,
+                                      0, // unsigned char lor_dependent_sigma_tof
+                                      0, // unsigned char lor_dependent_tofcenter_offset
+                                      64 // threadsperblock
+          );
+          if (chunk_num != _num_gpu_chunks - 1)
+            p.release_const_data_ptr();
+        }
+      else
+        {
+          joseph3d_back_cuda(_helper->xstart.data() + 3 * offset,
+                             _helper->xend.data() + 3 * offset,
+                             image_on_cuda_devices,
+                             _helper->origin.data(),
+                             _helper->voxsize.data(),
+                             p.get_const_data_ptr() + offset,
+                             num_lors_per_chunk,
+                             _helper->imgdim.data(),
+                             /*threadsperblock*/ 64);
+        }
       offset += num_lors_per_chunk;
     }
 
   // sum backprojected images on the first CUDA device
-  sum_float_arrays_on_first_device(image_on_cuda_devices, num_image_voxel);
+  sum_float_arrays_on_first_device(image_on_cuda_devices, _helper->num_image_voxel);
 
   // copy summed image back to host
-  get_float_array_from_device(image_on_cuda_devices, num_image_voxel, 0, image_vec.data());
+  get_float_array_from_device(image_on_cuda_devices, _helper->num_image_voxel, 0, image_vec.data());
 
   // free image array from CUDA devices
   free_float_array_on_all_devices(image_on_cuda_devices);
 
 #else
-  joseph3d_back(_helper->xstart.data(),
-                _helper->xend.data(),
-                image_vec.data(),
-                _helper->origin.data(),
-                _helper->voxsize.data(),
-                p.get_const_data_ptr(),
-                static_cast<long long>(p.get_proj_data_info_sptr()->size_all()),
-                _helper->imgdim.data());
+  if (this->_proj_data_info_sptr->is_tof_data() == 1)
+    {
+      std::vector<float> mem_for_PP_back(_helper->num_lors * _helper->num_tof_bins);
+      const float* STIR_mem = p.get_const_data_ptr();
+
+      TOF_transpose(mem_for_PP_back, STIR_mem, _helper, 0);
+
+      joseph3d_back_tof_sino(_helper->xend.data(),
+                             _helper->xstart.data(),
+                             image_vec.data(),
+                             _helper->origin.data(),
+                             _helper->voxsize.data(),
+                             mem_for_PP_back.data(),
+                             _helper->num_lors,
+                             _helper->imgdim.data(),
+                             _helper->tofbin_width,
+                             &_helper->sigma_tof,
+                             &_helper->tofcenter_offset,
+                             4, // float n_sigmas,
+                             _helper->num_tof_bins,
+                             0, //  unsigned char lor_dependent_sigma_tof
+                             0  // unsigned char lor_dependent_tofcenter_offset
+      );
+    }
+  else
+    {
+      joseph3d_back(_helper->xstart.data(),
+                    _helper->xend.data(),
+                    image_vec.data(),
+                    _helper->origin.data(),
+                    _helper->voxsize.data(),
+                    p.get_const_data_ptr(),
+                    static_cast<long long>(p.get_proj_data_info_sptr()->size_all()),
+                    _helper->imgdim.data());
+    }
 #endif
   info("done", 2);