diff --git a/documentation/STIR-UsersGuide.tex b/documentation/STIR-UsersGuide.tex
index 6c32c1ada2..80a91a9aed 100644
--- a/documentation/STIR-UsersGuide.tex
+++ b/documentation/STIR-UsersGuide.tex
@@ -3699,6 +3699,13 @@ \subsubsection{
a popular Monte Carlo simulator for PET and SPECT. This
is all preliminary. Check out the \texttt{README.txt} in the \textbf{STIR/SimSET} directory.
+\subsubsection{
+Performing timings of certain operations}
+The \textbf{stir\_timings} utility is mostly useful for developers,
+but you could use it to optimise the number of OpenMP threads to use for your data.
+
+Run the utility without any arguments to get a help message.
+If you want to know what is actually timed, you will have to look at the source code.
\subsection{
User-selectable components}
diff --git a/documentation/release_5.2.htm b/documentation/release_5.2.htm
index 68e716b7f2..7d0b468a5d 100644
--- a/documentation/release_5.2.htm
+++ b/documentation/release_5.2.htm
@@ -55,6 +55,11 @@
New functionality
(still somewhat preliminary).
PR #1182.
+
+ The new stir_timings utility is mostly useful for developers,
+ but you could use it to optimise the number of OpenMP threads to use for your data.
+
PR #1237.
+
New examples
diff --git a/src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx b/src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx
index cb165f6722..2164c4cb64 100644
--- a/src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx
+++ b/src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx
@@ -396,7 +396,6 @@ actual_reconstruct(shared_ptr > const& target_image_
alpha_fit = 1.0F;
beta_fit = 0.0F;
- start_timers();
{
//char file[max_filename_length];
//sprintf(file,"%s.full_log",output_filename_prefix.c_str());
@@ -564,7 +563,6 @@ actual_reconstruct(shared_ptr > const& target_image_
if(display_level>0)
display(image, image.find_max(), "Final image");
- stop_timers();
do_log_file(image);
full_log.close();
diff --git a/src/include/stir/DataProcessor.inl b/src/include/stir/DataProcessor.inl
index 9c523a9a93..b0f03617bd 100644
--- a/src/include/stir/DataProcessor.inl
+++ b/src/include/stir/DataProcessor.inl
@@ -74,7 +74,6 @@ Succeeded
DataProcessor::
apply(DataT& data)
{
- start_timers();
//assert(consistency_check(data) == Succeeded::yes);
if (!is_set_up_already )
if (set_up(data) == Succeeded::no)
@@ -82,6 +81,7 @@ apply(DataT& data)
warning("DataProcessor::apply: Building was unsuccesfull. No processing done.\n");
return Succeeded::no;
}
+ start_timers();
virtual_apply(data);
stop_timers();
return Succeeded::yes;
@@ -94,7 +94,6 @@ DataProcessor::
apply(DataT& data,
const DataT& in_data)
{
- start_timers();
//assert(consistency_check(in_data) == Succeeded::yes);
if (!is_set_up_already )
if (set_up(in_data) == Succeeded::no)
@@ -102,6 +101,7 @@ apply(DataT& data,
warning("DataProcessor::apply: Building was unsuccesfull. No processing done.\n");
return Succeeded::no;
}
+ start_timers();
virtual_apply(data, in_data);
stop_timers();
return Succeeded::yes;
diff --git a/src/include/stir/HighResWallClockTimer.h b/src/include/stir/HighResWallClockTimer.h
index 8b1fcdee30..d12859aad7 100644
--- a/src/include/stir/HighResWallClockTimer.h
+++ b/src/include/stir/HighResWallClockTimer.h
@@ -153,11 +153,10 @@ namespace stir {
/*!
\param bReset indicates whether the elapsed time should be reset before the timer is started.
- The timer must not be running.
+ The timer must not be running if asking to reset.
*/
inline void HighResWallClockTimer::start(bool bReset /* = false */)
{
- assert(!m_bRunning);
if (bReset) reset();
m_bRunning = true;
#if defined(_AIX)
diff --git a/src/include/stir/TimedObject.h b/src/include/stir/TimedObject.h
index a7588c079a..a5d6c4eb95 100644
--- a/src/include/stir/TimedObject.h
+++ b/src/include/stir/TimedObject.h
@@ -21,6 +21,7 @@
*/
#include "stir/CPUTimer.h"
+#include "stir/HighResWallClockTimer.h"
START_NAMESPACE_STIR
/*!
@@ -48,19 +49,25 @@ class TimedObject
/*! Note: function is const such that it can be called in a
function of a derived class that is const.
*/
- inline void start_timers() const;
+ inline void start_timers(bool do_reset = false) const;
- //! get current value of the timer (since first use or last reset)
+ //! get current value of the CPU timer (since first use or last reset)
inline double get_CPU_timer_value() const;
+ //! get current value of the wall-clock timer (since first use or last reset)
+ inline double get_wall_clock_timer_value() const;
+
private:
- //! A timer that measured CPU time.
+ //! A timer that measures CPU time.
/*! Note: member is mutable such that it can be modified in a const function.
*/
mutable CPUTimer cpu_timer;
- // TODO include other times (such as wall-clock)
+ //! A timer that measures wall clock time.
+ /*! Note: member is mutable such that it can be modified in a const function.
+ */
+ mutable HighResWallClockTimer wall_clock_timer;
};
diff --git a/src/include/stir/TimedObject.inl b/src/include/stir/TimedObject.inl
index 068b5d848c..29e0f610bf 100644
--- a/src/include/stir/TimedObject.inl
+++ b/src/include/stir/TimedObject.inl
@@ -25,17 +25,20 @@ START_NAMESPACE_STIR
void TimedObject::reset_timers()
{
cpu_timer.reset();
+ wall_clock_timer.reset();
}
-void TimedObject::start_timers() const
+void TimedObject::start_timers(bool do_reset) const
{
- cpu_timer.start();
+ cpu_timer.start(do_reset);
+ wall_clock_timer.start(do_reset);
}
void TimedObject::stop_timers() const
{
cpu_timer.stop();
+ wall_clock_timer.stop();
}
@@ -44,4 +47,9 @@ double TimedObject::get_CPU_timer_value() const
return cpu_timer.value();
}
+double TimedObject::get_wall_clock_timer_value() const
+{
+ return wall_clock_timer.value();
+}
+
END_NAMESPACE_STIR
diff --git a/src/include/stir/Timer.h b/src/include/stir/Timer.h
index 56904a1ba1..84ff82a6c4 100644
--- a/src/include/stir/Timer.h
+++ b/src/include/stir/Timer.h
@@ -3,6 +3,7 @@
/*
Copyright (C) 2000 PARAPET partners
Copyright (C) 2000- 2009, Hammersmith Imanet Ltd
+ Copyright (C) 2023, University College London
This file is part of STIR.
SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
@@ -54,8 +55,13 @@ class Timer
public:
inline Timer();
inline virtual ~Timer();
- inline void start();
+ //! start stopwatch, optionally resetting first
+ /*! the stopwatch should not be running already if asking to reset */
+ inline void start(bool do_reset = false);
+ //! stop stopwatch
inline void stop();
+ //! reset stopwatch
+ /*! the stopwatch should not be running */
inline void reset();
//! return value is undefined when start() is not called first.
inline double value() const;
diff --git a/src/include/stir/Timer.inl b/src/include/stir/Timer.inl
index f4390234a3..b4c733d516 100644
--- a/src/include/stir/Timer.inl
+++ b/src/include/stir/Timer.inl
@@ -14,12 +14,15 @@
/*
Copyright (C) 2000 PARAPET partners
Copyright (C) 2000- 2009, Hammersmith Imanet Ltd
+ Copyright (C) 2023, University College London
This file is part of STIR.
SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
See STIR/LICENSE.txt for details
*/
+#include "stir/error.h"
+
START_NAMESPACE_STIR
Timer::Timer()
@@ -32,13 +35,17 @@ Timer::Timer()
Timer::~Timer()
{}
-void Timer::start()
+void Timer::start(bool do_reset)
{
if (!running)
{
+ if (do_reset)
+ reset();
running = true;
previous_value = get_current_value();
}
+ else if (do_reset)
+ error("Timer::start called requesting reset, but the timer is running");
}
void Timer::stop()
diff --git a/src/recon_buildblock/BackProjectorByBin.cxx b/src/recon_buildblock/BackProjectorByBin.cxx
index 61a9616bf5..d993e7afac 100644
--- a/src/recon_buildblock/BackProjectorByBin.cxx
+++ b/src/recon_buildblock/BackProjectorByBin.cxx
@@ -152,8 +152,6 @@ back_project(DiscretisedDensity<3,float>& density,
check(*viewgrams.get_proj_data_info_sptr(), density);
- start_timers();
-
// first check symmetries
{
const ViewSegmentNumbers basic_vs = viewgrams.get_basic_view_segment_num();
@@ -178,7 +176,6 @@ back_project(DiscretisedDensity<3,float>& density,
max_axial_pos_num,
min_tangential_pos_num,
max_tangential_pos_num);
- stop_timers();
}
#endif
// -------------------------------------------------------------------------------------------------------------------- //
@@ -270,8 +267,6 @@ back_project(const RelatedViewgrams& viewgrams,
check(*viewgrams.get_proj_data_info_sptr(), *_density_sptr);
- start_timers();
-
#ifdef STIR_OPENMP
const int thread_num=omp_get_thread_num();
if(is_null_ptr(_local_output_image_sptrs[thread_num]))
@@ -303,7 +298,6 @@ back_project(const RelatedViewgrams& viewgrams,
max_axial_pos_num,
min_tangential_pos_num,
max_tangential_pos_num);
- stop_timers();
}
void
diff --git a/src/recon_buildblock/BackProjectorByBinUsingInterpolation.cxx b/src/recon_buildblock/BackProjectorByBinUsingInterpolation.cxx
index a97a5a4477..c5a9f91032 100644
--- a/src/recon_buildblock/BackProjectorByBinUsingInterpolation.cxx
+++ b/src/recon_buildblock/BackProjectorByBinUsingInterpolation.cxx
@@ -454,8 +454,6 @@ BackProjectorByBinUsingInterpolation::
back_project_2D_all_symmetries(const Sinogram &sino, PETPlane &image, int view,
const int min_tang_pos, const int max_tang_pos)
{
- start_timers();
-
assert(sino.get_min_bin() == - sino.get_max_bin());
assert(min_tang_pos == -max_tang_pos);
assert(image.get_min_x() == - image.get_max_x());
@@ -508,7 +506,6 @@ BackProjectorByBinUsingInterpolation::
backproj2D_Cho_view_viewplus90_180minview_90minview(image, projs, cphi, sphi, s);
}
- stop_timers();
}
void
@@ -516,8 +513,6 @@ BackProjectorByBinUsingInterpolation::
back_project_2D_view_plus_90(const Sinogram &sino, PETPlane &image, int view,
const int min_tang_pos, const int max_tang_pos)
{
- start_timers();
-
assert(sino.get_min_bin() == - sino.get_max_bin());
assert(min_tang_pos == -max_tang_pos);
assert(image.get_min_x() == - image.get_max_x());
@@ -560,7 +555,6 @@ BackProjectorByBinUsingInterpolation::
backproj2D_Cho_view_viewplus90(image, projs, cphi, sphi, s);
}
- stop_timers();
}
#endif
@@ -664,8 +658,6 @@ can only handle arc-corrected data (cast to ProjDataInfoCylindricalArcCorr)!\n")
: 0 );
- start_timers();
-
const JacobianForIntBP jacobian(proj_data_info_cyl_sptr, use_exact_Jacobian_now);
Array<4, float > Proj2424(IndexRange4D(0, 1, 0, 3, 0, 1, 1, 4));
@@ -803,7 +795,6 @@ can only handle arc-corrected data (cast to ProjDataInfoCylindricalArcCorr)!\n")
axial_pos_to_z_offset);
}
}
- stop_timers();
}
/*
@@ -867,8 +858,6 @@ can only handle arc-corrected data (cast to ProjDataInfoCylindricalArcCorr)!\n")
// KTXXX not necessary anymore
//assert(image.get_min_z() == 0);
- start_timers();
-
const JacobianForIntBP jacobian(proj_data_info_cyl_sptr, use_exact_Jacobian_now);
Array<4, float > Proj2424(IndexRange4D(0, 1, 0, 3, 0, 1, 1, 4));
@@ -1001,7 +990,6 @@ can only handle arc-corrected data (cast to ProjDataInfoCylindricalArcCorr)!\n")
axial_pos_to_z_offset);
}
}
- stop_timers();
}
diff --git a/src/recon_buildblock/ForwardProjectorByBin.cxx b/src/recon_buildblock/ForwardProjectorByBin.cxx
index 9758d925de..db1eae7401 100644
--- a/src/recon_buildblock/ForwardProjectorByBin.cxx
+++ b/src/recon_buildblock/ForwardProjectorByBin.cxx
@@ -135,7 +135,6 @@ forward_project(RelatedViewgrams& viewgrams,
if (viewgrams.get_num_viewgrams()==0)
return;
check(*viewgrams.get_proj_data_info_sptr(), density);
- start_timers();
// first check symmetries
{
@@ -256,7 +255,6 @@ forward_project(RelatedViewgrams& viewgrams,
error("You need to call set_input() forward_project()");
check(*viewgrams.get_proj_data_info_sptr(), *_density_sptr);
- start_timers();
// first check symmetries
{
@@ -281,7 +279,6 @@ forward_project(RelatedViewgrams& viewgrams,
max_axial_pos_num,
min_tangential_pos_num,
max_tangential_pos_num);
- stop_timers();
}
void
diff --git a/src/recon_buildblock/ForwardProjectorByBinUsingRayTracing.cxx b/src/recon_buildblock/ForwardProjectorByBinUsingRayTracing.cxx
index 9bcf8834b3..53f21dc397 100644
--- a/src/recon_buildblock/ForwardProjectorByBinUsingRayTracing.cxx
+++ b/src/recon_buildblock/ForwardProjectorByBinUsingRayTracing.cxx
@@ -483,9 +483,6 @@ forward_project_all_symmetries(
const int min_tang_pos_num_in_loop =
min_abs_tangential_pos_num==0 ? 1 : min_abs_tangential_pos_num;
- start_timers();
-
-
Array <4,float> Projall(IndexRange4D(min_ax_pos_num, max_ax_pos_num, 0, 1, 0, 1, 0, 3));
// KT 21/05/98 removed as now automatically zero
// Projall.fill(0);
@@ -653,8 +650,6 @@ forward_project_all_symmetries(
}// end of } else {
}// end of test for offset loop
- stop_timers();
-
}
@@ -905,8 +900,6 @@ void ForwardProjectorByBinUsingRayTracing::forward_project_2D(Sinogram &s
const int projrad = (int) (sino.get_num_tangential_poss() / 2) - 1;
- start_timers();
-
//TODO for the moment, just handle 1 plane and use some 3D variables
const int min_ax_pos = 0;
const int max_ax_pos = 0;
@@ -1023,8 +1016,6 @@ void ForwardProjectorByBinUsingRayTracing::forward_project_2D(Sinogram &s
}// end of } else {
}// end of test for offset loop
- stop_timers();
-
}
#endif // old 2D versions
@@ -1114,7 +1105,6 @@ forward_project_all_symmetries_2D(Viewgram & pos_view,
const int min_axial_pos_num, const int max_axial_pos_num,
const int min_tangential_pos_num, const int max_tangential_pos_num) const
{
- start_timers();
// KT 20/06/2001 should now work for non-arccorrected data as well
const shared_ptr proj_data_info_sptr =
@@ -1444,7 +1434,6 @@ forward_project_all_symmetries_2D(Viewgram & pos_view,
}// end loop over D
}// end of else
- stop_timers();
}
diff --git a/src/recon_buildblock/Parallelproj_projector/ParallelprojHelper.cxx b/src/recon_buildblock/Parallelproj_projector/ParallelprojHelper.cxx
index 133a4a33b3..adb030122c 100644
--- a/src/recon_buildblock/Parallelproj_projector/ParallelprojHelper.cxx
+++ b/src/recon_buildblock/Parallelproj_projector/ParallelprojHelper.cxx
@@ -10,7 +10,7 @@
*/
/*
- Copyright (C) 2021 University College London
+ Copyright (C) 2021, 2023 University College London
This file is part of STIR.
SPDX-License-Identifier: Apache-2.0
@@ -28,7 +28,7 @@
#include "stir/info.h"
#include "stir/stream.h"
#include
-
+#include "stir/num_threads.h"
START_NAMESPACE_STIR
@@ -70,45 +70,96 @@ detail::ParallelprojHelper::ParallelprojHelper(const ProjDataInfo& p_info, const
copy_to_array(coord_first_voxel*rescale, origin);
// loop over all LORs in the projdata
- Bin bin;
- LORInAxialAndNoArcCorrSinogramCoordinates lor;
- LORAs2Points lor_points;
const float radius = p_info.get_scanner_sptr()->get_max_FOV_radius();
// warning: next loop needs to be the same as how ProjDataInMemory stores its data. There is no guarantee that this will remain the case in the future.
const auto segment_sequence = ProjData::standard_segment_sequence(p_info);
std::size_t index(0);
+
+#ifdef STIR_OPENMP
+ // Using too many threads is counterproductive according to my timings, so I limited to 8 (not necessarily optimal!).
+ const auto num_threads_to_use = std::min(8,get_max_num_threads());
+#endif
for (int seg : segment_sequence)
{
- bin.segment_num() = seg;
- for (bin.axial_pos_num() = p_info.get_min_axial_pos_num(bin.segment_num()); bin.axial_pos_num() <= p_info.get_max_axial_pos_num(bin.segment_num()); ++bin.axial_pos_num())
+ for (int axial_pos_num = p_info.get_min_axial_pos_num(seg); axial_pos_num <= p_info.get_max_axial_pos_num(seg); ++axial_pos_num)
{
- for (bin.view_num() = p_info.get_min_view_num(); bin.view_num() <= p_info.get_max_view_num(); ++bin.view_num())
+ for (int view_num = p_info.get_min_view_num(); view_num <= p_info.get_max_view_num(); ++view_num)
{
- for (bin.tangential_pos_num() = p_info.get_min_tangential_pos_num(); bin.tangential_pos_num() <= p_info.get_max_tangential_pos_num(); ++bin.tangential_pos_num())
+#ifdef STIR_OPENMP
+ #pragma omp parallel for num_threads(num_threads_to_use)
+#endif
+ for (int tangential_pos_num = p_info.get_min_tangential_pos_num(); tangential_pos_num <= p_info.get_max_tangential_pos_num(); ++tangential_pos_num)
{
+ Bin bin;
+ bin.segment_num() = seg;
+ bin.axial_pos_num() = axial_pos_num;
+ bin.view_num() = view_num;
+ bin.tangential_pos_num() = tangential_pos_num;
+ // compute index for this bin (independent of multi-threading)
+ std::size_t this_index = index + (bin.tangential_pos_num() - p_info.get_min_tangential_pos_num())*3;
+ LORInAxialAndNoArcCorrSinogramCoordinates lor;
+ LORAs2Points lor_points;
+
p_info.get_LOR(lor, bin);
if (lor.get_intersections_with_cylinder(lor_points, radius) == Succeeded::no)
- { // memory is already allocated, so just passing in points that will produce nothing
- xstart[index] = 0;
- xend[index++] = 0;
- xstart[index] = 0;
- xend[index++] = 0;
- xstart[index] = 0;
- xend[index++] = 0;
+ { // memory is already allocated, so just passing in points that will produce nothing
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xstart[this_index] = 0;
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xend[this_index] = 0;
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xstart[this_index+1] = 0;
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xend[this_index+1] = 0;
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xstart[this_index+2] = 0;
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xend[this_index+2] = 0;
}
else
{
const CartesianCoordinate3D p1 = lor_points.p1()*rescale;
const CartesianCoordinate3D p2 = lor_points.p2()*rescale;
- xstart[index] = p1[1];
- xend[index++] = p2[1];
- xstart[index] = p1[2];
- xend[index++] = p2[2];
- xstart[index] = p1[3];
- xend[index++] = p2[3];
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xstart[this_index] = p1[1];
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xend[this_index] = p2[1];
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xstart[this_index+1] = p1[2];
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xend[this_index+1] = p2[2];
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xstart[this_index+2] = p1[3];
+#ifdef STIR_OPENMP
+#pragma omp atomic write
+#endif
+ xend[this_index+2] = p2[3];
}
}
+ index += p_info.get_num_tangential_poss()*3;
}
}
}
diff --git a/src/utilities/CMakeLists.txt b/src/utilities/CMakeLists.txt
index eb0c5c1820..3ab181099e 100644
--- a/src/utilities/CMakeLists.txt
+++ b/src/utilities/CMakeLists.txt
@@ -73,6 +73,7 @@ set(${dir_EXE_SOURCES}
write_sinogram_to_txt.cxx
find_sum_projection_of_viewgram_and_sinogram.cxx
separate_true_from_random_scatter_for_necr.cxx
+ stir_timings.cxx
)
if (HAVE_ITK)
diff --git a/src/utilities/stir_timings.cxx b/src/utilities/stir_timings.cxx
new file mode 100644
index 0000000000..0b4c1639c8
--- /dev/null
+++ b/src/utilities/stir_timings.cxx
@@ -0,0 +1,357 @@
+/*
+ Copyright (C) 2023 University College London
+ This file is part of STIR.
+
+ SPDX-License-Identifier: Apache-2.0
+
+ See STIR/LICENSE.txt for details
+
+*/
+
+/*!.
+ \file
+ \ingroup utilities
+ \author Kris Thielemans
+
+ \brief Perform timings
+
+ This utility performs timings of various operations. This is mostly useful for developers,
+ but you could use it to optimise the number of OpenMP threads to use for your data.
+
+ Run the utility without any arguments to get a help message.
+ If you want to know what is actually timed, you will have to look at the source code.
+*/
+
+#include "stir/ProjDataInterfile.h"
+#include "stir/ProjDataInMemory.h"
+#include "stir/DiscretisedDensity.h"
+#include "stir/VoxelsOnCartesianGrid.h"
+#include "stir/IO/read_from_file.h"
+#include "stir/IO/write_to_file.h"
+#include "stir/recon_buildblock/ProjectorByBinPairUsingProjMatrixByBin.h"
+#ifdef STIR_WITH_Parallelproj_PROJECTOR
+# include "stir/recon_buildblock/Parallelproj_projector/ProjectorByBinPairUsingParallelproj.h"
+#endif
+#include "stir/recon_buildblock/ProjMatrixByBinUsingRayTracing.h"
+#include "stir/recon_buildblock/PoissonLogLikelihoodWithLinearModelForMeanAndProjData.h"
+//#include "stir/OSMAPOSL/OSMAPOSLReconstruction.h"
+#include "stir/recon_buildblock/distributable_main.h"
+#include "stir/warning.h"
+#include "stir/error.h"
+#include "stir/num_threads.h"
+#include "stir/Verbosity.h"
+#include
+#include
+#include
+#include
+#include
+
+static void
+print_usage_and_exit()
+{
+ std::cerr << "\nUsage:\nstir_timings [--name some_string] [--threads num_threads] [--runs num_runs]\\\n"
+ << "\t[--skip-PP 1] [--skip-PMRT 1]\\\n"
+ << "\t[--image image_filename]\\\n"
+ << "\t--template-projdata template_proj_data_filename\n\n"
+ << "Timings are reported to stdout as:\n"
+ << "name\ttiming_name\tCPU_time_in_ms\twall-clock_time_in_ms\n";
+ std::exit(EXIT_FAILURE);
+}
+
+START_NAMESPACE_STIR
+
+class Timings : public TimedObject
+{
+ typedef void (Timings::*TimedFunction)();
+
+public:
+ //! Use as prefix for all output
+ std::string name;
+ // variables that select timings
+ bool skip_PMRT;
+ bool skip_PP;
+
+ // variables used for running timings
+ shared_ptr> image_sptr;
+ shared_ptr output_proj_data_sptr;
+ shared_ptr mem_proj_data_sptr;
+ shared_ptr projectors_sptr;
+ shared_ptr pmrt_projectors_sptr;
+#ifdef STIR_WITH_Parallelproj_PROJECTOR
+ shared_ptr parallelproj_projectors_sptr;
+#endif
+ shared_ptr template_proj_data_sptr;
+ shared_ptr exam_info_sptr;
+ shared_ptr>> objective_function_sptr;
+
+ // basic methods
+ Timings(const std::string& image_filename, const std::string& template_proj_data_filename)
+ {
+ if (!image_filename.empty())
+ this->image_sptr = read_from_file>(image_filename);
+
+ if (!template_proj_data_filename.empty())
+ this->template_proj_data_sptr = ProjData::read_from_file(template_proj_data_filename);
+ }
+
+ void run_it(TimedFunction f, const std::string& item, const unsigned runs = 1);
+ void run_all(const unsigned runs = 1);
+ void init();
+
+ // functions that are timed
+
+ //! Test function that could be used to see if reported timings are correct
+ /*! CPU time should be close to zero, wall-clock time close to 1123ms */
+ void sleep() { std::this_thread::sleep_for(std::chrono::milliseconds(1123)); }
+
+ void copy_image()
+ {
+ auto im = this->image_sptr->clone();
+ delete im;
+ }
+
+ //! copy from output_proj_data_sptr to new Interfile file
+ void copy_proj_data_file_to_file()
+ {
+ ProjDataInterfile tmp(this->template_proj_data_sptr->get_exam_info_sptr(),
+ this->template_proj_data_sptr->get_proj_data_info_sptr(), "my_timings_copy.hs");
+ tmp.fill(*this->output_proj_data_sptr);
+ }
+
+ //! copy from output_proj_data_sptr to memory object
+ void copy_proj_data_file_to_mem()
+ {
+ ProjDataInMemory tmp(this->template_proj_data_sptr->get_exam_info_sptr(),
+ this->template_proj_data_sptr->get_proj_data_info_sptr(),
+ /* initialise*/ false);
+ tmp.fill(*this->output_proj_data_sptr);
+ }
+
+ //! copy from mem_proj_data_sptr to new Interfile file
+ void copy_proj_data_mem_to_file()
+ {
+ ProjDataInterfile tmp(this->template_proj_data_sptr->get_exam_info_sptr(),
+ this->template_proj_data_sptr->get_proj_data_info_sptr(), "my_timings_copy.hs");
+ tmp.fill(*this->mem_proj_data_sptr);
+ }
+
+ //! copy from output_proj_data_sptr to memory object
+ void copy_proj_data_mem_to_mem()
+ {
+ ProjDataInMemory tmp(this->template_proj_data_sptr->get_exam_info_sptr(),
+ this->template_proj_data_sptr->get_proj_data_info_sptr(),
+ /* initialise*/ false);
+ tmp.fill(*this->mem_proj_data_sptr);
+ }
+
+ void projector_setup()
+ {
+ this->projectors_sptr->set_up(this->template_proj_data_sptr->get_proj_data_info_sptr(), this->image_sptr);
+ }
+
+ void forward_file()
+ {
+ this->projectors_sptr->get_forward_projector_sptr()->forward_project(*this->output_proj_data_sptr, *this->image_sptr);
+ }
+ void forward_memory()
+ {
+ this->projectors_sptr->get_forward_projector_sptr()->forward_project(*this->mem_proj_data_sptr, *this->image_sptr);
+ }
+ void back_file()
+ {
+ this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->output_proj_data_sptr);
+ }
+ void back_memory()
+ {
+ this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->mem_proj_data_sptr);
+ }
+
+ void obj_func_set_up() { this->objective_function_sptr->set_up(this->image_sptr); }
+
+ void obj_func_grad_no_sens()
+ {
+ auto im = this->image_sptr->clone();
+ this->objective_function_sptr->compute_sub_gradient_without_penalty_plus_sensitivity(*im, *this->image_sptr, 0);
+ delete im;
+ }
+};
+
+void
+Timings::run_it(TimedFunction f, const std::string& item, const unsigned runs)
+{
+ this->start_timers(true);
+ for (unsigned r = runs; r != 0; --r)
+ (this->*f)();
+ this->stop_timers();
+ std::cout << name << '\t' << std::setw(32) << std::left << item << '\t' << std::fixed << std::setprecision(3) << std::setw(24)
+ << std::right << this->get_CPU_timer_value() / runs * 1000 << '\t' << std::fixed << std::setprecision(3)
+ << std::setw(24) << std::right << this->get_wall_clock_timer_value() / runs * 1000 << std::endl;
+}
+
+void
+Timings::run_all(const unsigned runs)
+{
+ this->init();
+ // this->run_it(&Timings::sleep, "sleep", runs*1);
+ this->run_it(&Timings::copy_image, "copy_image", runs * 20);
+ this->output_proj_data_sptr->fill(1.F);
+ this->run_it(&Timings::copy_proj_data_mem_to_mem, "copy_proj_data_mem_to_mem", runs * 2);
+ this->run_it(&Timings::copy_proj_data_mem_to_file, "copy_proj_data_mem_to_file", runs * 2);
+ this->run_it(&Timings::copy_proj_data_file_to_mem, "copy_proj_data_file_to_mem", runs * 2);
+ this->run_it(&Timings::copy_proj_data_file_to_file, "copy_proj_data_file_to_file", runs * 2);
+ this->objective_function_sptr.reset(new PoissonLogLikelihoodWithLinearModelForMeanAndProjData>);
+ this->objective_function_sptr->set_proj_data_sptr(this->mem_proj_data_sptr);
+ // this->objective_function.set_num_subsets(proj_data_sptr->get_num_views()/2);
+ if (!this->skip_PMRT)
+ {
+ this->projectors_sptr = this->pmrt_projectors_sptr;
+ this->run_it(&Timings::projector_setup, "PMRT_projector_setup", runs * 10);
+ this->run_it(&Timings::forward_file, "PMRT_forward_file_first", 1);
+ this->run_it(&Timings::forward_file, "PMRT_forward_file", 1);
+ this->run_it(&Timings::forward_memory, "PMRT_forward_memory", 1);
+ this->run_it(&Timings::back_file, "PMRT_back_file_first", 1);
+ this->run_it(&Timings::back_file, "PMRT_back_file", 1);
+ this->run_it(&Timings::back_memory, "PMRT_back_memory", 1);
+ this->objective_function_sptr->set_projector_pair_sptr(this->projectors_sptr);
+ this->run_it(&Timings::obj_func_set_up, "PMRT_LogLik set_up", 1);
+ this->run_it(&Timings::obj_func_grad_no_sens, "PMRT_LogLik grad_no_sens", 1);
+ }
+#ifdef STIR_WITH_Parallelproj_PROJECTOR
+ if (!skip_PP)
+ {
+ this->projectors_sptr = this->parallelproj_projectors_sptr;
+ this->run_it(&Timings::projector_setup, "PP_projector_setup", 1);
+ this->run_it(&Timings::forward_file, "PP_forward_file_first", 1);
+ this->run_it(&Timings::forward_file, "PP_forward_file", runs);
+ this->run_it(&Timings::forward_memory, "PP_forward_memory", runs);
+ this->run_it(&Timings::back_file, "PP_back_file_first", 1);
+ this->run_it(&Timings::back_file, "PP_back_file", runs);
+ this->run_it(&Timings::back_memory, "PP_back_memory", runs);
+ this->objective_function_sptr->set_projector_pair_sptr(this->projectors_sptr);
+ this->run_it(&Timings::obj_func_set_up, "PP_LogLik set_up", 1);
+ this->run_it(&Timings::obj_func_grad_no_sens, "PP_LogLik grad_no_sens", 1);
+ }
+#endif
+ // write_to_file("my_timings_backproj.hv", *this->image_sptr);
+}
+
+void
+Timings::init()
+{
+
+ if (!this->template_proj_data_sptr)
+ print_usage_and_exit();
+
+ if (!image_sptr)
+ {
+ this->exam_info_sptr = this->template_proj_data_sptr->get_exam_info().create_shared_clone();
+ this->image_sptr = std::make_shared>(
+ this->exam_info_sptr, *this->template_proj_data_sptr->get_proj_data_info_sptr());
+ this->image_sptr->fill(1.F);
+ }
+ else
+ {
+ this->image_sptr->fill(1.F);
+ this->exam_info_sptr = this->image_sptr->get_exam_info().create_shared_clone();
+
+ if (this->image_sptr->get_exam_info().imaging_modality.is_unknown()
+ && this->template_proj_data_sptr->get_exam_info().imaging_modality.is_known())
+ {
+ this->exam_info_sptr->imaging_modality = this->template_proj_data_sptr->get_exam_info().imaging_modality;
+ }
+ else if (this->image_sptr->get_exam_info().imaging_modality
+ != this->template_proj_data_sptr->get_exam_info().imaging_modality)
+ error("forward_project: Imaging modality should be the same for the image and the projection data");
+
+ if (this->template_proj_data_sptr->get_exam_info().has_energy_information())
+ {
+ if (this->image_sptr->get_exam_info().has_energy_information())
+ warning("Both image and template have energy information. Using the latter.");
+
+ this->exam_info_sptr->set_energy_information_from(this->template_proj_data_sptr->get_exam_info());
+ }
+ }
+
+ // projection data set-up
+ {
+ std::string output_filename = "my_timings.hs";
+ this->output_proj_data_sptr
+ = std::make_shared(this->exam_info_sptr, this->template_proj_data_sptr->get_proj_data_info_sptr(),
+ output_filename, std::ios::in | std::ios::out | std::ios::trunc);
+ this->mem_proj_data_sptr
+ = std::make_shared(this->exam_info_sptr, this->template_proj_data_sptr->get_proj_data_info_sptr());
+ }
+
+ // projector set-up
+ {
+ auto PM_sptr = std::make_shared();
+ PM_sptr->set_num_tangential_LORs(5);
+ this->pmrt_projectors_sptr = std::make_shared(PM_sptr);
+
+#ifdef STIR_WITH_Parallelproj_PROJECTOR
+ this->parallelproj_projectors_sptr = std::make_shared();
+#endif
+ }
+}
+
+END_NAMESPACE_STIR
+
+#ifdef STIR_MPI
+int
+stir::distributable_main(int argc, char** argv)
+#else
+int
+main(int argc, char** argv)
+#endif
+{
+ using namespace stir;
+ Verbosity::set(0);
+
+ std::string image_filename;
+ std::string template_proj_data_filename;
+ std::string prog_name = argv[0];
+ unsigned num_runs = 3;
+ int num_threads = get_default_num_threads();
+ bool skip_PMRT = false;
+ bool skip_PP = false;
+ // prefix output with this string
+ std::string name;
+
+ ++argv;
+ --argc;
+ while (argc > 1)
+ {
+ if (!strcmp(argv[0], "--name"))
+ name = argv[1];
+ else if (!strcmp(argv[0], "--image"))
+ image_filename = argv[1];
+ else if (!strcmp(argv[0], "--template-projdata"))
+ template_proj_data_filename = argv[1];
+ else if (!strcmp(argv[0], "--runs"))
+ num_runs = std::atoi(argv[1]);
+ else if (!strcmp(argv[0], "--threads"))
+ num_threads = std::atoi(argv[1]);
+ else if (!strcmp(argv[0], "--skip-PMRT"))
+ skip_PMRT = std::atoi(argv[1]) != 0;
+ else if (!strcmp(argv[0], "--skip-PP"))
+ skip_PP = std::atoi(argv[1]) != 0;
+ else
+ print_usage_and_exit();
+ argv += 2;
+ argc -= 2;
+ }
+
+ if (argc > 0)
+ print_usage_and_exit();
+
+ set_num_threads(num_threads);
+ std::cerr << "Using " << num_threads << " threads.\n";
+
+ Timings timings(image_filename, template_proj_data_filename);
+ timings.name = name;
+ timings.skip_PMRT = skip_PMRT;
+ timings.skip_PP = skip_PP;
+
+ timings.run_all(num_runs);
+ return EXIT_SUCCESS;
+}