From aee2bd1075f005a346eae63ea582b351b9d98702 Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Tue, 29 Aug 2023 09:28:05 +0100 Subject: [PATCH 1/9] add optional do_reset argument to Timer This is convenient but also means it is the same as HighResWallClockTimer --- src/include/stir/Timer.h | 8 +++++++- src/include/stir/Timer.inl | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/include/stir/Timer.h b/src/include/stir/Timer.h index 56904a1ba1..84ff82a6c4 100644 --- a/src/include/stir/Timer.h +++ b/src/include/stir/Timer.h @@ -3,6 +3,7 @@ /* Copyright (C) 2000 PARAPET partners Copyright (C) 2000- 2009, Hammersmith Imanet Ltd + Copyright (C) 2023, University College London This file is part of STIR. SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license @@ -54,8 +55,13 @@ class Timer public: inline Timer(); inline virtual ~Timer(); - inline void start(); + //! start stopwatch, optionally resetting first + /*! the stopwatch should not be running already if asking to reset */ + inline void start(bool do_reset = false); + //! stop stopwatch inline void stop(); + //! reset stopwatch + /*! the stopwatch should not be running */ inline void reset(); //! return value is undefined when start() is not called first. inline double value() const; diff --git a/src/include/stir/Timer.inl b/src/include/stir/Timer.inl index f4390234a3..b4c733d516 100644 --- a/src/include/stir/Timer.inl +++ b/src/include/stir/Timer.inl @@ -14,12 +14,15 @@ /* Copyright (C) 2000 PARAPET partners Copyright (C) 2000- 2009, Hammersmith Imanet Ltd + Copyright (C) 2023, University College London This file is part of STIR. SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license See STIR/LICENSE.txt for details */ +#include "stir/error.h" + START_NAMESPACE_STIR Timer::Timer() @@ -32,13 +35,17 @@ Timer::Timer() Timer::~Timer() {} -void Timer::start() +void Timer::start(bool do_reset) { if (!running) { + if (do_reset) + reset(); running = true; previous_value = get_current_value(); } + else if (do_reset) + error("Timer::start called requesting reset, but the timer is running"); } void Timer::stop() From 9098576a5b3d822fe6a7467305435f7c2e4d3c27 Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Tue, 29 Aug 2023 09:29:40 +0100 Subject: [PATCH 2/9] add HighResWallClockTimer to TimedObject --- src/include/stir/TimedObject.h | 15 +++++++++++---- src/include/stir/TimedObject.inl | 12 ++++++++++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/include/stir/TimedObject.h b/src/include/stir/TimedObject.h index a7588c079a..a5d6c4eb95 100644 --- a/src/include/stir/TimedObject.h +++ b/src/include/stir/TimedObject.h @@ -21,6 +21,7 @@ */ #include "stir/CPUTimer.h" +#include "stir/HighResWallClockTimer.h" START_NAMESPACE_STIR /*! @@ -48,19 +49,25 @@ class TimedObject /*! Note: function is const such that it can be called in a function of a derived class that is const. */ - inline void start_timers() const; + inline void start_timers(bool do_reset = false) const; - //! get current value of the timer (since first use or last reset) + //! get current value of the CPU timer (since first use or last reset) inline double get_CPU_timer_value() const; + //! get current value of the wall-clock timer (since first use or last reset) + inline double get_wall_clock_timer_value() const; + private: - //! A timer that measured CPU time. + //! A timer that measures CPU time. /*! Note: member is mutable such that it can be modified in a const function. */ mutable CPUTimer cpu_timer; - // TODO include other times (such as wall-clock) + //! A timer that measures wall clock time. + /*! Note: member is mutable such that it can be modified in a const function. + */ + mutable HighResWallClockTimer wall_clock_timer; }; diff --git a/src/include/stir/TimedObject.inl b/src/include/stir/TimedObject.inl index 068b5d848c..29e0f610bf 100644 --- a/src/include/stir/TimedObject.inl +++ b/src/include/stir/TimedObject.inl @@ -25,17 +25,20 @@ START_NAMESPACE_STIR void TimedObject::reset_timers() { cpu_timer.reset(); + wall_clock_timer.reset(); } -void TimedObject::start_timers() const +void TimedObject::start_timers(bool do_reset) const { - cpu_timer.start(); + cpu_timer.start(do_reset); + wall_clock_timer.start(do_reset); } void TimedObject::stop_timers() const { cpu_timer.stop(); + wall_clock_timer.stop(); } @@ -44,4 +47,9 @@ double TimedObject::get_CPU_timer_value() const return cpu_timer.value(); } +double TimedObject::get_wall_clock_timer_value() const +{ + return wall_clock_timer.value(); +} + END_NAMESPACE_STIR From 0afdea506346fdc457801ab7538203ac53d759f2 Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Tue, 29 Aug 2023 09:45:43 +0100 Subject: [PATCH 3/9] add basic utility to perform timings --- src/utilities/CMakeLists.txt | 1 + src/utilities/stir_timings.cxx | 278 +++++++++++++++++++++++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 src/utilities/stir_timings.cxx diff --git a/src/utilities/CMakeLists.txt b/src/utilities/CMakeLists.txt index eb0c5c1820..3ab181099e 100644 --- a/src/utilities/CMakeLists.txt +++ b/src/utilities/CMakeLists.txt @@ -73,6 +73,7 @@ set(${dir_EXE_SOURCES} write_sinogram_to_txt.cxx find_sum_projection_of_viewgram_and_sinogram.cxx separate_true_from_random_scatter_for_necr.cxx + stir_timings.cxx ) if (HAVE_ITK) diff --git a/src/utilities/stir_timings.cxx b/src/utilities/stir_timings.cxx new file mode 100644 index 0000000000..af42f0ca28 --- /dev/null +++ b/src/utilities/stir_timings.cxx @@ -0,0 +1,278 @@ +/* + Copyright (C) 2023 University College London + This file is part of STIR. + + SPDX-License-Identifier: Apache-2.0 + + See STIR/LICENSE.txt for details + +*/ + +/*! + \file + \ingroup utilities + \author Kris Thielemans + + \brief Perform timings +*/ + +#include "stir/ProjDataInterfile.h" +#include "stir/ProjDataInMemory.h" +#include "stir/DiscretisedDensity.h" +#include "stir/VoxelsOnCartesianGrid.h" +#include "stir/IO/read_from_file.h" +#include "stir/IO/write_to_file.h" +#include "stir/recon_buildblock/ProjectorByBinPairUsingProjMatrixByBin.h" +#ifdef STIR_WITH_Parallelproj_PROJECTOR +# include "stir/recon_buildblock/Parallelproj_projector/ProjectorByBinPairUsingParallelproj.h" +#endif +#include "stir/recon_buildblock/ProjMatrixByBinUsingRayTracing.h" +#include "stir/warning.h" +#include "stir/error.h" +#include "stir/Verbosity.h" +#include +#include +#include +#include +#include + +static void +print_usage_and_exit() +{ + std::cerr << "\nUsage:\nstir_timings [--runs num_runs]\\\n" + << "\t[--skip-PP 1] [--skip-PMRT 1]\\\n" + << "\t[--image image_filename]\\\n" + << "\t--template-projdata template_proj_data_filename\n"; +#if 0 + std::cerr<<"The default projector uses the ray-tracing matrix.\n\n"; + std::cerr<<"Example parameter file:\n\n" + <<"Forward Projector parameters:=\n" + <<" type := Matrix\n" + <<" Forward projector Using Matrix Parameters :=\n" + <<" Matrix type := Ray Tracing\n" + <<" Ray tracing matrix parameters :=\n" + <<" End Ray tracing matrix parameters :=\n" + <<" End Forward Projector Using Matrix Parameters :=\n" + <<"End:=\n"; +#endif + exit(EXIT_FAILURE); +} + +START_NAMESPACE_STIR + +class Timings : public TimedObject +{ + typedef void (Timings::*TimedFunction)(); + +public: + bool skip_PMRT; + bool skip_PP; + + Timings(const std::string& image_filename, const std::string& template_proj_data_filename) + { + if (!image_filename.empty()) + this->image_sptr = read_from_file>(image_filename); + + if (!template_proj_data_filename.empty()) + this->template_proj_data_sptr = ProjData::read_from_file(template_proj_data_filename); + } + + void sleep() { std::this_thread::sleep_for(std::chrono::milliseconds(1123)); } + + void copy_image() + { + auto im = this->image_sptr->clone(); + delete im; + } + + void projector_setup() + { + this->projectors_sptr->set_up(this->template_proj_data_sptr->get_proj_data_info_sptr(), this->image_sptr); + } + + void forward_file() + { + this->projectors_sptr->get_forward_projector_sptr()->forward_project(*this->output_projdata_sptr, *this->image_sptr); + } + void forward_memory() + { + this->projectors_sptr->get_forward_projector_sptr()->forward_project(*this->mem_projdata_sptr, *this->image_sptr); + } + void back_file() + { + this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->output_projdata_sptr); + } + void back_memory() + { + this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->mem_projdata_sptr); + } + + void run_it(TimedFunction f, const std::string& item, const unsigned runs = 1) + { + this->start_timers(true); + for (unsigned r = runs; r != 0; --r) + (this->*f)(); + this->stop_timers(); + std::cout << std::setw(32) << std::left << item << '\t' << std::fixed << std::setprecision(3) << std::setw(24) << std::right + << this->get_CPU_timer_value() / runs * 1000 << '\t' << std::fixed << std::setprecision(3) << std::setw(24) + << std::right << this->get_wall_clock_timer_value() / runs * 1000 << std::endl; + } + + void run_all(const unsigned runs = 1) + { + init(); + // this->run_it(&Timings::sleep, "sleep", runs*1); + this->run_it(&Timings::copy_image, "copy_image", runs * 20); + this->projectors_sptr = this->pmrt_projectors_sptr; + if (!this->skip_PMRT) + { + this->run_it(&Timings::projector_setup, "PMRT_projector_setup", runs * 10); + this->run_it(&Timings::forward_file, "PMRT_forward_file_first", 1); + this->run_it(&Timings::forward_file, "PMRT_forward_file", 1); + this->run_it(&Timings::forward_memory, "PMRT_forward_memory", 1); + this->run_it(&Timings::back_file, "PMRT_back_file_first", 1); + this->run_it(&Timings::back_file, "PMRT_back_file", 1); + this->run_it(&Timings::back_memory, "PMRT_back_memory", 1); + } +#ifdef STIR_WITH_Parallelproj_PROJECTOR + if (!skip_PP) + { + this->projectors_sptr = this->parallelproj_projectors_sptr; + this->run_it(&Timings::projector_setup, "PP_projector_setup", 1); + this->run_it(&Timings::forward_file, "PP_forward_file_first", 1); + this->run_it(&Timings::forward_file, "PP_forward_file", runs); + this->run_it(&Timings::forward_memory, "PP_forward_memory", runs); + this->run_it(&Timings::back_file, "PP_back_file_first", 1); + this->run_it(&Timings::back_file, "PP_back_file", runs); + this->run_it(&Timings::back_memory, "PP_back_memory", runs); + } +#endif + write_to_file("my_timings_backproj.hv", *this->image_sptr); + } + + void init() + { + + if (!this->template_proj_data_sptr) + print_usage_and_exit(); + + if (!image_sptr) + { + this->exam_info_sptr = this->template_proj_data_sptr->get_exam_info().create_shared_clone(); + this->image_sptr = std::make_shared>( + this->exam_info_sptr, *this->template_proj_data_sptr->get_proj_data_info_sptr()); + this->image_sptr->fill(1.F); + } + else + { + this->image_sptr->fill(1.F); + this->exam_info_sptr = this->image_sptr->get_exam_info().create_shared_clone(); + + if (this->image_sptr->get_exam_info().imaging_modality.is_unknown() + && this->template_proj_data_sptr->get_exam_info().imaging_modality.is_known()) + { + this->exam_info_sptr->imaging_modality = this->template_proj_data_sptr->get_exam_info().imaging_modality; + } + else if (this->image_sptr->get_exam_info().imaging_modality + != this->template_proj_data_sptr->get_exam_info().imaging_modality) + error("forward_project: Imaging modality should be the same for the image and the projection data"); + + if (this->template_proj_data_sptr->get_exam_info().has_energy_information()) + { + if (this->image_sptr->get_exam_info().has_energy_information()) + warning("Both image and template have energy information. Using the latter."); + + this->exam_info_sptr->set_energy_information_from(this->template_proj_data_sptr->get_exam_info()); + } + } + std::string output_filename = "my_timings.hs"; + this->output_projdata_sptr + = std::make_shared(this->exam_info_sptr, this->template_proj_data_sptr->get_proj_data_info_sptr(), + output_filename, std::ios::in | std::ios::out | std::ios::trunc); + this->mem_projdata_sptr + = std::make_shared(this->exam_info_sptr, this->template_proj_data_sptr->get_proj_data_info_sptr()); + +#ifdef STIR_WITH_Parallelproj_PROJECTOR + this->parallelproj_projectors_sptr = std::make_shared(); +#endif + } + + // protected: + shared_ptr> image_sptr; + shared_ptr output_projdata_sptr; + shared_ptr mem_projdata_sptr; + shared_ptr projectors_sptr; + shared_ptr pmrt_projectors_sptr; +#ifdef STIR_WITH_Parallelproj_PROJECTOR + shared_ptr parallelproj_projectors_sptr; +#endif + shared_ptr template_proj_data_sptr; + shared_ptr exam_info_sptr; +}; + +END_NAMESPACE_STIR + +int +main(int argc, char* argv[]) +{ + using namespace stir; + Verbosity::set(0); + + std::string image_filename; + std::string template_proj_data_filename; + std::string prog_name = argv[0]; + unsigned num_runs = 3; + bool skip_PMRT = false; + bool skip_PP = false; + + ++argv; + --argc; + while (argc > 1) + { + if (!strcmp(argv[0], "--image")) + image_filename = argv[1]; + else if (!strcmp(argv[0], "--template-projdata")) + template_proj_data_filename = argv[1]; + else if (!strcmp(argv[0], "--runs")) + num_runs = std::atoi(argv[1]); + else if (!strcmp(argv[0], "--skip-PMRT")) + skip_PMRT = std::atoi(argv[1]) != 0; + else if (!strcmp(argv[0], "--skip-PP")) + skip_PP = std::atoi(argv[1]) != 0; + else + print_usage_and_exit(); + argv += 2; + argc -= 2; + } + + if (argc > 0) + print_usage_and_exit(); + + Timings timings(image_filename, template_proj_data_filename); + timings.skip_PMRT = skip_PMRT; + timings.skip_PP = skip_PP; + + if (0) + { +#if 0 + KeyParser parser; + parser.add_start_key("Forward Projector parameters"); + parser.add_parsing_key("type", &forw_projector_sptr); + parser.add_stop_key("END"); + parser.parse(argv[3]); + if (!timings.projectors_sptr) + { + std::cerr << "Failure parsing\n"; + return EXIT_FAILURE; + } +#endif + } + else + { + shared_ptr PM(new ProjMatrixByBinUsingRayTracing()); + timings.pmrt_projectors_sptr = std::make_shared(PM); + } + + timings.run_all(num_runs); + return EXIT_SUCCESS; +} From e0139cd4c13099b105ebdb838827db984f95e66a Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Tue, 29 Aug 2023 13:38:17 +0100 Subject: [PATCH 4/9] remove assert on timer already running --- src/include/stir/HighResWallClockTimer.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/include/stir/HighResWallClockTimer.h b/src/include/stir/HighResWallClockTimer.h index 8b1fcdee30..d12859aad7 100644 --- a/src/include/stir/HighResWallClockTimer.h +++ b/src/include/stir/HighResWallClockTimer.h @@ -153,11 +153,10 @@ namespace stir { /*! \param bReset indicates whether the elapsed time should be reset before the timer is started. - The timer must not be running. + The timer must not be running if asking to reset. */ inline void HighResWallClockTimer::start(bool bReset /* = false */) { - assert(!m_bRunning); if (bReset) reset(); m_bRunning = true; #if defined(_AIX) From 9c5dabcc491c9ac5c6ef3d95ccef66652ecef4fd Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Tue, 29 Aug 2023 13:39:35 +0100 Subject: [PATCH 5/9] remove start/stop timers in functions that are called multi-threaded TimedObject is not thread-safe, and timing results were incorrect. Currently just remove the calls. work-around https://github.com/UCL/STIR/issues/1238 --- src/recon_buildblock/BackProjectorByBin.cxx | 6 ------ .../BackProjectorByBinUsingInterpolation.cxx | 12 ------------ src/recon_buildblock/ForwardProjectorByBin.cxx | 3 --- .../ForwardProjectorByBinUsingRayTracing.cxx | 11 ----------- 4 files changed, 32 deletions(-) diff --git a/src/recon_buildblock/BackProjectorByBin.cxx b/src/recon_buildblock/BackProjectorByBin.cxx index 61a9616bf5..d993e7afac 100644 --- a/src/recon_buildblock/BackProjectorByBin.cxx +++ b/src/recon_buildblock/BackProjectorByBin.cxx @@ -152,8 +152,6 @@ back_project(DiscretisedDensity<3,float>& density, check(*viewgrams.get_proj_data_info_sptr(), density); - start_timers(); - // first check symmetries { const ViewSegmentNumbers basic_vs = viewgrams.get_basic_view_segment_num(); @@ -178,7 +176,6 @@ back_project(DiscretisedDensity<3,float>& density, max_axial_pos_num, min_tangential_pos_num, max_tangential_pos_num); - stop_timers(); } #endif // -------------------------------------------------------------------------------------------------------------------- // @@ -270,8 +267,6 @@ back_project(const RelatedViewgrams& viewgrams, check(*viewgrams.get_proj_data_info_sptr(), *_density_sptr); - start_timers(); - #ifdef STIR_OPENMP const int thread_num=omp_get_thread_num(); if(is_null_ptr(_local_output_image_sptrs[thread_num])) @@ -303,7 +298,6 @@ back_project(const RelatedViewgrams& viewgrams, max_axial_pos_num, min_tangential_pos_num, max_tangential_pos_num); - stop_timers(); } void diff --git a/src/recon_buildblock/BackProjectorByBinUsingInterpolation.cxx b/src/recon_buildblock/BackProjectorByBinUsingInterpolation.cxx index a97a5a4477..c5a9f91032 100644 --- a/src/recon_buildblock/BackProjectorByBinUsingInterpolation.cxx +++ b/src/recon_buildblock/BackProjectorByBinUsingInterpolation.cxx @@ -454,8 +454,6 @@ BackProjectorByBinUsingInterpolation:: back_project_2D_all_symmetries(const Sinogram &sino, PETPlane &image, int view, const int min_tang_pos, const int max_tang_pos) { - start_timers(); - assert(sino.get_min_bin() == - sino.get_max_bin()); assert(min_tang_pos == -max_tang_pos); assert(image.get_min_x() == - image.get_max_x()); @@ -508,7 +506,6 @@ BackProjectorByBinUsingInterpolation:: backproj2D_Cho_view_viewplus90_180minview_90minview(image, projs, cphi, sphi, s); } - stop_timers(); } void @@ -516,8 +513,6 @@ BackProjectorByBinUsingInterpolation:: back_project_2D_view_plus_90(const Sinogram &sino, PETPlane &image, int view, const int min_tang_pos, const int max_tang_pos) { - start_timers(); - assert(sino.get_min_bin() == - sino.get_max_bin()); assert(min_tang_pos == -max_tang_pos); assert(image.get_min_x() == - image.get_max_x()); @@ -560,7 +555,6 @@ BackProjectorByBinUsingInterpolation:: backproj2D_Cho_view_viewplus90(image, projs, cphi, sphi, s); } - stop_timers(); } #endif @@ -664,8 +658,6 @@ can only handle arc-corrected data (cast to ProjDataInfoCylindricalArcCorr)!\n") : 0 ); - start_timers(); - const JacobianForIntBP jacobian(proj_data_info_cyl_sptr, use_exact_Jacobian_now); Array<4, float > Proj2424(IndexRange4D(0, 1, 0, 3, 0, 1, 1, 4)); @@ -803,7 +795,6 @@ can only handle arc-corrected data (cast to ProjDataInfoCylindricalArcCorr)!\n") axial_pos_to_z_offset); } } - stop_timers(); } /* @@ -867,8 +858,6 @@ can only handle arc-corrected data (cast to ProjDataInfoCylindricalArcCorr)!\n") // KTXXX not necessary anymore //assert(image.get_min_z() == 0); - start_timers(); - const JacobianForIntBP jacobian(proj_data_info_cyl_sptr, use_exact_Jacobian_now); Array<4, float > Proj2424(IndexRange4D(0, 1, 0, 3, 0, 1, 1, 4)); @@ -1001,7 +990,6 @@ can only handle arc-corrected data (cast to ProjDataInfoCylindricalArcCorr)!\n") axial_pos_to_z_offset); } } - stop_timers(); } diff --git a/src/recon_buildblock/ForwardProjectorByBin.cxx b/src/recon_buildblock/ForwardProjectorByBin.cxx index 9758d925de..db1eae7401 100644 --- a/src/recon_buildblock/ForwardProjectorByBin.cxx +++ b/src/recon_buildblock/ForwardProjectorByBin.cxx @@ -135,7 +135,6 @@ forward_project(RelatedViewgrams& viewgrams, if (viewgrams.get_num_viewgrams()==0) return; check(*viewgrams.get_proj_data_info_sptr(), density); - start_timers(); // first check symmetries { @@ -256,7 +255,6 @@ forward_project(RelatedViewgrams& viewgrams, error("You need to call set_input() forward_project()"); check(*viewgrams.get_proj_data_info_sptr(), *_density_sptr); - start_timers(); // first check symmetries { @@ -281,7 +279,6 @@ forward_project(RelatedViewgrams& viewgrams, max_axial_pos_num, min_tangential_pos_num, max_tangential_pos_num); - stop_timers(); } void diff --git a/src/recon_buildblock/ForwardProjectorByBinUsingRayTracing.cxx b/src/recon_buildblock/ForwardProjectorByBinUsingRayTracing.cxx index 9bcf8834b3..53f21dc397 100644 --- a/src/recon_buildblock/ForwardProjectorByBinUsingRayTracing.cxx +++ b/src/recon_buildblock/ForwardProjectorByBinUsingRayTracing.cxx @@ -483,9 +483,6 @@ forward_project_all_symmetries( const int min_tang_pos_num_in_loop = min_abs_tangential_pos_num==0 ? 1 : min_abs_tangential_pos_num; - start_timers(); - - Array <4,float> Projall(IndexRange4D(min_ax_pos_num, max_ax_pos_num, 0, 1, 0, 1, 0, 3)); // KT 21/05/98 removed as now automatically zero // Projall.fill(0); @@ -653,8 +650,6 @@ forward_project_all_symmetries( }// end of } else { }// end of test for offset loop - stop_timers(); - } @@ -905,8 +900,6 @@ void ForwardProjectorByBinUsingRayTracing::forward_project_2D(Sinogram &s const int projrad = (int) (sino.get_num_tangential_poss() / 2) - 1; - start_timers(); - //TODO for the moment, just handle 1 plane and use some 3D variables const int min_ax_pos = 0; const int max_ax_pos = 0; @@ -1023,8 +1016,6 @@ void ForwardProjectorByBinUsingRayTracing::forward_project_2D(Sinogram &s }// end of } else { }// end of test for offset loop - stop_timers(); - } #endif // old 2D versions @@ -1114,7 +1105,6 @@ forward_project_all_symmetries_2D(Viewgram & pos_view, const int min_axial_pos_num, const int max_axial_pos_num, const int min_tangential_pos_num, const int max_tangential_pos_num) const { - start_timers(); // KT 20/06/2001 should now work for non-arccorrected data as well const shared_ptr proj_data_info_sptr = @@ -1444,7 +1434,6 @@ forward_project_all_symmetries_2D(Viewgram & pos_view, }// end loop over D }// end of else - stop_timers(); } From 8d69631b47d8bf738feb028c09bd72c215ae35bd Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Tue, 29 Aug 2023 14:16:31 +0100 Subject: [PATCH 6/9] multi-thread ParallelprojHelper The loop to construct xstart/end etc is now multi-threaded (although a little bit uglier!). Testing shows a speed-up of about 2-3. Using too many threads is counterproductive, so I limited to 8 (not necessarily optimal!). --- .../ParallelprojHelper.cxx | 95 ++++++++++++++----- 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/src/recon_buildblock/Parallelproj_projector/ParallelprojHelper.cxx b/src/recon_buildblock/Parallelproj_projector/ParallelprojHelper.cxx index 133a4a33b3..adb030122c 100644 --- a/src/recon_buildblock/Parallelproj_projector/ParallelprojHelper.cxx +++ b/src/recon_buildblock/Parallelproj_projector/ParallelprojHelper.cxx @@ -10,7 +10,7 @@ */ /* - Copyright (C) 2021 University College London + Copyright (C) 2021, 2023 University College London This file is part of STIR. SPDX-License-Identifier: Apache-2.0 @@ -28,7 +28,7 @@ #include "stir/info.h" #include "stir/stream.h" #include - +#include "stir/num_threads.h" START_NAMESPACE_STIR @@ -70,45 +70,96 @@ detail::ParallelprojHelper::ParallelprojHelper(const ProjDataInfo& p_info, const copy_to_array(coord_first_voxel*rescale, origin); // loop over all LORs in the projdata - Bin bin; - LORInAxialAndNoArcCorrSinogramCoordinates lor; - LORAs2Points lor_points; const float radius = p_info.get_scanner_sptr()->get_max_FOV_radius(); // warning: next loop needs to be the same as how ProjDataInMemory stores its data. There is no guarantee that this will remain the case in the future. const auto segment_sequence = ProjData::standard_segment_sequence(p_info); std::size_t index(0); + +#ifdef STIR_OPENMP + // Using too many threads is counterproductive according to my timings, so I limited to 8 (not necessarily optimal!). + const auto num_threads_to_use = std::min(8,get_max_num_threads()); +#endif for (int seg : segment_sequence) { - bin.segment_num() = seg; - for (bin.axial_pos_num() = p_info.get_min_axial_pos_num(bin.segment_num()); bin.axial_pos_num() <= p_info.get_max_axial_pos_num(bin.segment_num()); ++bin.axial_pos_num()) + for (int axial_pos_num = p_info.get_min_axial_pos_num(seg); axial_pos_num <= p_info.get_max_axial_pos_num(seg); ++axial_pos_num) { - for (bin.view_num() = p_info.get_min_view_num(); bin.view_num() <= p_info.get_max_view_num(); ++bin.view_num()) + for (int view_num = p_info.get_min_view_num(); view_num <= p_info.get_max_view_num(); ++view_num) { - for (bin.tangential_pos_num() = p_info.get_min_tangential_pos_num(); bin.tangential_pos_num() <= p_info.get_max_tangential_pos_num(); ++bin.tangential_pos_num()) +#ifdef STIR_OPENMP + #pragma omp parallel for num_threads(num_threads_to_use) +#endif + for (int tangential_pos_num = p_info.get_min_tangential_pos_num(); tangential_pos_num <= p_info.get_max_tangential_pos_num(); ++tangential_pos_num) { + Bin bin; + bin.segment_num() = seg; + bin.axial_pos_num() = axial_pos_num; + bin.view_num() = view_num; + bin.tangential_pos_num() = tangential_pos_num; + // compute index for this bin (independent of multi-threading) + std::size_t this_index = index + (bin.tangential_pos_num() - p_info.get_min_tangential_pos_num())*3; + LORInAxialAndNoArcCorrSinogramCoordinates lor; + LORAs2Points lor_points; + p_info.get_LOR(lor, bin); if (lor.get_intersections_with_cylinder(lor_points, radius) == Succeeded::no) - { // memory is already allocated, so just passing in points that will produce nothing - xstart[index] = 0; - xend[index++] = 0; - xstart[index] = 0; - xend[index++] = 0; - xstart[index] = 0; - xend[index++] = 0; + { // memory is already allocated, so just passing in points that will produce nothing +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xstart[this_index] = 0; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xend[this_index] = 0; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xstart[this_index+1] = 0; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xend[this_index+1] = 0; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xstart[this_index+2] = 0; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xend[this_index+2] = 0; } else { const CartesianCoordinate3D p1 = lor_points.p1()*rescale; const CartesianCoordinate3D p2 = lor_points.p2()*rescale; - xstart[index] = p1[1]; - xend[index++] = p2[1]; - xstart[index] = p1[2]; - xend[index++] = p2[2]; - xstart[index] = p1[3]; - xend[index++] = p2[3]; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xstart[this_index] = p1[1]; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xend[this_index] = p2[1]; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xstart[this_index+1] = p1[2]; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xend[this_index+1] = p2[2]; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xstart[this_index+2] = p1[3]; +#ifdef STIR_OPENMP +#pragma omp atomic write +#endif + xend[this_index+2] = p2[3]; } } + index += p_info.get_num_tangential_poss()*3; } } } From 7e6c9b465e2770e295c34da3140f077f54b0df63 Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Tue, 29 Aug 2023 14:42:45 +0100 Subject: [PATCH 7/9] avoid some nested calls of start/stop_timers Timers were stopped too early due to nested calls. This is now checked by asserts (by adding HighResWallClockTimer), allowing me to catch these problems. --- src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx | 2 -- src/include/stir/DataProcessor.inl | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx b/src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx index cb165f6722..2164c4cb64 100644 --- a/src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx +++ b/src/analytic/FBP3DRP/FBP3DRPReconstruction.cxx @@ -396,7 +396,6 @@ actual_reconstruct(shared_ptr > const& target_image_ alpha_fit = 1.0F; beta_fit = 0.0F; - start_timers(); { //char file[max_filename_length]; //sprintf(file,"%s.full_log",output_filename_prefix.c_str()); @@ -564,7 +563,6 @@ actual_reconstruct(shared_ptr > const& target_image_ if(display_level>0) display(image, image.find_max(), "Final image"); - stop_timers(); do_log_file(image); full_log.close(); diff --git a/src/include/stir/DataProcessor.inl b/src/include/stir/DataProcessor.inl index 9c523a9a93..b0f03617bd 100644 --- a/src/include/stir/DataProcessor.inl +++ b/src/include/stir/DataProcessor.inl @@ -74,7 +74,6 @@ Succeeded DataProcessor:: apply(DataT& data) { - start_timers(); //assert(consistency_check(data) == Succeeded::yes); if (!is_set_up_already ) if (set_up(data) == Succeeded::no) @@ -82,6 +81,7 @@ apply(DataT& data) warning("DataProcessor::apply: Building was unsuccesfull. No processing done.\n"); return Succeeded::no; } + start_timers(); virtual_apply(data); stop_timers(); return Succeeded::yes; @@ -94,7 +94,6 @@ DataProcessor:: apply(DataT& data, const DataT& in_data) { - start_timers(); //assert(consistency_check(in_data) == Succeeded::yes); if (!is_set_up_already ) if (set_up(in_data) == Succeeded::no) @@ -102,6 +101,7 @@ apply(DataT& data, warning("DataProcessor::apply: Building was unsuccesfull. No processing done.\n"); return Succeeded::no; } + start_timers(); virtual_apply(data, in_data); stop_timers(); return Succeeded::yes; From 1cd960984a9489d737da95c519a0e268497ced54 Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Sat, 9 Sep 2023 11:49:43 +0100 Subject: [PATCH 8/9] added timings for ProjData and Poisson log-likelihoood --- src/utilities/stir_timings.cxx | 146 ++++++++++++++++++++++----------- 1 file changed, 98 insertions(+), 48 deletions(-) diff --git a/src/utilities/stir_timings.cxx b/src/utilities/stir_timings.cxx index af42f0ca28..bb0ac9bc2d 100644 --- a/src/utilities/stir_timings.cxx +++ b/src/utilities/stir_timings.cxx @@ -27,6 +27,9 @@ # include "stir/recon_buildblock/Parallelproj_projector/ProjectorByBinPairUsingParallelproj.h" #endif #include "stir/recon_buildblock/ProjMatrixByBinUsingRayTracing.h" +#include "stir/recon_buildblock/PoissonLogLikelihoodWithLinearModelForMeanAndProjData.h" +//#include "stir/OSMAPOSL/OSMAPOSLReconstruction.h" +#include "stir/recon_buildblock/distributable_main.h" #include "stir/warning.h" #include "stir/error.h" #include "stir/Verbosity.h" @@ -42,19 +45,9 @@ print_usage_and_exit() std::cerr << "\nUsage:\nstir_timings [--runs num_runs]\\\n" << "\t[--skip-PP 1] [--skip-PMRT 1]\\\n" << "\t[--image image_filename]\\\n" - << "\t--template-projdata template_proj_data_filename\n"; -#if 0 - std::cerr<<"The default projector uses the ray-tracing matrix.\n\n"; - std::cerr<<"Example parameter file:\n\n" - <<"Forward Projector parameters:=\n" - <<" type := Matrix\n" - <<" Forward projector Using Matrix Parameters :=\n" - <<" Matrix type := Ray Tracing\n" - <<" Ray tracing matrix parameters :=\n" - <<" End Ray tracing matrix parameters :=\n" - <<" End Forward Projector Using Matrix Parameters :=\n" - <<"End:=\n"; -#endif + << "\t--template-projdata template_proj_data_filename\n\n" + << "Timings are reported as:\n" + << "name\tCPU_time_in_ms\twall-clock_time_in_ms\n"; exit(EXIT_FAILURE); } @@ -85,6 +78,42 @@ class Timings : public TimedObject delete im; } + //! copy from output_proj_data_sptr to new Interfile file + void copy_proj_data_file_to_file() + { + ProjDataInterfile tmp(this->template_proj_data_sptr->get_exam_info_sptr(), + this->template_proj_data_sptr->get_proj_data_info_sptr(), + "my_timings_copy.hs"); + tmp.fill(*this->output_proj_data_sptr); + } + + //! copy from output_proj_data_sptr to memory object + void copy_proj_data_file_to_mem() + { + ProjDataInMemory tmp(this->template_proj_data_sptr->get_exam_info_sptr(), + this->template_proj_data_sptr->get_proj_data_info_sptr(), + /* initialise*/ false); + tmp.fill(*this->output_proj_data_sptr); + } + + //! copy from mem_proj_data_sptr to new Interfile file + void copy_proj_data_mem_to_file() + { + ProjDataInterfile tmp(this->template_proj_data_sptr->get_exam_info_sptr(), + this->template_proj_data_sptr->get_proj_data_info_sptr(), + "my_timings_copy.hs"); + tmp.fill(*this->mem_proj_data_sptr); + } + + //! copy from output_proj_data_sptr to memory object + void copy_proj_data_mem_to_mem() + { + ProjDataInMemory tmp(this->template_proj_data_sptr->get_exam_info_sptr(), + this->template_proj_data_sptr->get_proj_data_info_sptr(), + /* initialise*/ false); + tmp.fill(*this->mem_proj_data_sptr); + } + void projector_setup() { this->projectors_sptr->set_up(this->template_proj_data_sptr->get_proj_data_info_sptr(), this->image_sptr); @@ -92,19 +121,31 @@ class Timings : public TimedObject void forward_file() { - this->projectors_sptr->get_forward_projector_sptr()->forward_project(*this->output_projdata_sptr, *this->image_sptr); + this->projectors_sptr->get_forward_projector_sptr()->forward_project(*this->output_proj_data_sptr, *this->image_sptr); } void forward_memory() { - this->projectors_sptr->get_forward_projector_sptr()->forward_project(*this->mem_projdata_sptr, *this->image_sptr); + this->projectors_sptr->get_forward_projector_sptr()->forward_project(*this->mem_proj_data_sptr, *this->image_sptr); } void back_file() { - this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->output_projdata_sptr); + this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->output_proj_data_sptr); } void back_memory() { - this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->mem_projdata_sptr); + this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->mem_proj_data_sptr); + } + + void obj_func_set_up() + { + this->objective_function_sptr->set_up(this->image_sptr); + } + + void obj_func_grad_no_sens() + { + auto im = this->image_sptr->clone(); + this->objective_function_sptr->compute_sub_gradient_without_penalty_plus_sensitivity(*im, *this->image_sptr, 0); + delete im; } void run_it(TimedFunction f, const std::string& item, const unsigned runs = 1) @@ -123,9 +164,17 @@ class Timings : public TimedObject init(); // this->run_it(&Timings::sleep, "sleep", runs*1); this->run_it(&Timings::copy_image, "copy_image", runs * 20); - this->projectors_sptr = this->pmrt_projectors_sptr; + this->output_proj_data_sptr->fill(1.F); + this->run_it(&Timings::copy_proj_data_mem_to_mem, "copy_proj_data_mem_to_mem", runs * 2); + this->run_it(&Timings::copy_proj_data_mem_to_file, "copy_proj_data_mem_to_file", runs * 2); + this->run_it(&Timings::copy_proj_data_file_to_mem, "copy_proj_data_file_to_mem", runs * 2); + this->run_it(&Timings::copy_proj_data_file_to_file, "copy_proj_data_file_to_file", runs * 2); + this->objective_function_sptr.reset(new PoissonLogLikelihoodWithLinearModelForMeanAndProjData>); + this->objective_function_sptr->set_proj_data_sptr(this->mem_proj_data_sptr); + // this->objective_function.set_num_subsets(proj_data_sptr->get_num_views()/2); if (!this->skip_PMRT) { + this->projectors_sptr = this->pmrt_projectors_sptr; this->run_it(&Timings::projector_setup, "PMRT_projector_setup", runs * 10); this->run_it(&Timings::forward_file, "PMRT_forward_file_first", 1); this->run_it(&Timings::forward_file, "PMRT_forward_file", 1); @@ -133,6 +182,10 @@ class Timings : public TimedObject this->run_it(&Timings::back_file, "PMRT_back_file_first", 1); this->run_it(&Timings::back_file, "PMRT_back_file", 1); this->run_it(&Timings::back_memory, "PMRT_back_memory", 1); + this->objective_function_sptr->set_projector_pair_sptr(this->projectors_sptr); + this->run_it(&Timings::obj_func_set_up, "PMRT LogLik set_up", 1); + this->run_it(&Timings::obj_func_grad_no_sens, "PMRT LogLik grad_no_sens", 1); + } #ifdef STIR_WITH_Parallelproj_PROJECTOR if (!skip_PP) @@ -145,9 +198,12 @@ class Timings : public TimedObject this->run_it(&Timings::back_file, "PP_back_file_first", 1); this->run_it(&Timings::back_file, "PP_back_file", runs); this->run_it(&Timings::back_memory, "PP_back_memory", runs); + this->objective_function_sptr->set_projector_pair_sptr(this->projectors_sptr); + this->run_it(&Timings::obj_func_set_up, "PP LogLik set_up", 1); + this->run_it(&Timings::obj_func_grad_no_sens, "PP LogLik grad_no_sens", 1); } #endif - write_to_file("my_timings_backproj.hv", *this->image_sptr); + // write_to_file("my_timings_backproj.hv", *this->image_sptr); } void init() @@ -185,22 +241,33 @@ class Timings : public TimedObject this->exam_info_sptr->set_energy_information_from(this->template_proj_data_sptr->get_exam_info()); } } - std::string output_filename = "my_timings.hs"; - this->output_projdata_sptr + + // projection data set-up + { + std::string output_filename = "my_timings.hs"; + this->output_proj_data_sptr = std::make_shared(this->exam_info_sptr, this->template_proj_data_sptr->get_proj_data_info_sptr(), output_filename, std::ios::in | std::ios::out | std::ios::trunc); - this->mem_projdata_sptr + this->mem_proj_data_sptr = std::make_shared(this->exam_info_sptr, this->template_proj_data_sptr->get_proj_data_info_sptr()); + } + + // projector set-up + { + auto PM_sptr = std::make_shared(); + PM_sptr->set_num_tangential_LORs(5); + this->pmrt_projectors_sptr = std::make_shared(PM_sptr); #ifdef STIR_WITH_Parallelproj_PROJECTOR - this->parallelproj_projectors_sptr = std::make_shared(); + this->parallelproj_projectors_sptr = std::make_shared(); #endif + } } // protected: shared_ptr> image_sptr; - shared_ptr output_projdata_sptr; - shared_ptr mem_projdata_sptr; + shared_ptr output_proj_data_sptr; + shared_ptr mem_proj_data_sptr; shared_ptr projectors_sptr; shared_ptr pmrt_projectors_sptr; #ifdef STIR_WITH_Parallelproj_PROJECTOR @@ -208,12 +275,16 @@ class Timings : public TimedObject #endif shared_ptr template_proj_data_sptr; shared_ptr exam_info_sptr; + shared_ptr>> objective_function_sptr; }; END_NAMESPACE_STIR -int -main(int argc, char* argv[]) +#ifdef STIR_MPI +int stir::distributable_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif { using namespace stir; Verbosity::set(0); @@ -252,27 +323,6 @@ main(int argc, char* argv[]) timings.skip_PMRT = skip_PMRT; timings.skip_PP = skip_PP; - if (0) - { -#if 0 - KeyParser parser; - parser.add_start_key("Forward Projector parameters"); - parser.add_parsing_key("type", &forw_projector_sptr); - parser.add_stop_key("END"); - parser.parse(argv[3]); - if (!timings.projectors_sptr) - { - std::cerr << "Failure parsing\n"; - return EXIT_FAILURE; - } -#endif - } - else - { - shared_ptr PM(new ProjMatrixByBinUsingRayTracing()); - timings.pmrt_projectors_sptr = std::make_shared(PM); - } - timings.run_all(num_runs); return EXIT_SUCCESS; } From 7cbd82dc2a149b4b5146d0b681aa61f63dc24a98 Mon Sep 17 00:00:00 2001 From: Kris Thielemans Date: Sat, 9 Sep 2023 13:56:08 +0100 Subject: [PATCH 9/9] clean-up and doc of stir_timings also added extra options for friendlier usage --- documentation/STIR-UsersGuide.tex | 7 + documentation/release_5.2.htm | 5 + src/utilities/stir_timings.cxx | 293 ++++++++++++++++-------------- 3 files changed, 173 insertions(+), 132 deletions(-) diff --git a/documentation/STIR-UsersGuide.tex b/documentation/STIR-UsersGuide.tex index 6c32c1ada2..80a91a9aed 100644 --- a/documentation/STIR-UsersGuide.tex +++ b/documentation/STIR-UsersGuide.tex @@ -3699,6 +3699,13 @@ \subsubsection{ a popular Monte Carlo simulator for PET and SPECT. This is all preliminary. Check out the \texttt{README.txt} in the \textbf{STIR/SimSET} directory. +\subsubsection{ +Performing timings of certain operations} +The \textbf{stir\_timings} utility is mostly useful for developers, +but you could use it to optimise the number of OpenMP threads to use for your data. + +Run the utility without any arguments to get a help message. +If you want to know what is actually timed, you will have to look at the source code. \subsection{ User-selectable components} diff --git a/documentation/release_5.2.htm b/documentation/release_5.2.htm index e3d005fb89..3fdbffa1f9 100644 --- a/documentation/release_5.2.htm +++ b/documentation/release_5.2.htm @@ -55,6 +55,11 @@

New functionality

(still somewhat preliminary).
PR #1182. +
  • + The new stir_timings utility is mostly useful for developers, + but you could use it to optimise the number of OpenMP threads to use for your data. +
    PR #1237. +
  • New examples

    diff --git a/src/utilities/stir_timings.cxx b/src/utilities/stir_timings.cxx index bb0ac9bc2d..0b4c1639c8 100644 --- a/src/utilities/stir_timings.cxx +++ b/src/utilities/stir_timings.cxx @@ -8,12 +8,18 @@ */ -/*! +/*!. \file \ingroup utilities \author Kris Thielemans \brief Perform timings + + This utility performs timings of various operations. This is mostly useful for developers, + but you could use it to optimise the number of OpenMP threads to use for your data. + + Run the utility without any arguments to get a help message. + If you want to know what is actually timed, you will have to look at the source code. */ #include "stir/ProjDataInterfile.h" @@ -32,6 +38,7 @@ #include "stir/recon_buildblock/distributable_main.h" #include "stir/warning.h" #include "stir/error.h" +#include "stir/num_threads.h" #include "stir/Verbosity.h" #include #include @@ -42,13 +49,13 @@ static void print_usage_and_exit() { - std::cerr << "\nUsage:\nstir_timings [--runs num_runs]\\\n" + std::cerr << "\nUsage:\nstir_timings [--name some_string] [--threads num_threads] [--runs num_runs]\\\n" << "\t[--skip-PP 1] [--skip-PMRT 1]\\\n" << "\t[--image image_filename]\\\n" << "\t--template-projdata template_proj_data_filename\n\n" - << "Timings are reported as:\n" - << "name\tCPU_time_in_ms\twall-clock_time_in_ms\n"; - exit(EXIT_FAILURE); + << "Timings are reported to stdout as:\n" + << "name\ttiming_name\tCPU_time_in_ms\twall-clock_time_in_ms\n"; + std::exit(EXIT_FAILURE); } START_NAMESPACE_STIR @@ -58,9 +65,26 @@ class Timings : public TimedObject typedef void (Timings::*TimedFunction)(); public: + //! Use as prefix for all output + std::string name; + // variables that select timings bool skip_PMRT; bool skip_PP; + // variables used for running timings + shared_ptr> image_sptr; + shared_ptr output_proj_data_sptr; + shared_ptr mem_proj_data_sptr; + shared_ptr projectors_sptr; + shared_ptr pmrt_projectors_sptr; +#ifdef STIR_WITH_Parallelproj_PROJECTOR + shared_ptr parallelproj_projectors_sptr; +#endif + shared_ptr template_proj_data_sptr; + shared_ptr exam_info_sptr; + shared_ptr>> objective_function_sptr; + + // basic methods Timings(const std::string& image_filename, const std::string& template_proj_data_filename) { if (!image_filename.empty()) @@ -70,6 +94,14 @@ class Timings : public TimedObject this->template_proj_data_sptr = ProjData::read_from_file(template_proj_data_filename); } + void run_it(TimedFunction f, const std::string& item, const unsigned runs = 1); + void run_all(const unsigned runs = 1); + void init(); + + // functions that are timed + + //! Test function that could be used to see if reported timings are correct + /*! CPU time should be close to zero, wall-clock time close to 1123ms */ void sleep() { std::this_thread::sleep_for(std::chrono::milliseconds(1123)); } void copy_image() @@ -82,8 +114,7 @@ class Timings : public TimedObject void copy_proj_data_file_to_file() { ProjDataInterfile tmp(this->template_proj_data_sptr->get_exam_info_sptr(), - this->template_proj_data_sptr->get_proj_data_info_sptr(), - "my_timings_copy.hs"); + this->template_proj_data_sptr->get_proj_data_info_sptr(), "my_timings_copy.hs"); tmp.fill(*this->output_proj_data_sptr); } @@ -100,8 +131,7 @@ class Timings : public TimedObject void copy_proj_data_mem_to_file() { ProjDataInterfile tmp(this->template_proj_data_sptr->get_exam_info_sptr(), - this->template_proj_data_sptr->get_proj_data_info_sptr(), - "my_timings_copy.hs"); + this->template_proj_data_sptr->get_proj_data_info_sptr(), "my_timings_copy.hs"); tmp.fill(*this->mem_proj_data_sptr); } @@ -113,7 +143,7 @@ class Timings : public TimedObject /* initialise*/ false); tmp.fill(*this->mem_proj_data_sptr); } - + void projector_setup() { this->projectors_sptr->set_up(this->template_proj_data_sptr->get_proj_data_info_sptr(), this->image_sptr); @@ -136,10 +166,7 @@ class Timings : public TimedObject this->projectors_sptr->get_back_projector_sptr()->back_project(*this->image_sptr, *this->mem_proj_data_sptr); } - void obj_func_set_up() - { - this->objective_function_sptr->set_up(this->image_sptr); - } + void obj_func_set_up() { this->objective_function_sptr->set_up(this->image_sptr); } void obj_func_grad_no_sens() { @@ -147,143 +174,134 @@ class Timings : public TimedObject this->objective_function_sptr->compute_sub_gradient_without_penalty_plus_sensitivity(*im, *this->image_sptr, 0); delete im; } +}; - void run_it(TimedFunction f, const std::string& item, const unsigned runs = 1) - { - this->start_timers(true); - for (unsigned r = runs; r != 0; --r) - (this->*f)(); - this->stop_timers(); - std::cout << std::setw(32) << std::left << item << '\t' << std::fixed << std::setprecision(3) << std::setw(24) << std::right - << this->get_CPU_timer_value() / runs * 1000 << '\t' << std::fixed << std::setprecision(3) << std::setw(24) - << std::right << this->get_wall_clock_timer_value() / runs * 1000 << std::endl; - } +void +Timings::run_it(TimedFunction f, const std::string& item, const unsigned runs) +{ + this->start_timers(true); + for (unsigned r = runs; r != 0; --r) + (this->*f)(); + this->stop_timers(); + std::cout << name << '\t' << std::setw(32) << std::left << item << '\t' << std::fixed << std::setprecision(3) << std::setw(24) + << std::right << this->get_CPU_timer_value() / runs * 1000 << '\t' << std::fixed << std::setprecision(3) + << std::setw(24) << std::right << this->get_wall_clock_timer_value() / runs * 1000 << std::endl; +} - void run_all(const unsigned runs = 1) - { - init(); - // this->run_it(&Timings::sleep, "sleep", runs*1); - this->run_it(&Timings::copy_image, "copy_image", runs * 20); - this->output_proj_data_sptr->fill(1.F); - this->run_it(&Timings::copy_proj_data_mem_to_mem, "copy_proj_data_mem_to_mem", runs * 2); - this->run_it(&Timings::copy_proj_data_mem_to_file, "copy_proj_data_mem_to_file", runs * 2); - this->run_it(&Timings::copy_proj_data_file_to_mem, "copy_proj_data_file_to_mem", runs * 2); - this->run_it(&Timings::copy_proj_data_file_to_file, "copy_proj_data_file_to_file", runs * 2); - this->objective_function_sptr.reset(new PoissonLogLikelihoodWithLinearModelForMeanAndProjData>); - this->objective_function_sptr->set_proj_data_sptr(this->mem_proj_data_sptr); - // this->objective_function.set_num_subsets(proj_data_sptr->get_num_views()/2); - if (!this->skip_PMRT) - { - this->projectors_sptr = this->pmrt_projectors_sptr; - this->run_it(&Timings::projector_setup, "PMRT_projector_setup", runs * 10); - this->run_it(&Timings::forward_file, "PMRT_forward_file_first", 1); - this->run_it(&Timings::forward_file, "PMRT_forward_file", 1); - this->run_it(&Timings::forward_memory, "PMRT_forward_memory", 1); - this->run_it(&Timings::back_file, "PMRT_back_file_first", 1); - this->run_it(&Timings::back_file, "PMRT_back_file", 1); - this->run_it(&Timings::back_memory, "PMRT_back_memory", 1); - this->objective_function_sptr->set_projector_pair_sptr(this->projectors_sptr); - this->run_it(&Timings::obj_func_set_up, "PMRT LogLik set_up", 1); - this->run_it(&Timings::obj_func_grad_no_sens, "PMRT LogLik grad_no_sens", 1); - - } +void +Timings::run_all(const unsigned runs) +{ + this->init(); + // this->run_it(&Timings::sleep, "sleep", runs*1); + this->run_it(&Timings::copy_image, "copy_image", runs * 20); + this->output_proj_data_sptr->fill(1.F); + this->run_it(&Timings::copy_proj_data_mem_to_mem, "copy_proj_data_mem_to_mem", runs * 2); + this->run_it(&Timings::copy_proj_data_mem_to_file, "copy_proj_data_mem_to_file", runs * 2); + this->run_it(&Timings::copy_proj_data_file_to_mem, "copy_proj_data_file_to_mem", runs * 2); + this->run_it(&Timings::copy_proj_data_file_to_file, "copy_proj_data_file_to_file", runs * 2); + this->objective_function_sptr.reset(new PoissonLogLikelihoodWithLinearModelForMeanAndProjData>); + this->objective_function_sptr->set_proj_data_sptr(this->mem_proj_data_sptr); + // this->objective_function.set_num_subsets(proj_data_sptr->get_num_views()/2); + if (!this->skip_PMRT) + { + this->projectors_sptr = this->pmrt_projectors_sptr; + this->run_it(&Timings::projector_setup, "PMRT_projector_setup", runs * 10); + this->run_it(&Timings::forward_file, "PMRT_forward_file_first", 1); + this->run_it(&Timings::forward_file, "PMRT_forward_file", 1); + this->run_it(&Timings::forward_memory, "PMRT_forward_memory", 1); + this->run_it(&Timings::back_file, "PMRT_back_file_first", 1); + this->run_it(&Timings::back_file, "PMRT_back_file", 1); + this->run_it(&Timings::back_memory, "PMRT_back_memory", 1); + this->objective_function_sptr->set_projector_pair_sptr(this->projectors_sptr); + this->run_it(&Timings::obj_func_set_up, "PMRT_LogLik set_up", 1); + this->run_it(&Timings::obj_func_grad_no_sens, "PMRT_LogLik grad_no_sens", 1); + } #ifdef STIR_WITH_Parallelproj_PROJECTOR - if (!skip_PP) - { - this->projectors_sptr = this->parallelproj_projectors_sptr; - this->run_it(&Timings::projector_setup, "PP_projector_setup", 1); - this->run_it(&Timings::forward_file, "PP_forward_file_first", 1); - this->run_it(&Timings::forward_file, "PP_forward_file", runs); - this->run_it(&Timings::forward_memory, "PP_forward_memory", runs); - this->run_it(&Timings::back_file, "PP_back_file_first", 1); - this->run_it(&Timings::back_file, "PP_back_file", runs); - this->run_it(&Timings::back_memory, "PP_back_memory", runs); - this->objective_function_sptr->set_projector_pair_sptr(this->projectors_sptr); - this->run_it(&Timings::obj_func_set_up, "PP LogLik set_up", 1); - this->run_it(&Timings::obj_func_grad_no_sens, "PP LogLik grad_no_sens", 1); - } + if (!skip_PP) + { + this->projectors_sptr = this->parallelproj_projectors_sptr; + this->run_it(&Timings::projector_setup, "PP_projector_setup", 1); + this->run_it(&Timings::forward_file, "PP_forward_file_first", 1); + this->run_it(&Timings::forward_file, "PP_forward_file", runs); + this->run_it(&Timings::forward_memory, "PP_forward_memory", runs); + this->run_it(&Timings::back_file, "PP_back_file_first", 1); + this->run_it(&Timings::back_file, "PP_back_file", runs); + this->run_it(&Timings::back_memory, "PP_back_memory", runs); + this->objective_function_sptr->set_projector_pair_sptr(this->projectors_sptr); + this->run_it(&Timings::obj_func_set_up, "PP_LogLik set_up", 1); + this->run_it(&Timings::obj_func_grad_no_sens, "PP_LogLik grad_no_sens", 1); + } #endif - // write_to_file("my_timings_backproj.hv", *this->image_sptr); - } + // write_to_file("my_timings_backproj.hv", *this->image_sptr); +} - void init() - { +void +Timings::init() +{ + + if (!this->template_proj_data_sptr) + print_usage_and_exit(); - if (!this->template_proj_data_sptr) - print_usage_and_exit(); - - if (!image_sptr) - { - this->exam_info_sptr = this->template_proj_data_sptr->get_exam_info().create_shared_clone(); - this->image_sptr = std::make_shared>( - this->exam_info_sptr, *this->template_proj_data_sptr->get_proj_data_info_sptr()); - this->image_sptr->fill(1.F); - } - else - { - this->image_sptr->fill(1.F); - this->exam_info_sptr = this->image_sptr->get_exam_info().create_shared_clone(); - - if (this->image_sptr->get_exam_info().imaging_modality.is_unknown() - && this->template_proj_data_sptr->get_exam_info().imaging_modality.is_known()) - { - this->exam_info_sptr->imaging_modality = this->template_proj_data_sptr->get_exam_info().imaging_modality; - } - else if (this->image_sptr->get_exam_info().imaging_modality - != this->template_proj_data_sptr->get_exam_info().imaging_modality) - error("forward_project: Imaging modality should be the same for the image and the projection data"); - - if (this->template_proj_data_sptr->get_exam_info().has_energy_information()) - { - if (this->image_sptr->get_exam_info().has_energy_information()) - warning("Both image and template have energy information. Using the latter."); - - this->exam_info_sptr->set_energy_information_from(this->template_proj_data_sptr->get_exam_info()); - } - } - - // projection data set-up + if (!image_sptr) + { + this->exam_info_sptr = this->template_proj_data_sptr->get_exam_info().create_shared_clone(); + this->image_sptr = std::make_shared>( + this->exam_info_sptr, *this->template_proj_data_sptr->get_proj_data_info_sptr()); + this->image_sptr->fill(1.F); + } + else { - std::string output_filename = "my_timings.hs"; - this->output_proj_data_sptr + this->image_sptr->fill(1.F); + this->exam_info_sptr = this->image_sptr->get_exam_info().create_shared_clone(); + + if (this->image_sptr->get_exam_info().imaging_modality.is_unknown() + && this->template_proj_data_sptr->get_exam_info().imaging_modality.is_known()) + { + this->exam_info_sptr->imaging_modality = this->template_proj_data_sptr->get_exam_info().imaging_modality; + } + else if (this->image_sptr->get_exam_info().imaging_modality + != this->template_proj_data_sptr->get_exam_info().imaging_modality) + error("forward_project: Imaging modality should be the same for the image and the projection data"); + + if (this->template_proj_data_sptr->get_exam_info().has_energy_information()) + { + if (this->image_sptr->get_exam_info().has_energy_information()) + warning("Both image and template have energy information. Using the latter."); + + this->exam_info_sptr->set_energy_information_from(this->template_proj_data_sptr->get_exam_info()); + } + } + + // projection data set-up + { + std::string output_filename = "my_timings.hs"; + this->output_proj_data_sptr = std::make_shared(this->exam_info_sptr, this->template_proj_data_sptr->get_proj_data_info_sptr(), output_filename, std::ios::in | std::ios::out | std::ios::trunc); - this->mem_proj_data_sptr + this->mem_proj_data_sptr = std::make_shared(this->exam_info_sptr, this->template_proj_data_sptr->get_proj_data_info_sptr()); - } + } - // projector set-up - { - auto PM_sptr = std::make_shared(); - PM_sptr->set_num_tangential_LORs(5); - this->pmrt_projectors_sptr = std::make_shared(PM_sptr); + // projector set-up + { + auto PM_sptr = std::make_shared(); + PM_sptr->set_num_tangential_LORs(5); + this->pmrt_projectors_sptr = std::make_shared(PM_sptr); #ifdef STIR_WITH_Parallelproj_PROJECTOR - this->parallelproj_projectors_sptr = std::make_shared(); + this->parallelproj_projectors_sptr = std::make_shared(); #endif - } } - - // protected: - shared_ptr> image_sptr; - shared_ptr output_proj_data_sptr; - shared_ptr mem_proj_data_sptr; - shared_ptr projectors_sptr; - shared_ptr pmrt_projectors_sptr; -#ifdef STIR_WITH_Parallelproj_PROJECTOR - shared_ptr parallelproj_projectors_sptr; -#endif - shared_ptr template_proj_data_sptr; - shared_ptr exam_info_sptr; - shared_ptr>> objective_function_sptr; -}; +} END_NAMESPACE_STIR #ifdef STIR_MPI -int stir::distributable_main(int argc, char **argv) +int +stir::distributable_main(int argc, char** argv) #else -int main(int argc, char **argv) +int +main(int argc, char** argv) #endif { using namespace stir; @@ -293,19 +311,26 @@ int main(int argc, char **argv) std::string template_proj_data_filename; std::string prog_name = argv[0]; unsigned num_runs = 3; + int num_threads = get_default_num_threads(); bool skip_PMRT = false; bool skip_PP = false; + // prefix output with this string + std::string name; ++argv; --argc; while (argc > 1) { - if (!strcmp(argv[0], "--image")) + if (!strcmp(argv[0], "--name")) + name = argv[1]; + else if (!strcmp(argv[0], "--image")) image_filename = argv[1]; else if (!strcmp(argv[0], "--template-projdata")) template_proj_data_filename = argv[1]; else if (!strcmp(argv[0], "--runs")) num_runs = std::atoi(argv[1]); + else if (!strcmp(argv[0], "--threads")) + num_threads = std::atoi(argv[1]); else if (!strcmp(argv[0], "--skip-PMRT")) skip_PMRT = std::atoi(argv[1]) != 0; else if (!strcmp(argv[0], "--skip-PP")) @@ -319,7 +344,11 @@ int main(int argc, char **argv) if (argc > 0) print_usage_and_exit(); + set_num_threads(num_threads); + std::cerr << "Using " << num_threads << " threads.\n"; + Timings timings(image_filename, template_proj_data_filename); + timings.name = name; timings.skip_PMRT = skip_PMRT; timings.skip_PP = skip_PP;