Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better reporting of CPU timing #34941

Merged
merged 2 commits into from
Aug 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions FWCore/Framework/interface/SystemTimeKeeper.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

// user include files
#include "FWCore/Utilities/interface/CPUTimer.h"
#include "FWCore/Utilities/interface/ChildrenCPUTimer.h"
#include "FWCore/Utilities/interface/WallclockTimer.h"

// forward declarations
Expand Down Expand Up @@ -106,6 +107,7 @@ namespace edm {
std::vector<std::vector<std::string>> m_modulesOnPaths;

CPUTimer m_processingLoopTimer;
ChildrenCPUTimer m_processingLoopChildrenTimer;
ProcessContext const* m_processContext;

unsigned int m_minModuleID;
Expand Down
12 changes: 9 additions & 3 deletions FWCore/Framework/src/SystemTimeKeeper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,15 @@ void SystemTimeKeeper::restartModuleEvent(StreamContext const& iStream, ModuleCa
}
}

void SystemTimeKeeper::startProcessingLoop() { m_processingLoopTimer.start(); }
void SystemTimeKeeper::startProcessingLoop() {
m_processingLoopTimer.start();
m_processingLoopChildrenTimer.start();
}

void SystemTimeKeeper::stopProcessingLoop() { m_processingLoopTimer.stop(); }
void SystemTimeKeeper::stopProcessingLoop() {
m_processingLoopTimer.stop();
m_processingLoopChildrenTimer.stop();
}

static void fillPathSummary(unsigned int iStartIndex,
unsigned int iEndIndex,
Expand Down Expand Up @@ -248,7 +254,7 @@ void SystemTimeKeeper::fillTriggerTimingReport(TriggerTimingReport& rep) const {
sumEventTime += stream.realTime();
}
rep.eventSummary.realTime = m_processingLoopTimer.realTime();
rep.eventSummary.cpuTime = m_processingLoopTimer.cpuTime();
rep.eventSummary.cpuTime = m_processingLoopTimer.cpuTime() + m_processingLoopChildrenTimer.cpuTime();
rep.eventSummary.sumStreamRealTime = sumEventTime;
}

Expand Down
49 changes: 31 additions & 18 deletions FWCore/Services/plugins/Timing.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ namespace edm {
double curr_job_cpu_; // seconds
std::atomic<double> extra_job_cpu_; //seconds
//use last run time for determining end of processing
std::atomic<double> last_run_time_;
std::atomic<double> last_run_cpu_;
std::atomic<double> end_loop_time_;
std::atomic<double> end_loop_cpu_;
std::vector<double> curr_events_time_; // seconds
bool summary_only_;
bool report_summary_;
Expand Down Expand Up @@ -161,6 +161,16 @@ namespace edm {
return static_cast<double>(t.tv_sec) + (static_cast<double>(t.tv_usec) * 1E-6);
}

static double getChildrenCPU() {
struct rusage usage;

getrusage(RUSAGE_CHILDREN, &usage);
double totalCPUTime = (double)usage.ru_utime.tv_sec + (double(usage.ru_utime.tv_usec) * 1E-6);
totalCPUTime += (double)usage.ru_stime.tv_sec + (double(usage.ru_stime.tv_usec) * 1E-6);

return totalCPUTime;
}

static double getCPU() {
struct rusage usage;
getrusage(RUSAGE_SELF, &usage);
Expand Down Expand Up @@ -208,8 +218,8 @@ namespace edm {
: curr_job_time_(0.),
curr_job_cpu_(0.),
extra_job_cpu_(0.0),
last_run_time_(0.0),
last_run_cpu_(0.0),
end_loop_time_(0.0),
end_loop_cpu_(0.0),
curr_events_time_(),
summary_only_(iPS.getUntrackedParameter<bool>("summaryOnly")),
report_summary_(iPS.getUntrackedParameter<bool>("useJobReport")),
Expand All @@ -228,6 +238,10 @@ namespace edm {
nSubProcesses_{0} {
iRegistry.watchPreBeginJob(this, &Timing::preBeginJob);
iRegistry.watchPostBeginJob(this, &Timing::postBeginJob);
iRegistry.preEndJobSignal_.connect([this]() {
end_loop_time_ = getTime();
end_loop_cpu_ = getCPU();
});
iRegistry.watchPostEndJob(this, &Timing::postEndJob);

iRegistry.watchPreEvent(this, &Timing::preEvent);
Expand Down Expand Up @@ -311,11 +325,6 @@ namespace edm {
countSubProcessesPostEvent_.emplace_back(std::make_unique<std::atomic<unsigned int>>(0));
}
});

iRegistry.postGlobalEndRunSignal_.connect([this](edm::GlobalContext const&) {
last_run_time_ = getTime();
last_run_cpu_ = getCPU();
});
}

Timing::~Timing() {}
Expand Down Expand Up @@ -383,6 +392,8 @@ namespace edm {

double total_job_cpu = job_end_cpu + extra_job_cpu_;

const double job_end_children_cpu = getChildrenCPU();

const double total_initialization_time = curr_job_time_ - jobStartTime();
const double total_initialization_cpu = curr_job_cpu_;

Expand All @@ -395,10 +406,10 @@ namespace edm {
double min_event_time = *(std::min_element(min_events_time_.begin(), min_events_time_.end()));
double max_event_time = *(std::max_element(max_events_time_.begin(), max_events_time_.end()));

auto total_loop_time = last_run_time_ - curr_job_time_;
auto total_loop_cpu = last_run_cpu_ + extra_job_cpu_ - curr_job_cpu_;
auto total_loop_time = end_loop_time_ - curr_job_time_;
auto total_loop_cpu = end_loop_cpu_ + extra_job_cpu_ - curr_job_cpu_;

if (last_run_time_ == 0.0) {
if (end_loop_time_ == 0.0) {
total_loop_time = 0.0;
total_loop_cpu = 0.0;
}
Expand Down Expand Up @@ -427,14 +438,15 @@ namespace edm {
<< " - Total loop: " << total_loop_time << "\n"
<< " - Total init: " << total_initialization_time << "\n"
<< " - Total job: " << total_job_time << "\n"
<< " - EventSetup Lock: " << accumulatedTimeForLock_ << "\n"
<< " - EventSetup Get: " << accumulatedTimeForGet_ << "\n"
<< " - EventSetup Lock: " << accumulatedTimeForLock_ << "\n"
<< " - EventSetup Get: " << accumulatedTimeForGet_ << "\n"
<< " Event Throughput: " << event_throughput << " ev/s\n"
<< " CPU Summary: \n"
<< " - Total loop: " << total_loop_cpu << "\n"
<< " - Total init: " << total_initialization_cpu << "\n"
<< " - Total extra: " << extra_job_cpu_ << "\n"
<< " - Total job: " << total_job_cpu << "\n"
<< " - Total loop: " << total_loop_cpu << "\n"
<< " - Total init: " << total_initialization_cpu << "\n"
<< " - Total extra: " << extra_job_cpu_ << "\n"
<< " - Total children: " << job_end_children_cpu << "\n"
<< " - Total job: " << total_job_cpu << "\n"
<< " Processing Summary: \n"
<< " - Number of Events: " << total_event_count_ << "\n"
<< " - Number of Global Begin Lumi Calls: " << begin_lumi_count_ << "\n"
Expand All @@ -450,6 +462,7 @@ namespace edm {
reportData.insert(std::make_pair("EventThroughput", d2str(event_throughput)));
reportData.insert(std::make_pair("TotalJobTime", d2str(total_job_time)));
reportData.insert(std::make_pair("TotalJobCPU", d2str(total_job_cpu)));
reportData.insert(std::make_pair("TotalJobChildrenCPU", d2str(job_end_children_cpu)));
reportData.insert(std::make_pair("TotalLoopTime", d2str(total_loop_time)));
reportData.insert(std::make_pair("TotalLoopCPU", d2str(total_loop_cpu)));
reportData.insert(std::make_pair("TotalInitTime", d2str(total_initialization_time)));
Expand Down
59 changes: 59 additions & 0 deletions FWCore/Utilities/interface/ChildrenCPUTimer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#ifndef FWCore_Utilities_ChildrenCPUTimer_h
#define FWCore_Utilities_ChildrenCPUTimer_h
// -*- C++ -*-
//
// Package: Utilities
// Class : ChildrenCPUTimer
//
/**\class ChildrenCPUTimer ChildrenCPUTimer.h FWCore/Utilities/interface/ChildrenCPUTimer.h
Description: Timer which measures the CPU time for child processes
Usage:
<usage>
*/
//
// Original Author: Chris Jones
// Created: Sun Apr 16 20:32:13 EDT 2006
//

// system include files
#include <sys/time.h>

// user include files

// forward declarations
namespace edm {
class ChildrenCPUTimer {
public:
ChildrenCPUTimer();
~ChildrenCPUTimer();
ChildrenCPUTimer(ChildrenCPUTimer&&) = default;
ChildrenCPUTimer(const ChildrenCPUTimer&) = delete;
ChildrenCPUTimer& operator=(const ChildrenCPUTimer&) = delete;

// ---------- const member functions ---------------------
double cpuTime() const;

// ---------- static member functions --------------------

// ---------- member functions ---------------------------
void start();
double stop(); //returns delta time
void reset();

void add(double t);

private:
double calculateDeltaTime() const;

// ---------- member data --------------------------------
enum State { kRunning, kStopped } state_;
struct timeval startCPUTime_;

double accumulatedCPUTime_;
};
} // namespace edm

#endif
95 changes: 95 additions & 0 deletions FWCore/Utilities/src/ChildrenCPUTimer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// -*- C++ -*-
//
// Package: Utilities
// Class : ChildrenCPUTimer
//
// Implementation:
// <Notes on implementation>
//
// Original Author: Chris Jones
// Created: Sun Apr 16 20:32:20 EDT 2006
//

// system include files
#include <sys/resource.h>
#include <cerrno>

// user include files
#include "FWCore/Utilities/interface/ChildrenCPUTimer.h"
#include "FWCore/Utilities/interface/Exception.h"

//
// constants, enums and typedefs
//
using namespace edm;

//
// static data member definitions
//

//
// constructors and destructor
//
ChildrenCPUTimer::ChildrenCPUTimer() : state_(kStopped), startCPUTime_(), accumulatedCPUTime_(0) {
startCPUTime_.tv_sec = 0;
startCPUTime_.tv_usec = 0;
}

ChildrenCPUTimer::~ChildrenCPUTimer() {}

//
// member functions
//
void ChildrenCPUTimer::start() {
if (kStopped == state_) {
rusage theUsage;
if (0 != getrusage(RUSAGE_CHILDREN, &theUsage)) {
throw cms::Exception("ChildrenCPUTimerFailed") << errno;
}
startCPUTime_.tv_sec = theUsage.ru_stime.tv_sec + theUsage.ru_utime.tv_sec;
startCPUTime_.tv_usec = theUsage.ru_stime.tv_usec + theUsage.ru_utime.tv_usec;
state_ = kRunning;
}
}

double ChildrenCPUTimer::stop() {
if (kRunning == state_) {
auto t = calculateDeltaTime();
accumulatedCPUTime_ += t;

state_ = kStopped;
return t;
}
return 0.;
}

void ChildrenCPUTimer::reset() { accumulatedCPUTime_ = 0; }

void ChildrenCPUTimer::add(double t) { accumulatedCPUTime_ += t; }

double ChildrenCPUTimer::calculateDeltaTime() const {
double returnValue;
double const microsecToSec = 1E-6;

rusage theUsage;
if (0 != getrusage(RUSAGE_CHILDREN, &theUsage)) {
throw cms::Exception("CPUTimerFailed") << errno;
}

returnValue = theUsage.ru_stime.tv_sec + theUsage.ru_utime.tv_sec - startCPUTime_.tv_sec +
microsecToSec * (theUsage.ru_stime.tv_usec + theUsage.ru_utime.tv_usec - startCPUTime_.tv_usec);
return returnValue;
}
//
// const member functions
//
double ChildrenCPUTimer::cpuTime() const {
if (kStopped == state_) {
return accumulatedCPUTime_;
}
return accumulatedCPUTime_ + calculateDeltaTime();
}

//
// static member functions
//