Skip to content

Commit

Permalink
Tpetra: Adding Kokkos::fence tracking support to timer injection (#12223
Browse files Browse the repository at this point in the history
)

* Tpetra: Getting random pools correct

* Tpetra: Adding Kokkos::fence tracking support to timer injection

* MueLu: CLI timer injection support

* Tpetra: oops

* Tpetra: oops

* Tpetra: oops

* MueLu: oops

* MueLu: oops

* Tpetra: oops

* MueLu: oops

* Tpetra: Improving fence labeling

* Tpetra: Updating output text
  • Loading branch information
csiefer2 authored Sep 13, 2023
1 parent 25eae15 commit 5a91a88
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 17 deletions.
5 changes: 4 additions & 1 deletion packages/muelu/test/unit_tests/MueLu_Test_ETI.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
#endif

#include <TpetraCore_config.h>
#include <Tpetra_Details_DeepCopyTeuchosTimerInjection.hpp>
#include <Tpetra_Details_KokkosTeuchosTimerInjection.hpp>

#include <KokkosKernels_config.h>
#include <KokkosKernels_Controls.hpp>
Expand Down Expand Up @@ -126,6 +126,7 @@ bool Automatic_Test_ETI(int argc, char *argv[]) {
#endif

bool timedeepcopy = false; clp.setOption("timedeepcopy", "notimedeepcopy", &timedeepcopy, "instrument Kokkos::deep_copy() with Teuchos timers. This can also be done with by setting the environment variable TPETRA_TIME_KOKKOS_DEEP_COPY=ON");
bool timefence = false; clp.setOption("timefence", "notimefence", &timefence, "instrument Kokkos::fence() with Teuchos timers. This can also be done with by setting the environment variable TPETRA_TIME_KOKKOS_FENCE=ON");
Xpetra::Parameters xpetraParameters(clp);

clp.recogniseAllOptions(false);
Expand All @@ -138,6 +139,8 @@ bool Automatic_Test_ETI(int argc, char *argv[]) {

if(timedeepcopy)
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor(true);
if(timefence)
Tpetra::Details::AddKokkosFenceToTimeMonitor(true);

#ifdef HAVE_TEUCHOS_STACKTRACE
if (stacktrace)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
#include "Teuchos_FancyOStream.hpp"
#include "Teuchos_TimeMonitor.hpp"
#include "Teuchos_StackedTimer.hpp"
#include "Tpetra_Details_DeepCopyTeuchosTimerInjection.hpp"
#include "Tpetra_Details_KokkosTeuchosTimerInjection.hpp"
#include "fem_assembly_commandLineOpts.hpp"
#include "fem_assembly_typedefs.hpp"
#include "fem_assembly_MeshDatabase.hpp"
Expand Down
3 changes: 2 additions & 1 deletion packages/tpetra/core/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -937,4 +937,5 @@ SET_PROPERTY(
# / from this directory, or to / from the 'impl' subdirectory. That ensures
# that running "make" will also rerun CMake in order to regenerate Makefiles.
#
# Here's a change
# Here's another change

8 changes: 7 additions & 1 deletion packages/tpetra/core/src/Tpetra_Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

#include <Kokkos_Core.hpp>
#include "Tpetra_Details_checkLaunchBlocking.hpp"
#include "Tpetra_Details_DeepCopyTeuchosTimerInjection.hpp"
#include "Tpetra_Details_KokkosTeuchosTimerInjection.hpp"

namespace Tpetra {

Expand Down Expand Up @@ -244,6 +244,8 @@ namespace Tpetra {

// Add Kokkos::deep_copy() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor();
// Add Kokkos::fence() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosFenceToTimeMonitor();
}
tpetraIsInitialized_ = true;
}
Expand All @@ -265,6 +267,8 @@ namespace Tpetra {

// Add Kokkos::deep_copy() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor();
// Add Kokkos::fence() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosFenceToTimeMonitor();
}
tpetraIsInitialized_ = true;

Expand Down Expand Up @@ -310,6 +314,8 @@ namespace Tpetra {

// Add Kokkos::deep_copy() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor();
// Add Kokkos::fence() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosFenceToTimeMonitor();
}
tpetraIsInitialized_ = true;
wrappedDefaultComm_ = comm;
Expand Down
10 changes: 10 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,17 @@ bool Behavior::timeKokkosDeepCopyVerbose()

}

bool Behavior::timeKokkosFence()
{
constexpr char envVarName[] = "TPETRA_TIME_KOKKOS_FENCE";
constexpr bool defaultValue(false);

static bool value_ = defaultValue;
static bool initialized_ = false;
return idempotentlyGetEnvironmentVariableAsBool
(value_, initialized_, envVarName, defaultValue);

}

} // namespace Details
} // namespace Tpetra
Expand Down
5 changes: 5 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,11 @@ class Behavior {
/// <tt>TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE</tt> environment variable.
static bool timeKokkosDeepCopyVerbose();

/// \brief Add Teuchos timers for all host calls to Kokkos::fence().
///
/// This is disabled by default. You may control this at run time via the
/// <tt>TPETRA_TIME_KOKKOS_FENCE</tt> environment variable.
static bool timeKokkosFence();

/// \brief Warn if more than this many Kokkos spaces are accessed.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
// ************************************************************************
// @HEADER
*/
#include "Tpetra_Details_DeepCopyTeuchosTimerInjection.hpp"
#include "Tpetra_Details_KokkosTeuchosTimerInjection.hpp"
#include "TpetraCore_config.h"
#include "Tpetra_Details_Behavior.hpp"
#include "Kokkos_Core.hpp"
Expand Down Expand Up @@ -120,8 +120,6 @@ namespace Details {
}
}// end DeepCopyTimerInjection



void AddKokkosDeepCopyToTimeMonitor(bool force) {
if (!DeepCopyTimerInjection::initialized_) {
if (force || Tpetra::Details::Behavior::timeKokkosDeepCopy() || Tpetra::Details::Behavior::timeKokkosDeepCopyVerbose()) {
Expand All @@ -132,6 +130,105 @@ namespace Details {
}
}
}

namespace FenceTimerInjection {
Teuchos::RCP<Teuchos::Time> timer_;
bool initialized_ = false;
uint64_t active_handle;

void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
uint64_t* handle) {

// Nested fences are not allowed
if(timer_ != Teuchos::null)
return;
active_handle = (active_handle+1) % 1024;
*handle = active_handle;

// Get a useful label from the deviceId
// NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
std::string device_label("(");
{
using namespace Kokkos::Tools::Experimental;

ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
if (eid.type == DeviceType::Serial) device_label+="Serial";
else if (eid.type == DeviceType::OpenMP) device_label+="OpenMP";
else if (eid.type == DeviceType::Cuda) device_label+="Cuda";
else if (eid.type == DeviceType::HIP) device_label+="HIP";
else if (eid.type == DeviceType::OpenMPTarget) device_label+="OpenMPTarget";
else if (eid.type == DeviceType::HPX) device_label+="HPX";
else if (eid.type == DeviceType::Threads) device_label+="Threats";
else if (eid.type == DeviceType::SYCL) device_label+="SYCL";
else if (eid.type == DeviceType::OpenACC) device_label+="OpenACC";
else if (eid.type == DeviceType::Unknown) device_label+="Unknown";
else device_label+="Unknown to Tpetra";

if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
device_label += " All Instances)";
else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
device_label += " DeepCopyResource)";
else
device_label += " Instance " + std::to_string(eid.instance_id) + ")";
}

timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::fence ")+name + " " + device_label);
timer_->start();
timer_->incrementNumCalls();
#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
if (nonnull(stackedTimer))
stackedTimer->start(timer_->name());
#endif

}


void kokkosp_end_fence(const uint64_t handle) {
if(handle == active_handle) {
if (timer_ != Teuchos::null) {
timer_->stop();
#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
try {
const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
if (nonnull(stackedTimer))
stackedTimer->stop(timer_->name());
}
catch (std::runtime_error&) {
std::ostringstream warning;
warning <<
"\n*********************************************************************\n"
"WARNING: Overlapping timers detected!\n"
"A TimeMonitor timer was stopped before a nested subtimer was\n"
"stopped. This is not allowed by the StackedTimer. This corner case\n"
"typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
"assigned to a new timer. To disable this warning, either fix the\n"
"ordering of timer creation and destuction or disable the StackedTimer\n";
std::cout << warning.str() << std::endl;
Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
}
#endif
}

timer_ = Teuchos::null;

}
// Else: We've nested our fences, and we need to ignore the inner fences
}


}//end FenceTimerInjection

void AddKokkosFenceToTimeMonitor(bool force) {
if (!FenceTimerInjection::initialized_) {
if (force || Tpetra::Details::Behavior::timeKokkosFence()) {
Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
FenceTimerInjection::initialized_=true;
}
}
}



} // namespace Details
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,12 @@
// ************************************************************************
// @HEADER
*/
#ifndef TPETRA_DETAILS_DEEP_COPY_TEUCHOS_TIMER_INJECTION_HPP
#define TPETRA_DETAILS_DEEP_COPY_TEUCHOS_TIMER_INJECTION_HPP

/// \file Tpetra_Details_DeepCopyTeuchosTimerInjection.hpp
/// \brief Declaration of Tpetra::Details::DeepCopyTeuchosTimerInjection, a class that
/// uses Kokkos' profiling library to add deep copies between memory spaces to the Teuchos::TimeMonitor
/// system. The idea being that you enable this capability and your regular timer output now prints out
/// all of your traffic between memory spaces. This does have the side effect of making Kokkos::deep_copy()
#ifndef TPETRA_DETAILS_KOKKOS_TEUCHOS_TIMER_INJECTION_HPP
#define TPETRA_DETAILS_KOKKOS_TEUCHOS_TIMER_INJECTION_HPP

/// \file Tpetra_Details_KokkosTeuchosTimerInjection.hpp
/// \brief Declaration functions that use Kokkos' profiling library to add deep copies between memory spaces,
/// and Kokkos fences to the Teuchos::TimeMonitor system. This does have the side effect of making Kokkos::deep_copy()
/// calls on the host also call Kokkos::fence()


Expand All @@ -57,7 +55,11 @@ namespace Details {
// This is used for unit testing the capability
void AddKokkosDeepCopyToTimeMonitor(bool force = false);

// The force option overrides the environment variable control via TPETRA_TIME_KOKKOS_FENCE
// This is used for unit testing the capability
void AddKokkosFenceToTimeMonitor(bool force = false);

} // namespace Details
} // namespace Tpetra

#endif // TPETRA_DETAILS_DEEP_COPY_TEUCHOS_TIMER_INJECTION_HPP
#endif // TPETRA_DETAILS_KOKKOS_TEUCHOS_TIMER_INJECTION_HPP
4 changes: 3 additions & 1 deletion packages/tpetra/core/src/Tpetra_Details_initializeKokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#include "Teuchos_GlobalMPISession.hpp"
#include "Kokkos_Core.hpp"
#include "Tpetra_Details_checkLaunchBlocking.hpp"
#include "Tpetra_Details_DeepCopyTeuchosTimerInjection.hpp"
#include "Tpetra_Details_KokkosTeuchosTimerInjection.hpp"
#include <cstdlib> // std::atexit
#include <string>
#include <vector>
Expand Down Expand Up @@ -82,6 +82,8 @@ initializeKokkos ()
}
// Add Kokkos::deep_copy() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor();
// Add Kokkos::fence() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosFenceToTimeMonitor();
}

} // namespace Details
Expand Down

0 comments on commit 5a91a88

Please sign in to comment.