diff --git a/docs/user_guide/source/engines/sst.rst b/docs/user_guide/source/engines/sst.rst index ace5eda039..3f0b9fe2c7 100644 --- a/docs/user_guide/source/engines/sst.rst +++ b/docs/user_guide/source/engines/sst.rst @@ -157,7 +157,7 @@ the underlying network communication mechanism to use for exchanging data in SST. Generally this is chosen by SST based upon what is available on the current platform. However, specifying this engine parameter allows overriding SST's choice. Current allowed values are -**"UCX"**,**"MPI"**, **"RDMA"**, and **"WAN"**. (**ib** and **fabric** are accepted as +**"UCX"**, **"MPI"**, **"RDMA"**, and **"WAN"**. (**ib** and **fabric** are accepted as equivalent to **RDMA** and **evpath** is equivalent to **WAN**.) Generally both the reader and writer should be using the same network transport, and the network transport chosen may be dictated by the @@ -219,7 +219,7 @@ plane, this parameter controls the libfabric interface choice. 12. ``FirstTimestepPrecious``: Default **FALSE**. FirstTimestepPrecious is a boolean parameter that affects the queueing of the first timestep presented to the SST Writer engine. If -FirstTimestepPrecious is **TRUE***, then the first timestep is +FirstTimestepPrecious is **TRUE**, then the first timestep is effectively never removed from the output queue and will be presented as a first timestep to any reader that joins at a later time. This can be used to convey run parameters or other information that every @@ -235,7 +235,7 @@ This value is interpreted by only by the SST Writer engine. 13. ``AlwaysProvideLatestTimestep``: Default **FALSE**. AlwaysProvideLatestTimestep is a boolean parameter that affects what of the available timesteps will be provided to the reader engine. If -AlwaysProvideLatestTimestep is **TRUE***, then if there are multiple +AlwaysProvideLatestTimestep is **TRUE**, then if there are multiple timesteps available to the reader, older timesteps will be skipped and the reader will see only the newest available upon BeginStep. This value is interpreted by only by the SST Reader engine. diff --git a/source/adios2/engine/sst/SstWriter.cpp b/source/adios2/engine/sst/SstWriter.cpp index 4f95a92662..66039de421 100644 --- a/source/adios2/engine/sst/SstWriter.cpp +++ b/source/adios2/engine/sst/SstWriter.cpp @@ -183,6 +183,8 @@ StepStatus SstWriter::BeginStep(StepMode mode, const float timeout_sec) return StepStatus::OK; } +size_t SstWriter::CurrentStep() const { return m_WriterStep; } + void SstWriter::MarshalAttributes() { PERFSTUBS_SCOPED_TIMER_FUNC(); diff --git a/source/adios2/engine/sst/SstWriter.h b/source/adios2/engine/sst/SstWriter.h index c4bb9d2794..5f0d5cfce1 100644 --- a/source/adios2/engine/sst/SstWriter.h +++ b/source/adios2/engine/sst/SstWriter.h @@ -38,6 +38,7 @@ class SstWriter : public Engine StepStatus BeginStep(StepMode mode, const float timeoutSeconds = -1.0) final; + size_t CurrentStep() const final; void PerformPuts() final; void EndStep() final; void Flush(const int transportIndex = -1) final; diff --git a/source/adios2/toolkit/format/bp/bp4/BP4Serializer.tcc b/source/adios2/toolkit/format/bp/bp4/BP4Serializer.tcc index ca0a760729..59b08923e5 100644 --- a/source/adios2/toolkit/format/bp/bp4/BP4Serializer.tcc +++ b/source/adios2/toolkit/format/bp/bp4/BP4Serializer.tcc @@ -339,7 +339,8 @@ BP4Serializer::GetBPStats(const bool singleValue, if (blockInfo.MemSpace == MemorySpace::CUDA) { const size_t size = helper::GetTotalSize(blockInfo.Count); - helper::CUDAMinMax(blockInfo.Data, size, stats.Min, stats.Max); + if (!std::is_same::value) + helper::CUDAMinMax(blockInfo.Data, size, stats.Min, stats.Max); return stats; } #endif diff --git a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp index 96125b32d5..32d00f572c 100644 --- a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp +++ b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp @@ -606,8 +606,9 @@ static void GetMinMax(const void *Data, size_t ElemCount, const DataType Type, Type == helper::GetDataType()) \ { \ const T *values = (const T *)Data; \ - helper::CUDAMinMax(values, ElemCount, MinMax.MinUnion.field_##N, \ - MinMax.MaxUnion.field_##N); \ + if (!std::is_same::value) \ + helper::CUDAMinMax(values, ElemCount, MinMax.MinUnion.field_##N, \ + MinMax.MaxUnion.field_##N); \ } ADIOS2_FOREACH_MINMAX_STDTYPE_2ARGS(pertype) #undef pertype diff --git a/source/utils/adios_iotest/adios_iotest.cpp b/source/utils/adios_iotest/adios_iotest.cpp index 27a743fcff..072fc8fe95 100644 --- a/source/utils/adios_iotest/adios_iotest.cpp +++ b/source/utils/adios_iotest/adios_iotest.cpp @@ -18,23 +18,27 @@ int main(int argc, char *argv[]) { + Settings settings; + + /* Check input arguments. Quit if something is wrong. */ + if (settings.processArguments(argc, argv)) + { + return 1; + } + int provided; int threadSupportLevel = MPI_THREAD_SINGLE; - - if (std::string(argv[1]) == "SST") + if (settings.multithreadedMPI) { threadSupportLevel = MPI_THREAD_MULTIPLE; } - MPI_Init_thread(&argc, &argv, threadSupportLevel, &provided); - Settings settings; + settings.initDecomp(MPI_COMM_WORLD); - /* Check input arguments. Quit if something is wrong. */ - if (settings.processArguments(argc, argv, MPI_COMM_WORLD) || - settings.extraArgumentChecks()) + // MPI-dependent argument checks + if (settings.extraArgumentChecks()) { - MPI_Finalize(); return 1; } diff --git a/source/utils/adios_iotest/settings.cpp b/source/utils/adios_iotest/settings.cpp index b156ac19a8..495ead1788 100644 --- a/source/utils/adios_iotest/settings.cpp +++ b/source/utils/adios_iotest/settings.cpp @@ -31,12 +31,13 @@ struct option options[] = {{"help", no_argument, NULL, 'h'}, {"weak-scaling", no_argument, NULL, 'w'}, {"timer", no_argument, NULL, 't'}, {"fixed", no_argument, NULL, 'F'}, + {"multithreaded-mpi", no_argument, NULL, 'T'}, #ifdef ADIOS2_HAVE_HDF5_PARALLEL {"hdf5", no_argument, NULL, 'H'}, #endif {NULL, 0, NULL, 0}}; -static const char *optstring = "-hvswtFHa:c:d:D:x:p:"; +static const char *optstring = "-hvswtTFHa:c:d:D:x:p:"; size_t Settings::ndigits(size_t n) const { @@ -74,6 +75,7 @@ void Settings::displayHelp() << " -v increase verbosity\n" << " -h display this help\n" << " -F turn on fixed I/O pattern explicitly\n" + << " -T turn on multi-threaded MPI (needed by SST/MPI)\n" << " -p specify the path of the output explicitly\n" << " -t print and dump the timing measured by the I/O " "timer\n\n"; @@ -206,6 +208,9 @@ int Settings::processArgs(int argc, char *argv[]) case 'F': fixedPattern = true; break; + case 'T': + multithreadedMPI = true; + break; case 'h': if (!myRank) { @@ -318,13 +323,27 @@ int Settings::processArgs(int argc, char *argv[]) return 0; } -int Settings::processArguments(int argc, char *argv[], MPI_Comm worldComm) +int Settings::processArguments(int argc, char *argv[]) { int retval = 0; try { retval = processArgs(argc, argv); + } + catch (std::exception &e) // command-line argument errors + { + std::cout << "ERROR : " << e.what() << std::endl; + displayHelp(); + retval = 1; + } + return retval; +} +int Settings::initDecomp(MPI_Comm worldComm) +{ + int retval = 0; + try + { int wrank; MPI_Comm_rank(worldComm, &wrank); MPI_Comm_split(worldComm, static_cast(appId), wrank, &appComm); diff --git a/source/utils/adios_iotest/settings.h b/source/utils/adios_iotest/settings.h index 1436e22376..b02d69ed50 100644 --- a/source/utils/adios_iotest/settings.h +++ b/source/utils/adios_iotest/settings.h @@ -42,6 +42,7 @@ class Settings bool ioTimer = false; // used to measure io time bool fixedPattern = false; // should Lock definitions? bool isRatioDecomp = false; + bool multithreadedMPI = false; // turn on MT-enabled MPI IOLib iolib = IOLib::ADIOS; // process decomposition std::vector processDecomp = {1, 1, 1, 1, 1, 1, 1, 1, @@ -56,7 +57,8 @@ class Settings Settings() = default; ~Settings() = default; - int processArguments(int argc, char *argv[], MPI_Comm worldComm); + int processArguments(int argc, char *argv[]); + int initDecomp(MPI_Comm worldComm); int extraArgumentChecks(); size_t stringToNumber(const std::string &varName, const char *arg) const; int parseCSDecomp(const char *arg);