From fb298555d5bdf648108de0a34896d7ccc14936f6 Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Mon, 26 Feb 2024 13:26:41 +0100 Subject: [PATCH 1/9] #2249: Add perf test for raw MPI Send/Recv and ObjGroup's Send --- tests/perf/send_cost.cc | 163 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 tests/perf/send_cost.cc diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc new file mode 100644 index 0000000000..4da3570852 --- /dev/null +++ b/tests/perf/send_cost.cc @@ -0,0 +1,163 @@ +/* +//@HEADER +// ***************************************************************************** +// +// send_cost.cc +// DARMA/vt => Virtual Transport +// +// Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ +#include "common/test_harness.h" +#include "vt/configs/error/config_assert.h" +#include "vt/scheduler/scheduler.h" +#include +#include +#include + +#include + +#include + +using namespace vt; +using namespace vt::tests::perf::common; + +static constexpr std::array const payloadSizes = { + 1, 64, 128, 2048, 16384, 524288, 268435456}; + +vt::EpochType the_epoch = vt::no_epoch; + +struct SendTest : PerfTestHarness { }; + +struct NodeObj { + struct PingMsg : Message { + using MessageParentType = vt::Message; + vt_msg_serialize_required(); + std::vector vec_; + + PingMsg() : Message() { } + explicit PingMsg(size_t size) : Message() { + vec_.resize(size, vt::theContext()->getNode()); + } + + template + void serialize(SerializerT& s) { + MessageParentType::serialize(s); + s | vec_; + } + }; + + void sendHandler(NodeObj::PingMsg* msg) { handled_ = true; } + + explicit NodeObj(SendTest* test_obj) : test_obj_(test_obj) { } + + void initialize() { proxy_ = vt::theObjGroup()->getProxy(this); } + + bool handled_ = false; + SendTest* test_obj_ = nullptr; + vt::objgroup::proxy::Proxy proxy_ = {}; +}; + +VT_PERF_TEST(SendTest, test_send) { + auto const thisNode = vt::theContext()->getNode(); + + if (thisNode == 0) { + vt::theTerm()->disableTD(); + } + + auto const lastNode = theContext()->getNumNodes() - 1; + + auto const prevNode = (thisNode - 1 + num_nodes_) % num_nodes_; + auto const nextNode = (thisNode + 1) % num_nodes_; + int data = thisNode; + + for (auto size : payloadSizes) { + std::vector dataVec(size, data); + std::vector recvData(size, data); + + StartTimer(fmt::format("Payload size {}", size)); + + MPI_Request request; + MPI_Irecv( + &recvData[0], size, MPI_INT, prevNode, 0, MPI_COMM_WORLD, &request); + MPI_Send(&dataVec[0], size, MPI_INT, nextNode, 0, MPI_COMM_WORLD); + + MPI_Wait(&request, MPI_STATUS_IGNORE); + + StopTimer(fmt::format("Payload size {}", size)); + } + + if (vt::theContext()->getNode() == 0) { + vt::theTerm()->enableTD(); + } +} + +VT_PERF_TEST(SendTest, test_objgroup_send) { + auto grp_proxy = + vt::theObjGroup()->makeCollective("test_objgroup_send", this); + grp_proxy[my_node_].invoke<&NodeObj::initialize>(); + + if (theContext()->getNode() == 0) { + theTerm()->disableTD(); + } + + auto const thisNode = vt::theContext()->getNode(); + auto const lastNode = theContext()->getNumNodes() - 1; + + auto const prevNode = (thisNode - 1 + num_nodes_) % num_nodes_; + auto const nextNode = (thisNode + 1) % num_nodes_; + int data = thisNode; + { + for (auto size : payloadSizes) { + NodeObj::PingMsg msg(size); + StartTimer(fmt::format("ObjGroup Payload size {}", size)); + + vt::runInEpochCollective([grp_proxy, nextNode, tmpMsg = std::move(msg)] { + grp_proxy[nextNode].send<&NodeObj::sendHandler>(std::move(tmpMsg)); + }); + + StopTimer(fmt::format("ObjGroup Payload size {}", size)); + + assert(grp_proxy[thisNode].get()->handled_); + grp_proxy[thisNode].get()->handled_ = false; + } + } + + if (vt::theContext()->getNode() == 0) { + vt::theTerm()->enableTD(); + } +} + +VT_PERF_TEST_MAIN() From b98b40d41c78ebfbe389d8dcb74f5b1364807c8a Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Wed, 28 Feb 2024 23:25:56 +0100 Subject: [PATCH 2/9] #2249: Add test for collection's send --- tests/perf/send_cost.cc | 152 ++++++++++++++++++++++++++++------------ 1 file changed, 109 insertions(+), 43 deletions(-) diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index 4da3570852..ad114cae1b 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -41,10 +41,13 @@ //@HEADER */ #include "common/test_harness.h" -#include "vt/configs/error/config_assert.h" +#include "vt/collective/collective_alg.h" +#include "vt/context/context.h" #include "vt/scheduler/scheduler.h" +#include #include #include +#include #include #include @@ -54,41 +57,18 @@ using namespace vt; using namespace vt::tests::perf::common; -static constexpr std::array const payloadSizes = { - 1, 64, 128, 2048, 16384, 524288, 268435456}; +// static constexpr std::array const payloadSizes = { +// 1, 64, 128, 2048, 16384, 524288, 268435456}; + +static constexpr std::array const payloadSizes = {1, 64}; vt::EpochType the_epoch = vt::no_epoch; struct SendTest : PerfTestHarness { }; -struct NodeObj { - struct PingMsg : Message { - using MessageParentType = vt::Message; - vt_msg_serialize_required(); - std::vector vec_; - - PingMsg() : Message() { } - explicit PingMsg(size_t size) : Message() { - vec_.resize(size, vt::theContext()->getNode()); - } - - template - void serialize(SerializerT& s) { - MessageParentType::serialize(s); - s | vec_; - } - }; - - void sendHandler(NodeObj::PingMsg* msg) { handled_ = true; } - - explicit NodeObj(SendTest* test_obj) : test_obj_(test_obj) { } - - void initialize() { proxy_ = vt::theObjGroup()->getProxy(this); } - - bool handled_ = false; - SendTest* test_obj_ = nullptr; - vt::objgroup::proxy::Proxy proxy_ = {}; -}; +//////////////////////////////////////// +//////////////// RAW MPI /////////////// +//////////////////////////////////////// VT_PERF_TEST(SendTest, test_send) { auto const thisNode = vt::theContext()->getNode(); @@ -124,6 +104,39 @@ VT_PERF_TEST(SendTest, test_send) { } } +//////////////////////////////////////// +///////////// OBJECT GROUP ///////////// +//////////////////////////////////////// + +struct NodeObj { + struct PingMsg : Message { + using MessageParentType = vt::Message; + vt_msg_serialize_required(); + std::vector vec_; + + PingMsg() : Message() { } + explicit PingMsg(size_t size) : Message() { + vec_.resize(size, vt::theContext()->getNode()); + } + + template + void serialize(SerializerT& s) { + MessageParentType::serialize(s); + s | vec_; + } + }; + + void sendHandler(NodeObj::PingMsg* msg) { handled_ = true; } + + explicit NodeObj(SendTest* test_obj) : test_obj_(test_obj) { } + + void initialize() { proxy_ = vt::theObjGroup()->getProxy(this); } + + bool handled_ = false; + SendTest* test_obj_ = nullptr; + vt::objgroup::proxy::Proxy proxy_ = {}; +}; + VT_PERF_TEST(SendTest, test_objgroup_send) { auto grp_proxy = vt::theObjGroup()->makeCollective("test_objgroup_send", this); @@ -138,21 +151,19 @@ VT_PERF_TEST(SendTest, test_objgroup_send) { auto const prevNode = (thisNode - 1 + num_nodes_) % num_nodes_; auto const nextNode = (thisNode + 1) % num_nodes_; - int data = thisNode; - { - for (auto size : payloadSizes) { - NodeObj::PingMsg msg(size); - StartTimer(fmt::format("ObjGroup Payload size {}", size)); - vt::runInEpochCollective([grp_proxy, nextNode, tmpMsg = std::move(msg)] { - grp_proxy[nextNode].send<&NodeObj::sendHandler>(std::move(tmpMsg)); - }); + for (auto size : payloadSizes) { + NodeObj::PingMsg msg(size); + StartTimer(fmt::format("ObjGroup Payload size {}", size)); - StopTimer(fmt::format("ObjGroup Payload size {}", size)); + vt::runInEpochCollective([grp_proxy, nextNode, tmpMsg = std::move(msg)] { + grp_proxy[nextNode].send<&NodeObj::sendHandler>(std::move(tmpMsg)); + }); - assert(grp_proxy[thisNode].get()->handled_); - grp_proxy[thisNode].get()->handled_ = false; - } + StopTimer(fmt::format("ObjGroup Payload size {}", size)); + + assert(grp_proxy[thisNode].get()->handled_); + grp_proxy[thisNode].get()->handled_ = false; } if (vt::theContext()->getNode() == 0) { @@ -160,4 +171,59 @@ VT_PERF_TEST(SendTest, test_objgroup_send) { } } +//////////////////////////////////////// +////////////// COLLECTION ////////////// +//////////////////////////////////////// + +struct Hello : vt::Collection { + struct TestDataMsg : vt::CollectionMessage { + vt_msg_serialize_required(); + using MessageParentType = vt::CollectionMessage; + // vt_msg_serialize_if_needed_by_parent_or_type1(vt::IdxBase); + TestDataMsg() = default; + explicit TestDataMsg(size_t size) { + vec_.resize(size, theContext()->getNode()); + } + + template + void serialize(SerializerT& s) { + MessageParentType::serialize(s); + s | vec_; + } + + std::vector vec_ = {}; + }; + Hello() = default; + + void handler(TestDataMsg* msg) { + fmt::print("[{}] Handler!\n", theContext()->getNode()); + counter_++; + } + +private: + int counter_ = 0; +}; + +VT_PERF_TEST(SendTest, test_collection_send) { + auto range = vt::Index1D(int32_t{num_nodes_}); + auto proxy = vt::makeCollection("hello_world_collection_reduce") + .bounds(range) + .bulkInsert() + .wait(); + + auto const thisNode = vt::theContext()->getNode(); + auto const nextNode = (thisNode + 1) % num_nodes_; + + for (auto size : payloadSizes) { + StartTimer(fmt::format("Collection Payload size {}", size)); + + vt::runInEpochCollective([&] { + proxy[nextNode].send(size); + }); + + StopTimer(fmt::format("Collection Payload size {}", size)); + } + vt::theCollective()->barrier(); +} + VT_PERF_TEST_MAIN() From 4e524f1cbc7375299e4ba66583c329b20bdb0438 Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Thu, 29 Feb 2024 20:06:10 +0100 Subject: [PATCH 3/9] #2249: Fix runtime failures --- tests/perf/send_cost.cc | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index ad114cae1b..fd08ffbef6 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -42,7 +42,10 @@ */ #include "common/test_harness.h" #include "vt/collective/collective_alg.h" +#include "vt/configs/error/config_assert.h" +#include "vt/configs/error/hard_error.h" #include "vt/context/context.h" +#include "vt/messaging/message/shared_message.h" #include "vt/scheduler/scheduler.h" #include #include @@ -57,10 +60,8 @@ using namespace vt; using namespace vt::tests::perf::common; -// static constexpr std::array const payloadSizes = { -// 1, 64, 128, 2048, 16384, 524288, 268435456}; - -static constexpr std::array const payloadSizes = {1, 64}; +static constexpr std::array const payloadSizes = { + 1, 64, 128, 2048, 16384, 524288, 268435456}; vt::EpochType the_epoch = vt::no_epoch; @@ -195,13 +196,16 @@ struct Hello : vt::Collection { }; Hello() = default; - void handler(TestDataMsg* msg) { - fmt::print("[{}] Handler!\n", theContext()->getNode()); - counter_++; + void Handler(TestDataMsg* msg) { handled_ = true; } + + void CheckHandledAndReset() { + vtAssert( + handled_, fmt::format("[{}] Recv not run!", theContext()->getNode())); + + handled_ = false; } -private: - int counter_ = 0; + bool handled_ = false; }; VT_PERF_TEST(SendTest, test_collection_send) { @@ -215,15 +219,17 @@ VT_PERF_TEST(SendTest, test_collection_send) { auto const nextNode = (thisNode + 1) % num_nodes_; for (auto size : payloadSizes) { + auto msg = vt::makeMessage(size_t{size}); StartTimer(fmt::format("Collection Payload size {}", size)); - vt::runInEpochCollective([&] { - proxy[nextNode].send(size); - }); + vt::runInEpochCollective( + [&] { proxy[nextNode].sendMsg<&Hello::Handler>(msg); }); StopTimer(fmt::format("Collection Payload size {}", size)); + + vt::runInEpochCollective( + [&] { proxy[thisNode].invoke<&Hello::CheckHandledAndReset>(); }); } - vt::theCollective()->barrier(); } VT_PERF_TEST_MAIN() From d191408797f6603b9eba485de85b04d96b6c9640 Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Wed, 17 Apr 2024 19:26:30 +0200 Subject: [PATCH 4/9] #2249: Update Objgroup send perf test --- tests/perf/send_cost.cc | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index fd08ffbef6..588cc58bb1 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -41,13 +41,16 @@ //@HEADER */ #include "common/test_harness.h" +#include "common/timers.h" #include "vt/collective/collective_alg.h" #include "vt/configs/error/config_assert.h" #include "vt/configs/error/hard_error.h" #include "vt/context/context.h" #include "vt/messaging/message/shared_message.h" #include "vt/scheduler/scheduler.h" +#include #include +#include #include #include #include @@ -64,6 +67,7 @@ static constexpr std::array const payloadSizes = { 1, 64, 128, 2048, 16384, 524288, 268435456}; vt::EpochType the_epoch = vt::no_epoch; +bool send_done = false; struct SendTest : PerfTestHarness { }; @@ -114,9 +118,12 @@ struct NodeObj { using MessageParentType = vt::Message; vt_msg_serialize_required(); std::vector vec_; + DurationMilli start_; PingMsg() : Message() { } - explicit PingMsg(size_t size) : Message() { + explicit PingMsg(size_t size) + : Message(), + start_(std::chrono::steady_clock::now().time_since_epoch()) { vec_.resize(size, vt::theContext()->getNode()); } @@ -124,10 +131,18 @@ struct NodeObj { void serialize(SerializerT& s) { MessageParentType::serialize(s); s | vec_; + s | start_; } }; - void sendHandler(NodeObj::PingMsg* msg) { handled_ = true; } + void sendHandler(NodeObj::PingMsg* msg) { + test_obj_->AddResult( + {fmt::format("ObjGroup Payload size {}", msg->vec_.size()), + (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - + msg->start_) + .count()}); + send_done = true; + } explicit NodeObj(SendTest* test_obj) : test_obj_(test_obj) { } @@ -154,17 +169,12 @@ VT_PERF_TEST(SendTest, test_objgroup_send) { auto const nextNode = (thisNode + 1) % num_nodes_; for (auto size : payloadSizes) { - NodeObj::PingMsg msg(size); - StartTimer(fmt::format("ObjGroup Payload size {}", size)); - - vt::runInEpochCollective([grp_proxy, nextNode, tmpMsg = std::move(msg)] { - grp_proxy[nextNode].send<&NodeObj::sendHandler>(std::move(tmpMsg)); - }); + theCollective()->barrier(); - StopTimer(fmt::format("ObjGroup Payload size {}", size)); + grp_proxy[nextNode].send<&NodeObj::sendHandler>(size); + theSched()->runSchedulerWhile([]{ return !send_done; }); - assert(grp_proxy[thisNode].get()->handled_); - grp_proxy[thisNode].get()->handled_ = false; + send_done = false; } if (vt::theContext()->getNode() == 0) { From e6d30fc246c2cc63f25d85e695aadd40e76d4d1c Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Fri, 19 Apr 2024 15:48:17 +0200 Subject: [PATCH 5/9] #2249: Update Collection send perf test --- tests/perf/send_cost.cc | 61 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index 588cc58bb1..415a27da15 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -40,13 +40,11 @@ // ***************************************************************************** //@HEADER */ + #include "common/test_harness.h" #include "common/timers.h" #include "vt/collective/collective_alg.h" -#include "vt/configs/error/config_assert.h" -#include "vt/configs/error/hard_error.h" #include "vt/context/context.h" -#include "vt/messaging/message/shared_message.h" #include "vt/scheduler/scheduler.h" #include #include @@ -66,8 +64,8 @@ using namespace vt::tests::perf::common; static constexpr std::array const payloadSizes = { 1, 64, 128, 2048, 16384, 524288, 268435456}; -vt::EpochType the_epoch = vt::no_epoch; -bool send_done = false; +bool obj_send_done = false; +bool col_send_done = false; struct SendTest : PerfTestHarness { }; @@ -140,8 +138,10 @@ struct NodeObj { {fmt::format("ObjGroup Payload size {}", msg->vec_.size()), (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - msg->start_) - .count()}); - send_done = true; + .count() + } + ); + obj_send_done = true; } explicit NodeObj(SendTest* test_obj) : test_obj_(test_obj) { } @@ -172,9 +172,9 @@ VT_PERF_TEST(SendTest, test_objgroup_send) { theCollective()->barrier(); grp_proxy[nextNode].send<&NodeObj::sendHandler>(size); - theSched()->runSchedulerWhile([]{ return !send_done; }); + theSched()->runSchedulerWhile([] { return !obj_send_done; }); - send_done = false; + obj_send_done = false; } if (vt::theContext()->getNode() == 0) { @@ -190,9 +190,9 @@ struct Hello : vt::Collection { struct TestDataMsg : vt::CollectionMessage { vt_msg_serialize_required(); using MessageParentType = vt::CollectionMessage; - // vt_msg_serialize_if_needed_by_parent_or_type1(vt::IdxBase); TestDataMsg() = default; - explicit TestDataMsg(size_t size) { + explicit TestDataMsg(size_t size) + : start_(std::chrono::steady_clock::now().time_since_epoch()) { vec_.resize(size, theContext()->getNode()); } @@ -200,27 +200,32 @@ struct Hello : vt::Collection { void serialize(SerializerT& s) { MessageParentType::serialize(s); s | vec_; + s | start_; } std::vector vec_ = {}; + DurationMilli start_; }; - Hello() = default; - void Handler(TestDataMsg* msg) { handled_ = true; } - - void CheckHandledAndReset() { - vtAssert( - handled_, fmt::format("[{}] Recv not run!", theContext()->getNode())); + Hello() = default; - handled_ = false; + void Handler(TestDataMsg* msg) { + test_obj_->AddResult( + {fmt::format("Collection Payload size {}", msg->vec_.size()), + (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - + msg->start_) + .count() + } + ); + col_send_done = true; } - bool handled_ = false; + SendTest* test_obj_ = nullptr; }; VT_PERF_TEST(SendTest, test_collection_send) { auto range = vt::Index1D(int32_t{num_nodes_}); - auto proxy = vt::makeCollection("hello_world_collection_reduce") + auto proxy = vt::makeCollection("send_cost_collection") .bounds(range) .bulkInsert() .wait(); @@ -228,17 +233,15 @@ VT_PERF_TEST(SendTest, test_collection_send) { auto const thisNode = vt::theContext()->getNode(); auto const nextNode = (thisNode + 1) % num_nodes_; - for (auto size : payloadSizes) { - auto msg = vt::makeMessage(size_t{size}); - StartTimer(fmt::format("Collection Payload size {}", size)); + proxy[thisNode].tryGetLocalPtr()->test_obj_ = this; - vt::runInEpochCollective( - [&] { proxy[nextNode].sendMsg<&Hello::Handler>(msg); }); - - StopTimer(fmt::format("Collection Payload size {}", size)); + for (auto size : payloadSizes) { + theCollective()->barrier(); + proxy[nextNode].send<&Hello::Handler>(size); - vt::runInEpochCollective( - [&] { proxy[thisNode].invoke<&Hello::CheckHandledAndReset>(); }); + // We run 1 coll elem per node, so it should be ok + theSched()->runSchedulerWhile([] { return !col_send_done; }); + col_send_done = false; } } From a06166eeb97d6664fe629f54cd5dad567d01e9af Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Thu, 25 Apr 2024 15:02:15 +0200 Subject: [PATCH 6/9] #2249: Update ObjGroup send test with message that uses payload as std::shared_ptr --- tests/perf/send_cost.cc | 107 ++++++++++++++++++++++++++++++++++------ 1 file changed, 92 insertions(+), 15 deletions(-) diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index 415a27da15..7506b22038 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -44,6 +44,7 @@ #include "common/test_harness.h" #include "common/timers.h" #include "vt/collective/collective_alg.h" +#include "vt/configs/error/config_assert.h" #include "vt/context/context.h" #include "vt/scheduler/scheduler.h" #include @@ -115,32 +116,76 @@ struct NodeObj { struct PingMsg : Message { using MessageParentType = vt::Message; vt_msg_serialize_required(); - std::vector vec_; - DurationMilli start_; PingMsg() : Message() { } - explicit PingMsg(size_t size) + explicit PingMsg(const std::vector& payload) : Message(), - start_(std::chrono::steady_clock::now().time_since_epoch()) { - vec_.resize(size, vt::theContext()->getNode()); + payload_(payload), + start_(std::chrono::steady_clock::now().time_since_epoch()) { } + + template + void serialize(SerializerT& s) { + MessageParentType::serialize(s); + s | payload_; + s | start_; } + std::vector payload_; + DurationMilli start_; + }; + + struct PingMsgPtr : Message { + using MessageParentType = vt::Message; + vt_msg_serialize_required(); + + PingMsgPtr() : Message() { } + + explicit PingMsgPtr(const std::shared_ptr>& payload) + : Message(), + payload_(payload), + start_(std::chrono::steady_clock::now().time_since_epoch()) { } + template void serialize(SerializerT& s) { MessageParentType::serialize(s); - s | vec_; + + if (s.isUnpacking()) { + payload_ = std::make_shared>(); + } + + s | *payload_; s | start_; } + + std::shared_ptr> payload_; + DurationMilli start_; }; void sendHandler(NodeObj::PingMsg* msg) { test_obj_->AddResult( - {fmt::format("ObjGroup Payload size {}", msg->vec_.size()), + {fmt::format("ObjGroup Payload size {}", msg->payload_.size()), (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - msg->start_) - .count() - } - ); + .count()}); + + obj_send_done = true; + } + + void sendHandlerPtr(NodeObj::PingMsgPtr* msg) { + test_obj_->AddResult( + {fmt::format("ObjGroupPtr Payload size {}", msg->payload_->size()), + (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - + msg->start_) + .count()}); + + auto const num_nodes = vt::theContext()->getNumNodes(); + auto const this_node = vt::theContext()->getNode(); + auto const prev_node = (this_node - 1 + num_nodes) % num_nodes; + for (auto val : *msg->payload_) { + vtAssert( + val == prev_node, fmt::format("[{}]: Incorrect value!\n", this_node)); + } + obj_send_done = true; } @@ -153,7 +198,39 @@ struct NodeObj { vt::objgroup::proxy::Proxy proxy_ = {}; }; -VT_PERF_TEST(SendTest, test_objgroup_send) { +VT_PERF_TEST(SendTest, test_objgroup_send_ptr) { + auto grp_proxy = + vt::theObjGroup()->makeCollective("test_objgroup_send_ptr", this); + grp_proxy[my_node_].invoke<&NodeObj::initialize>(); + + if (theContext()->getNode() == 0) { + theTerm()->disableTD(); + } + + auto const thisNode = vt::theContext()->getNode(); + auto const lastNode = theContext()->getNumNodes() - 1; + + auto const prevNode = (thisNode - 1 + num_nodes_) % num_nodes_; + auto const nextNode = (thisNode + 1) % num_nodes_; + + for (auto size : payloadSizes) { + auto payload = std::make_shared>(); + payload->resize(size, thisNode); + + theCollective()->barrier(); + + grp_proxy[nextNode].send<&NodeObj::sendHandlerPtr>(payload); + theSched()->runSchedulerWhile([] { return !obj_send_done; }); + + obj_send_done = false; + } + + if (vt::theContext()->getNode() == 0) { + vt::theTerm()->enableTD(); + } +} + +VT_PERF_TEST(SendTest, test_objgroup_send_vec) { auto grp_proxy = vt::theObjGroup()->makeCollective("test_objgroup_send", this); grp_proxy[my_node_].invoke<&NodeObj::initialize>(); @@ -169,9 +246,11 @@ VT_PERF_TEST(SendTest, test_objgroup_send) { auto const nextNode = (thisNode + 1) % num_nodes_; for (auto size : payloadSizes) { + std::vector payload(size, thisNode); + theCollective()->barrier(); - grp_proxy[nextNode].send<&NodeObj::sendHandler>(size); + grp_proxy[nextNode].send<&NodeObj::sendHandler>(payload); theSched()->runSchedulerWhile([] { return !obj_send_done; }); obj_send_done = false; @@ -214,9 +293,7 @@ struct Hello : vt::Collection { {fmt::format("Collection Payload size {}", msg->vec_.size()), (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - msg->start_) - .count() - } - ); + .count()}); col_send_done = true; } From e82ca2e868ede5935bdb57e57c0c7d667c7f1521 Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Tue, 7 May 2024 13:25:28 +0200 Subject: [PATCH 7/9] #2249: Use payload as pointer to data to avoid extra allocation cost --- tests/perf/send_cost.cc | 151 ++++++++++++---------------------------- 1 file changed, 45 insertions(+), 106 deletions(-) diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index 7506b22038..2d8377a71d 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -42,12 +42,10 @@ */ #include "common/test_harness.h" -#include "common/timers.h" #include "vt/collective/collective_alg.h" #include "vt/configs/error/config_assert.h" #include "vt/context/context.h" #include "vt/scheduler/scheduler.h" -#include #include #include #include @@ -55,15 +53,13 @@ #include #include -#include - #include using namespace vt; using namespace vt::tests::perf::common; -static constexpr std::array const payloadSizes = { - 1, 64, 128, 2048, 16384, 524288, 268435456}; +static constexpr std::array const payloadSizes = { + 1, 64, 128, 2048, 16384, 524288, 2097152, 268435456}; bool obj_send_done = false; bool col_send_done = false; @@ -76,11 +72,6 @@ struct SendTest : PerfTestHarness { }; VT_PERF_TEST(SendTest, test_send) { auto const thisNode = vt::theContext()->getNode(); - - if (thisNode == 0) { - vt::theTerm()->disableTD(); - } - auto const lastNode = theContext()->getNumNodes() - 1; auto const prevNode = (thisNode - 1 + num_nodes_) % num_nodes_; @@ -89,10 +80,10 @@ VT_PERF_TEST(SendTest, test_send) { for (auto size : payloadSizes) { std::vector dataVec(size, data); - std::vector recvData(size, data); StartTimer(fmt::format("Payload size {}", size)); + std::vector recvData(size, data); MPI_Request request; MPI_Irecv( &recvData[0], size, MPI_INT, prevNode, 0, MPI_COMM_WORLD, &request); @@ -102,10 +93,6 @@ VT_PERF_TEST(SendTest, test_send) { StopTimer(fmt::format("Payload size {}", size)); } - - if (vt::theContext()->getNode() == 0) { - vt::theTerm()->enableTD(); - } } //////////////////////////////////////// @@ -113,34 +100,19 @@ VT_PERF_TEST(SendTest, test_send) { //////////////////////////////////////// struct NodeObj { - struct PingMsg : Message { + struct ObjGroupMsg : Message { using MessageParentType = vt::Message; vt_msg_serialize_required(); - PingMsg() : Message() { } - explicit PingMsg(const std::vector& payload) - : Message(), - payload_(payload), - start_(std::chrono::steady_clock::now().time_since_epoch()) { } + ObjGroupMsg() : Message() { } - template - void serialize(SerializerT& s) { - MessageParentType::serialize(s); - s | payload_; - s | start_; + ~ObjGroupMsg() { + if (owning_) { + delete payload_; + } } - std::vector payload_; - DurationMilli start_; - }; - - struct PingMsgPtr : Message { - using MessageParentType = vt::Message; - vt_msg_serialize_required(); - - PingMsgPtr() : Message() { } - - explicit PingMsgPtr(const std::shared_ptr>& payload) + explicit ObjGroupMsg(std::vector* payload) : Message(), payload_(payload), start_(std::chrono::steady_clock::now().time_since_epoch()) { } @@ -150,41 +122,25 @@ struct NodeObj { MessageParentType::serialize(s); if (s.isUnpacking()) { - payload_ = std::make_shared>(); + payload_ = new std::vector(); + owning_ = true; } s | *payload_; s | start_; } - std::shared_ptr> payload_; + std::vector* payload_; + bool owning_ = false; DurationMilli start_; }; - void sendHandler(NodeObj::PingMsg* msg) { - test_obj_->AddResult( - {fmt::format("ObjGroup Payload size {}", msg->payload_.size()), - (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - - msg->start_) - .count()}); + void sendHandler(NodeObj::ObjGroupMsg* msg) { + auto now = std::chrono::steady_clock::now(); - obj_send_done = true; - } - - void sendHandlerPtr(NodeObj::PingMsgPtr* msg) { test_obj_->AddResult( - {fmt::format("ObjGroupPtr Payload size {}", msg->payload_->size()), - (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - - msg->start_) - .count()}); - - auto const num_nodes = vt::theContext()->getNumNodes(); - auto const this_node = vt::theContext()->getNode(); - auto const prev_node = (this_node - 1 + num_nodes) % num_nodes; - for (auto val : *msg->payload_) { - vtAssert( - val == prev_node, fmt::format("[{}]: Incorrect value!\n", this_node)); - } + {fmt::format("ObjGroup Payload size {}", msg->payload_->size()), + (DurationMilli{now.time_since_epoch()} - msg->start_).count()}); obj_send_done = true; } @@ -198,39 +154,7 @@ struct NodeObj { vt::objgroup::proxy::Proxy proxy_ = {}; }; -VT_PERF_TEST(SendTest, test_objgroup_send_ptr) { - auto grp_proxy = - vt::theObjGroup()->makeCollective("test_objgroup_send_ptr", this); - grp_proxy[my_node_].invoke<&NodeObj::initialize>(); - - if (theContext()->getNode() == 0) { - theTerm()->disableTD(); - } - - auto const thisNode = vt::theContext()->getNode(); - auto const lastNode = theContext()->getNumNodes() - 1; - - auto const prevNode = (thisNode - 1 + num_nodes_) % num_nodes_; - auto const nextNode = (thisNode + 1) % num_nodes_; - - for (auto size : payloadSizes) { - auto payload = std::make_shared>(); - payload->resize(size, thisNode); - - theCollective()->barrier(); - - grp_proxy[nextNode].send<&NodeObj::sendHandlerPtr>(payload); - theSched()->runSchedulerWhile([] { return !obj_send_done; }); - - obj_send_done = false; - } - - if (vt::theContext()->getNode() == 0) { - vt::theTerm()->enableTD(); - } -} - -VT_PERF_TEST(SendTest, test_objgroup_send_vec) { +VT_PERF_TEST(SendTest, test_objgroup_send) { auto grp_proxy = vt::theObjGroup()->makeCollective("test_objgroup_send", this); grp_proxy[my_node_].invoke<&NodeObj::initialize>(); @@ -246,7 +170,8 @@ VT_PERF_TEST(SendTest, test_objgroup_send_vec) { auto const nextNode = (thisNode + 1) % num_nodes_; for (auto size : payloadSizes) { - std::vector payload(size, thisNode); + auto* payload = new std::vector(); + payload->resize(size, thisNode); theCollective()->barrier(); @@ -254,6 +179,8 @@ VT_PERF_TEST(SendTest, test_objgroup_send_vec) { theSched()->runSchedulerWhile([] { return !obj_send_done; }); obj_send_done = false; + + delete payload; } if (vt::theContext()->getNode() == 0) { @@ -270,30 +197,40 @@ struct Hello : vt::Collection { vt_msg_serialize_required(); using MessageParentType = vt::CollectionMessage; TestDataMsg() = default; - explicit TestDataMsg(size_t size) - : start_(std::chrono::steady_clock::now().time_since_epoch()) { - vec_.resize(size, theContext()->getNode()); + ~TestDataMsg() { + if (owning_) { + delete payload_; + } } + explicit TestDataMsg(std::vector* payload) + : start_(std::chrono::steady_clock::now().time_since_epoch()), + payload_(payload) { } template void serialize(SerializerT& s) { MessageParentType::serialize(s); - s | vec_; s | start_; + + if (s.isUnpacking()) { + owning_ = true; + payload_ = new std::vector(); + } + + s | *payload_; } - std::vector vec_ = {}; DurationMilli start_; + std::vector* payload_ = {}; + bool owning_ = false; }; Hello() = default; void Handler(TestDataMsg* msg) { + auto now = std::chrono::steady_clock::now(); test_obj_->AddResult( - {fmt::format("Collection Payload size {}", msg->vec_.size()), - (DurationMilli{std::chrono::steady_clock::now().time_since_epoch()} - - msg->start_) - .count()}); + {fmt::format("Collection Payload size {}", msg->payload_->size()), + (DurationMilli{now.time_since_epoch()} - msg->start_).count()}); col_send_done = true; } @@ -313,8 +250,10 @@ VT_PERF_TEST(SendTest, test_collection_send) { proxy[thisNode].tryGetLocalPtr()->test_obj_ = this; for (auto size : payloadSizes) { + std::vector payload(size, thisNode); + theCollective()->barrier(); - proxy[nextNode].send<&Hello::Handler>(size); + proxy[nextNode].send<&Hello::Handler>(&payload); // We run 1 coll elem per node, so it should be ok theSched()->runSchedulerWhile([] { return !col_send_done; }); From 4563c33ebef270aaaa432d37125f03dd311ae9bb Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Tue, 7 May 2024 18:44:22 +0200 Subject: [PATCH 8/9] #2249: Fix code quality issue --- tests/perf/send_cost.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index 2d8377a71d..3f08622762 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -130,9 +130,9 @@ struct NodeObj { s | start_; } - std::vector* payload_; + std::vector* payload_ = nullptr; bool owning_ = false; - DurationMilli start_; + DurationMilli start_ = {}; }; void sendHandler(NodeObj::ObjGroupMsg* msg) { @@ -219,8 +219,8 @@ struct Hello : vt::Collection { s | *payload_; } - DurationMilli start_; - std::vector* payload_ = {}; + DurationMilli start_ = {}; + std::vector* payload_ = nullptr; bool owning_ = false; }; From 33955ec31cdcf211aa4b155e0f4a9a7c9f9a0c7d Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Thu, 9 May 2024 15:54:36 +0200 Subject: [PATCH 9/9] #2249: Disable global timer for send_cost tests and adjust payload sizes --- tests/perf/send_cost.cc | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/perf/send_cost.cc b/tests/perf/send_cost.cc index 3f08622762..f15d0bb8db 100644 --- a/tests/perf/send_cost.cc +++ b/tests/perf/send_cost.cc @@ -58,13 +58,17 @@ using namespace vt; using namespace vt::tests::perf::common; -static constexpr std::array const payloadSizes = { - 1, 64, 128, 2048, 16384, 524288, 2097152, 268435456}; +static constexpr std::array const payloadSizes = { + 1, 64, 128, 2048, 16384, 32768, 524288, 1048576, 2097152}; bool obj_send_done = false; bool col_send_done = false; -struct SendTest : PerfTestHarness { }; +struct SendTest : PerfTestHarness { + SendTest() { + DisableGlobalTimer(); + } +}; //////////////////////////////////////// //////////////// RAW MPI /////////////// @@ -159,10 +163,6 @@ VT_PERF_TEST(SendTest, test_objgroup_send) { vt::theObjGroup()->makeCollective("test_objgroup_send", this); grp_proxy[my_node_].invoke<&NodeObj::initialize>(); - if (theContext()->getNode() == 0) { - theTerm()->disableTD(); - } - auto const thisNode = vt::theContext()->getNode(); auto const lastNode = theContext()->getNumNodes() - 1; @@ -182,10 +182,6 @@ VT_PERF_TEST(SendTest, test_objgroup_send) { delete payload; } - - if (vt::theContext()->getNode() == 0) { - vt::theTerm()->enableTD(); - } } //////////////////////////////////////// @@ -239,7 +235,7 @@ struct Hello : vt::Collection { VT_PERF_TEST(SendTest, test_collection_send) { auto range = vt::Index1D(int32_t{num_nodes_}); - auto proxy = vt::makeCollection("send_cost_collection") + auto proxy = vt::makeCollection("test_collection_send") .bounds(range) .bulkInsert() .wait();