From 6ed76bfcdf476e47c81f895a94d1487556a5c97f Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 31 Jan 2020 10:30:48 -0700 Subject: [PATCH 01/49] Tpetra: Add verbosePrintArray function @trilinos/tpetra This is useful for controlling how much of arrays get printed when you set TPETRA_VERBOSE=1. --- packages/tpetra/core/src/Tpetra_Util.hpp | 38 ++++++- .../tpetra/core/test/Utils/CMakeLists.txt | 10 ++ .../core/test/Utils/verbosePrintArray.cpp | 105 ++++++++++++++++++ 3 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 packages/tpetra/core/test/Utils/verbosePrintArray.cpp diff --git a/packages/tpetra/core/src/Tpetra_Util.hpp b/packages/tpetra/core/src/Tpetra_Util.hpp index 2ca0aee17470..4ab3dfac5717 100644 --- a/packages/tpetra/core/src/Tpetra_Util.hpp +++ b/packages/tpetra/core/src/Tpetra_Util.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -60,6 +58,7 @@ #include "Teuchos_Utils.hpp" #include #include +#include #include #if defined(HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS) || defined(HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS) @@ -954,6 +953,41 @@ namespace Tpetra { return os.str (); } + /// \brief Print min(x.size(), maxNumToPrint) entries of x. + /// + /// \return void, because returning std::ostream& won't work + /// if \c out is an std::ostringstream. + template + void + verbosePrintArray(std::ostream& out, + const ArrayType& x, + const char name[], + const size_t maxNumToPrint) + { + out << name << ": ["; + + const size_t numEnt(x.size()); + if (maxNumToPrint == 0) { + if (numEnt != 0) { + out << "..."; + } + } + else { + const size_t numToPrint = numEnt > maxNumToPrint ? + maxNumToPrint : numEnt; + size_t k = 0; + for ( ; k < numToPrint; ++k) { + out << x[k]; + if (k + size_t(1) < numToPrint) { + out << ", "; + } + } + if (k < numEnt) { + out << ", ..."; + } + } + out << "]"; + } } // namespace Details } // namespace Tpetra diff --git a/packages/tpetra/core/test/Utils/CMakeLists.txt b/packages/tpetra/core/test/Utils/CMakeLists.txt index dfe5d7fb961a..1d82d4047d7b 100644 --- a/packages/tpetra/core/test/Utils/CMakeLists.txt +++ b/packages/tpetra/core/test/Utils/CMakeLists.txt @@ -135,3 +135,13 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 1 STANDARD_PASS_OUTPUT ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + verbosePrintArray + SOURCES + verbosePrintArray + ${TEUCHOS_STD_UNIT_TEST_MAIN} + COMM serial mpi + NUM_MPI_PROCS 1 + STANDARD_PASS_OUTPUT + ) diff --git a/packages/tpetra/core/test/Utils/verbosePrintArray.cpp b/packages/tpetra/core/test/Utils/verbosePrintArray.cpp new file mode 100644 index 000000000000..f88076935f24 --- /dev/null +++ b/packages/tpetra/core/test/Utils/verbosePrintArray.cpp @@ -0,0 +1,105 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER +*/ + +#include "Tpetra_Util.hpp" +#include "Teuchos_UnitTestHarness.hpp" +#include + +namespace { // (anonymous) + + TEUCHOS_UNIT_TEST( Utils, VerbosePrintArray_threshold ) + { + using Tpetra::Details::verbosePrintArray; + + std::vector x {{3, 5, 7, 9, 11}}; + std::ostringstream os; + + verbosePrintArray(os, x, "x", 10); + os << ", "; + verbosePrintArray(os, x, "x2", 3); + os << ", "; + verbosePrintArray(os, x, "x3", 5); + + const std::string expected + ("x: [3, 5, 7, 9, 11], x2: [3, 5, 7, ...], x3: [3, 5, 7, 9, 11]"); + TEST_EQUALITY( os.str(), expected ); + } + + TEUCHOS_UNIT_TEST( Utils, VerbosePrintArray_empty ) + { + using Tpetra::Details::verbosePrintArray; + + std::vector x; + std::ostringstream os; + + verbosePrintArray(os, x, "x", 10); + os << ", "; + verbosePrintArray(os, x, "x2", 3); + os << ", "; + verbosePrintArray(os, x, "x3", 5); + + const std::string expected("x: [], x2: [], x3: []"); + TEST_EQUALITY( os.str(), expected ); + } + + TEUCHOS_UNIT_TEST( Utils, VerbosePrintArray_zero_threshold ) + { + using Tpetra::Details::verbosePrintArray; + + std::vector x {{3, 5, 7, 9, 11}}; + std::vector y; + std::ostringstream os; + + verbosePrintArray(os, x, "x", 0); + os << ", "; + verbosePrintArray(os, x, "x2", 0); + os << ", "; + verbosePrintArray(os, x, "x3", 0); + os << ", "; + verbosePrintArray(os, y, "y", 0); + + const std::string expected + ("x: [...], x2: [...], " + "x3: [...], y: []"); + TEST_EQUALITY( os.str(), expected ); + } + +} // namespace (anonymous) From aea200580fa0493cf5f9b92dddc634ecd5ccfbf0 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 31 Jan 2020 11:19:15 -0700 Subject: [PATCH 02/49] Tpetra: Limit verbose printing in Directory::getEntries @trilinos/tpetra Use new verbosePrintArray function to limit the number of entries verbose printing will show, in the implementation of Directory::getEntries for noncontiguous Maps. --- .../core/src/Tpetra_DirectoryImpl_def.hpp | 63 ++++++++++++------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_DirectoryImpl_def.hpp b/packages/tpetra/core/src/Tpetra_DirectoryImpl_def.hpp index 4e8150f9ff2b..c86fabf583ed 100644 --- a/packages/tpetra/core/src/Tpetra_DirectoryImpl_def.hpp +++ b/packages/tpetra/core/src/Tpetra_DirectoryImpl_def.hpp @@ -34,13 +34,11 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER -#ifndef __Tpetra_DirectoryImpl_def_hpp -#define __Tpetra_DirectoryImpl_def_hpp +#ifndef TPETRA_DIRECTORYIMPL_DEF_HPP +#define TPETRA_DIRECTORYIMPL_DEF_HPP /// \file Tpetra_DirectoryImpl_def.hpp /// \brief Definition of implementation details of Tpetra::Directory. @@ -48,6 +46,7 @@ #include "Tpetra_Distributor.hpp" #include "Tpetra_Map.hpp" #include "Tpetra_TieBreak.hpp" +#include "Tpetra_Util.hpp" #include "Tpetra_Details_FixedHashTable.hpp" #include "Teuchos_Comm.hpp" #include @@ -943,7 +942,8 @@ namespace Tpetra { using Teuchos::as; using Teuchos::RCP; using Teuchos::toString; - using ::Tpetra::Details::Behavior; + using Details::Behavior; + using Details::verbosePrintArray; using std::cerr; using std::endl; using size_type = typename Array::size_type; @@ -955,6 +955,9 @@ namespace Tpetra { RCP > comm = map.getComm (); const bool verbose = Behavior::verbose ("Directory") || Behavior::verbose ("Tpetra::Directory"); + const size_t maxNumToPrint = verbose ? + Behavior::verbosePrintCountThreshold() : size_t(0); + std::unique_ptr procPrefix; if (verbose) { std::ostringstream os; @@ -966,12 +969,16 @@ namespace Tpetra { os << map.getComm ()->getRank (); } os << ": "; - procPrefix = std::unique_ptr (new std::string (os.str ())); - os << funcPrefix << "{GIDs: " << toString (globalIDs) - << ", PIDs: " << toString (nodeIDs) - << ", LIDs: " << toString (localIDs) - << ", computeLIDs: " << (computeLIDs ? "true" : "false") - << "}" << endl; + procPrefix = std::unique_ptr( + new std::string(os.str())); + os << funcPrefix << "{"; + verbosePrintArray(os, globalIDs, "GIDs", maxNumToPrint); + os << ", "; + verbosePrintArray(os, nodeIDs, "PIDs", maxNumToPrint); + os << ", "; + verbosePrintArray(os, localIDs, "LIDs", maxNumToPrint); + os << ", computeLIDs: " + << (computeLIDs ? "true" : "false") << "}" << endl; cerr << os.str (); } @@ -1001,8 +1008,9 @@ namespace Tpetra { res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ()); if (verbose) { std::ostringstream os; - os << *procPrefix << "directoryMap_->getRemoteIndexList " - "PIDs result: " << toString (dirImages) << endl; + os << *procPrefix << "Director Map getRemoteIndexList out "; + verbosePrintArray(os, dirImages, "PIDs", maxNumToPrint); + os << endl; cerr << os.str (); } @@ -1032,9 +1040,11 @@ namespace Tpetra { if (verbose) { std::ostringstream os; os << *procPrefix << "Distributor::createFromRecvs result: " - << "{sendGIDs: " << toString (sendGIDs) - << ", sendPIDs: " << toString (sendImages) - << "}" << endl; + << "{"; + verbosePrintArray(os, sendGIDs, "sendGIDs", maxNumToPrint); + os << ", "; + verbosePrintArray(os, sendImages, "sendPIDs", maxNumToPrint); + os << "}" << endl; cerr << os.str (); } const size_type numSends = sendGIDs.size (); @@ -1181,16 +1191,18 @@ namespace Tpetra { // doWaits. The code is still correct in this form, however. if (verbose) { std::ostringstream os; - os << *procPrefix << "Call doPostsAndWaits: {packetSize: " - << packetSize << ", exports: " << toString (exports) << "}" - << endl; + os << *procPrefix << "Call doPostsAndWaits: {" + << "packetSize: " << packetSize << ", "; + verbosePrintArray(os, exports, "exports", maxNumToPrint); + os << "}" << endl; cerr << os.str (); } distor.doPostsAndWaits (exports ().getConst (), packetSize, imports ()); if (verbose) { std::ostringstream os; - os << *procPrefix << "doPostsAndWaits result: " - << toString (imports) << endl; + os << *procPrefix << "doPostsAndWaits result: "; + verbosePrintArray(os, imports, "imports", maxNumToPrint); + os << endl; cerr << os.str (); } @@ -1202,8 +1214,11 @@ namespace Tpetra { sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin()); if (verbose) { std::ostringstream os; - os << *procPrefix << "sortedIDs: " << toString (sortedIDs) - << ", offset: " << toString (offset) << endl; + os << *procPrefix; + verbosePrintArray(os, sortedIDs, "sortedIDs", maxNumToPrint); + os << ", "; + verbosePrintArray(os, offset, "offset", maxNumToPrint); + os << endl; cerr << os.str (); } @@ -1281,4 +1296,4 @@ namespace Tpetra { template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \ } -#endif // __Tpetra_DirectoryImpl_def_hpp +#endif // TPETRA_DIRECTORYIMPL_DEF_HPP From b1425a31c370aa89b3c287412f58c8f65d6327ad Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 31 Jan 2020 11:42:58 -0700 Subject: [PATCH 03/49] Tpetra::DistObject: Improve verbose debugging output @trilinos/tpetra Factor createPrefix out of CrsGraph and CrsMatrix, into DistObject. Make DistObject be more selective about printing verbose output. --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 3 - .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 61 +++----- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 3 - .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 59 +++----- .../core/src/Tpetra_DistObject_decl.hpp | 7 +- .../tpetra/core/src/Tpetra_DistObject_def.hpp | 138 +++++++++++------- 6 files changed, 132 insertions(+), 139 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 95e5eb4e2a42..9d3811922b68 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1485,9 +1485,6 @@ namespace Tpetra { }; private: - std::unique_ptr - createPrefix(const char methodName[]) const; - // Friend declaration for nonmember function. template friend Teuchos::RCP diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index d25243d64b57..42cfe2e36dda 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -1214,7 +1214,7 @@ namespace Tpetra { CrsGraph:: allocateIndices (const ELocalGlobal lg, const bool verbose) { - using ::Tpetra::Details::ProfilingRegion; + using Details::ProfilingRegion; using Teuchos::arcp; using Teuchos::Array; using Teuchos::ArrayRCP; @@ -1228,12 +1228,13 @@ namespace Tpetra { typename lcl_col_inds_type::array_layout, device_type> gbl_col_inds_type; const char tfecfFuncName[] = "allocateIndices: "; - const char suffix[] = " Please report this bug to the Tpetra developers."; + const char suffix[] = + " Please report this bug to the Tpetra developers."; ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices"); std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("allocateIndices"); + prefix = this->createPrefix("CrsGraph", "allocateIndices"); std::ostringstream os; os << *prefix << "{lg=" << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices") @@ -4471,7 +4472,7 @@ namespace Tpetra { CrsGraph:: makeIndicesLocal (const bool verbose) { - using ::Tpetra::Details::ProfilingRegion; + using Details::ProfilingRegion; using Teuchos::arcp; using Teuchos::Array; using std::endl; @@ -4490,7 +4491,7 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("makeIndicesLocal"); + prefix = this->createPrefix("CrsGraph", "makeIndicesLocal"); std::ostringstream os; os << *prefix << "lclNumRows: " << getNodeNumRows() << endl; std::cerr << os.str(); @@ -5095,19 +5096,22 @@ namespace Tpetra { const Kokkos::UnorderedMap& padding, const bool verbose) { - using ::Tpetra::Details::ProfilingRegion; - using Tpetra::Details::padCrsArrays; + using Details::ProfilingRegion; + using Details::padCrsArrays; using std::endl; using execution_space = typename device_type::execution_space; - using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type; + using row_ptrs_type = + typename local_graph_type::row_map_type::non_const_type; using indices_type = t_GlobalOrdinal_1D; - using local_indices_type = typename local_graph_type::entries_type::non_const_type; - using range_policy = Kokkos::RangePolicy>; + using local_indices_type = + typename local_graph_type::entries_type::non_const_type; + using range_policy = Kokkos::RangePolicy>; ProfilingRegion regionCAP ("Tpetra::CrsGraph::applyCrsPadding"); std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("applyCrsPadding"); + prefix = this->createPrefix("CrsGraph", "applyCrsPadding"); std::ostringstream os; os << *prefix << "padding.size(): " << padding.size() << ", indicesAreAllocated: " @@ -5235,26 +5239,6 @@ namespace Tpetra { this->k_rowPtrs_ = row_ptrs_beg; } - template - std::unique_ptr - CrsGraph:: - createPrefix(const char methodName[]) const - { - int myRank = -1; - auto map = this->getMap(); - if (! map.is_null()) { - auto comm = map->getComm(); - if (! comm.is_null()) { - myRank = comm->getRank(); - } - } - std::ostringstream pfxStrm; - pfxStrm << "Proc " << myRank << ": Tpetra::CrsGraph::" - << methodName << ": "; - return std::unique_ptr( - new std::string(pfxStrm.str())); - } - template Kokkos::UnorderedMap CrsGraph:: @@ -5270,7 +5254,8 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("computeCrsPadding(same & permute)"); + prefix = this->createPrefix("CrsGraph", + "computeCrsPadding(same & permute)"); std::ostringstream os; os << *prefix << "{numSameIDs: " << numSameIDs << ", numPermutes: " << permuteFromLIDs.extent(0) << "}" @@ -5432,7 +5417,8 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("computeCrsPadding(imports)"); + prefix = + this->createPrefix("CrsGraph", "computeCrsPadding(imports)"); std::ostringstream os; os << *prefix << "{importLIDs.extent(0): " << importLIDs.extent(0) @@ -6119,18 +6105,19 @@ namespace Tpetra { Distributor& /* distor */, const CombineMode /* combineMode */ ) { - using ::Tpetra::Details::ProfilingRegion; + using Details::Behavior; + using Details::ProfilingRegion; using std::endl; using LO = local_ordinal_type; using GO = global_ordinal_type; const char tfecfFuncName[] = "unpackAndCombine: "; - ProfilingRegion regionCGC ("Tpetra::CrsGraph::unpackAndCombine"); - const bool verbose = ::Tpetra::Details::Behavior::verbose ("CrsGraph"); + ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine"); + const bool verbose = Behavior::verbose("CrsGraph"); std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("unpackAndCombine"); + prefix = this->createPrefix("CrsGraph", "unpackAndCombine"); std::ostringstream os; os << *prefix << endl; std::cerr << os.str (); diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index 3677a474bc36..afba90422513 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -3439,9 +3439,6 @@ namespace Tpetra { const bool verbose); private: - std::unique_ptr - createPrefix(const char methodName[]) const; - void copyAndPermuteImpl (const RowMatrix& source, const size_t numSameIDs, diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 5b5a614cc97e..f41cb46ee9e2 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -322,7 +322,7 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("CrsMatrix(CrsGraph,params)"); + prefix = this->createPrefix("CrsMatrix", "CrsMatrix(graph,params)"); std::ostringstream os; os << *prefix << endl; std::cerr << os.str (); @@ -1113,7 +1113,7 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("allocateValues"); + prefix = this->createPrefix("CrsMatrix", "allocateValues"); std::ostringstream os; os << *prefix << "{lg: " << (lg == LocalIndices ? "Local" : "Global") << "Indices" @@ -1291,7 +1291,7 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("fillLocalGraphAndMatrix"); + prefix = this->createPrefix("CrsMatrix", "fillLocalGraphAndMatrix"); std::ostringstream os; os << *prefix << endl; std::cerr << os.str (); @@ -1687,7 +1687,7 @@ namespace Tpetra { const bool verbose = Details::Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("fillLocalMatrix"); + prefix = this->createPrefix("CrsMatrix", "fillLocalMatrix"); std::ostringstream os; os << *prefix << "lclNumRows: " << lclNumRows << endl; std::cerr << os.str (); @@ -4318,7 +4318,8 @@ namespace Tpetra { CrsMatrix:: globalAssemble () { - using ::Tpetra::Details::ProfilingRegion; + using Details::Behavior; + using Details::ProfilingRegion; using Teuchos::Comm; using Teuchos::outArg; using Teuchos::RCP; @@ -4334,10 +4335,10 @@ namespace Tpetra { const char tfecfFuncName[] = "globalAssemble: "; // for exception macro ProfilingRegion regionGlobalAssemble ("Tpetra::CrsMatrix::globalAssemble"); - const bool verbose = Details::Behavior::verbose("CrsMatrix"); + const bool verbose = Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("globalAssemble"); + prefix = this->createPrefix("CrsMatrix", "globalAssemble"); std::ostringstream os; os << *prefix << "nonlocals_.size()=" << nonlocals_.size() << endl; @@ -4644,7 +4645,8 @@ namespace Tpetra { const Teuchos::RCP& rangeMap, const Teuchos::RCP& params) { - using ::Tpetra::Details::ProfilingRegion; + using Details::Behavior; + using Details::ProfilingRegion; using Teuchos::ArrayRCP; using Teuchos::RCP; using Teuchos::rcp; @@ -4652,10 +4654,10 @@ namespace Tpetra { const char tfecfFuncName[] = "fillComplete: "; ProfilingRegion regionFillComplete ("Tpetra::CrsMatrix::fillComplete"); - const bool verbose = Details::Behavior::verbose("CrsMatrix"); + const bool verbose = Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("fillComplete(dom,ran,p)"); + prefix = this->createPrefix("CrsMatrix", "fillComplete(dom,ran,p)"); std::ostringstream os; os << *prefix << endl; std::cerr << os.str (); @@ -6389,26 +6391,6 @@ namespace Tpetra { return (srcRowMat != NULL); } - template - std::unique_ptr - CrsMatrix:: - createPrefix(const char methodName[]) const - { - int myRank = -1; - auto map = this->getMap(); - if (! map.is_null()) { - auto comm = map->getComm(); - if (! comm.is_null()) { - myRank = comm->getRank(); - } - } - std::ostringstream pfxStrm; - pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::" - << methodName << ": "; - return std::unique_ptr( - new std::string(pfxStrm.str())); - } - template void CrsMatrix:: @@ -6416,8 +6398,8 @@ namespace Tpetra { const Kokkos::UnorderedMap& padding, const bool verbose) { - using ::Tpetra::Details::ProfilingRegion; - using Tpetra::Details::padCrsArrays; + using Details::ProfilingRegion; + using Details::padCrsArrays; using std::endl; using execution_space = typename device_type::execution_space; using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type; @@ -6427,7 +6409,7 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("applyCrsPadding"); + prefix = this->createPrefix("CrsMatrix", "applyCrsPadding"); std::ostringstream os; os << *prefix << "padding.size(): " << padding.size() << endl; std::cerr << os.str (); @@ -6734,18 +6716,19 @@ namespace Tpetra { const Kokkos::DualView& permuteToLIDs, const Kokkos::DualView& permuteFromLIDs) { - using Tpetra::Details::dualViewStatusToString; - using Tpetra::Details::ProfilingRegion; + using Details::Behavior; + using Details::dualViewStatusToString; + using Details::ProfilingRegion; using std::endl; // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC. const char tfecfFuncName[] = "copyAndPermute: "; - ProfilingRegion regionCAP ("Tpetra::CrsMatrix::copyAndPermute"); + ProfilingRegion regionCAP("Tpetra::CrsMatrix::copyAndPermute"); - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { - prefix = createPrefix("copyAndPermute"); + prefix = this->createPrefix("CrsMatrix", "copyAndPermute"); std::ostringstream os; os << *prefix << endl << *prefix << " numSameIDs: " << numSameIDs << endl diff --git a/packages/tpetra/core/src/Tpetra_DistObject_decl.hpp b/packages/tpetra/core/src/Tpetra_DistObject_decl.hpp index 7b7841224f28..486812995975 100644 --- a/packages/tpetra/core/src/Tpetra_DistObject_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_DistObject_decl.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -56,6 +54,7 @@ #include "Tpetra_SrcDistObject.hpp" #include "Tpetra_DistObject_fwd.hpp" #include "Kokkos_ArithTraits.hpp" +#include #include // #ifndef HAVE_TPETRA_TRANSFER_TIMERS @@ -906,6 +905,10 @@ namespace Tpetra { Teuchos::RCP map_; protected: + std::unique_ptr + createPrefix(const char className[], + const char methodName[]) const; + /// \brief Buffer into which packed data are imported (received /// from other processes). /// diff --git a/packages/tpetra/core/src/Tpetra_DistObject_def.hpp b/packages/tpetra/core/src/Tpetra_DistObject_def.hpp index 45a6853d0227..271561e1f9ca 100644 --- a/packages/tpetra/core/src/Tpetra_DistObject_def.hpp +++ b/packages/tpetra/core/src/Tpetra_DistObject_def.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -54,6 +52,7 @@ #include "Tpetra_Details_reallocDualViewIfNeeded.hpp" #include "Tpetra_Details_Behavior.hpp" #include "Tpetra_Details_Profiling.hpp" +#include "Teuchos_TypeNameTraits.hpp" #include #include #include @@ -276,13 +275,14 @@ namespace Tpetra { const CombineMode CM, const bool restrictedMode) { + using Details::Behavior; using std::endl; const char modeString[] = "doImport (forward mode)"; // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug // output to std::cerr on every MPI process. This is unwise for // runs with large numbers of MPI processes. - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { int myRank = 0; @@ -302,7 +302,8 @@ namespace Tpetra { os << *prefix << "Start" << endl; std::cerr << os.str (); } - this->doTransfer (source, importer, modeString, DoForward, CM, restrictedMode); + this->doTransfer (source, importer, modeString, DoForward, CM, + restrictedMode); if (verbose) { std::ostringstream os; os << *prefix << "Done!" << endl; @@ -318,13 +319,14 @@ namespace Tpetra { const CombineMode CM, const bool restrictedMode) { + using Details::Behavior; using std::endl; const char modeString[] = "doExport (forward mode)"; // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug // output to std::cerr on every MPI process. This is unwise for // runs with large numbers of MPI processes. - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { int myRank = 0; @@ -361,13 +363,14 @@ namespace Tpetra { const CombineMode CM, const bool restrictedMode) { + using Details::Behavior; using std::endl; const char modeString[] = "doImport (reverse mode)"; // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug // output to std::cerr on every MPI process. This is unwise for // runs with large numbers of MPI processes. - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { int myRank = 0; @@ -404,13 +407,14 @@ namespace Tpetra { const CombineMode CM, const bool restrictedMode) { + using Details::Behavior; using std::endl; const char modeString[] = "doExport (reverse mode)"; // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug // output to std::cerr on every MPI process. This is unwise for // runs with large numbers of MPI processes. - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { int myRank = 0; @@ -463,23 +467,26 @@ namespace Tpetra { const CombineMode CM, bool restrictedMode) { - using ::Tpetra::Details::getDualViewCopyFromArrayView; - using ::Tpetra::Details::ProfilingRegion; + using Details::Behavior; + using Details::getDualViewCopyFromArrayView; + using Details::ProfilingRegion; using std::endl; + const char funcName[] = "Tpetra::DistObject::doTransfer"; - ProfilingRegion region_doTransfer ("Tpetra::DistObject::doTransfer"); - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + ProfilingRegion region_doTransfer(funcName); + const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { - auto map = this->getMap (); - auto comm = map.is_null () ? Teuchos::null : map->getComm (); - const int myRank = comm.is_null () ? -1 : comm->getRank (); + auto map = this->getMap(); + auto comm = map.is_null() ? Teuchos::null : map->getComm(); + const int myRank = comm.is_null() ? -1 : comm->getRank(); std::ostringstream os; - os << "Proc " << myRank << ": Tpetra::DistObject::doTransfer: "; - prefix = std::unique_ptr (new std::string (os.str ())); - os << *prefix << "Source type: " << typeid (src).name () - << ", Target type: " << typeid (*this).name () << endl; - std::cerr << os.str (); + os << "Proc " << myRank << ": " << funcName << ": "; + prefix = std::unique_ptr( + new std::string(os.str())); + os << *prefix << "Source type: " << Teuchos::typeName(src) + << ", Target type: " << Teuchos::typeName(*this) << endl; + std::cerr << os.str(); } // "Restricted Mode" does two things: @@ -492,9 +499,9 @@ namespace Tpetra { // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug // checks. These may communicate more. - const bool debug = ::Tpetra::Details::Behavior::debug (); + const bool debug = Behavior::debug("DistObject"); if (debug) { - if (!restrictedMode && revOp == DoForward) { + if (! restrictedMode && revOp == DoForward) { const bool myMapSameAsTransferTgtMap = this->getMap ()->isSameAs (* (transfer.getTargetMap ())); TEUCHOS_TEST_FOR_EXCEPTION @@ -504,7 +511,7 @@ namespace Tpetra { "(in the sense of Tpetra::Map::isSameAs) as the input " "Export/Import object's target Map."); } - else if (!restrictedMode && revOp == DoReverse) { + else if (! restrictedMode && revOp == DoReverse) { const bool myMapSameAsTransferSrcMap = this->getMap ()->isSameAs (* (transfer.getSourceMap ())); TEUCHOS_TEST_FOR_EXCEPTION @@ -572,37 +579,32 @@ namespace Tpetra { "Tpetra::DistObject::" << modeString << ": Transfer object " "cannot have permutes in restricted mode."); -const bool useTheNewInterface = true; - - if (useTheNewInterface) { - using ::Tpetra::Details::Behavior; - // Do we need all communication buffers to live on host? - const bool commOnHost = ! Behavior::assumeMpiIsCudaAware (); - if (verbose) { - std::ostringstream os; - os << *prefix << "doTransfer: Use new interface; " - "commOnHost=" << (commOnHost ? "true" : "false") << endl; - std::cerr << os.str (); - } - - using const_lo_dv_type = - Kokkos::DualView; - const_lo_dv_type permToLIDs = (revOp == DoForward) ? - transfer.getPermuteToLIDs_dv () : - transfer.getPermuteFromLIDs_dv (); - const_lo_dv_type permFromLIDs = (revOp == DoForward) ? - transfer.getPermuteFromLIDs_dv () : - transfer.getPermuteToLIDs_dv (); - const_lo_dv_type remoteLIDs = (revOp == DoForward) ? - transfer.getRemoteLIDs_dv () : - transfer.getExportLIDs_dv (); - const_lo_dv_type exportLIDs = (revOp == DoForward) ? - transfer.getExportLIDs_dv () : - transfer.getRemoteLIDs_dv (); - doTransferNew (src, CM, numSameIDs, permToLIDs, permFromLIDs, - remoteLIDs, exportLIDs, distor, revOp, commOnHost,restrictedMode); + // Do we need all communication buffers to live on host? + const bool commOnHost = ! Behavior::assumeMpiIsCudaAware (); + if (verbose) { + std::ostringstream os; + os << *prefix << "doTransfer: Use new interface; " + "commOnHost=" << (commOnHost ? "true" : "false") << endl; + std::cerr << os.str (); } + using const_lo_dv_type = + Kokkos::DualView; + const_lo_dv_type permToLIDs = (revOp == DoForward) ? + transfer.getPermuteToLIDs_dv () : + transfer.getPermuteFromLIDs_dv (); + const_lo_dv_type permFromLIDs = (revOp == DoForward) ? + transfer.getPermuteFromLIDs_dv () : + transfer.getPermuteToLIDs_dv (); + const_lo_dv_type remoteLIDs = (revOp == DoForward) ? + transfer.getRemoteLIDs_dv () : + transfer.getExportLIDs_dv (); + const_lo_dv_type exportLIDs = (revOp == DoForward) ? + transfer.getExportLIDs_dv () : + transfer.getRemoteLIDs_dv (); + doTransferNew (src, CM, numSameIDs, permToLIDs, permFromLIDs, + remoteLIDs, exportLIDs, distor, revOp, commOnHost, + restrictedMode); if (verbose) { std::ostringstream os; @@ -641,8 +643,9 @@ const bool useTheNewInterface = true; reallocArraysForNumPacketsPerLid (const size_t numExportLIDs, const size_t numImportLIDs) { - using ::Tpetra::Details::reallocDualViewIfNeeded; + using Details::Behavior; using ::Tpetra::Details::dualViewStatusToString; + using ::Tpetra::Details::reallocDualViewIfNeeded; using std::endl; // If an array is already allocated, and if is at least // tooBigFactor times bigger than it needs to be, free it and @@ -650,7 +653,7 @@ const bool useTheNewInterface = true; // Otherwise, take subviews to reduce allocation size. constexpr size_t tooBigFactor = 10; - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { const int myRank = [&] () { @@ -735,6 +738,7 @@ const bool useTheNewInterface = true; const bool commOnHost, const bool restrictedMode) { + using Details::Behavior; using ::Tpetra::Details::dualViewStatusToString; using ::Tpetra::Details::getArrayViewFromDualView; using ::Tpetra::Details::ProfilingRegion; @@ -745,15 +749,16 @@ const bool useTheNewInterface = true; using std::endl; using DT = device_type; using DES = typename DT::execution_space; + const char funcName[] = "Tpetra::DistObject::doTransferNew"; - ProfilingRegion region_dTN ("Tpetra::DistObject::doTransferNew"); + ProfilingRegion region_dTN(funcName); #ifdef HAVE_TPETRA_TRANSFER_TIMERS // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor // of Kokkos profiling. Teuchos::TimeMonitor doXferMon (*doXferTimer_); #endif // HAVE_TPETRA_TRANSFER_TIMERS - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("DistObject"); // Prefix for verbose output. Use a pointer, so we don't pay for // string construction unless needed. We set this below. std::unique_ptr prefix; @@ -762,7 +767,7 @@ const bool useTheNewInterface = true; auto comm = map.is_null () ? Teuchos::null : map->getComm (); const int myRank = comm.is_null () ? 0 : comm->getRank (); std::ostringstream os; - os << "Proc " << myRank << ": Tpetra::CrsMatrix::doTransferNew: "; + os << "Proc " << myRank << ": " << funcName << ": "; prefix = std::unique_ptr (new std::string (os.str ())); } @@ -1314,6 +1319,27 @@ const bool useTheNewInterface = true; this->describe (*out, Teuchos::VERB_DEFAULT); } + template + std::unique_ptr + DistObject:: + createPrefix(const char className[], + const char methodName[]) const + { + int myRank = -1; + auto map = this->getMap(); + if (! map.is_null()) { + auto comm = map->getComm(); + if (! comm.is_null()) { + myRank = comm->getRank(); + } + } + std::ostringstream pfxStrm; + pfxStrm << "Proc " << myRank << ": Tpetra::" << className + << "::" << methodName << ": "; + return std::unique_ptr( + new std::string(pfxStrm.str())); + } + template void removeEmptyProcessesInPlace (Teuchos::RCP& input, From 16e70eef37a95ee4568cb99eae954ab98153659b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 31 Jan 2020 13:27:50 -0700 Subject: [PATCH 04/49] Tpetra::{CrsGraph, CrsMatrix, DistObject}: Improve verbose output Remove redundant code that DistObject::createPrefix replaces. Make DistObject, CrsGraph, and CrsMatrix call Behavior::verbose with their class name, instead of with no arguments. This makes verbose output more precise. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 22 ++- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 152 ++++++------------ .../tpetra/core/src/Tpetra_DistObject_def.hpp | 134 ++++----------- 3 files changed, 90 insertions(+), 218 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 42cfe2e36dda..65371e9cb3ec 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -4970,22 +4970,21 @@ namespace Tpetra { const Kokkos::DualView& permuteFromLIDs) { + using Details::Behavior; using std::endl; using LO = local_ordinal_type; using GO = global_ordinal_type; using this_type = CrsGraph; using row_graph_type = RowGraph; const char tfecfFuncName[] = "copyAndPermute: "; - const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph"); - const bool verbose = ::Tpetra::Details::Behavior::verbose ("CrsGraph"); + const bool debug = Behavior::debug("CrsGraph"); + const bool verbose = Behavior::verbose("CrsGraph"); std::unique_ptr prefix; if (debug) { + prefix = this->createPrefix("CrsGraph", "copyAndPermute"); std::ostringstream os; - const int myRank = this->getMap ()->getComm ()->getRank (); - os << "Proc " << myRank << ": Tpetra::CrsGraph::copyAndPermute: "; - prefix = std::unique_ptr (new std::string (os.str ())); - os << endl; + os << *prefix << endl; std::cerr << os.str (); } @@ -5490,11 +5489,9 @@ namespace Tpetra { const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph"); std::unique_ptr prefix; if (debug) { + prefix = this->createPrefix("CrsGraph", "packAndPrepare"); std::ostringstream os; - const int myRank = this->getMap ()->getComm ()->getRank (); - os << "Proc " << myRank << ": Tpetra::CrsGraph::packAndPrepare: "; - prefix = std::unique_ptr (new std::string (os.str ())); - os << "Start" << endl; + os << *prefix << "Start" << endl; std::cerr << os.str (); } @@ -5842,10 +5839,9 @@ namespace Tpetra { std::unique_ptr prefix; if (debug) { + prefix = this->createPrefix("CrsGraph", "packFillActiveNew"); std::ostringstream os; - os << "Proc " << myRank << ": Tpetra::CrsGraph::packFillActiveNew: "; - prefix = std::unique_ptr (new std::string (os.str ())); - os << "Start" << endl; + os << *prefix << "Start" << endl; std::cerr << os.str (); } diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index f41cb46ee9e2..b0d644569dff 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -6791,8 +6791,9 @@ namespace Tpetra { size_t& constantNumPackets, Distributor& distor) { - using Tpetra::Details::dualViewStatusToString; - using Tpetra::Details::ProfilingRegion; + using Details::Behavior; + using Details::dualViewStatusToString; + using Details::ProfilingRegion; using Teuchos::outArg; using Teuchos::REDUCE_MAX; using Teuchos::reduceAll; @@ -6802,8 +6803,8 @@ namespace Tpetra { const char tfecfFuncName[] = "packAndPrepare: "; ProfilingRegion regionPAP ("Tpetra::CrsMatrix::packAndPrepare"); - const bool debug = ::Tpetra::Details::Behavior::debug (); - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool debug = Behavior::debug("CrsMatrix"); + const bool verbose = Behavior::verbose("CrsMatrix"); // Processes on which the communicator is null should not participate. Teuchos::RCP > pComm = this->getComm (); @@ -6815,11 +6816,7 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { - prefix = [myRank] () { - std::ostringstream pfxStrm; - pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::packAndPrepare: "; - return std::unique_ptr (new std::string (pfxStrm.str ())); - } (); + prefix = this->createPrefix("CrsMatrix", "packAndPrepare"); std::ostringstream os; os << *prefix << "Start" << endl << *prefix << " " @@ -7176,7 +7173,8 @@ namespace Tpetra { size_t& totalNumEntries, const Kokkos::DualView& exportLIDs) const { - using Tpetra::Details::dualViewStatusToString; + using Details::Behavior; + using Details::dualViewStatusToString; using std::endl; typedef impl_scalar_type IST; typedef LocalOrdinal LO; @@ -7186,24 +7184,10 @@ namespace Tpetra { // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug // output to std::cerr on every MPI process. This is unwise for // runs with large numbers of MPI processes. - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { - int myRank = 0; - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - // Restrict pfxStrm to inner scope to reduce high-water memory usage. - prefix = [myRank] () { - std::ostringstream pfxStrm; - pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::allocatePackSpaceNew: "; - return std::unique_ptr (new std::string (pfxStrm.str ())); - } (); - + prefix = this->createPrefix("CrsMatrix", "allocatePackSpaceNew"); std::ostringstream os; os << *prefix << "Before:" << endl @@ -7299,40 +7283,25 @@ namespace Tpetra { size_t& constantNumPackets, Distributor& /* distor */) const { + using Details::Behavior; + using Details::dualViewStatusToString; + using Details::PackTraits; + using Details::create_mirror_view_from_raw_host_array; using Kokkos::View; - using Tpetra::Details::dualViewStatusToString; - using Tpetra::Details::PackTraits; - using Tpetra::Details::create_mirror_view_from_raw_host_array; using std::endl; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef impl_scalar_type ST; - typedef typename View::HostMirror::execution_space HES; + using LO = LocalOrdinal; + using GO = GlobalOrdinal; + using ST = impl_scalar_type; + using HES = + typename View::HostMirror::execution_space; const char tfecfFuncName[] = "packNonStaticNew: "; - // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug - // output to std::cerr on every MPI process. This is unwise for - // runs with large numbers of MPI processes. - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { - int myRank = 0; - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - // Restrict pfxStrm to inner scope to reduce high-water memory usage. - prefix = [myRank] () { - std::ostringstream pfxStrm; - pfxStrm << "(Proc " << myRank << ") "; - return std::unique_ptr (new std::string (pfxStrm.str ())); - } (); - + prefix = this->createPrefix("CrsMatrix", "packNonStaticNew"); std::ostringstream os; - os << *prefix << "Tpetra::CrsMatrix::packNonStaticNew:" << endl; + os << *prefix << "Start" << endl; std::cerr << os.str (); } @@ -7591,14 +7560,15 @@ namespace Tpetra { Distributor& distor, const CombineMode combineMode) { - using Tpetra::Details::dualViewStatusToString; - using Tpetra::Details::ProfilingRegion; + using Details::Behavior; + using Details::dualViewStatusToString; + using Details::ProfilingRegion; using std::endl; const char tfecfFuncName[] = "unpackAndCombine: "; ProfilingRegion regionUAC ("Tpetra::CrsMatrix::unpackAndCombine"); - const bool debug = ::Tpetra::Details::Behavior::debug (); - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool debug = Behavior::debug("CrsMatrix"); + const bool verbose = Behavior::verbose("CrsMatrix"); constexpr int numValidModes = 5; const CombineMode validModes[numValidModes] = {ADD, REPLACE, ABSMAX, INSERT, ZERO}; @@ -7608,18 +7578,7 @@ namespace Tpetra { std::unique_ptr prefix; int myRank = 0; if (verbose) { - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - prefix = [myRank] () { - std::ostringstream pfxStrm; - pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::unpackAndCombine: "; - return std::unique_ptr (new std::string (pfxStrm.str ())); - } (); + prefix = this->createPrefix("CrsMatrix", "unpackAndCombine"); std::ostringstream os; os << *prefix << "Start:" << endl << *prefix << " " @@ -7759,10 +7718,11 @@ namespace Tpetra { using Kokkos::View; using Kokkos::subview; using Kokkos::MemoryUnmanaged; - using Tpetra::Details::castAwayConstDualView; - using Tpetra::Details::create_mirror_view_from_raw_host_array; - using Tpetra::Details::PackTraits; - using Tpetra::Details::ScalarViewTraits; + using Details::Behavior; + using Details::castAwayConstDualView; + using Details::create_mirror_view_from_raw_host_array; + using Details::PackTraits; + using Details::ScalarViewTraits; using std::endl; typedef LocalOrdinal LO; typedef GlobalOrdinal GO; @@ -7775,28 +7735,11 @@ namespace Tpetra { typedef View vals_out_type; const char tfecfFuncName[] = "unpackAndCombineImplNonStatic: "; - // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug - // output to std::cerr on every MPI process. This is unwise for - // runs with large numbers of MPI processes. - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool verbose = Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { - int myRank = 0; - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - // Restrict pfxStrm to inner scope to reduce high-water memory usage. - prefix = [myRank] () { - std::ostringstream pfxStrm; - pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::" - "unpackAndCombineImplNonStatic: "; - return std::unique_ptr (new std::string (pfxStrm.str ())); - } (); - + prefix = this->createPrefix("CrsMatrix", + "unpackAndCombineImplNonStatic: "); std::ostringstream os; os << *prefix << endl; // we've already printed DualViews' statuses std::cerr << os.str (); @@ -8332,15 +8275,17 @@ namespace Tpetra { const Teuchos::RCP& rangeMap, const Teuchos::RCP& params) const { - using Tpetra::Details::getArrayViewFromDualView; - using Tpetra::Details::packCrsMatrixWithOwningPIDs; - using Tpetra::Details::unpackAndCombineWithOwningPIDsCount; - using Tpetra::Details::unpackAndCombineIntoCrsArrays; + using Details::Behavior; + using Details::getArrayViewFromDualView; + using Details::packCrsMatrixWithOwningPIDs; + using Details::unpackAndCombineWithOwningPIDsCount; + using Details::unpackAndCombineIntoCrsArrays; using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::Comm; using Teuchos::ParameterList; using Teuchos::RCP; + using std::endl; typedef LocalOrdinal LO; typedef GlobalOrdinal GO; typedef node_type NT; @@ -8348,18 +8293,17 @@ namespace Tpetra { typedef Vector IntVectorType; using Teuchos::as; - const bool debug = ::Tpetra::Details::Behavior::debug (); - const bool verbose = ::Tpetra::Details::Behavior::verbose (); + const bool debug = Behavior::debug("CrsMatrix"); + const bool verbose = Behavior::verbose("CrsMatrix"); int MyPID = getComm ()->getRank (); std::unique_ptr verbosePrefix; if (verbose) { + verbosePrefix = + this->createPrefix("CrsMatrix", "transferAndFillComplete"); std::ostringstream os; - os << "Proc " << MyPID << ": transferAndFillComplete: "; - verbosePrefix = std::unique_ptr (new std::string (os.str ())); - - os << "start" << std::endl; - std::cerr << os.str (); + os << "start" << endl; + std::cerr << os.str(); } // diff --git a/packages/tpetra/core/src/Tpetra_DistObject_def.hpp b/packages/tpetra/core/src/Tpetra_DistObject_def.hpp index 271561e1f9ca..a33f2e429f88 100644 --- a/packages/tpetra/core/src/Tpetra_DistObject_def.hpp +++ b/packages/tpetra/core/src/Tpetra_DistObject_def.hpp @@ -285,19 +285,7 @@ namespace Tpetra { const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { - int myRank = 0; - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - prefix = [myRank] () { - std::ostringstream os; - os << "Proc " << myRank << ": Tpetra::DistObject::doTransfer: "; - return std::unique_ptr (new std::string (os.str ())); - } (); + prefix = this->createPrefix("DistObject", modeString); std::ostringstream os; os << *prefix << "Start" << endl; std::cerr << os.str (); @@ -306,7 +294,7 @@ namespace Tpetra { restrictedMode); if (verbose) { std::ostringstream os; - os << *prefix << "Done!" << endl; + os << *prefix << "Done" << endl; std::cerr << os.str (); } } @@ -329,28 +317,16 @@ namespace Tpetra { const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { - int myRank = 0; - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - prefix = [myRank] () { - std::ostringstream os; - os << "(Proc " << myRank << ") "; - return std::unique_ptr (new std::string (os.str ())); - } (); + prefix = this->createPrefix("DistObject", modeString); std::ostringstream os; - os << *prefix << "Tpetra::DistObject::" << modeString << ":" << endl; + os << *prefix << "Start" << endl; std::cerr << os.str (); } - this->doTransfer (source, exporter, modeString, DoForward, CM, restrictedMode); + this->doTransfer (source, exporter, modeString, DoForward, CM, + restrictedMode); if (verbose) { std::ostringstream os; - os << *prefix << "Tpetra::DistObject::" << modeString << ": Done!" - << endl; + os << *prefix << "Done" << endl; std::cerr << os.str (); } } @@ -373,28 +349,16 @@ namespace Tpetra { const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { - int myRank = 0; - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - prefix = [myRank] () { - std::ostringstream os; - os << "(Proc " << myRank << ") "; - return std::unique_ptr (new std::string (os.str ())); - } (); + prefix = this->createPrefix("DistObject", modeString); std::ostringstream os; - os << *prefix << "Tpetra::DistObject::" << modeString << ":" << endl; + os << *prefix << "Start" << endl; std::cerr << os.str (); } - this->doTransfer (source, exporter, modeString, DoReverse, CM, restrictedMode); + this->doTransfer (source, exporter, modeString, DoReverse, CM, + restrictedMode); if (verbose) { std::ostringstream os; - os << *prefix << "Tpetra::DistObject::" << modeString << ": Done!" - << endl; + os << *prefix << "Done" << endl; std::cerr << os.str (); } } @@ -417,28 +381,16 @@ namespace Tpetra { const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { - int myRank = 0; - auto map = this->getMap (); - if (! map.is_null ()) { - auto comm = map->getComm (); - if (! comm.is_null ()) { - myRank = comm->getRank (); - } - } - prefix = [myRank] () { - std::ostringstream os; - os << "(Proc " << myRank << ") "; - return std::unique_ptr (new std::string (os.str ())); - } (); + prefix = this->createPrefix("DistObject", modeString); std::ostringstream os; - os << *prefix << "Tpetra::DistObject::" << modeString << ":" << endl; + os << *prefix << "Start" << endl; std::cerr << os.str (); } - this->doTransfer (source, importer, modeString, DoReverse, CM, restrictedMode); + this->doTransfer (source, importer, modeString, DoReverse, CM, + restrictedMode); if (verbose) { std::ostringstream os; - os << *prefix << "Tpetra::DistObject::" << modeString << ": Done!" - << endl; + os << *prefix << "Done" << endl; std::cerr << os.str (); } } @@ -477,13 +429,8 @@ namespace Tpetra { const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { - auto map = this->getMap(); - auto comm = map.is_null() ? Teuchos::null : map->getComm(); - const int myRank = comm.is_null() ? -1 : comm->getRank(); std::ostringstream os; - os << "Proc " << myRank << ": " << funcName << ": "; - prefix = std::unique_ptr( - new std::string(os.str())); + prefix = this->createPrefix("DistObject", "doTransfer"); os << *prefix << "Source type: " << Teuchos::typeName(src) << ", Target type: " << Teuchos::typeName(*this) << endl; std::cerr << os.str(); @@ -656,31 +603,21 @@ namespace Tpetra { const bool verbose = Behavior::verbose("DistObject"); std::unique_ptr prefix; if (verbose) { - const int myRank = [&] () { - auto map = this->getMap (); - if (map.get () == nullptr) { - return -1; - } - auto comm = map->getComm (); - if (comm.get () == nullptr) { - return -2; - } - return comm->getRank (); - } (); - std::ostringstream os; - os << "Proc " << myRank << ": reallocArraysForNumPacketsPerLid(" - << numExportLIDs << ", " << numImportLIDs << "): "; - prefix = std::unique_ptr (new std::string (os.str ())); - } - - if (verbose) { + prefix = this->createPrefix("DistObject", + "reallocArraysForNumPacketsPerLid"); std::ostringstream os; - os << *prefix << "before:" << endl - << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_, - "numExportPacketsPerLID_") + os << *prefix + << "numExportLIDs: " << numExportLIDs + << ", numImportLIDs: " << numImportLIDs + << endl; + os << *prefix << "DualView status before:" << endl + << *prefix + << dualViewStatusToString (this->numExportPacketsPerLID_, + "numExportPacketsPerLID_") << endl - << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_, - "numImportPacketsPerLID_") + << *prefix + << dualViewStatusToString (this->numImportPacketsPerLID_, + "numImportPacketsPerLID_") << endl; std::cerr << os.str (); } @@ -706,7 +643,7 @@ namespace Tpetra { if (verbose) { std::ostringstream os; - os << *prefix << "after:" << endl + os << *prefix << "DualView status after:" << endl << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_, "numExportPacketsPerLID_") << endl @@ -763,12 +700,7 @@ namespace Tpetra { // string construction unless needed. We set this below. std::unique_ptr prefix; if (verbose) { - auto map = this->getMap (); - auto comm = map.is_null () ? Teuchos::null : map->getComm (); - const int myRank = comm.is_null () ? 0 : comm->getRank (); - std::ostringstream os; - os << "Proc " << myRank << ": " << funcName << ": "; - prefix = std::unique_ptr (new std::string (os.str ())); + prefix = this->createPrefix("DistObject", "doTransferNew"); } if (verbose) { From e4ee9b77daabab6d121bb69cb92ae0d454dba27a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 3 Feb 2020 12:57:29 -0700 Subject: [PATCH 05/49] Tpetra::CrsGraph: Separate debug from verbose; debug is run time @trilinos/tpetra 1. Fix places in CrsGraph that were using Behavior::debug(...) for verbose output, not for debug checks. 2. Always use Behavior::verbose("CrsGraph") for verbose output, not just Behavior::verbose(). This gives users finer-grained control of verbose output. 3. No more "constexpr bool debug" -- always use the run-time value of Behavior::debug("CrsGraph") for debug checks in CrsGraph. Avoid run-time overhead by calling Behavior::debug("CrsGraph") only once per CrsGraph instance, at construction time. 4. Use createPrefix in all verbose output. --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 8 + .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 466 ++++++++---------- 2 files changed, 226 insertions(+), 248 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 9d3811922b68..d67680b62b28 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -2399,6 +2399,14 @@ namespace Tpetra { /// call sortGhostColumnGIDsWithinProcessBlock(false). bool sortGhostsAssociatedWithEachProcessor_; + private: + //! Get initial value of debug_ for this object. + static bool getDebug(); + + /// \brief Whether to do extra debug checks. + /// + /// This comes from Tpetra::Details::Behavior::debug("CrsGraph"). + bool debug_ = getDebug(); }; // class CrsGraph /// \brief Nonmember function to create an empty CrsGraph given a diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 65371e9cb3ec..e2b72f67f878 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -219,6 +219,13 @@ namespace Tpetra { } // namespace Details + template + bool + CrsGraph:: + getDebug() { + return Details::Behavior::debug("CrsGraph"); + } + template CrsGraph:: CrsGraph (const Teuchos::RCP& rowMap, @@ -346,8 +353,7 @@ namespace Tpetra { << " != the local number of rows " << lclNumRows << " as specified by " "the input row Map."); - const bool debug = ::Tpetra::Details::Behavior::debug (); - if (debug) { + if (debug_) { for (size_t r = 0; r < lclNumRows; ++r) { const size_t curRowCount = numEntPerRow[r]; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -421,8 +427,7 @@ namespace Tpetra { numEntPerRow.extent (0) << " != the local number of rows " << lclNumRows << " as specified by " "the input row Map."); - const bool debug = ::Tpetra::Details::Behavior::debug (); - if (debug) { + if (debug_) { for (size_t r = 0; r < lclNumRows; ++r) { const size_t curRowCount = numEntPerRow.h_view(r); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -482,8 +487,7 @@ namespace Tpetra { numEntPerRow.extent (0) << " != the local number of rows " << lclNumRows << " as specified by " "the input row Map."); - const bool debug = ::Tpetra::Details::Behavior::debug (); - if (debug) { + if (debug_) { for (size_t r = 0; r < lclNumRows; ++r) { const size_t curRowCount = numEntPerRow.h_view(r); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -543,8 +547,7 @@ namespace Tpetra { << " != the local number of rows " << lclNumRows << " as specified by " "the input row Map."); - const bool debug = ::Tpetra::Details::Behavior::debug (); - if (debug) { + if (debug_) { for (size_t r = 0; r < lclNumRows; ++r) { const size_t curRowCount = numEntPerRow[r]; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -1420,14 +1423,9 @@ namespace Tpetra { { lclInds = nullptr; capacity = 0; -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG if (rowInfo.allocSize != 0 && k_lclInds1D_.extent (0) != 0) { - if (debug) { + if (debug_) { if (rowInfo.offset1D + rowInfo.allocSize > static_cast (k_lclInds1D_.extent (0))) { return static_cast (-1); @@ -1607,14 +1605,9 @@ namespace Tpetra { { gblInds = nullptr; capacity = 0; -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG if (rowInfo.allocSize != 0 && k_gblInds1D_.extent (0) != 0) { - if (debug) { + if (debug_) { if (rowInfo.offset1D + rowInfo.allocSize > static_cast (k_gblInds1D_.extent (0))) { return static_cast (-1); @@ -1814,14 +1807,9 @@ namespace Tpetra { typedef LocalOrdinal LO; typedef GlobalOrdinal GO; const char tfecfFuncName[] = "insertIndices: "; -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG size_t oldNumEnt = 0; - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument, "lg must be either GlobalIndices or LocalIndices."); @@ -1834,7 +1822,7 @@ namespace Tpetra { numNewInds = new_ginds.size(); if (I == GlobalIndices) { // store global indices ArrayView gind_view = this->getGlobalViewNonConst (rowinfo); - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (static_cast (gind_view.size ()) < rowinfo.numEntries + numNewInds, std::logic_error, @@ -1849,7 +1837,7 @@ namespace Tpetra { } else if (I == LocalIndices) { // store local indices ArrayView lind_view = this->getLocalViewNonConst (rowinfo); - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (static_cast (lind_view.size ()) < rowinfo.numEntries + numNewInds, std::logic_error, @@ -1868,7 +1856,7 @@ namespace Tpetra { numNewInds = new_linds.size(); if (I == LocalIndices) { // store local indices ArrayView lind_view = this->getLocalViewNonConst (rowinfo); - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (static_cast (lind_view.size ()) < rowinfo.numEntries + numNewInds, std::logic_error, @@ -1896,7 +1884,7 @@ namespace Tpetra { this->k_numRowEntries_(rowinfo.localRow) += numNewInds; this->setLocallyModified (); - if (debug) { + if (debug_) { const size_t chkNewNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -2013,7 +2001,7 @@ namespace Tpetra { this->k_numRowEntries_(myRow) += numNewInds; this->setLocallyModified (); - if(::Tpetra::Details::Behavior::debug ()) { + if (debug_) { const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (chkNewNumEntries != newNumEntries, std::logic_error, @@ -2165,8 +2153,7 @@ namespace Tpetra { CrsGraph:: checkInternalState () const { - const bool debug = ::Tpetra::Details::Behavior::debug (); - if (debug) { + if (debug_) { const char tfecfFuncName[] = "checkInternalState: "; const char suffix[] = " Please report this bug to the Tpetra developers."; @@ -2438,7 +2425,6 @@ namespace Tpetra { typedef typename row_map_type::non_const_value_type row_offset_type; const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: "; const char suffix[] = " Please report this bug to the Tpetra developers."; - const bool debug = ::Tpetra::Details::Behavior::debug (); const size_t size = k_rowPtrs_.extent (0); constexpr bool same = std::is_same::value; @@ -2455,7 +2441,7 @@ namespace Tpetra { // This helps with debugging when there are two memory spaces. typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_); Kokkos::deep_copy (ptr_h, k_rowPtrs_); - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION (ptr_h.extent (0) != k_rowPtrs_.extent (0), std::logic_error, prefix << "size_t == row_offset_type, but ptr_h.extent(0) = " @@ -2481,7 +2467,7 @@ namespace Tpetra { Kokkos::deep_copy (ptr_h, ptr_d); ptr_st = Kokkos::Compat::persistingView (ptr_h); } - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION (same && size != 0 && ptr_rot.is_null (), std::logic_error, prefix << "size_t == row_offset_type and size = " << size @@ -2498,7 +2484,7 @@ namespace Tpetra { Kokkos::Impl::if_c, ArrayRCP >::select (ptr_rot, ptr_st); - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION (size != 0 && retval.is_null (), std::logic_error, prefix << "size = " << size << " != 0, but retval is null." << suffix); @@ -2608,11 +2594,6 @@ namespace Tpetra { Teuchos::ArrayView& indices) const { const char tfecfFuncName[] = "getLocalRowView: "; -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (isGloballyIndexed (), std::runtime_error, "The graph's indices are " @@ -2634,7 +2615,7 @@ namespace Tpetra { indices = indices (0, rowInfo.numEntries); } - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (static_cast (indices.size ()) != getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() " @@ -2652,11 +2633,6 @@ namespace Tpetra { Teuchos::ArrayView& indices) const { const char tfecfFuncName[] = "getGlobalRowView: "; -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (isLocallyIndexed (), std::runtime_error, "The graph's indices are " @@ -2672,7 +2648,7 @@ namespace Tpetra { indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries); } - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (static_cast (indices.size ()) != getNumEntriesInGlobalRow (globalRow), @@ -2707,18 +2683,11 @@ namespace Tpetra { if (! indicesAreAllocated ()) { // Allocating indices takes a while and only needs to be done // once per MPI process, so it's OK to query TPETRA_VERBOSE. - using ::Tpetra::Details::Behavior; - const bool verbose = Behavior::verbose("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); allocateIndices (LocalIndices, verbose); } -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG - - if (debug) { + if (debug_) { // In debug mode, if the graph has a column Map, test whether any // of the given column indices are not in the column Map. Keep // track of the invalid column indices so we can tell the user @@ -2754,7 +2723,7 @@ namespace Tpetra { insertLocalIndicesImpl (localRow, indices); - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error, "At the end of insertLocalIndices, ! indicesAreAllocated() || " @@ -2784,11 +2753,6 @@ namespace Tpetra { { typedef LocalOrdinal LO; const char tfecfFuncName[] = "insertGlobalIndices: "; -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (this->isLocallyIndexed (), std::runtime_error, @@ -2805,13 +2769,12 @@ namespace Tpetra { if (! this->indicesAreAllocated ()) { // Allocating indices takes a while and only needs to be done // once per MPI process, so it's OK to query TPETRA_VERBOSE. - using ::Tpetra::Details::Behavior; - const bool verbose = Behavior::verbose("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); this->allocateIndices (GlobalIndices, verbose); } const LO lclRow = this->rowMap_->getLocalElement (gblRow); if (lclRow != Tpetra::Details::OrdinalTraits::invalid ()) { - if (debug) { + if (debug_) { if (this->hasColMap ()) { using std::endl; const map_type& colMap = * (this->colMap_); @@ -2851,7 +2814,7 @@ namespace Tpetra { (true, std::invalid_argument, os.str ()); } } - } // debug + } // debug_ this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds); } else { // a nonlocal row @@ -2898,8 +2861,7 @@ namespace Tpetra { if (! this->indicesAreAllocated ()) { // Allocating indices takes a while and only needs to be done // once per MPI process, so it's OK to query TPETRA_VERBOSE. - using ::Tpetra::Details::Behavior; - const bool verbose = Behavior::verbose("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); this->allocateIndices (GlobalIndices, verbose); } @@ -2978,8 +2940,7 @@ namespace Tpetra { if (! indicesAreAllocated ()) { // Allocating indices takes a while and only needs to be done // once per MPI process, so it's OK to query TPETRA_VERBOSE. - using ::Tpetra::Details::Behavior; - const bool verbose = Behavior::verbose("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); allocateIndices (LocalIndices, verbose); } @@ -2990,13 +2951,8 @@ namespace Tpetra { if (k_numRowEntries_.extent (0) != 0) { this->k_numRowEntries_(lrow) = 0; } -#ifdef HAVE_TPETRA_DEBUG - constexpr bool debug = true; -#else - constexpr bool debug = false; -#endif // HAVE_TPETRA_DEBUG - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (getNumEntriesInLocalRow (lrow) != 0 || ! indicesAreAllocated () || @@ -3032,7 +2988,7 @@ namespace Tpetra { } } - if (this->isSorted() && Tpetra::Details::Behavior::debug()) { + if (debug_ && this->isSorted()) { // Verify that the local indices are actually sorted int notSorted = 0; using exec_space = typename local_graph_type::execution_space; @@ -3482,10 +3438,17 @@ namespace Tpetra { const Teuchos::RCP& rangeMap, const Teuchos::RCP& params) { - using ::Tpetra::Details::Behavior; + using std::endl; const char tfecfFuncName[] = "fillComplete: "; - const bool debug = Behavior::debug("CrsGraph"); - const bool verbose = Behavior::verbose("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); + + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsGraph", "fillComplete"); + std::ostringstream os; + os << *prefix << "Start" << endl; + std::cerr << os.str(); + } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (! isFillActive () || isFillComplete (), std::runtime_error, @@ -3570,8 +3533,8 @@ namespace Tpetra { // The method doesn't do any work if the indices are already local. const std::pair makeIndicesLocalResult = this->makeIndicesLocal(verbose); - if (debug) { // In debug mode, print error output on all processes - using ::Tpetra::Details::gathervPrint; + if (debug_) { + using Details::gathervPrint; using Teuchos::RCP; using Teuchos::REDUCE_MIN; using Teuchos::reduceAll; @@ -3634,6 +3597,12 @@ namespace Tpetra { } this->fillComplete_ = true; this->checkInternalState (); + + if (verbose) { + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); + } } @@ -3797,7 +3766,6 @@ namespace Tpetra { typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type; const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or " "expertStaticFillComplete): "; - const bool debug = ::Tpetra::Details::Behavior::debug (); const size_t lclNumRows = this->getNodeNumRows (); // This method's goal is to fill in the two arrays (compressed @@ -3818,7 +3786,7 @@ namespace Tpetra { // currently stored in a 1-D format, with row offsets in // k_rowPtrs_ and local column indices in k_lclInds1D_. - if (debug) { + if (debug_) { // StaticProfile also means that the graph's array of row // offsets must already be allocated. TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -3829,18 +3797,16 @@ namespace Tpetra { "(StaticProfile branch) k_rowPtrs_.extent(0) = " << k_rowPtrs_.extent (0) << " != (lclNumRows + 1) = " << (lclNumRows + 1) << "."); - { - const size_t numOffsets = k_rowPtrs_.extent (0); - const auto valToCheck = - ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (numOffsets != 0 && - k_lclInds1D_.extent (0) != valToCheck, - std::logic_error, "(StaticProfile branch) numOffsets = " << - numOffsets << " != 0 and k_lclInds1D_.extent(0) = " << - k_lclInds1D_.extent (0) << " != k_rowPtrs_(" << numOffsets << - ") = " << valToCheck << "."); - } + const size_t numOffsets = k_rowPtrs_.extent (0); + const auto valToCheck = + ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (numOffsets != 0 && + k_lclInds1D_.extent (0) != valToCheck, + std::logic_error, "(StaticProfile branch) numOffsets = " << + numOffsets << " != 0 and k_lclInds1D_.extent(0) = " << + k_lclInds1D_.extent (0) << " != k_rowPtrs_(" << numOffsets << + ") = " << valToCheck << "."); } size_t allocSize = 0; @@ -3876,7 +3842,7 @@ namespace Tpetra { // bound on the number of entries in each row, but didn't fill // all those entries. - if (debug) { + if (debug_) { if (k_rowPtrs_.extent (0) != 0) { const size_t numOffsets = static_cast (k_rowPtrs_.extent (0)); @@ -3906,7 +3872,7 @@ namespace Tpetra { // It's ok that k_numRowEntries_ is a host View; the // function can handle this. typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_; - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (static_cast (numRowEnt_h.extent (0)) != lclNumRows, std::logic_error, "(StaticProfile unpacked branch) " @@ -3916,22 +3882,20 @@ namespace Tpetra { lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h); - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (static_cast (ptr_d.extent (0)) != lclNumRows + 1, std::logic_error, "(StaticProfile unpacked branch) After " "allocating ptr_d, ptr_d.extent(0) = " << ptr_d.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1) << "."); - { - const auto valToCheck = - ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (valToCheck != lclTotalNumEntries, std::logic_error, - "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked " - "branch, after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows - << ") = " << valToCheck << " != total number of entries on " - "the calling process = " << lclTotalNumEntries << "."); - } + const auto valToCheck = + ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (valToCheck != lclTotalNumEntries, std::logic_error, + "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked " + "branch, after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows + << ") = " << valToCheck << " != total number of entries on " + "the calling process = " << lclTotalNumEntries << "."); } } @@ -3958,7 +3922,7 @@ namespace Tpetra { Kokkos::parallel_for (range_type (0, lclNumRows), f); } - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (ptr_d.extent (0) == 0, std::logic_error, "(StaticProfile " "\"Optimize Storage\"=true branch) After packing, " @@ -3981,7 +3945,7 @@ namespace Tpetra { ptr_d_const = k_rowPtrs_; ind_d = k_lclInds1D_; - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (ptr_d_const.extent (0) == 0, std::logic_error, "(StaticProfile " "\"Optimize Storage\"=false branch) ptr_d_const.extent(0) = 0. " @@ -4000,7 +3964,7 @@ namespace Tpetra { } } - if (debug) { + if (debug_) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (static_cast (ptr_d_const.extent (0)) != lclNumRows + 1, std::logic_error, "After packing, ptr_d_const.extent(0) = " << @@ -4299,8 +4263,7 @@ namespace Tpetra { newDomainMap.is_null (), std::invalid_argument, prefix << "The new domain Map must be nonnull."); - const bool debug = ::Tpetra::Details::Behavior::debug (); - if (debug) { + if (debug_) { if (newImporter.is_null ()) { // It's not a good idea to put expensive operations in a macro // clause, even if they are side effect - free, because macros @@ -4643,9 +4606,8 @@ namespace Tpetra { CrsGraph:: makeColMap (Teuchos::Array& remotePIDs) { - using ::Tpetra::Details::ProfilingRegion; + using Details::ProfilingRegion; ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap"); - const bool debug = ::Tpetra::Details::Behavior::debug (); // this->colMap_ should be null at this point, but we accept the // future possibility that it might not be (esp. if we decide @@ -4663,7 +4625,7 @@ namespace Tpetra { // now, we only propagate (to all processes) and report the error // in debug mode. In the future, we need to add the local/global // error handling scheme used in BlockCrsMatrix to this class. - if (debug) { + if (debug_) { using Teuchos::outArg; using Teuchos::REDUCE_MIN; using Teuchos::reduceAll; @@ -4671,8 +4633,8 @@ namespace Tpetra { std::ostringstream errStrm; const int lclErrCode = - ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (), - *this, sortEachProcsGids, &errStrm); + Details::makeColMap (colMap, remotePIDs, + getDomainMap (), *this, sortEachProcsGids, &errStrm); auto comm = this->getComm (); if (! comm.is_null ()) { const int lclSuccess = (lclErrCode == 0) ? 1 : 0; @@ -4681,7 +4643,7 @@ namespace Tpetra { outArg (gblSuccess)); if (gblSuccess != 1) { std::ostringstream os; - Tpetra::Details::gathervPrint (os, errStrm.str (), *comm); + Details::gathervPrint (os, errStrm.str (), *comm); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (true, std::runtime_error, "An error happened on at least one " "(MPI) process in the CrsGraph's communicator. Here are all " @@ -4690,8 +4652,8 @@ namespace Tpetra { } } else { - (void) ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (), - *this, sortEachProcsGids, nullptr); + (void) Details::makeColMap (colMap, remotePIDs, + getDomainMap (), *this, sortEachProcsGids, nullptr); } // See above. We want to admit the possibility of makeColMap // actually revising an existing column Map, even though that @@ -4970,18 +4932,16 @@ namespace Tpetra { const Kokkos::DualView& permuteFromLIDs) { - using Details::Behavior; using std::endl; using LO = local_ordinal_type; using GO = global_ordinal_type; using this_type = CrsGraph; using row_graph_type = RowGraph; const char tfecfFuncName[] = "copyAndPermute: "; - const bool debug = Behavior::debug("CrsGraph"); - const bool verbose = Behavior::verbose("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); std::unique_ptr prefix; - if (debug) { + if (verbose) { prefix = this->createPrefix("CrsGraph", "copyAndPermute"); std::ostringstream os; os << *prefix << endl; @@ -4999,7 +4959,7 @@ namespace Tpetra { const row_graph_type& srcRowGraph = dynamic_cast (source); - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "Target is StaticProfile; do CRS padding" << endl; std::cerr << os.str (); @@ -5023,7 +4983,7 @@ namespace Tpetra { // "Copy" part of "copy and permute." // if (src_filled || srcCrsGraph == nullptr) { - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl; std::cerr << os.str (); @@ -5042,7 +5002,7 @@ namespace Tpetra { this->insertGlobalIndices (gid, row_copy ()); } } else { - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl; std::cerr << os.str (); @@ -5081,7 +5041,7 @@ namespace Tpetra { } } - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "Done" << endl; std::cerr << os.str (); @@ -5486,13 +5446,13 @@ namespace Tpetra { const char tfecfFuncName[] = "packAndPrepare: "; ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare"); - const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); std::unique_ptr prefix; - if (debug) { + if (verbose) { prefix = this->createPrefix("CrsGraph", "packAndPrepare"); std::ostringstream os; os << *prefix << "Start" << endl; - std::cerr << os.str (); + std::cerr << os.str(); } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -5521,10 +5481,11 @@ namespace Tpetra { using Teuchos::ArrayView; using LO = local_ordinal_type; - if (debug) { + if (verbose) { std::ostringstream os; - os << *prefix << "Source is a RowGraph but not a CrsGraph" << endl; - std::cerr << os.str (); + os << *prefix << "Source is a RowGraph but not a CrsGraph" + << endl; + std::cerr << os.str(); } // RowGraph::pack serves the "old" DistObject interface. It // takes Teuchos::ArrayView and Teuchos::Array&. The latter @@ -5559,10 +5520,10 @@ namespace Tpetra { else if (! getColMap ().is_null () && (lclGraph_.row_map.extent (0) != 0 || getRowMap ()->getNodeNumElements () == 0)) { - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "packCrsGraphNew path" << endl; - std::cerr << os.str (); + std::cerr << os.str(); } using export_pids_type = Kokkos::DualView; @@ -5579,10 +5540,10 @@ namespace Tpetra { constantNumPackets, distor); } - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "Done" << endl; - std::cerr << os.str (); + std::cerr << os.str(); } } @@ -5617,21 +5578,24 @@ namespace Tpetra { size_t& constantNumPackets, Distributor& /* distor */) const { - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef typename Kokkos::View::HostMirror::execution_space host_execution_space; - typedef typename device_type::execution_space device_execution_space; + using std::endl; + using LO = LocalOrdinal; + using GO = GlobalOrdinal; + using host_execution_space = + typename Kokkos::View:: + HostMirror::execution_space; + using device_execution_space = + typename device_type::execution_space; const char tfecfFuncName[] = "packFillActive: "; - const bool debug = ::Tpetra::Details::Behavior::debug("CrsGraph::pack"); - const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0; + const bool verbose = Details::Behavior::verbose("CrsGraph"); const auto numExportLIDs = exportLIDs.size (); - if (debug) { + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsGraph", "allocateIndices"); std::ostringstream os; - os << "Proc " << myRank << ": CrsGraph::pack: numExportLIDs = " - << numExportLIDs << std::endl; - std::cerr << os.str (); + os << *prefix << "numExportLIDs=" << numExportLIDs << endl; + std::cerr << os.str(); } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (numExportLIDs != numPacketsPerLID.size (), std::runtime_error, @@ -5689,11 +5653,10 @@ namespace Tpetra { }, totalNumPackets); - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": CrsGraph::pack: " - << "totalNumPackets = " << totalNumPackets << std::endl; - std::cerr << os.str (); + os << *prefix << "totalNumPackets=" << totalNumPackets << endl; + std::cerr << os.str(); } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (errCount != 0, std::logic_error, "totalNumPackets count encountered " @@ -5712,10 +5675,10 @@ namespace Tpetra { // Loop again over the rows to export, and pack rows of indices // into the output buffer. - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": CrsGraph::pack: pack into exports" << std::endl; - std::cerr << os.str (); + os << *prefix << "Pack into exports" << endl; + std::cerr << os.str(); } // Teuchos::ArrayView may not be thread safe, or may not be @@ -5723,30 +5686,32 @@ namespace Tpetra { GO* const exports_raw = exports.getRawPtr (); errCount = 0; Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views", - inputRange, - [=] (const LO& i, size_t& exportsOffset, const bool final) { + inputRange, [=, &prefix] + (const LO i, size_t& exportsOffset, const bool final) { const size_t curOffset = exportsOffset; const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]); - const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow); + const RowInfo rowInfo = + this->getRowInfoFromGlobalRowIndex (gblRow); - if (rowInfo.localRow == Tpetra::Details::OrdinalTraits::invalid ()) { - if (debug) { + using TDO = Tpetra::Details::OrdinalTraits; + if (rowInfo.localRow == TDO::invalid ()) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": INVALID rowInfo: " - << "i = " << i << ", lclRow = " << exportLIDs_raw[i] << std::endl; - std::cerr << os.str (); + os << *prefix << ": INVALID rowInfo: i=" << i + << ", lclRow=" << exportLIDs_raw[i] << endl; + std::cerr << os.str(); } Kokkos::atomic_add (&errCountView(), ONE); } else if (curOffset + rowInfo.numEntries > totalNumPackets) { - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": UH OH! For i=" << i << ", lclRow=" + os << *prefix << ": UH OH! For i=" << i << ", lclRow=" << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset " "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries << ") > totalNumPackets (= " << totalNumPackets << ")." - << std::endl; - std::cerr << os.str (); + << endl; + std::cerr << os.str(); } Kokkos::atomic_add (&errCountView(), ONE); } @@ -5809,6 +5774,12 @@ namespace Tpetra { (errCount != 0, std::logic_error, "Packing encountered " "one or more errors! errCount = " << errCount << ", totalNumPackets = " << totalNumPackets << "."); + + if (verbose) { + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); + } } template @@ -5834,24 +5805,17 @@ namespace Tpetra { using exports_dv_type = Kokkos::DualView; const char tfecfFuncName[] = "packFillActiveNew: "; - const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph"); - const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0; + const bool verbose = Details::Behavior::verbose("CrsGraph"); + const auto numExportLIDs = exportLIDs.extent (0); std::unique_ptr prefix; - if (debug) { + if (verbose) { prefix = this->createPrefix("CrsGraph", "packFillActiveNew"); - std::ostringstream os; - os << *prefix << "Start" << endl; - std::cerr << os.str (); - } - - const auto numExportLIDs = exportLIDs.extent (0); - if (debug) { std::ostringstream os; os << *prefix << "numExportLIDs: " << numExportLIDs - << ", numPacketsPerLID.extent(0): " << numPacketsPerLID.extent (0) - << endl; - std::cerr << os.str (); + << ", numPacketsPerLID.extent(0): " + << numPacketsPerLID.extent(0) << endl; + std::cerr << os.str(); } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error, @@ -5894,7 +5858,7 @@ namespace Tpetra { Kokkos::View errCountView (&errCount); constexpr size_t ONE = 1; - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "Compute totalNumPackets" << endl; std::cerr << os.str (); @@ -5903,15 +5867,15 @@ namespace Tpetra { Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets", inputRange, - [=] (const LO i, size_t& curTotalNumPackets) { + [=, &prefix] (const LO i, size_t& curTotalNumPackets) { const LO lclRow = exportLIDs_h[i]; const GO gblRow = rowMap.getGlobalElement (lclRow); if (gblRow == Tpetra::Details::OrdinalTraits::invalid ()) { - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": For i=" << i << ", lclRow=" - << lclRow << " not in row Map on this process" << endl; - std::cerr << os.str (); + os << *prefix << "For i=" << i << ", lclRow=" << lclRow + << " not in row Map on this process" << endl; + std::cerr << os.str(); } Kokkos::atomic_add (&errCountView(), ONE); numPacketsPerLID_h(i) = 0; @@ -5924,7 +5888,7 @@ namespace Tpetra { }, totalNumPackets); - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "totalNumPackets: " << totalNumPackets << ", errCount: " << errCount << endl; @@ -5936,7 +5900,7 @@ namespace Tpetra { << ", errCount: " << errCount << "."); // Allocate space for all the column indices to pack. - if (static_cast (exports.extent (0)) < totalNumPackets) { + if (size_t(exports.extent (0)) < totalNumPackets) { // FIXME (mfh 09 Apr 2019) Create without initializing. exports = exports_dv_type ("exports", totalNumPackets); } @@ -5949,10 +5913,10 @@ namespace Tpetra { // Loop again over the rows to export, and pack rows of indices // into the output buffer. - if (debug) { + if (verbose) { std::ostringstream os; os << *prefix << "Pack into exports buffer" << endl; - std::cerr << os.str (); + std::cerr << os.str(); } exports.clear_sync_state (); @@ -5967,17 +5931,17 @@ namespace Tpetra { errCount = 0; Kokkos::parallel_scan ("Tpetra::CrsGraph::packFillActiveNew: Pack exports", - inputRange, - [=] (const LO i, size_t& exportsOffset, const bool final) { + inputRange, [=, &prefix] + (const LO i, size_t& exportsOffset, const bool final) { const size_t curOffset = exportsOffset; const LO lclRow = exportLIDs_h(i); const GO gblRow = rowMap.getGlobalElement (lclRow); if (gblRow == Details::OrdinalTraits::invalid ()) { - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": For i=" << i << ", lclRow=" - << lclRow << " not in row Map on this process" << endl; - std::cerr << os.str (); + os << *prefix << "For i=" << i << ", lclRow=" << lclRow + << " not in row Map on this process" << endl; + std::cerr << os.str(); } Kokkos::atomic_add (&errCountView(), ONE); return; @@ -5985,26 +5949,26 @@ namespace Tpetra { const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow); if (rowInfo.localRow == Details::OrdinalTraits::invalid ()) { - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": For i=" << i << ", lclRow=" - << lclRow << ", gblRow=" << gblRow << ": invalid rowInfo" + os << *prefix << "For i=" << i << ", lclRow=" << lclRow + << ", gblRow=" << gblRow << ": invalid rowInfo" << endl; - std::cerr << os.str (); + std::cerr << os.str(); } Kokkos::atomic_add (&errCountView(), ONE); return; } if (curOffset + rowInfo.numEntries > totalNumPackets) { - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": For i=" << i << ", lclRow=" - << lclRow << ", gblRow=" << gblRow << ", curOffset " - "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries - << ") > totalNumPackets (= " << totalNumPackets << ")." - << endl; - std::cerr << os.str (); + os << *prefix << "For i=" << i << ", lclRow=" << lclRow + << ", gblRow=" << gblRow << ", curOffset (= " + << curOffset << ") + numEnt (= " << rowInfo.numEntries + << ") > totalNumPackets (= " << totalNumPackets + << ")." << endl; + std::cerr << os.str(); } Kokkos::atomic_add (&errCountView(), ONE); return; @@ -6030,13 +5994,12 @@ namespace Tpetra { exportsOffset = curOffset + numEnt; } else { // error in getting local row view - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": For i=" << i << ", lclRow=" - << lclRow << ", gblRow=" << gblRow << ": " - "getLocalViewRawConst returned nonzero error code " - << errCode << endl; - std::cerr << os.str (); + os << *prefix << "For i=" << i << ", lclRow=" << lclRow + << ", gblRow=" << gblRow << ": getLocalViewRawConst" + "returned nonzero error code " << errCode << endl; + std::cerr << os.str(); } Kokkos::atomic_add (&errCountView(), ONE); } @@ -6059,13 +6022,13 @@ namespace Tpetra { exportsOffset = curOffset + numEnt; } else { // error in getting global row view - if (debug) { + if (verbose) { std::ostringstream os; - os << "Proc " << myRank << ": For i=" << i << ", lclRow=" - << lclRow << ", gblRow=" << gblRow << ": " + os << *prefix << "For i=" << i << ", lclRow=" << lclRow + << ", gblRow=" << gblRow << ": " "getGlobalViewRawConst returned nonzero error code " << errCode << endl; - std::cerr << os.str (); + std::cerr << os.str(); } Kokkos::atomic_add (&errCountView(), ONE); } @@ -6080,10 +6043,10 @@ namespace Tpetra { // "one or more errors! errCount = " << errCount // << ", totalNumPackets = " << totalNumPackets << "."); - if (debug) { + if (verbose) { std::ostringstream os; - os << *prefix << "errCount = " << errCount << "; Done" << endl; - std::cerr << os.str (); + os << *prefix << "errCount=" << errCount << "; Done" << endl; + std::cerr << os.str(); } } @@ -6101,7 +6064,6 @@ namespace Tpetra { Distributor& /* distor */, const CombineMode /* combineMode */ ) { - using Details::Behavior; using Details::ProfilingRegion; using std::endl; using LO = local_ordinal_type; @@ -6109,7 +6071,7 @@ namespace Tpetra { const char tfecfFuncName[] = "unpackAndCombine: "; ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine"); - const bool verbose = Behavior::verbose("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); std::unique_ptr prefix; if (verbose) { @@ -6311,10 +6273,20 @@ namespace Tpetra { CrsGraph:: getLocalDiagOffsets (const Kokkos::View& offsets) const { - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; + using std::endl; + using LO = LocalOrdinal; + using GO = GlobalOrdinal; const char tfecfFuncName[] = "getLocalDiagOffsets: "; - const bool debug = ::Tpetra::Details::Behavior::debug (); + const bool verbose = Details::Behavior::verbose("CrsGraph"); + + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets"); + std::ostringstream os; + os << *prefix << "offsets.extent(0)=" << offsets.extent(0) + << endl; + std::cerr << os.str(); + } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (! hasColMap (), std::runtime_error, "The graph must have a column Map."); @@ -6336,7 +6308,7 @@ namespace Tpetra { bool allOffsetsCorrect = true; bool noOtherWeirdness = true; using wrong_offsets_type = std::vector >; - wrong_offsets_type wrongOffsets (0); + wrong_offsets_type wrongOffsets(0); // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just // the subset of Map functionality that we need below. @@ -6384,7 +6356,7 @@ namespace Tpetra { lclColInd, hint, sorted); offsets_h(lclRowInd) = offset; - if (debug) { + if (debug_) { // Now that we have what we think is an offset, make sure // that it really does point to the diagonal entry. Offsets // are _relative_ to each row, not absolute (for the whole @@ -6413,7 +6385,7 @@ namespace Tpetra { } } } - } // debug + } // debug_ } else { // either row is empty, or something went wrong w/ getRowInfo() offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits::invalid (); @@ -6425,23 +6397,21 @@ namespace Tpetra { Kokkos::deep_copy (offsets, offsets_h); } // whether the graph is fill complete - if (debug) { - if (wrongOffsets.size () != 0) { - std::ostringstream os; - os << "Proc " << this->getComm ()->getRank () << ": Wrong offsets: ["; - for (size_t k = 0; k < wrongOffsets.size (); ++k) { - os << "(" << wrongOffsets[k].first << "," - << wrongOffsets[k].second << ")"; - if (k + 1 < wrongOffsets.size ()) { - os << ", "; - } + if (verbose && wrongOffsets.size () != 0) { + std::ostringstream os; + os << *prefix << "Wrong offsets: ["; + for (size_t k = 0; k < wrongOffsets.size (); ++k) { + os << "(" << wrongOffsets[k].first << "," + << wrongOffsets[k].second << ")"; + if (k + 1 < wrongOffsets.size ()) { + os << ", "; } - os << "]" << std::endl; - std::cerr << os.str (); } - } // debug + os << "]" << endl; + std::cerr << os.str(); + } - if (debug) { + if (debug_) { using Teuchos::reduceAll; using std::endl; Teuchos::RCP > comm = this->getComm (); @@ -6489,7 +6459,7 @@ namespace Tpetra { } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str()); } - } // debug + } // debug_ } namespace { // (anonymous) From e24b70a47db7cceaca6984d96996fb89c16e7df0 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 3 Feb 2020 14:42:10 -0700 Subject: [PATCH 06/49] Tpetra: Fix CrsGraph unit test to use Behavior::debug("CrsGraph)" @trilinos/tpetra See discussion here: https://github.com/trilinos/Trilinos/issues/6727#issuecomment-581632461 --- .../test/CrsGraph/CrsGraph_UnitTests0.cpp | 144 +++++++----------- 1 file changed, 56 insertions(+), 88 deletions(-) diff --git a/packages/tpetra/core/test/CrsGraph/CrsGraph_UnitTests0.cpp b/packages/tpetra/core/test/CrsGraph/CrsGraph_UnitTests0.cpp index 40d6f2f3f1f6..f2b976e957b3 100644 --- a/packages/tpetra/core/test/CrsGraph/CrsGraph_UnitTests0.cpp +++ b/packages/tpetra/core/test/CrsGraph/CrsGraph_UnitTests0.cpp @@ -35,19 +35,16 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER */ -#include -#include +#include "Tpetra_CrsGraph.hpp" +#include "Tpetra_Details_Behavior.hpp" #include "Tpetra_Details_determineLocalTriangularStructure.hpp" -#include -#include // std::is_same +#include "Tpetra_TestingUtilities.hpp" -namespace { +namespace { // (anonymous) template Tpetra::Details::LocalTriangularStructureResult @@ -130,7 +127,6 @@ namespace { // UNIT TESTS // - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, BadConst, LO, GO , Node ) { using Teuchos::REDUCE_MIN; @@ -199,42 +195,43 @@ namespace { // TEST_EQUALITY_CONST( gblSuccess, 1 ); } - -// mfh 05 Apr 2013: CrsGraph only tests for bad nonowned GIDs in a -// debug build. The BadGIDs test fails in a release build. -#ifdef HAVE_TPETRA_DEBUG - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, BadGIDs, LO, GO , Node ) { - using GRAPH = Tpetra::CrsGraph; - using map_type = Tpetra::Map; - - const GST INVALID = Teuchos::OrdinalTraits::invalid (); - // get a comm - RCP > comm = Tpetra::getDefaultComm(); - const int myRank = comm->getRank(); - // create a Map - const size_t numLocal = 10; - const GO indexBase = 0; - RCP map = - rcp (new map_type (INVALID, numLocal, indexBase, comm)); - { - Array gids(1); - gids[0] = myRank*numLocal+numLocal; // off this node, except on the last proc, where it is off the map - // bad gid on the last node (not in domain map), discovered at fillComplete() - GRAPH goodgraph(map,1); - goodgraph.insertGlobalIndices(map->getMinGlobalIndex(),gids()); - TEST_THROW( goodgraph.fillComplete(), std::runtime_error ); + const bool debug = Tpetra::Details::Behavior::debug("CrsGraph"); + if (debug) { + using GRAPH = Tpetra::CrsGraph; + using map_type = Tpetra::Map; + + const GST INVALID = Teuchos::OrdinalTraits::invalid (); + // get a comm + RCP > comm = Tpetra::getDefaultComm(); + const int myRank = comm->getRank(); + // create a Map + const size_t numLocal = 10; + const GO indexBase = 0; + RCP map = + rcp (new map_type (INVALID, numLocal, indexBase, comm)); + { + Array gids(1); + // This GID is off process but still in the domain Map on + // every process but the last. On the last process, this GID + // is not in the domain Map on ANY process. This makes + // inserting it an error. + gids[0] = myRank*numLocal+numLocal; + // In debug mode, CrsGraph::fillComplete discovers on the last + // process that this GID is bad, because it's not in the + // domain Map. + GRAPH goodgraph(map,1); + goodgraph.insertGlobalIndices(map->getMinGlobalIndex(),gids()); + TEST_THROW( goodgraph.fillComplete(), std::runtime_error ); + } + // All procs fail if any process fails + int globalSuccess_int = -1; + reduceAll( *comm, REDUCE_SUM, success ? 0 : 1, outArg(globalSuccess_int) ); + TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } - // All procs fail if any process fails - int globalSuccess_int = -1; - reduceAll( *comm, REDUCE_SUM, success ? 0 : 1, outArg(globalSuccess_int) ); - TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } -#endif // HAVE_TPETRA_DEBUG - - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, ExcessAllocation, LO, GO , Node ) { using Teuchos::Comm; @@ -299,8 +296,6 @@ namespace { TEST_EQUALITY_CONST(gblSuccess, 1); } - - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, insert_remove_LIDs, LO, GO , Node ) { using Teuchos::Comm; @@ -349,8 +344,6 @@ namespace { TEST_EQUALITY_CONST(gblSuccess, 1); } - - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, CopiesAndViews, LO, GO , Node ) { using GRAPH = Tpetra::CrsGraph; @@ -457,8 +450,6 @@ namespace { } } - - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, WithStaticProfile, LO, GO , Node ) { using GRAPH = Tpetra::CrsGraph; @@ -488,8 +479,6 @@ namespace { TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } - - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, EmptyGraphAlloc0, LO, GO , Node ) { using crs_graph_type = Tpetra::CrsGraph; @@ -538,7 +527,7 @@ namespace { RCP test_row; { - // allocate + // allocate RCP test_crs = rcp (new crs_graph_type (map, 1)); // invalid, because none are allocated yet TEST_EQUALITY_CONST( test_crs->getNodeAllocationSize(), STINV ); @@ -712,8 +701,6 @@ namespace { TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } - - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, EmptyGraphAlloc1, LO, GO , Node ) { typedef Tpetra::CrsGraph graph_type; @@ -786,7 +773,6 @@ namespace { } } - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, DottedDiag, LO, GO , Node ) { using GRAPH = Tpetra::CrsGraph; @@ -862,8 +848,6 @@ namespace { TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } - - //// TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( CrsGraph, NonLocals, LO, GO , Node ) { using Teuchos::as; @@ -1079,38 +1063,22 @@ namespace { // INSTANTIATIONS // -// Tests to build and run in both debug and release modes. We will -// instantiate them over all enabled local ordinal (LO), global -// ordinal (GO), and Kokkos Node (NODE) types. -#define UNIT_TEST_GROUP_DEBUG_AND_RELEASE( LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, EmptyGraphAlloc0, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, EmptyGraphAlloc1, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, ExcessAllocation, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, BadConst, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, insert_remove_LIDs, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, NonLocals, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, DottedDiag, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, WithStaticProfile, LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, CopiesAndViews, LO, GO, NODE ) - -// mfh 05 Apr 2013: CrsGraph only tests for bad nonowned GIDs in a -// debug build. The BadGIDs test fails in a release build. -#ifdef HAVE_TPETRA_DEBUG - -#define UNIT_TEST_GROUP_DEBUG_ONLY( LO, GO, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, BadGIDs, LO, GO, NODE ) - -#else // NOT HAVE_TPETRA_DEBUG - -#define UNIT_TEST_GROUP_DEBUG_ONLY( LO, GO, NODE ) - -#endif // HAVE_TPETRA_DEBUG - - - TPETRA_ETI_MANGLING_TYPEDEFS() - - TPETRA_INSTANTIATE_LGN( UNIT_TEST_GROUP_DEBUG_AND_RELEASE ) - - TPETRA_INSTANTIATE_LGN( UNIT_TEST_GROUP_DEBUG_ONLY ) - -} +// Tests to build and run. We will instantiate them over all enabled +// LocalOrdinal (LO), GlobalOrdinal (GO), and Node (NODE) types. +#define UNIT_TEST_GROUP( LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, EmptyGraphAlloc0, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, EmptyGraphAlloc1, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, ExcessAllocation, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, BadConst, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, insert_remove_LIDs, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, NonLocals, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, DottedDiag, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, WithStaticProfile, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, CopiesAndViews, LO, GO, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( CrsGraph, BadGIDs, LO, GO, NODE ) + + TPETRA_ETI_MANGLING_TYPEDEFS() + + TPETRA_INSTANTIATE_LGN( UNIT_TEST_GROUP ) + +} // namespace (anonymous) From 32af6652337353ffbcb711f89c7e732e2368b3dc Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 13:22:04 -0700 Subject: [PATCH 07/49] Tpetra::CrsMatrix: Separate copyAndPermuteImpl into 2 code paths @trilinos/tpetra Separate the implementation of Tpetra::CrsMatrix::copyAndPermute into two different code paths, depending on whether the target matrix has a static (const) graph. This is related to #6663. --- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 68 ++- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 524 ++++++++++++------ 2 files changed, 415 insertions(+), 177 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index afba90422513..6e992d609cd5 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -3440,11 +3440,21 @@ namespace Tpetra { private: void - copyAndPermuteImpl (const RowMatrix& source, - const size_t numSameIDs, - const LocalOrdinal permuteToLIDs[], - const LocalOrdinal permuteFromLIDs[], - const size_t numPermutes); + copyAndPermuteStaticGraph( + const RowMatrix& source, + const size_t numSameIDs, + const LocalOrdinal permuteToLIDs[], + const LocalOrdinal permuteFromLIDs[], + const size_t numPermutes); + + void + copyAndPermuteNonStaticGraph( + const RowMatrix& source, + const size_t numSameIDs, + const LocalOrdinal permuteToLIDs[], + const LocalOrdinal permuteFromLIDs[], + const size_t numPermutes); + protected: virtual void copyAndPermute @@ -3982,9 +3992,22 @@ namespace Tpetra { /// about why we use \c Scalar and not \c impl_scalar_type here /// for the input array type. void - insertGlobalValuesFiltered (const GlobalOrdinal globalRow, - const Teuchos::ArrayView& indices, - const Teuchos::ArrayView& values); + insertGlobalValuesFiltered( + const GlobalOrdinal globalRow, + const Teuchos::ArrayView& indices, + const Teuchos::ArrayView& values, + const bool debug); + + /// \brief Wrapper for insertGlobalValuesFiltered that prints + /// helpful error messages if insertGlobalValuesFiltered throws. + void + insertGlobalValuesFilteredChecked( + const GlobalOrdinal globalRow, + const Teuchos::ArrayView& indices, + const Teuchos::ArrayView& values, + const char* const prefix, + const bool debug, + const bool verbose); /// \brief Combine in the data using the given combine mode. /// @@ -3998,10 +4021,14 @@ namespace Tpetra { /// about why we use \c Scalar and not \c impl_scalar_type here /// for the input array type. void - combineGlobalValues (const GlobalOrdinal globalRowIndex, - const Teuchos::ArrayView& columnIndices, - const Teuchos::ArrayView& values, - const Tpetra::CombineMode combineMode); + combineGlobalValues( + const GlobalOrdinal globalRowIndex, + const Teuchos::ArrayView& columnIndices, + const Teuchos::ArrayView& values, + const Tpetra::CombineMode combineMode, + const char* const prefix, + const bool debug, + const bool verbose); /// \brief Combine in the data using the given combine mode. /// @@ -4017,15 +4044,22 @@ namespace Tpetra { /// \param cols [in] Input (global) column indices corresponding /// to the above values. /// \param combineMode [in] The CombineMode to use. + /// \param prefix [in] Prefix for verbose debugging output; must + /// be nonnull if verbose is true. + /// \param debug [in] Whether to do debug checking. + /// \param verbose [in] Whether to print verbose debugging output. /// /// \return The number of modified entries. No error if and only /// if equal to numEnt. LocalOrdinal - combineGlobalValuesRaw (const LocalOrdinal lclRow, - const LocalOrdinal numEnt, - const impl_scalar_type vals[], - const GlobalOrdinal cols[], - const Tpetra::CombineMode combineMode); + combineGlobalValuesRaw(const LocalOrdinal lclRow, + const LocalOrdinal numEnt, + const impl_scalar_type vals[], + const GlobalOrdinal cols[], + const Tpetra::CombineMode combineMode, + const char* const prefix, + const bool debug, + const bool verbose); /// \brief Transform CrsMatrix entries, using global indices; /// backwards compatibility version that takes diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index b0d644569dff..93e2702ef440 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -2195,9 +2195,11 @@ namespace Tpetra { template void CrsMatrix:: - insertGlobalValuesFiltered (const GlobalOrdinal gblRow, - const Teuchos::ArrayView& indices, - const Teuchos::ArrayView& values) + insertGlobalValuesFiltered( + const GlobalOrdinal gblRow, + const Teuchos::ArrayView& indices, + const Teuchos::ArrayView& values, + const bool debug) { typedef impl_scalar_type IST; typedef LocalOrdinal LO; @@ -2205,12 +2207,12 @@ namespace Tpetra { typedef Tpetra::Details::OrdinalTraits OTLO; const char tfecfFuncName[] = "insertGlobalValuesFiltered: "; -#ifdef HAVE_TPETRA_DEBUG - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (values.size () != indices.size (), std::runtime_error, - "values.size() = " << values.size () << " != indices.size() = " - << indices.size () << "."); -#endif // HAVE_TPETRA_DEBUG + if (debug) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (values.size () != indices.size (), std::runtime_error, + "values.size() = " << values.size () << " != indices.size() = " + << indices.size () << "."); + } // getRowMap() is not thread safe, because it increments RCP's // reference count. getCrsGraphRef() is thread safe. @@ -2278,12 +2280,12 @@ namespace Tpetra { // Invariant before the increment line: Either endOffset == // numInputEnt, or inputGblColInds[endOffset] is not in the column Map // on the calling process. -#ifdef HAVE_TPETRA_DEBUG - const bool invariant = endOffset == numInputEnt || - colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid (); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (! invariant, std::logic_error, std::endl << "Invariant failed!"); -#endif // HAVE_TPETRA_DEBUG + if (debug) { + const bool invariant = endOffset == numInputEnt || + colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid (); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (! invariant, std::logic_error, std::endl << "Invariant failed!"); + } curOffset = endOffset + 1; } } @@ -2313,12 +2315,12 @@ namespace Tpetra { // Invariant before the increment line: Either endOffset == // numInputEnt, or inputGblColInds[endOffset] is not in the // column Map on the calling process. -#ifdef HAVE_TPETRA_DEBUG - const bool invariant = endOffset == numInputEnt || - colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid (); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (! invariant, std::logic_error, std::endl << "Invariant failed!"); -#endif // HAVE_TPETRA_DEBUG + if (debug) { + const bool invariant = endOffset == numInputEnt || + colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid (); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (! invariant, std::logic_error, std::endl << "Invariant failed!"); + } curOffset = endOffset + 1; } } @@ -2329,6 +2331,45 @@ namespace Tpetra { } } + template + void + CrsMatrix:: + insertGlobalValuesFilteredChecked( + const GlobalOrdinal gblRow, + const Teuchos::ArrayView& indices, + const Teuchos::ArrayView& values, + const char* const prefix, + const bool debug, + const bool verbose) + { + using Details::verbosePrintArray; + using std::endl; + + try { + insertGlobalValuesFiltered(gblRow, indices, values, debug); + } + catch(std::exception& e) { + std::ostringstream os; + if (verbose) { + const size_t maxNumToPrint = + Details::Behavior::verbosePrintCountThreshold(); + os << *prefix << ": insertGlobalValuesFiltered threw an " + "exception: " << e.what() << endl + << "Global row index: " << gblRow << endl; + verbosePrintArray(os, indices, "Global column indices", + maxNumToPrint); + os << endl; + verbosePrintArray(os, values, "Values", maxNumToPrint); + os << endl; + } + else { + os << ": insertGlobalValuesFiltered threw an exception: " + << e.what(); + } + TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str()); + } + } + template LocalOrdinal CrsMatrix:: @@ -6564,23 +6605,35 @@ namespace Tpetra { template void CrsMatrix:: - copyAndPermuteImpl (const RowMatrix& srcMat, - const size_t numSameIDs, - const LocalOrdinal permuteToLIDs[], - const LocalOrdinal permuteFromLIDs[], - const size_t numPermutes) + copyAndPermuteStaticGraph( + const RowMatrix& srcMat, + const size_t numSameIDs, + const LocalOrdinal permuteToLIDs[], + const LocalOrdinal permuteFromLIDs[], + const size_t numPermutes) { - using Tpetra::Details::ProfilingRegion; + using Details::ProfilingRegion; using Teuchos::Array; using Teuchos::ArrayView; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; -#ifdef HAVE_TPETRA_DEBUG - // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC. - const char tfecfFuncName[] = "copyAndPermuteImpl: "; -#endif // HAVE_TPETRA_DEBUG + using std::endl; + using LO = LocalOrdinal; + using GO = GlobalOrdinal; + const char tfecfFuncName[] = "copyAndPermuteStaticGraph"; + const char suffix[] = + " Please report this bug to the Tpetra developers."; + ProfilingRegion regionCAP + ("Tpetra::CrsMatrix::copyAndPermuteStaticGraph"); - ProfilingRegion regionCAP ("Tpetra::CrsMatrix::copyAndPermuteImpl"); + const bool debug = Details::Behavior::debug("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsGraph", tfecfFuncName); + std::ostringstream os; + os << *prefix << "Start" << endl; + } + const char* const prefix_raw = + verbose ? prefix.get()->c_str() : nullptr; const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed (); // @@ -6598,7 +6651,6 @@ namespace Tpetra { const GO sourceGID = srcRowMap.getGlobalElement (sourceLID); const GO targetGID = sourceGID; - // Input views for the combineGlobalValues() call below. ArrayView rowIndsConstView; ArrayView rowValsConstView; @@ -6617,47 +6669,188 @@ namespace Tpetra { // copy. Really it's the GIDs that have to be copied (because // they have to be converted from LIDs). size_t checkRowLength = 0; - srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength); - -#ifdef HAVE_TPETRA_DEBUG - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, - std::logic_error, "For global row index " << sourceGID << ", the source" - " matrix's getNumEntriesInGlobalRow() method returns a row length of " - << rowLength << ", but the getGlobalRowCopy() method reports that " - "the row length is " << checkRowLength << ". Please report this bug " - "to the Tpetra developers."); -#endif // HAVE_TPETRA_DEBUG - + srcMat.getGlobalRowCopy (sourceGID, rowIndsView, + rowValsView, checkRowLength); + if (debug) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (rowLength != checkRowLength, std::logic_error, "For " + "global row index " << sourceGID << ", the source " + "matrix's getNumEntriesInGlobalRow returns a row length " + "of " << rowLength << ", but getGlobalRowCopy reports " + "a row length of " << checkRowLength << "." << suffix); + } rowIndsConstView = rowIndsView.view (0, rowLength); rowValsConstView = rowValsView.view (0, rowLength); } else { // source matrix is globally indexed. - srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView); + srcMat.getGlobalRowView(sourceGID, rowIndsConstView, + rowValsConstView); } - // Combine the data into the target matrix. - if (this->isStaticGraph ()) { - // Applying a permutation to a matrix with a static graph - // means REPLACE-ing entries. - combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, REPLACE); + // Applying a permutation to a matrix with a static graph + // means REPLACE-ing entries. + combineGlobalValues(targetGID, rowIndsConstView, + rowValsConstView, REPLACE, + prefix_raw, debug, verbose); + } + + if (verbose) { + std::ostringstream os; + os << *prefix << "Do permutes" << endl; + } + + const map_type& tgtRowMap = * (this->getRowMap ()); + for (size_t p = 0; p < numPermutes; ++p) { + const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]); + const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]); + + ArrayView rowIndsConstView; + ArrayView rowValsConstView; + + if (sourceIsLocallyIndexed) { + const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID); + if (rowLength > static_cast (rowInds.size ())) { + rowInds.resize (rowLength); + rowVals.resize (rowLength); + } + // Resizing invalidates an Array's views, so we must make new + // ones, even if rowLength hasn't changed. + ArrayView rowIndsView = rowInds.view (0, rowLength); + ArrayView rowValsView = rowVals.view (0, rowLength); + + // The source matrix is locally indexed, so we have to get a + // copy. Really it's the GIDs that have to be copied (because + // they have to be converted from LIDs). + size_t checkRowLength = 0; + srcMat.getGlobalRowCopy(sourceGID, rowIndsView, + rowValsView, checkRowLength); + if (debug) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (rowLength != checkRowLength, std::logic_error, "For " + "source matrix global row index " << sourceGID << ", " + "getNumEntriesInGlobalRow returns a row length of " << + rowLength << ", but getGlobalRowCopy a row length of " + << checkRowLength << "." << suffix); + } + rowIndsConstView = rowIndsView.view (0, rowLength); + rowValsConstView = rowValsView.view (0, rowLength); } else { - // Applying a permutation to a matrix with a dynamic graph - // means INSERT-ing entries. This has the same effect as - // ADD, if the target graph already has an entry there. - combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, INSERT); + srcMat.getGlobalRowView(sourceGID, rowIndsConstView, + rowValsConstView); } - } // For each of the consecutive source and target IDs that are the same + combineGlobalValues(targetGID, rowIndsConstView, + rowValsConstView, REPLACE, + prefix_raw, debug, verbose); + } + + if (verbose) { + std::ostringstream os; + os << *prefix << "Done" << endl; + } + } + + template + void + CrsMatrix:: + copyAndPermuteNonStaticGraph( + const RowMatrix& srcMat, + const size_t numSameIDs, + const LocalOrdinal permuteToLIDs[], + const LocalOrdinal permuteFromLIDs[], + const size_t numPermutes) + { + using Details::ProfilingRegion; + using Teuchos::Array; + using Teuchos::ArrayView; + using std::endl; + using LO = LocalOrdinal; + using GO = GlobalOrdinal; + const char tfecfFuncName[] = "copyAndPermuteNonStaticGraph"; + const char suffix[] = + " Please report this bug to the Tpetra developers."; + ProfilingRegion regionCAP + ("Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph"); + + const bool debug = Details::Behavior::debug("CrsGraph"); + const bool verbose = Details::Behavior::verbose("CrsGraph"); + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsGraph", tfecfFuncName); + std::ostringstream os; + os << *prefix << "Start" << endl; + } + const char* const prefix_raw = + verbose ? prefix.get()->c_str() : nullptr; + + const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed (); // - // Permute the remaining rows. + // Copy the first numSame row from source to target (this matrix). + // This involves copying rows corresponding to LIDs [0, numSame-1]. // + const map_type& srcRowMap = * (srcMat.getRowMap ()); + Array rowInds; + Array rowVals; + const LO numSameIDs_as_LID = static_cast (numSameIDs); + for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) { + // Global ID for the current row index in the source matrix. + // The first numSameIDs GIDs in the two input lists are the + // same, so sourceGID == targetGID in this case. + const GO sourceGID = srcRowMap.getGlobalElement (sourceLID); + const GO targetGID = sourceGID; + + ArrayView rowIndsConstView; + ArrayView rowValsConstView; + + if (sourceIsLocallyIndexed) { + const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID); + if (rowLength > static_cast (rowInds.size())) { + rowInds.resize (rowLength); + rowVals.resize (rowLength); + } + // Resizing invalidates an Array's views, so we must make new + // ones, even if rowLength hasn't changed. + ArrayView rowIndsView = rowInds.view (0, rowLength); + ArrayView rowValsView = rowVals.view (0, rowLength); + + // The source matrix is locally indexed, so we have to get a + // copy. Really it's the GIDs that have to be copied (because + // they have to be converted from LIDs). + size_t checkRowLength = 0; + srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength); + + if (debug) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (rowLength != checkRowLength, std::logic_error, ": For " + "global row index " << sourceGID << ", the source " + "matrix's getNumEntriesInGlobalRow returns a row length " + "of " << rowLength << ", but getGlobalRowCopy reports " + "a row length of " << checkRowLength << "." << suffix); + } + rowIndsConstView = rowIndsView.view (0, rowLength); + rowValsConstView = rowValsView.view (0, rowLength); + } + else { // source matrix is globally indexed. + srcMat.getGlobalRowView(sourceGID, rowIndsConstView, + rowValsConstView); + } + + // Combine the data into the target matrix. + insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView, + rowValsConstView, prefix_raw, debug, verbose); + } + + if (verbose) { + std::ostringstream os; + os << *prefix << "Do permutes" << endl; + } + const map_type& tgtRowMap = * (this->getRowMap ()); for (size_t p = 0; p < numPermutes; ++p) { const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]); const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]); - // Input views for the combineGlobalValues() call below. ArrayView rowIndsConstView; ArrayView rowValsConstView; @@ -6676,35 +6869,33 @@ namespace Tpetra { // copy. Really it's the GIDs that have to be copied (because // they have to be converted from LIDs). size_t checkRowLength = 0; - srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength); - -#ifdef HAVE_TPETRA_DEBUG - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength, - std::logic_error, "For the source matrix's global row index " - << sourceGID << ", the source matrix's getNumEntriesInGlobalRow() " - "method returns a row length of " << rowLength << ", but the " - "getGlobalRowCopy() method reports that the row length is " - << checkRowLength << ". Please report this bug to the Tpetra " - "developers."); -#endif // HAVE_TPETRA_DEBUG - + srcMat.getGlobalRowCopy(sourceGID, rowIndsView, + rowValsView, checkRowLength); + if (debug) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (rowLength != checkRowLength, std::logic_error, "For " + "source matrix global row index " << sourceGID << ", " + "getNumEntriesInGlobalRow returns a row length of " << + rowLength << ", but getGlobalRowCopy a row length of " + << checkRowLength << "." << suffix); + } rowIndsConstView = rowIndsView.view (0, rowLength); rowValsConstView = rowValsView.view (0, rowLength); } else { - srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView); + srcMat.getGlobalRowView(sourceGID, rowIndsConstView, + rowValsConstView); } // Combine the data into the target matrix. - if (isStaticGraph()) { - this->combineGlobalValues (targetGID, rowIndsConstView, - rowValsConstView, REPLACE); - } - else { - this->combineGlobalValues (targetGID, rowIndsConstView, - rowValsConstView, INSERT); - } - } // For each ID to permute + insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView, + rowValsConstView, prefix_raw, debug, verbose); + } + + if (verbose) { + std::ostringstream os; + os << *prefix << "Done" << endl; + } } template @@ -6760,24 +6951,33 @@ namespace Tpetra { // This dynamic cast should succeed, because we've already tested // it in checkSizes(). - using RMT = ::Tpetra::RowMatrix; + using RMT = RowMatrix; const RMT& srcMat = dynamic_cast (srcObj); - if (!this->isStaticGraph ()) { + if (isStaticGraph ()) { + copyAndPermuteStaticGraph(srcMat, numSameIDs, + permuteToLIDs_h.data(), + permuteFromLIDs_h.data(), + numPermute); + } + else { auto padding = - this->myGraph_->computeCrsPadding(*srcMat.getGraph(), + myGraph_->computeCrsPadding(*srcMat.getGraph(), numSameIDs, permuteToLIDs, permuteFromLIDs, verbose); - if (padding.size() > 0) - this->applyCrsPadding(padding, verbose); + if (padding.size() != 0) { + applyCrsPadding(padding, verbose); + } + copyAndPermuteNonStaticGraph(srcMat, numSameIDs, + permuteToLIDs_h.data(), + permuteFromLIDs_h.data(), + numPermute); } if (verbose) { std::ostringstream os; - os << *prefix << "Call copyAndPermuteImpl" << endl; - std::cerr << os.str (); + os << *prefix << "Done" << endl; + std::cerr << os.str(); } - this->copyAndPermuteImpl (srcMat, numSameIDs, permuteToLIDs_h.data (), - permuteFromLIDs_h.data (), numPermute); } template @@ -7437,35 +7637,44 @@ namespace Tpetra { template LocalOrdinal CrsMatrix:: - combineGlobalValuesRaw (const LocalOrdinal lclRow, - const LocalOrdinal numEnt, - const impl_scalar_type vals[], - const GlobalOrdinal cols[], - const Tpetra::CombineMode combineMode) + combineGlobalValuesRaw(const LocalOrdinal lclRow, + const LocalOrdinal numEnt, + const impl_scalar_type vals[], + const GlobalOrdinal cols[], + const Tpetra::CombineMode combMode, + const char* const prefix, + const bool debug, + const bool verbose) { - typedef GlobalOrdinal GO; - //const char tfecfFuncName[] = "combineGlobalValuesRaw: "; + using GO = GlobalOrdinal; // mfh 23 Mar 2017: This branch is not thread safe in a debug // build, due to use of Teuchos::ArrayView; see #229. - const GO gblRow = this->myGraph_->rowMap_->getGlobalElement (lclRow); - Teuchos::ArrayView cols_av (numEnt == 0 ? NULL : cols, numEnt); - Teuchos::ArrayView vals_av (numEnt == 0 ? NULL : reinterpret_cast (vals), numEnt); + const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow); + Teuchos::ArrayView cols_av + (numEnt == 0 ? nullptr : cols, numEnt); + Teuchos::ArrayView vals_av + (numEnt == 0 ? nullptr : reinterpret_cast (vals), numEnt); // FIXME (mfh 23 Mar 2017) This is a work-around for less common // combine modes. combineGlobalValues throws on error; it does // not return an error code. Thus, if it returns, it succeeded. - this->combineGlobalValues (gblRow, cols_av, vals_av, combineMode); + combineGlobalValues(gblRow, cols_av, vals_av, combMode, + prefix, debug, verbose); return numEnt; } template void CrsMatrix:: - combineGlobalValues (const GlobalOrdinal globalRowIndex, - const Teuchos::ArrayView& columnIndices, - const Teuchos::ArrayView& values, - const Tpetra::CombineMode combineMode) + combineGlobalValues( + const GlobalOrdinal globalRowIndex, + const Teuchos::ArrayView& columnIndices, + const Teuchos::ArrayView& values, + const Tpetra::CombineMode combineMode, + const char* const prefix, + const bool debug, + const bool verbose) { const char tfecfFuncName[] = "combineGlobalValues: "; @@ -7487,16 +7696,18 @@ namespace Tpetra { values, f); } else if (combineMode == INSERT) { - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - isStaticGraph () && combineMode == INSERT, std::invalid_argument, - "INSERT combine mode is not allowed if the matrix has a static graph " - "(i.e., was constructed with the CrsMatrix constructor that takes a " - "const CrsGraph pointer)."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (isStaticGraph() && combineMode == INSERT, + std::invalid_argument, "INSERT combine mode is forbidden " + "if the matrix has a static (const) graph (i.e., was " + "constructed with the CrsMatrix constructor that takes a " + "const CrsGraph pointer)."); } else { - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - true, std::logic_error, "Invalid combine mode; should never get " - "here! Please report this bug to the Tpetra developers."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (true, std::logic_error, "Invalid combine mode; should " + "never get here! " + "Please report this bug to the Tpetra developers."); } } else { // The matrix has a dynamic graph. @@ -7507,18 +7718,8 @@ namespace Tpetra { // are equivalent. We need to call insertGlobalValues() // anyway if the column indices don't yet exist in this row, // so we just call insertGlobalValues() for both cases. - try { - this->insertGlobalValuesFiltered (globalRowIndex, columnIndices, - values); - } - catch (std::exception& e) { - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (true, std::runtime_error, std::endl - << "insertGlobalValuesFiltered(" << globalRowIndex << ", " - << std::endl << Teuchos::toString (columnIndices) << ", " - << std::endl << Teuchos::toString (values) - << ") threw an exception: " << std::endl << e.what ()); - } + insertGlobalValuesFilteredChecked(globalRowIndex, + columnIndices, values, prefix, debug, verbose); } // FIXME (mfh 14 Mar 2012): // @@ -7735,6 +7936,7 @@ namespace Tpetra { typedef View vals_out_type; const char tfecfFuncName[] = "unpackAndCombineImplNonStatic: "; + const bool debug = Behavior::debug("CrsMatrix"); const bool verbose = Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { @@ -7744,6 +7946,8 @@ namespace Tpetra { os << *prefix << endl; // we've already printed DualViews' statuses std::cerr << os.str (); } + const char* const prefix_raw = + verbose ? prefix.get()->c_str() : nullptr; const size_type numImportLIDs = importLIDs.extent (0); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC @@ -7800,43 +8004,39 @@ namespace Tpetra { continue; // empty buffer for that row means that the row is empty } // We need to unpack a nonzero number of entries for this row. -#ifdef HAVE_TPETRA_DEBUG - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (offset + numBytes > static_cast (imports_h.extent (0)), - std::logic_error, "At local row index importLIDs_h[i=" << i << "]=" - << importLIDs_h[i] << ", offset (=" << offset << ") + numBytes (=" - << numBytes << ") > imports_h.extent(0)=" - << imports_h.extent (0) << "."); -#endif // HAVE_TPETRA_DEBUG - + if (debug) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (offset + numBytes > static_cast (imports_h.extent (0)), + std::logic_error, "At local row index importLIDs_h[i=" << i << "]=" + << importLIDs_h[i] << ", offset (=" << offset << ") + numBytes (=" + << numBytes << ") > imports_h.extent(0)=" + << imports_h.extent (0) << "."); + } LO numEntLO = 0; -#ifdef HAVE_TPETRA_DEBUG - const size_t theNumBytes = PackTraits::packValueCount (numEntLO); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (theNumBytes > numBytes, std::logic_error, "theNumBytes = " - << theNumBytes << " > numBytes = " << numBytes << "."); -#endif // HAVE_TPETRA_DEBUG - + if (debug) { + const size_t theNumBytes = PackTraits::packValueCount (numEntLO); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (theNumBytes > numBytes, std::logic_error, "theNumBytes = " + << theNumBytes << " > numBytes = " << numBytes << "."); + } const char* const inBuf = imports_h.data () + offset; const size_t actualNumBytes = PackTraits::unpackValue (numEntLO, inBuf); -#ifdef HAVE_TPETRA_DEBUG - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (actualNumBytes > numBytes, std::logic_error, "At i = " << i - << ", actualNumBytes=" << actualNumBytes - << " > numBytes=" << numBytes << "."); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (numEntLO == 0, std::logic_error, "At local row index importLIDs_h[i=" - << i << "]=" << importLIDs_h[i] << ", the number of entries read " - "from the packed data is numEntLO=" << numEntLO << ", but numBytes=" - << numBytes << " != 0."); -#else - (void) actualNumBytes; -#endif // HAVE_TPETRA_DEBUG + if (debug) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (actualNumBytes > numBytes, std::logic_error, "At i = " << i + << ", actualNumBytes=" << actualNumBytes + << " > numBytes=" << numBytes << "."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (numEntLO == 0, std::logic_error, "At local row index importLIDs_h[i=" + << i << "]=" << importLIDs_h[i] << ", the number of entries read " + "from the packed data is numEntLO=" << numEntLO << ", but numBytes=" + << numBytes << " != 0."); + } - maxRowNumEnt = std::max (static_cast (numEntLO), maxRowNumEnt); + maxRowNumEnt = std::max(size_t(numEntLO), maxRowNumEnt); offset += numBytes; } @@ -7872,9 +8072,7 @@ namespace Tpetra { } LO numEntLO = 0; const char* const inBuf = imports_h.data () + offset; - const size_t actualNumBytes = - PackTraits::unpackValue (numEntLO, inBuf); - (void) actualNumBytes; + (void) PackTraits::unpackValue (numEntLO, inBuf); const size_t numEnt = static_cast(numEntLO);; const LO lclRow = importLIDs_h[i]; @@ -7892,11 +8090,17 @@ namespace Tpetra { const ST* const valsRaw = const_cast (valsOut.data ()); const GO* const gidsRaw = const_cast (gidsOut.data ()); - this->combineGlobalValuesRaw (lclRow, numEnt, valsRaw, gidsRaw, combineMode); - + combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw, + combineMode, prefix_raw, debug, verbose); // Don't update offset until current LID has succeeded. offset += numBytes; } // for each import LID i + + if (verbose) { + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); + } } template From 7bf8e71223b06373e076551abace12ea12c07b4b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 15:15:04 -0700 Subject: [PATCH 08/49] Tpetra::CrsGraph::computeCrsPaddingForSameIDs changes (see below) @trilinos/tpetra Make sure that computeCrsPaddingForSameIDs doesn't double-count entries that appear in both the source and target. I don't think it was doing that before, thanks to Tim's hard work, but I'm just making sure. Also, with TPETRA_VERBOSE set, the method now prints how many duplicates it found. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 116 +++++++++++++++--- 1 file changed, 96 insertions(+), 20 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index e2b72f67f878..071c7c848665 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5245,48 +5245,116 @@ namespace Tpetra { { using LO = LocalOrdinal; using GO = GlobalOrdinal; + using std::endl; const char tfecfFuncName[] = "computeCrsPaddingForSameIds: "; - if (!numSameIDs) + std::unique_ptr prefix; + const bool verbose = Details::Behavior::verbose("CrsGraph"); + if (verbose) { + prefix = + this->createPrefix("CrsGraph", "computeCrsPaddingForSameIDs"); + std::ostringstream os; + os << *prefix << "numSameIDs: " << numSameIDs + << ", padAll: " << (padAll ? "true" : "false") << endl; + std::cerr << os.str(); + } + + if (! numSameIDs) { return; + } Kokkos::fence (); using insert_result = typename Kokkos::UnorderedMap::insert_result; + using this_type = CrsGraph; + const this_type* srcCrs = dynamic_cast(&source); // Compute extra capacity needed to accommodate incoming data const map_type& src_row_map = * (source.getRowMap ()); + const bool src_sorted = srcCrs == nullptr ? false : srcCrs->isSorted(); + const bool tgt_sorted = this->isSorted(); + const bool src_merged = srcCrs == nullptr ? false : srcCrs->isMerged(); + const bool tgt_merged = this->isMerged(); + + std::vector src_row_inds; + std::vector tgt_row_inds; + std::vector merged_inds; + + size_t srcNumDups = 0; + size_t tgtNumDups = 0; + size_t mergedNumDups = 0; + for (LO tgt_lid = 0; tgt_lid < static_cast (numSameIDs); ++tgt_lid) { const GO src_gid = src_row_map.getGlobalElement(tgt_lid); - auto num_src_entries = source.getNumEntriesInGlobalRow(src_gid); - - if (num_src_entries == 0) + size_t orig_num_src_entries = source.getNumEntriesInGlobalRow(src_gid); + if (orig_num_src_entries == 0) { continue; - + } insert_result result; const GO tgt_gid = rowMap_->getGlobalElement(tgt_lid); if (padAll) { - result = padding.insert(tgt_lid, num_src_entries); + result = padding.insert(tgt_lid, orig_num_src_entries); } else { - size_t check_row_length = 0; - std::vector src_row_inds(num_src_entries); - Teuchos::ArrayView src_row_inds_view(src_row_inds.data(), src_row_inds.size()); - source.getGlobalRowCopy(src_gid, src_row_inds_view, check_row_length); + if (src_row_inds.size() < orig_num_src_entries) { + src_row_inds.resize(orig_num_src_entries); + } + Teuchos::ArrayView src_row_inds_view(src_row_inds.data(), orig_num_src_entries); + source.getGlobalRowCopy(src_gid, src_row_inds_view, orig_num_src_entries); + if (! src_sorted) { + std::sort(src_row_inds_view.begin(), src_row_inds_view.end()); + } + if (! src_merged) { + auto new_end = std::unique(src_row_inds_view.begin(), src_row_inds_view.end()); + const size_t new_num_ent = static_cast(new_end - src_row_inds_view.begin()); + srcNumDups += (new_num_ent - orig_num_src_entries); + src_row_inds_view = Teuchos::ArrayView(src_row_inds_view.data(), new_num_ent); + } + if (src_row_inds_view.size() == 0) { // nothing new to insert + result = padding.insert(tgt_lid, size_t(0)); + // FIXME (mfh 09 Apr 2019, 04 Feb 2020) Kokkos::UnorderedMap + // is allowed to fail even if the user did nothing wrong. We + // should actually have a retry option. I just copied this + // code over from computeCrsPadding. + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (result.failed(), std::runtime_error, + "unable to insert padding for LID " << tgt_lid); + } - auto num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid); - std::vector tgt_row_inds(num_tgt_entries); - Teuchos::ArrayView tgt_row_inds_view(tgt_row_inds.data(), tgt_row_inds.size()); - this->getGlobalRowCopy(tgt_gid, tgt_row_inds_view, check_row_length); + size_t orig_num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid); + if (tgt_row_inds.size() < orig_num_tgt_entries) { + tgt_row_inds.resize(orig_num_tgt_entries); + } + Teuchos::ArrayView tgt_row_inds_view(tgt_row_inds.data(), orig_num_tgt_entries); + this->getGlobalRowCopy(tgt_gid, tgt_row_inds_view, orig_num_tgt_entries); + if (! tgt_sorted) { + std::sort(tgt_row_inds_view.begin(), tgt_row_inds_view.end()); + } + if (! tgt_merged) { + auto new_end = std::unique(tgt_row_inds_view.begin(), tgt_row_inds_view.end()); + const size_t new_num_ent = static_cast(new_end - tgt_row_inds_view.begin()); + tgtNumDups += (new_num_ent - orig_num_tgt_entries); + tgt_row_inds_view = Teuchos::ArrayView(tgt_row_inds_view.data(), new_num_ent); + } - size_t how_much_padding = 0; - for (auto src_row_ind : src_row_inds) { - if (std::find(tgt_row_inds.begin(), tgt_row_inds.end(), src_row_ind) == tgt_row_inds.end()) { - // The target row does not have space for - how_much_padding++; - } + const size_t orig_num_merged = + size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); + if (merged_inds.size() < orig_num_merged) { + merged_inds.resize(orig_num_merged); } + auto merged_end = + std::merge(src_row_inds_view.begin(), src_row_inds_view.end(), + tgt_row_inds_view.begin(), tgt_row_inds_view.end(), + merged_inds.begin()); + const size_t new_num_merged = + static_cast(merged_end - merged_inds.begin()); + mergedNumDups += (orig_num_merged - new_num_merged); + + const size_t how_much_padding = + new_num_merged >= orig_num_tgt_entries ? + new_num_merged - orig_num_tgt_entries : + size_t(0); result = padding.insert (tgt_lid, how_much_padding); } @@ -5297,6 +5365,14 @@ namespace Tpetra { (result.failed(), std::runtime_error, "unable to insert padding for LID " << tgt_lid); } + + if (verbose) { + std::ostringstream os; + os << *prefix << "Done: srcNumDups: " << srcNumDups + << ", tgtNumDups: " << tgtNumDups + << ", mergedNumDups: " << mergedNumDups << endl; + std::cerr << os.str(); + } } template From 9089f662a58e58c8cb07d2c2683a79201c12c43f Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 15:44:41 -0700 Subject: [PATCH 09/49] Tpetra::CrsGraph: Factor out common code in my previous commit @trilinos/tpetra --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 28 +++---- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 81 ++++++++++++------- 2 files changed, 65 insertions(+), 44 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index d67680b62b28..19621c1e18c2 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1186,23 +1186,21 @@ namespace Tpetra { const size_t numSameIDs, const bool padAll) const; void - computeCrsPaddingForPermutedIDs (Kokkos::UnorderedMap& padding, - const RowGraph& source, - const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs, - const bool padAll) const; + computeCrsPaddingForPermutedIDs( + Kokkos::UnorderedMap& padding, + const RowGraph& source, + const Kokkos::DualView& permuteToLIDs, + const Kokkos::DualView& permuteFromLIDs, + const bool padAll) const; virtual void - packAndPrepare - (const SrcDistObject& source, - const Kokkos::DualView& exportLIDs, - Kokkos::DualView& exports, - Kokkos::DualView numPacketsPerLID, - size_t& constantNumPackets, - Distributor& distor) override; + packAndPrepare( + const SrcDistObject& source, + const Kokkos::DualView& exportLIDs, + Kokkos::DualView& exports, + Kokkos::DualView numPacketsPerLID, + size_t& constantNumPackets, + Distributor& distor) override; virtual void pack (const Teuchos::ArrayView& exportLIDs, diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 071c7c848665..d734534341ea 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -78,6 +78,44 @@ namespace Tpetra { namespace Details { namespace Impl { + template + Teuchos::ArrayView< + typename RowGraph< + LocalOrdinal, GlobalOrdinal, Node>::global_ordinal_type> + getSortedMergedGlobalRow( + std::vector& gblColIndsStorage, + size_t& origNumEntries, + size_t& numDuplicates, + const RowGraph& graph, + const GlobalOrdinal gblRow, + const bool iAmSorted, + const bool iAmMerged) + { + using Teuchos::ArrayView; + using GO = GlobalOrdinal; + + origNumEntries = graph.getNumEntriesInGlobalRow(gblRow); + if (origNumEntries > gblColIndsStorage.size()) { + gblColIndsStorage.resize(origNumEntries); + } + ArrayView gblColInds(gblColIndsStorage.data(), + origNumEntries); + graph.getGlobalRowCopy(gblRow, gblColInds, origNumEntries); + if (! iAmSorted) { + std::sort(gblColInds.begin(), gblColInds.end()); + } + if (! iAmMerged) { + auto newEnd = + std::unique(gblColInds.begin(), gblColInds.end()); + const size_t newNumEntries = + static_cast(newEnd - gblColInds.begin()); + numDuplicates = size_t(newNumEntries - origNumEntries); + gblColInds = + ArrayView(gblColInds.data(), newNumEntries); + } + return gblColInds; + } + template class ConvertColumnIndicesFromGlobalToLocal { public: @@ -5245,6 +5283,7 @@ namespace Tpetra { { using LO = LocalOrdinal; using GO = GlobalOrdinal; + using Details::Impl::getSortedMergedGlobalRow; using std::endl; const char tfecfFuncName[] = "computeCrsPaddingForSameIds: "; @@ -5297,20 +5336,12 @@ namespace Tpetra { result = padding.insert(tgt_lid, orig_num_src_entries); } else { - if (src_row_inds.size() < orig_num_src_entries) { - src_row_inds.resize(orig_num_src_entries); - } - Teuchos::ArrayView src_row_inds_view(src_row_inds.data(), orig_num_src_entries); - source.getGlobalRowCopy(src_gid, src_row_inds_view, orig_num_src_entries); - if (! src_sorted) { - std::sort(src_row_inds_view.begin(), src_row_inds_view.end()); - } - if (! src_merged) { - auto new_end = std::unique(src_row_inds_view.begin(), src_row_inds_view.end()); - const size_t new_num_ent = static_cast(new_end - src_row_inds_view.begin()); - srcNumDups += (new_num_ent - orig_num_src_entries); - src_row_inds_view = Teuchos::ArrayView(src_row_inds_view.data(), new_num_ent); - } + size_t curNumSrcDups = 0; + Teuchos::ArrayView src_row_inds_view = + getSortedMergedGlobalRow(src_row_inds, orig_num_src_entries, + curNumSrcDups, source, src_gid, + src_sorted, src_merged); + srcNumDups += curNumSrcDups; if (src_row_inds_view.size() == 0) { // nothing new to insert result = padding.insert(tgt_lid, size_t(0)); // FIXME (mfh 09 Apr 2019, 04 Feb 2020) Kokkos::UnorderedMap @@ -5322,21 +5353,13 @@ namespace Tpetra { "unable to insert padding for LID " << tgt_lid); } - size_t orig_num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid); - if (tgt_row_inds.size() < orig_num_tgt_entries) { - tgt_row_inds.resize(orig_num_tgt_entries); - } - Teuchos::ArrayView tgt_row_inds_view(tgt_row_inds.data(), orig_num_tgt_entries); - this->getGlobalRowCopy(tgt_gid, tgt_row_inds_view, orig_num_tgt_entries); - if (! tgt_sorted) { - std::sort(tgt_row_inds_view.begin(), tgt_row_inds_view.end()); - } - if (! tgt_merged) { - auto new_end = std::unique(tgt_row_inds_view.begin(), tgt_row_inds_view.end()); - const size_t new_num_ent = static_cast(new_end - tgt_row_inds_view.begin()); - tgtNumDups += (new_num_ent - orig_num_tgt_entries); - tgt_row_inds_view = Teuchos::ArrayView(tgt_row_inds_view.data(), new_num_ent); - } + size_t orig_num_tgt_entries = 0; + size_t curNumTgtDups = 0; + Teuchos::ArrayView tgt_row_inds_view = + getSortedMergedGlobalRow(tgt_row_inds, orig_num_tgt_entries, + curNumTgtDups, *this, tgt_gid, + tgt_sorted, tgt_merged); + tgtNumDups += curNumTgtDups; const size_t orig_num_merged = size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); From 96730dd56c806cc03248f546247578c57b95134c Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 15:55:02 -0700 Subject: [PATCH 10/49] Tpetra::CrsGraph::computeCrsPaddingForPermutedIDs changes @trilinos/tpetra Make sure that computeCrsPaddingForPermutedIDs doesn't double-count entries that appear in both the source and target. I don't think it was doing that before, thanks to Tim's hard work, but I'm just making sure. Also, with TPETRA_VERBOSE set, the method now prints how many duplicates it found. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 135 ++++++++++++------ 1 file changed, 94 insertions(+), 41 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index d734534341ea..14b527e695e4 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5306,14 +5306,15 @@ namespace Tpetra { using insert_result = typename Kokkos::UnorderedMap::insert_result; - using this_type = CrsGraph; - const this_type* srcCrs = dynamic_cast(&source); // Compute extra capacity needed to accommodate incoming data const map_type& src_row_map = * (source.getRowMap ()); + + using this_type = CrsGraph; + const this_type* srcCrs = dynamic_cast(&source); const bool src_sorted = srcCrs == nullptr ? false : srcCrs->isSorted(); - const bool tgt_sorted = this->isSorted(); const bool src_merged = srcCrs == nullptr ? false : srcCrs->isMerged(); + const bool tgt_sorted = this->isSorted(); const bool tgt_merged = this->isMerged(); std::vector src_row_inds; @@ -5381,9 +5382,8 @@ namespace Tpetra { result = padding.insert (tgt_lid, how_much_padding); } - // FIXME (mfh 09 Apr 2019) Kokkos::UnorderedMap is allowed to fail even if - // the user did nothing wrong. We should actually have a retry option. I - // just copied this code over from computeCrsPadding. + // Kokkos::UnorderedMap is allowed to fail even if the user did + // nothing wrong. We should actually have a retry option. TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (result.failed(), std::runtime_error, "unable to insert padding for LID " << tgt_lid); @@ -5401,15 +5401,32 @@ namespace Tpetra { template void CrsGraph:: - computeCrsPaddingForPermutedIDs (Kokkos::UnorderedMap& padding, - const RowGraph& source, - const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs, - const bool padAll) const + computeCrsPaddingForPermutedIDs( + Kokkos::UnorderedMap& padding, + const RowGraph& source, + const Kokkos::DualView& permuteToLIDs, + const Kokkos::DualView& permuteFromLIDs, + const bool padAll) const { using LO = LocalOrdinal; using GO = GlobalOrdinal; - const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds: "; + using Details::Impl::getSortedMergedGlobalRow; + using std::endl; + const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds"; + + std::unique_ptr prefix; + const bool verbose = Details::Behavior::verbose("CrsGraph"); + if (verbose) { + prefix = this->createPrefix("CrsGraph", tfecfFuncName); + std::ostringstream os; + os << *prefix << "permuteToLIDs.extent(0): " + << permuteToLIDs.extent(0) + << ", permuteFromLIDs.extent(0): " + << permuteFromLIDs.extent(0) + << ", padAll: " << (padAll ? "true" : "false") << endl; + std::cerr << os.str(); + } + Kokkos::fence (); const map_type& src_row_map = * (source.getRowMap ()); @@ -5418,49 +5435,85 @@ namespace Tpetra { typename Kokkos::UnorderedMap::insert_result; auto permuteToLIDs_h = permuteToLIDs.view_host (); auto permuteFromLIDs_h = permuteFromLIDs.view_host (); - for (LO i = 0; i < static_cast (permuteToLIDs_h.extent (0)); ++i) { - const GO src_gid = src_row_map.getGlobalElement(permuteFromLIDs_h[i]); - auto num_src_entries = source.getNumEntriesInGlobalRow(src_gid); - if (num_src_entries == 0) - continue; + using this_type = CrsGraph; + const this_type* srcCrs = dynamic_cast(&source); + const bool src_sorted = srcCrs == nullptr ? false : srcCrs->isSorted(); + const bool src_merged = srcCrs == nullptr ? false : srcCrs->isMerged(); + const bool tgt_sorted = this->isSorted(); + const bool tgt_merged = this->isMerged(); + + std::vector src_row_inds; + std::vector tgt_row_inds; + std::vector merged_inds; + + size_t srcNumDups = 0; + size_t tgtNumDups = 0; + size_t mergedNumDups = 0; + const LO numPermutes = static_cast(permuteToLIDs_h.extent(0)); + for (LO i = 0; i < numPermutes; ++i) { + const GO src_gid = src_row_map.getGlobalElement(permuteFromLIDs_h[i]); + auto orig_num_src_entries = source.getNumEntriesInGlobalRow(src_gid); + if (orig_num_src_entries == 0) { + continue; + } insert_result result; const LO tgt_lid = permuteToLIDs_h[i]; - if (padAll) - { - result = padding.insert (tgt_lid, num_src_entries); + if (padAll) { + result = padding.insert (tgt_lid, orig_num_src_entries); } else { - size_t check_row_length = 0; - std::vector src_row_inds(num_src_entries); - Teuchos::ArrayView src_row_inds_view(src_row_inds.data(), src_row_inds.size()); - source.getGlobalRowCopy(src_gid, src_row_inds_view, check_row_length); - - const GO tgt_gid = rowMap_->getGlobalElement (tgt_lid); - auto num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid); - std::vector tgt_row_inds(num_tgt_entries); - Teuchos::ArrayView tgt_row_inds_view(tgt_row_inds.data(), tgt_row_inds.size()); - this->getGlobalRowCopy(tgt_gid, tgt_row_inds_view, check_row_length); - - size_t how_much_padding = 0; - for (auto src_row_ind : src_row_inds) { - if (std::find(tgt_row_inds.begin(), tgt_row_inds.end(), src_row_ind) == tgt_row_inds.end()) { - // The target row does not have space for - how_much_padding++; - } + size_t curNumSrcDups = 0; + Teuchos::ArrayView src_row_inds_view = + getSortedMergedGlobalRow(src_row_inds, orig_num_src_entries, + curNumSrcDups, source, src_gid, + src_sorted, src_merged); + srcNumDups += curNumSrcDups; + + const GO tgt_gid = rowMap_->getGlobalElement(tgt_lid); + size_t orig_num_tgt_entries = 0; + size_t curNumTgtDups = 0; + Teuchos::ArrayView tgt_row_inds_view = + getSortedMergedGlobalRow(tgt_row_inds, orig_num_tgt_entries, + curNumTgtDups, *this, tgt_gid, + tgt_sorted, tgt_merged); + tgtNumDups += curNumTgtDups; + + const size_t orig_num_merged = + size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); + if (merged_inds.size() < orig_num_merged) { + merged_inds.resize(orig_num_merged); } + auto merged_end = + std::merge(src_row_inds_view.begin(), src_row_inds_view.end(), + tgt_row_inds_view.begin(), tgt_row_inds_view.end(), + merged_inds.begin()); + const size_t new_num_merged = + static_cast(merged_end - merged_inds.begin()); + mergedNumDups += (orig_num_merged - new_num_merged); + + const size_t how_much_padding = + new_num_merged >= orig_num_tgt_entries ? + new_num_merged - orig_num_tgt_entries : + size_t(0); result = padding.insert (tgt_lid, how_much_padding); } - // FIXME (mfh 09 Apr 2019) Kokkos::UnorderedMap is allowed to - // fail even if the user did nothing wrong. We should actually - // have a retry option. I just copied this code over from - // computeCrsPadding. + + // Kokkos::UnorderedMap is allowed to fail even if the user did + // nothing wrong. We should actually have a retry option. TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (result.failed(), std::runtime_error, "unable to insert padding for LID " << tgt_lid); } + if (verbose) { + std::ostringstream os; + os << *prefix << "Done: srcNumDups: " << srcNumDups + << ", tgtNumDups: " << tgtNumDups + << ", mergedNumDups: " << mergedNumDups << endl; + std::cerr << os.str(); + } } template From 823025a4a193d807cede55f4e4d899a10089a9b2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 15:59:16 -0700 Subject: [PATCH 11/49] Tpetra::CrsMatrix::copyAndPermute @trilinos/tpetra Tpetra::CrsMatrix::copyAndPermute now has two completely separate code paths, depending on whether the target matrix has a static (const) CrsGraph. This should facilitate division of labor. --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 11 +++-- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 4 +- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 49 ++++++++++--------- 3 files changed, 34 insertions(+), 30 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 19621c1e18c2..b10d2bda70d4 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1169,11 +1169,12 @@ namespace Tpetra { const bool verbose); Kokkos::UnorderedMap - computeCrsPadding (const RowGraph& source, - const size_t numSameIDs, - const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs, - const bool verbose) const; + computeCrsPadding( + const RowGraph& source, + const size_t numSameIDs, + const Kokkos::DualView& permuteToLIDs, + const Kokkos::DualView& permuteFromLIDs, + const bool verbose) const; Kokkos::UnorderedMap computeCrsPadding (const Kokkos::DualView& importLIDs, diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index 6e992d609cd5..d3abac99738c 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -3451,8 +3451,8 @@ namespace Tpetra { copyAndPermuteNonStaticGraph( const RowMatrix& source, const size_t numSameIDs, - const LocalOrdinal permuteToLIDs[], - const LocalOrdinal permuteFromLIDs[], + const Kokkos::DualView& permuteToLIDs_dv, + const Kokkos::DualView& permuteFromLIDs_dv, const size_t numPermutes); protected: diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 93e2702ef440..dbc68d9f6814 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -6757,8 +6757,8 @@ namespace Tpetra { copyAndPermuteNonStaticGraph( const RowMatrix& srcMat, const size_t numSameIDs, - const LocalOrdinal permuteToLIDs[], - const LocalOrdinal permuteFromLIDs[], + const Kokkos::DualView& permuteToLIDs_dv, + const Kokkos::DualView& permuteFromLIDs_dv, const size_t numPermutes) { using Details::ProfilingRegion; @@ -6784,6 +6784,16 @@ namespace Tpetra { const char* const prefix_raw = verbose ? prefix.get()->c_str() : nullptr; + { + using row_graph_type = RowGraph; + const row_graph_type& srcGraph = *(srcMat.getGraph()); + auto padding = + myGraph_->computeCrsPadding(srcGraph, numSameIDs, + permuteToLIDs_dv, permuteFromLIDs_dv, verbose); + if (padding.size() != 0) { + applyCrsPadding(padding, verbose); + } + } const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed (); // // Copy the first numSame row from source to target (this matrix). @@ -6845,6 +6855,8 @@ namespace Tpetra { std::ostringstream os; os << *prefix << "Do permutes" << endl; } + const LO* const permuteFromLIDs = permuteFromLIDs_dv.view_host().data(); + const LO* const permuteToLIDs = permuteToLIDs_dv.view_host().data(); const map_type& tgtRowMap = * (this->getRowMap ()); for (size_t p = 0; p < numPermutes; ++p) { @@ -6901,11 +6913,11 @@ namespace Tpetra { template void CrsMatrix:: - copyAndPermute - (const SrcDistObject& srcObj, - const size_t numSameIDs, - const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs) + copyAndPermute( + const SrcDistObject& srcObj, + const size_t numSameIDs, + const Kokkos::DualView& permuteToLIDs, + const Kokkos::DualView& permuteFromLIDs) { using Details::Behavior; using Details::dualViewStatusToString; @@ -6944,33 +6956,24 @@ namespace Tpetra { << numPermute << "!= permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent (0) << "."); - TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () ); - auto permuteToLIDs_h = permuteToLIDs.view_host (); - TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () ); - auto permuteFromLIDs_h = permuteFromLIDs.view_host (); - // This dynamic cast should succeed, because we've already tested // it in checkSizes(). using RMT = RowMatrix; const RMT& srcMat = dynamic_cast (srcObj); - if (isStaticGraph ()) { + TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () ); + auto permuteToLIDs_h = permuteToLIDs.view_host (); + TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () ); + auto permuteFromLIDs_h = permuteFromLIDs.view_host (); + copyAndPermuteStaticGraph(srcMat, numSameIDs, permuteToLIDs_h.data(), permuteFromLIDs_h.data(), numPermute); } else { - auto padding = - myGraph_->computeCrsPadding(*srcMat.getGraph(), - numSameIDs, permuteToLIDs, permuteFromLIDs, verbose); - if (padding.size() != 0) { - applyCrsPadding(padding, verbose); - } - copyAndPermuteNonStaticGraph(srcMat, numSameIDs, - permuteToLIDs_h.data(), - permuteFromLIDs_h.data(), - numPermute); + copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs, + permuteFromLIDs, numPermute); } if (verbose) { From f80392ac1d3d41b3e2c1807830bec76f81674488 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 16:23:56 -0700 Subject: [PATCH 12/49] Tpetra::CrsMatrix::unpackAndCombine @trilinos/tpetra Tpetra::CrsMatrix::unpackAndCombine now has two completely separate code paths, depending on whether the target matrix has a static (const) CrsGraph. This should facilitate division of labor. --- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 18 +++---- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 52 ++++++++++--------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index d3abac99738c..3fa9a9fb49a8 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -3482,15 +3482,15 @@ namespace Tpetra { /// \brief Unpack the imported column indices and values, and /// combine into matrix. void - unpackAndCombineImpl (const Kokkos::DualView& importLIDs, - const Kokkos::DualView& imports, - const Kokkos::DualView& numPacketsPerLID, - const size_t constantNumPackets, - Distributor& distor, - const CombineMode combineMode); + unpackAndCombineImpl( + const Kokkos::DualView& importLIDs, + Kokkos::DualView imports, + Kokkos::DualView numPacketsPerLID, + const size_t constantNumPackets, + Distributor & distor, + const CombineMode combineMode, + const bool verbose); /// \brief Implementation of unpackAndCombineImpl for when the /// target matrix's structure may change. diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index dbc68d9f6814..0c8a934f8304 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -7822,19 +7822,14 @@ namespace Tpetra { return; // nothing to do } - if (!this->isStaticGraph()) { - auto padding = myGraph_->computeCrsPadding(importLIDs, numPacketsPerLID, verbose); - if (padding.size() > 0) - this->applyCrsPadding(padding, verbose); - } - if (debug) { using Teuchos::reduceAll; std::unique_ptr msg (new std::ostringstream ()); int lclBad = 0; try { - this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID, - constantNumPackets, distor, combineMode); + unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID, + constantNumPackets, distor, combineMode, + verbose); } catch (std::exception& e) { lclBad = 1; *msg << e.what (); @@ -7859,8 +7854,9 @@ namespace Tpetra { } } else { - this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID, - constantNumPackets, distor, combineMode); + unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID, + constantNumPackets, distor, combineMode, + verbose); } if (verbose) { @@ -7882,27 +7878,33 @@ namespace Tpetra { template void CrsMatrix:: - unpackAndCombineImpl (const Kokkos::DualView& importLIDs, - const Kokkos::DualView& imports, - const Kokkos::DualView& numPacketsPerLID, - const size_t constantNumPackets, - Distributor & distor, - const CombineMode combineMode) + unpackAndCombineImpl( + const Kokkos::DualView& importLIDs, + Kokkos::DualView imports, + Kokkos::DualView numPacketsPerLID, + const size_t constantNumPackets, + Distributor & distor, + const CombineMode combineMode, + const bool verbose) { - if (this->isStaticGraph ()) { - using ::Tpetra::Details::unpackCrsMatrixAndCombineNew; + if (isStaticGraph ()) { + using Details::unpackCrsMatrixAndCombineNew; unpackCrsMatrixAndCombineNew (*this, imports, numPacketsPerLID, importLIDs, constantNumPackets, distor, combineMode); } else { - this->unpackAndCombineImplNonStatic (importLIDs, imports, - numPacketsPerLID, - constantNumPackets, - distor, combineMode); + auto padding = + myGraph_->computeCrsPadding(importLIDs, numPacketsPerLID, + verbose); + if (padding.size() > 0) { + applyCrsPadding(padding, verbose); + } + unpackAndCombineImplNonStatic(importLIDs, imports, + numPacketsPerLID, + constantNumPackets, + distor, combineMode); } } From 60df411c75789e589424cdce8457922a8c9fe8bd Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 16:28:32 -0700 Subject: [PATCH 13/49] Tpetra::CrsMatrix::unpackAndCombineImplNonStatic: Fix signature @trilinos/tpetra Make DualView parameters of unpackAndCombineImplNonStatic consistent with those of unpackAndCombine, so we don't need to cast away const. --- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 19 +++--- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 58 +++++++++---------- 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index 3fa9a9fb49a8..5268a13aca3d 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -3488,22 +3488,21 @@ namespace Tpetra { Kokkos::DualView imports, Kokkos::DualView numPacketsPerLID, const size_t constantNumPackets, - Distributor & distor, + Distributor& distor, const CombineMode combineMode, const bool verbose); /// \brief Implementation of unpackAndCombineImpl for when the /// target matrix's structure may change. void - unpackAndCombineImplNonStatic (const Kokkos::DualView& importLIDs, - const Kokkos::DualView& imports, - const Kokkos::DualView& numPacketsPerLID, - const size_t constantNumPackets, - Distributor& distor, - const CombineMode combineMode); + unpackAndCombineImplNonStatic( + const Kokkos::DualView& importLIDs, + Kokkos::DualView imports, + Kokkos::DualView numPacketsPerLID, + const size_t constantNumPackets, + Distributor& distor, + const CombineMode combineMode); public: /// \brief Unpack the imported column indices and values, and diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 0c8a934f8304..e23c81c06540 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -7911,15 +7911,14 @@ namespace Tpetra { template void CrsMatrix:: - unpackAndCombineImplNonStatic (const Kokkos::DualView& importLIDs, - const Kokkos::DualView& imports, - const Kokkos::DualView& numPacketsPerLID, - const size_t /* constantNumPackets */, - Distributor& /* distor */, - const CombineMode combineMode) + unpackAndCombineImplNonStatic( + const Kokkos::DualView& importLIDs, + Kokkos::DualView imports, + Kokkos::DualView numPacketsPerLID, + const size_t constantNumPackets, + Distributor& distor, + const CombineMode combineMode) { using Kokkos::View; using Kokkos::subview; @@ -7930,15 +7929,16 @@ namespace Tpetra { using Details::PackTraits; using Details::ScalarViewTraits; using std::endl; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef impl_scalar_type ST; - typedef typename Teuchos::ArrayView::size_type size_type; - typedef typename View::HostMirror::execution_space HES; - typedef std::pair::size_type, - typename View::size_type> pair_type; - typedef View gids_out_type; - typedef View vals_out_type; + using LO = LocalOrdinal; + using GO = GlobalOrdinal; + using ST = impl_scalar_type; + using size_type = typename Teuchos::ArrayView::size_type; + using HES = + typename View::HostMirror::execution_space; + using pair_type = std::pair::size_type, + typename View::size_type>; + using gids_out_type = View; + using vals_out_type = View; const char tfecfFuncName[] = "unpackAndCombineImplNonStatic: "; const bool debug = Behavior::debug("CrsMatrix"); @@ -7946,7 +7946,7 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose) { prefix = this->createPrefix("CrsMatrix", - "unpackAndCombineImplNonStatic: "); + "unpackAndCombineImplNonStatic"); std::ostringstream os; os << *prefix << endl; // we've already printed DualViews' statuses std::cerr << os.str (); @@ -7969,22 +7969,20 @@ namespace Tpetra { return; // nothing to do; no need to combine entries } - // We're unpacking on host. This is read-only host access of imports. - { - auto imports_nc = castAwayConstDualView (imports); - imports_nc.sync_host (); + // We're unpacking on host. This is read-only host access. + if (imports.need_sync_host()) { + imports.sync_host (); } - auto imports_h = imports.view_host (); + auto imports_h = imports.view_host(); // Read-only host access. - { - auto numPacketsPerLID_nc = castAwayConstDualView (numPacketsPerLID); - numPacketsPerLID_nc.sync_host (); + if (numPacketsPerLID.need_sync_host()) { + numPacketsPerLID.sync_host (); } - auto numPacketsPerLID_h = numPacketsPerLID.view_host (); + auto numPacketsPerLID_h = numPacketsPerLID.view_host(); - TEUCHOS_ASSERT( ! importLIDs.need_sync_host () ); - auto importLIDs_h = importLIDs.view_host (); + TEUCHOS_ASSERT( ! importLIDs.need_sync_host() ); + auto importLIDs_h = importLIDs.view_host(); size_t numBytesPerValue; { From 915f4faed58199c21c9aeac76524eec172543f76 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 16:44:49 -0700 Subject: [PATCH 14/49] Tpetra::CrsMatrix: Remove redundant debug check @trilinos/tpetra Remove redundant debug check in unpackAndCombineImplNonStatic. CrsMatrix::unpackAndCombine is supposed to check all its arguments before passing them to implementation methods, so the latter don't need to recheck their arguments. --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index e23c81c06540..b06097b54470 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -7816,6 +7816,12 @@ namespace Tpetra { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (true, std::invalid_argument, os.str ()); } + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (importLIDs.extent(0) != numPacketsPerLID.extent(0), + std::invalid_argument, "importLIDs.extent(0)=" + << importLIDs.extent(0) + << " != numPacketsPerLID.extent(0)=" + << numPacketsPerLID.extent(0) << "."); } if (combineMode == ZERO) { @@ -7955,16 +7961,6 @@ namespace Tpetra { verbose ? prefix.get()->c_str() : nullptr; const size_type numImportLIDs = importLIDs.extent (0); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (numImportLIDs != static_cast (numPacketsPerLID.extent (0)), - std::invalid_argument, "importLIDs.size() = " << numImportLIDs - << " != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0) - << "."); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (combineMode != ADD && combineMode != INSERT && combineMode != REPLACE && - combineMode != ABSMAX && combineMode != ZERO, std::invalid_argument, - "Invalid CombineMode value " << combineMode << ". Valid " - << "values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."); if (combineMode == ZERO || numImportLIDs == 0) { return; // nothing to do; no need to combine entries } From 83a6854531a90b4c4665b2533887d2ca5dee25e5 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 17:02:20 -0700 Subject: [PATCH 15/49] Tpetra::CrsGraph: Add stub computePaddingForCrsMatrixUnpack @trilinos/tpetra Currently, it just does what computeCrsPadding(imports) does. Later, it will unpack the received data and figure out how much space the target actually needs, without double-counting merges (as it currently does). CrsMatrix::unpackAndCombineImplNonStatic calls this method, so once we make this method do the right thing (unpack the incoming data and count the merge size per row), CrsMatrix should also be doing the right thing. Thus, in theory, this should at least partly address #6663. --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 14 +++- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 66 +++++++++++++++++++ .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 5 +- 3 files changed, 79 insertions(+), 6 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index b10d2bda70d4..3e985d091a3f 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1177,9 +1177,17 @@ namespace Tpetra { const bool verbose) const; Kokkos::UnorderedMap - computeCrsPadding (const Kokkos::DualView& importLIDs, - Kokkos::DualView numPacketsPerLID, - const bool verbose) const; + computeCrsPadding( + const Kokkos::DualView& importLIDs, + Kokkos::DualView numPacketsPerLID, + const bool verbose) const; + + Kokkos::UnorderedMap + computePaddingForCrsMatrixUnpack( + const Kokkos::DualView& importLIDs, + Kokkos::DualView imports, + Kokkos::DualView numPacketsPerLID, + const bool verbose) const; void computeCrsPaddingForSameIDs (Kokkos::UnorderedMap& padding, diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 14b527e695e4..62818f6df108 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5574,6 +5574,72 @@ namespace Tpetra { return padding; } + template + Kokkos::UnorderedMap + CrsGraph:: + computePaddingForCrsMatrixUnpack( + const Kokkos::DualView& importLIDs, + Kokkos::DualView imports, + Kokkos::DualView numPacketsPerLID, + const bool verbose) const + { + using std::endl; + const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack"; + + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsGraph", tfecfFuncName); + std::ostringstream os; + os << *prefix << "importLIDs.extent(0): " + << importLIDs.extent(0) + << ", imports.extent(0): " + << imports.extent(0) + << ", numPacketsPerLID.extent(0): " + << numPacketsPerLID.extent(0) + << endl; + std::cerr << os.str(); + } + + // Creating padding for each new incoming index + Kokkos::fence (); // Make sure device sees changes made by host + auto numEnt = static_cast (importLIDs.extent (0)); + + // if (imports.need_sync_host()) { + // imports.sync_host(); + // } + if (numPacketsPerLID.need_sync_host ()) { + numPacketsPerLID.sync_host(); + } + + auto importLIDs_h = importLIDs.view_host(); + //auto imports_h = imports.view_host(); + auto numPacketsPerLID_h = numPacketsPerLID.view_host(); + + // without unpacking the import/export buffer, we don't know how many of the + // numPacketsPerLID[i] LIDs exist in the target. Below, it is assumed that + // none do, and padding is requested for all. + // + // Use tmp_padding since Kokkos::UnorderedMap does not allow re-insertion + std::map tmp_padding; + for (size_t i = 0; i < numEnt; ++i) + tmp_padding[importLIDs_h[i]] += numPacketsPerLID_h[i]; + + using padding_type = Kokkos::UnorderedMap; + padding_type padding (importLIDs.extent (0)); + for (auto&& item : tmp_padding) { + auto result = padding.insert (item.first, item.second); + // FIXME (mfh 09 Apr 2019) See note in other computeCrsPaddingoverload. + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (result.failed(), std::runtime_error, + ": Unable to insert padding for LID " << item.first); + } + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (padding.failed_insert(), std::runtime_error, + ": Failed to insert one or more indices into padding map"); + return padding; + } + template void CrsGraph:: diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index b06097b54470..3e82b1ca2ea4 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -7901,9 +7901,8 @@ namespace Tpetra { distor, combineMode); } else { - auto padding = - myGraph_->computeCrsPadding(importLIDs, numPacketsPerLID, - verbose); + auto padding = myGraph_->computePaddingForCrsMatrixUnpack( + importLIDs, imports, numPacketsPerLID, verbose); if (padding.size() > 0) { applyCrsPadding(padding, verbose); } From fb2a9f15dc97a707d9dc5b740a863282f779831b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 17:49:53 -0700 Subject: [PATCH 16/49] Tpetra::CrsGraph: Finish computePaddingForCrsMatrixUnpack @trilinos/tpetra Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack now actually unpacks the received data, and figures out how much the target matrix actually needs to grow, without double-counting incoming entries that would merge with existing target matrix entries. This should at least partly address #6663. --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 2 +- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 126 +++++++++++++----- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 44 +++++- 3 files changed, 132 insertions(+), 40 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 3e985d091a3f..3f302f2bf136 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1182,7 +1182,7 @@ namespace Tpetra { Kokkos::DualView numPacketsPerLID, const bool verbose) const; - Kokkos::UnorderedMap + std::vector computePaddingForCrsMatrixUnpack( const Kokkos::DualView& importLIDs, Kokkos::DualView imports, diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 62818f6df108..528ffa213741 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5364,7 +5364,7 @@ namespace Tpetra { const size_t orig_num_merged = size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); - if (merged_inds.size() < orig_num_merged) { + if (merged_inds.size() != orig_num_merged) { merged_inds.resize(orig_num_merged); } auto merged_end = @@ -5482,7 +5482,7 @@ namespace Tpetra { const size_t orig_num_merged = size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); - if (merged_inds.size() < orig_num_merged) { + if (merged_inds.size() != orig_num_merged) { merged_inds.resize(orig_num_merged); } auto merged_end = @@ -5575,7 +5575,7 @@ namespace Tpetra { } template - Kokkos::UnorderedMap + std::vector CrsGraph:: computePaddingForCrsMatrixUnpack( const Kokkos::DualView& importLIDs, @@ -5583,6 +5583,10 @@ namespace Tpetra { Kokkos::DualView numPacketsPerLID, const bool verbose) const { + using LO = local_ordinal_type; + using GO = global_ordinal_type; + using Details::Impl::getSortedMergedGlobalRow; + using Details::PackTraits; using std::endl; const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack"; @@ -5590,53 +5594,101 @@ namespace Tpetra { if (verbose) { prefix = this->createPrefix("CrsGraph", tfecfFuncName); std::ostringstream os; - os << *prefix << "importLIDs.extent(0): " - << importLIDs.extent(0) - << ", imports.extent(0): " - << imports.extent(0) - << ", numPacketsPerLID.extent(0): " - << numPacketsPerLID.extent(0) - << endl; + os << *prefix << "Start" << endl; std::cerr << os.str(); } - // Creating padding for each new incoming index - Kokkos::fence (); // Make sure device sees changes made by host - auto numEnt = static_cast (importLIDs.extent (0)); - - // if (imports.need_sync_host()) { - // imports.sync_host(); - // } + Kokkos::fence (); // Make sure host sees changes made by device + if (imports.need_sync_host()) { + imports.sync_host(); + } if (numPacketsPerLID.need_sync_host ()) { numPacketsPerLID.sync_host(); } auto importLIDs_h = importLIDs.view_host(); - //auto imports_h = imports.view_host(); + auto imports_h = imports.view_host(); auto numPacketsPerLID_h = numPacketsPerLID.view_host(); - // without unpacking the import/export buffer, we don't know how many of the - // numPacketsPerLID[i] LIDs exist in the target. Below, it is assumed that - // none do, and padding is requested for all. - // - // Use tmp_padding since Kokkos::UnorderedMap does not allow re-insertion - std::map tmp_padding; - for (size_t i = 0; i < numEnt; ++i) - tmp_padding[importLIDs_h[i]] += numPacketsPerLID_h[i]; + const LO numImports = static_cast(importLIDs.extent(0)); + const bool tgtSorted = isSorted(); + const bool tgtMerged = isMerged(); - using padding_type = Kokkos::UnorderedMap; - padding_type padding (importLIDs.extent (0)); - for (auto&& item : tmp_padding) { - auto result = padding.insert (item.first, item.second); - // FIXME (mfh 09 Apr 2019) See note in other computeCrsPaddingoverload. - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - ": Unable to insert padding for LID " << item.first); + std::vector padding(numImports); + std::vector gblColIndsReceived; + std::vector gblColIndsTgt; + std::vector gblColIndsMerged; + + size_t srcNumDups = 0; + size_t tgtNumDups = 0; + size_t mergedNumDups = 0; + + size_t offset = 0; + for (LO whichImp = 0; whichImp < numImports; ++whichImp) { + const LO lclRowInd = importLIDs_h[whichImp]; + const GO gblRowInd = rowMap_->getGlobalElement(lclRowInd); + + const size_t numBytes = numPacketsPerLID_h[whichImp]; + if (numBytes == 0) { + continue; + } + LO numEntriesReceived = 0; + const size_t numEntBeg = offset; + const size_t numEntLen = + PackTraits::packValueCount(numEntriesReceived); + PackTraits::unpackValue(numEntriesReceived, + imports_h.data() + numEntBeg); + const size_t gidsBeg = numEntBeg + numEntLen; + + if (gblColIndsReceived.size() < size_t(numEntriesReceived)) { + gblColIndsReceived.resize(numEntriesReceived); + } + (void) PackTraits::unpackArray(gblColIndsReceived.data(), + imports_h.data() + gidsBeg, + numEntriesReceived); + std::sort(gblColIndsReceived.begin(), gblColIndsReceived.end()); + auto newEnd = std::unique(gblColIndsReceived.begin(), + gblColIndsReceived.end()); + const size_t numEntriesUnique = + static_cast(newEnd - gblColIndsReceived.begin()); + gblColIndsReceived.resize(numEntriesUnique); + srcNumDups += (numEntriesReceived - numEntriesUnique); + + size_t origNumTgtEnt = 0; + size_t curNumTgtDups = 0; + Teuchos::ArrayView gblColIndsTgtView = + getSortedMergedGlobalRow(gblColIndsTgt, origNumTgtEnt, + curNumTgtDups, *this, gblRowInd, + tgtSorted, tgtMerged); + tgtNumDups += curNumTgtDups; + + const size_t origNumMerged = numEntriesUnique + + size_t(gblColIndsTgtView.size()); + if (gblColIndsMerged.size() != origNumMerged) { + gblColIndsMerged.resize(origNumMerged); + } + auto mergedNewEnd = + std::merge(gblColIndsReceived.begin(), gblColIndsReceived.end(), + gblColIndsTgtView.begin(), gblColIndsTgtView.end(), + gblColIndsMerged.begin()); + const size_t newNumMerged = + static_cast(mergedNewEnd - gblColIndsMerged.begin()); + mergedNumDups += (origNumMerged - newNumMerged); + + const size_t extraSpaceNeeded = newNumMerged >= origNumTgtEnt ? + newNumMerged - origNumTgtEnt : + size_t(0); + padding[whichImp] = extraSpaceNeeded; + offset += numBytes; } - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (padding.failed_insert(), std::runtime_error, - ": Failed to insert one or more indices into padding map"); + if (verbose) { + std::ostringstream os; + os << *prefix << "Done: srcNumDups: " << srcNumDups + << ", tgtNumDups: " << tgtNumDups + << ", mergedNumDups: " << mergedNumDups << endl; + std::cerr << os.str(); + } return padding; } diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 3e82b1ca2ea4..80a629c4575c 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -7894,6 +7894,23 @@ namespace Tpetra { const CombineMode combineMode, const bool verbose) { + using std::endl; + using LO = local_ordinal_type; + const char tfecfFuncName[] = "unpackAndCombineImpl"; + std::unique_ptr prefix; + if (verbose) { + prefix = this->createPrefix("CrsMatrix", tfecfFuncName); + std::ostringstream os; + os << *prefix << "importLIDs.extent(0): " + << importLIDs.extent(0) + << ", imports.extent(0): " + << imports.extent(0) + << ", numPacketsPerLID.extent(0): " + << numPacketsPerLID.extent(0) + << endl; + std::cerr << os.str(); + } + if (isStaticGraph ()) { using Details::unpackCrsMatrixAndCombineNew; unpackCrsMatrixAndCombineNew (*this, imports, numPacketsPerLID, @@ -7901,8 +7918,31 @@ namespace Tpetra { distor, combineMode); } else { - auto padding = myGraph_->computePaddingForCrsMatrixUnpack( - importLIDs, imports, numPacketsPerLID, verbose); + std::vector paddingVec = + myGraph_->computePaddingForCrsMatrixUnpack( + importLIDs, imports, numPacketsPerLID, verbose); + using padding_type = + Kokkos::UnorderedMap; + const LO numImports = static_cast(importLIDs.extent(0)); + padding_type padding (numImports); + + // padding gets pre-filled on devic, but we're modifying it on + // host here, so we need to fence to ensure that device is done. + Kokkos::fence(); + + auto importLIDs_h = importLIDs.view_host(); + for (LO whichImp = 0; whichImp < numImports; ++whichImp) { + const LO lclRowInd = importLIDs_h[whichImp]; + auto result = padding.insert(lclRowInd, paddingVec[whichImp]); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (result.failed(), std::runtime_error, + ": Unable to insert padding for LID " << lclRowInd); + } + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (padding.failed_insert(), std::runtime_error, + ": Failed to insert one or more indices into padding map"); + if (padding.size() > 0) { applyCrsPadding(padding, verbose); } From 63813c1ae0a4b2d101e17affaca5461823bb7258 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 18:05:31 -0700 Subject: [PATCH 17/49] Tpetra::CrsMatrix::applyCrsPadding: Remove duplicate allocations @trilinos/tpetra CrsMatrix::applyCrsPadding allocates deep copies of the current column indices and values arrays, then passes the copies to padCrsArray. padCrsArray then copies both arrays again. This commit fixes applyCrsPadding so that it just passes the crurent column indices and values arrays directly to padCrsArray, instead of making redundant copies of each. --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 122 ++++++------------ 1 file changed, 42 insertions(+), 80 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 80a629c4575c..e5608c7b7942 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -6500,106 +6500,68 @@ namespace Tpetra { // Case 1: Unpacked storage refill_num_row_entries = true; auto num_row_entries = myGraph_->k_numRowEntries_; - Kokkos::parallel_for("Fill end row pointers", range_policy(0, N), - KOKKOS_LAMBDA(const size_t i){ - row_ptr_end(i) = row_ptr_beg(i) + num_row_entries(i); - } - ); + Kokkos::parallel_for + ("Fill end row pointers", range_policy(0, N), + KOKKOS_LAMBDA (const size_t i) { + row_ptr_end(i) = row_ptr_beg(i) + num_row_entries(i); + }); } else { - // mfh If packed storage, don't need row_ptr_end to be separate allocation; - // could just have it alias row_ptr_beg+1. + // FIXME (mfh 04 Feb 2020) If packed storage, don't need + // row_ptr_end to be separate allocation; could just have it + // alias row_ptr_beg+1. + // // Case 2: Packed storage - Kokkos::parallel_for("Fill end row pointers", range_policy(0, N), - KOKKOS_LAMBDA(const size_t i){ - row_ptr_end(i) = row_ptr_beg(i+1); - } - ); + Kokkos::parallel_for + ("Fill end row pointers", range_policy(0, N), + KOKKOS_LAMBDA (const size_t i) { + row_ptr_end(i) = row_ptr_beg(i+1); + }); } using values_type = typename local_matrix_type::values_type; - if (verbose) { - std::ostringstream os; - os << *prefix << "Allocate (copy of) values: " << k_values1D_.size() << endl; - std::cerr << os.str(); - } - values_type values("values", k_values1D_.size()); - Kokkos::deep_copy(values, k_values1D_); + using padding_type = Kokkos::UnorderedMap; - if(myGraph_->isGloballyIndexed()) { - using indices_type = typename crs_graph_type::t_GlobalOrdinal_1D; - if (verbose) { - std::ostringstream os; - os << *prefix << "Allocate (copy of) global column indices: " - << myGraph_->k_gblInds1D_.extent(0) << endl; - std::cerr << os.str(); - } - indices_type indices("indices", myGraph_->k_gblInds1D_.extent(0)); - Kokkos::deep_copy(indices, myGraph_->k_gblInds1D_); - using padding_type = Kokkos::UnorderedMap; - padCrsArrays(row_ptr_beg, - row_ptr_end, indices, values, padding, myRank, verbose); - if (verbose) { - std::ostringstream os; - os << *prefix << "Free old myGraph_->k_gblInds1D_: " - << myGraph_->k_gblInds1D_.extent(0) << endl; - std::cerr << os.str(); - } - myGraph_->k_gblInds1D_ = indices; - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - values.size() != indices.size(), - std::logic_error, - "After padding, values and indices should be same size"); + if (myGraph_->isGloballyIndexed()) { + //using indices_type = typename crs_graph_type::t_GlobalOrdinal_1D; + padCrsArrays /* */ ( + row_ptr_beg, row_ptr_end, myGraph_->k_gblInds1D_, k_values1D_, + padding, myRank, verbose); + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (k_values1D_.extent(0) != myGraph_->k_gblInds1D_.extent(0), + std::logic_error, + "After padding, values and indices should be same size"); } else { - using indices_type = typename local_graph_type::entries_type::non_const_type; - if (verbose) { - std::ostringstream os; - os << *prefix << "Allocate (copy of) local column indices: " - << myGraph_->k_lclInds1D_.extent(0) << endl; - std::cerr << os.str(); - } - indices_type indices("indices", myGraph_->k_lclInds1D_.extent(0)); - Kokkos::deep_copy(indices, myGraph_->k_lclInds1D_); - using padding_type = Kokkos::UnorderedMap; - padCrsArrays(row_ptr_beg, - row_ptr_end, indices, values, padding, myRank, verbose); - if (verbose) { - std::ostringstream os; - os << *prefix << "Free old myGraph_->k_lclInds1D_: " - << myGraph_->k_lclInds1D_.extent(0) << endl; - std::cerr << os.str(); - } - myGraph_->k_lclInds1D_ = indices; - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - values.size() != indices.size(), - std::logic_error, - "After padding, values and indices should be same size"); + //using indices_type = typename local_graph_type::entries_type::non_const_type; + padCrsArrays /* */ ( + row_ptr_beg, row_ptr_end, myGraph_->k_lclInds1D_, k_values1D_, + padding, myRank, verbose); + + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (k_values1D_.extent(0) != myGraph_->k_lclInds1D_.extent(0), + std::logic_error, + "After padding, values and indices should be same size"); } if (refill_num_row_entries) { auto num_row_entries = myGraph_->k_numRowEntries_; - Kokkos::parallel_for("Fill num entries", range_policy(0, N), - KOKKOS_LAMBDA(const size_t i){ - num_row_entries(i) = row_ptr_end(i) - row_ptr_beg(i); - } - ); + Kokkos::parallel_for + ("Fill num entries", range_policy(0, N), + KOKKOS_LAMBDA (const size_t i) { + num_row_entries(i) = row_ptr_end(i) - row_ptr_beg(i); + }); } if (verbose) { std::ostringstream os; - os << *prefix << "Free old myGraph_->k_rowPtrs_: " - << myGraph_->k_rowPtrs_.extent(0) << endl; + os << *prefix << "Assign myGraph_->k_rowPtrs_: " + << "old=" << myGraph_->k_rowPtrs_.extent(0) + << ", new=" << row_ptr_beg.extent(0) << endl; std::cerr << os.str(); } myGraph_->k_rowPtrs_ = row_ptr_beg; - if (verbose) { - std::ostringstream os; - os << *prefix << "Free old k_values1D_: " - << k_values1D_.extent(0) << endl; - std::cerr << os.str(); - } - k_values1D_ = values; } template From b6b352129b5898f15f3868e9d4347ce0e4a0df1c Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 20:42:45 -0700 Subject: [PATCH 18/49] Tpetra::CrsMatrix: Fix build warning (unused variable) @trilinos/tpetra --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index e5608c7b7942..5c615429a303 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -7742,7 +7742,6 @@ namespace Tpetra { {"ADD", "REPLACE", "ABSMAX", "INSERT", "ZERO"}; std::unique_ptr prefix; - int myRank = 0; if (verbose) { prefix = this->createPrefix("CrsMatrix", "unpackAndCombine"); std::ostringstream os; @@ -7892,20 +7891,21 @@ namespace Tpetra { // host here, so we need to fence to ensure that device is done. Kokkos::fence(); - auto importLIDs_h = importLIDs.view_host(); - for (LO whichImp = 0; whichImp < numImports; ++whichImp) { - const LO lclRowInd = importLIDs_h[whichImp]; - auto result = padding.insert(lclRowInd, paddingVec[whichImp]); + if (paddingVec.size() != 0) { + auto importLIDs_h = importLIDs.view_host(); + for (LO whichImp = 0; whichImp < numImports; ++whichImp) { + const LO lclRowInd = importLIDs_h[whichImp]; + auto result = + padding.insert(lclRowInd, paddingVec[whichImp]); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (result.failed(), std::runtime_error, + ": Unable to insert padding for LID " << lclRowInd); + } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - ": Unable to insert padding for LID " << lclRowInd); + (padding.failed_insert(), std::runtime_error, + ": Failed to insert one or more indices into padding map"); } - - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (padding.failed_insert(), std::runtime_error, - ": Failed to insert one or more indices into padding map"); - - if (padding.size() > 0) { + if (padding.size() != 0) { applyCrsPadding(padding, verbose); } unpackAndCombineImplNonStatic(importLIDs, imports, From 8d415f810608f40b4ffefc0765afa02a133a224b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 21:03:57 -0700 Subject: [PATCH 19/49] Tpetra::CrsMatrix: Fix more build warnings @trilinos/tpetra Also, change the types of unpackCrsMatrixAndCombineNew's DualView parameters, so that unpackCrsMatrixAndCombineNew does not need to cast away their const-ness any more. --- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 109 +++++++++--------- ...Details_unpackCrsMatrixAndCombine_decl.hpp | 21 ++-- ..._Details_unpackCrsMatrixAndCombine_def.hpp | 59 +++++----- 3 files changed, 98 insertions(+), 91 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 5c615429a303..a323500ec1bf 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -6445,15 +6445,17 @@ namespace Tpetra { using execution_space = typename device_type::execution_space; using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type; using range_policy = Kokkos::RangePolicy>; - const char tfecfFuncName[] = "applyCrsPadding: "; - ProfilingRegion regionCAP ("Tpetra::CrsMatrix::applyCrsPadding"); + const char tfecfFuncName[] = "applyCrsPadding"; + const char suffix[] = + ". Please report this bug to the Tpetra developers."; + ProfilingRegion regionCAP("Tpetra::CrsMatrix::applyCrsPadding"); std::unique_ptr prefix; if (verbose) { - prefix = this->createPrefix("CrsMatrix", "applyCrsPadding"); + prefix = this->createPrefix("CrsMatrix", tfecfFuncName); std::ostringstream os; - os << *prefix << "padding.size(): " << padding.size() << endl; - std::cerr << os.str (); + os << *prefix << "padding.size()=" << padding.size() << endl; + std::cerr << os.str(); } const int myRank = ! verbose ? -1 : [&] () { auto map = this->getMap(); @@ -6468,12 +6470,13 @@ namespace Tpetra { } (); // NOTE (mfh 29 Jan 2020) This allocates the values array. - if (! myGraph_->indicesAreAllocated ()) { - this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose); + if (! myGraph_->indicesAreAllocated()) { + allocateValues(GlobalIndices, GraphNotYetAllocated, verbose); } - if (padding.size() == 0) + if (padding.size() == 0) { return; + } // Making copies here because k_rowPtrs_ has a const type. Otherwise, we // would use it directly. @@ -6487,7 +6490,8 @@ namespace Tpetra { row_ptrs_type row_ptr_beg("row_ptr_beg", myGraph_->k_rowPtrs_.extent(0)); Kokkos::deep_copy(row_ptr_beg, myGraph_->k_rowPtrs_); - const size_t N = (row_ptr_beg.extent(0) == 0 ? 0 : row_ptr_beg.extent(0) - 1); + const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) : + size_t(row_ptr_beg.extent(0) - 1); if (verbose) { std::ostringstream os; os << *prefix << "Allocate row_ptrs_end: " << N << endl; @@ -6519,30 +6523,27 @@ namespace Tpetra { }); } - using values_type = typename local_matrix_type::values_type; - using padding_type = Kokkos::UnorderedMap; - if (myGraph_->isGloballyIndexed()) { - //using indices_type = typename crs_graph_type::t_GlobalOrdinal_1D; - padCrsArrays /* */ ( - row_ptr_beg, row_ptr_end, myGraph_->k_gblInds1D_, k_values1D_, - padding, myRank, verbose); - + padCrsArrays(row_ptr_beg, row_ptr_end, myGraph_->k_gblInds1D_, + k_values1D_, padding, myRank, verbose); + const auto newValuesLen = k_values1D_.extent(0); + const auto newColIndsLen = myGraph_->k_gblInds1D_.extent(0); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (k_values1D_.extent(0) != myGraph_->k_gblInds1D_.extent(0), - std::logic_error, - "After padding, values and indices should be same size"); + (newValuesLen != newColIndsLen, std::logic_error, + "After padding, values and indices should be same size, but " + "k_values1D_.extent(0)=" << newValuesLen << " != myGraph_->" + "k_gblInds1D_.extent(0)=" << newColIndsLen << "."); } else { - //using indices_type = typename local_graph_type::entries_type::non_const_type; - padCrsArrays /* */ ( - row_ptr_beg, row_ptr_end, myGraph_->k_lclInds1D_, k_values1D_, - padding, myRank, verbose); - + padCrsArrays(row_ptr_beg, row_ptr_end, myGraph_->k_lclInds1D_, + k_values1D_, padding, myRank, verbose); + const auto newValuesLen = k_values1D_.extent(0); + const auto newColIndsLen = myGraph_->k_lclInds1D_.extent(0); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (k_values1D_.extent(0) != myGraph_->k_lclInds1D_.extent(0), - std::logic_error, - "After padding, values and indices should be same size"); + (newValuesLen != newColIndsLen, std::logic_error, + "After padding, values and indices should be same size, but " + "k_values1D_.extent(0)=" << newValuesLen << " != myGraph_->" + "k_lclInds1D_.extent(0)=" << newColIndsLen << "."); } if (refill_num_row_entries) { @@ -7874,9 +7875,9 @@ namespace Tpetra { if (isStaticGraph ()) { using Details::unpackCrsMatrixAndCombineNew; - unpackCrsMatrixAndCombineNew (*this, imports, numPacketsPerLID, - importLIDs, constantNumPackets, - distor, combineMode); + unpackCrsMatrixAndCombineNew(*this, imports, numPacketsPerLID, + importLIDs, constantNumPackets, + distor, combineMode); } else { std::vector paddingVec = @@ -7946,14 +7947,13 @@ namespace Tpetra { typename View::size_type>; using gids_out_type = View; using vals_out_type = View; - const char tfecfFuncName[] = "unpackAndCombineImplNonStatic: "; + const char tfecfFuncName[] = "unpackAndCombineImplNonStatic"; const bool debug = Behavior::debug("CrsMatrix"); const bool verbose = Behavior::verbose("CrsMatrix"); std::unique_ptr prefix; if (verbose) { - prefix = this->createPrefix("CrsMatrix", - "unpackAndCombineImplNonStatic"); + prefix = this->createPrefix("CrsMatrix", tfecfFuncName); std::ostringstream os; os << *prefix << endl; // we've already printed DualViews' statuses std::cerr << os.str (); @@ -8006,18 +8006,19 @@ namespace Tpetra { // We need to unpack a nonzero number of entries for this row. if (debug) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (offset + numBytes > static_cast (imports_h.extent (0)), - std::logic_error, "At local row index importLIDs_h[i=" << i << "]=" - << importLIDs_h[i] << ", offset (=" << offset << ") + numBytes (=" - << numBytes << ") > imports_h.extent(0)=" - << imports_h.extent (0) << "."); + (offset + numBytes > size_t(imports_h.extent (0)), + std::logic_error, ": At local row index importLIDs_h[i=" + << i << "]=" << importLIDs_h[i] << ", offset (=" << offset + << ") + numBytes (=" << numBytes << ") > " + "imports_h.extent(0)=" << imports_h.extent (0) << "."); } LO numEntLO = 0; if (debug) { - const size_t theNumBytes = PackTraits::packValueCount (numEntLO); + const size_t theNumBytes = + PackTraits::packValueCount (numEntLO); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (theNumBytes > numBytes, std::logic_error, "theNumBytes = " + (theNumBytes > numBytes, std::logic_error, ": theNumBytes=" << theNumBytes << " > numBytes = " << numBytes << "."); } const char* const inBuf = imports_h.data () + offset; @@ -8026,14 +8027,15 @@ namespace Tpetra { if (debug) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (actualNumBytes > numBytes, std::logic_error, "At i = " << i + (actualNumBytes > numBytes, std::logic_error, ": At i=" << i << ", actualNumBytes=" << actualNumBytes << " > numBytes=" << numBytes << "."); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (numEntLO == 0, std::logic_error, "At local row index importLIDs_h[i=" - << i << "]=" << importLIDs_h[i] << ", the number of entries read " - "from the packed data is numEntLO=" << numEntLO << ", but numBytes=" - << numBytes << " != 0."); + (numEntLO == 0, std::logic_error, ": At local row index " + "importLIDs_h[i=" << i << "]=" << importLIDs_h[i] << ", " + "the number of entries read from the packed data is " + "numEntLO=" << numEntLO << ", but numBytes=" << numBytes + << " != 0."); } maxRowNumEnt = std::max(size_t(numEntLO), maxRowNumEnt); @@ -8059,9 +8061,12 @@ namespace Tpetra { // for each row's data to contain the run-time size. This is only // necessary if the size is not a compile-time constant. Scalar val; - gblColInds = ScalarViewTraits::allocateArray (gid, maxRowNumEnt, "gids"); - lclColInds = ScalarViewTraits::allocateArray (lid, maxRowNumEnt, "lids"); - vals = ScalarViewTraits::allocateArray (val, maxRowNumEnt, "vals"); + gblColInds = ScalarViewTraits::allocateArray( + gid, maxRowNumEnt, "gids"); + lclColInds = ScalarViewTraits::allocateArray( + lid, maxRowNumEnt, "lids"); + vals = ScalarViewTraits::allocateArray( + val, maxRowNumEnt, "vals"); } offset = 0; @@ -8084,9 +8089,9 @@ namespace Tpetra { unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (), offset, numBytes, numEnt, numBytesPerValue); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (numBytes != numBytesOut, std::logic_error, "At i = " << i << ", " - << "numBytes = " << numBytes << " != numBytesOut = " << numBytesOut - << "."); + (numBytes != numBytesOut, std::logic_error, ": At i=" << i + << ", numBytes=" << numBytes << " != numBytesOut=" + << numBytesOut << "."); const ST* const valsRaw = const_cast (valsOut.data ()); const GO* const gidsRaw = const_cast (gidsOut.data ()); diff --git a/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_decl.hpp b/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_decl.hpp index ba541e923a62..57d8ed190f7b 100644 --- a/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_decl.hpp @@ -138,16 +138,17 @@ unpackCrsMatrixAndCombine (const CrsMatrix& sourceMatrix, template void -unpackCrsMatrixAndCombineNew (const CrsMatrix& sourceMatrix, - const Kokkos::DualView::buffer_device_type>& imports, - const Kokkos::DualView::buffer_device_type>& numPacketsPerLID, - const Kokkos::DualView::buffer_device_type>& importLIDs, - const size_t constantNumPackets, - Distributor & distor, - const CombineMode combineMode); +unpackCrsMatrixAndCombineNew( + const CrsMatrix& sourceMatrix, + Kokkos::DualView::buffer_device_type> imports, + Kokkos::DualView::buffer_device_type> numPacketsPerLID, + const Kokkos::DualView::buffer_device_type>& importLIDs, + const size_t constantNumPackets, + Distributor& distor, + const CombineMode combineMode); /// \brief Special version of Tpetra::Details::unpackCrsMatrixAndCombine /// that also unpacks owning process ranks. diff --git a/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp b/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp index ed33b7bebd2e..ac3e6ed9bcd9 100644 --- a/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_unpackCrsMatrixAndCombine_def.hpp @@ -547,15 +547,18 @@ unpackAndCombineIntoCrsMatrix( const Tpetra::CombineMode combine_mode, const bool unpack_pids) { - typedef typename LocalMatrix::value_type ST; - typedef typename LocalMap::local_ordinal_type LO; - typedef typename LocalMap::device_type DT; - typedef typename DT::execution_space XS; - typedef Kokkos::RangePolicy > range_policy; - typedef UnpackCrsMatrixAndCombineFunctor unpack_functor_type; - + using ST = typename LocalMatrix::value_type; + using LO = typename LocalMap::local_ordinal_type; + using DT = typename LocalMap::device_type; + using XS = typename DT::execution_space; + using range_policy = + Kokkos::RangePolicy >; + using unpack_functor_type = + UnpackCrsMatrixAndCombineFunctor; const char prefix[] = - "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix: "; + "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::" + "unpackAndCombineIntoCrsMatrix: "; const size_t num_import_lids = static_cast(import_lids.extent(0)); if (num_import_lids == 0) { @@ -1143,18 +1146,18 @@ unpackCrsMatrixAndCombine( template void -unpackCrsMatrixAndCombineNew (const CrsMatrix& sourceMatrix, - const Kokkos::DualView::buffer_device_type>& imports, - const Kokkos::DualView::buffer_device_type>& numPacketsPerLID, - const Kokkos::DualView::buffer_device_type>& importLIDs, - const size_t /* constantNumPackets */, - Distributor& /* distor */, - const CombineMode combineMode) +unpackCrsMatrixAndCombineNew( + const CrsMatrix& sourceMatrix, + Kokkos::DualView::buffer_device_type> imports, + Kokkos::DualView::buffer_device_type> numPacketsPerLID, + const Kokkos::DualView::buffer_device_type>& importLIDs, + const size_t /* constantNumPackets */, + Distributor& /* distor */, + const CombineMode combineMode) { - using Tpetra::Details::castAwayConstDualView; using Kokkos::View; using crs_matrix_type = CrsMatrix; using dist_object_type = DistObject; @@ -1167,18 +1170,16 @@ unpackCrsMatrixAndCombineNew (const CrsMatrix& sourceMatrix, "crs_matrix_type::device_type and local_matrix_type::device_type " "must be the same."); - { - auto numPacketsPerLID_nc = castAwayConstDualView (numPacketsPerLID); - numPacketsPerLID_nc.sync_device (); + if (numPacketsPerLID.need_sync_device()) { + numPacketsPerLID.sync_device (); } auto num_packets_per_lid_d = numPacketsPerLID.view_device (); TEUCHOS_ASSERT( ! importLIDs.need_sync_device () ); auto import_lids_d = importLIDs.view_device (); - { - auto imports_nc = castAwayConstDualView (imports); - imports_nc.sync_device (); + if (imports.need_sync_device()) { + imports.sync_device (); } auto imports_d = imports.view_device (); @@ -1186,13 +1187,13 @@ unpackCrsMatrixAndCombineNew (const CrsMatrix& sourceMatrix, auto local_col_map = sourceMatrix.getColMap ()->getLocalMap (); typedef decltype (local_col_map) local_map_type; - // Now do the actual unpack! + const bool unpack_pids = false; UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix< local_matrix_type, local_map_type, buffer_device_type > (local_matrix, local_col_map, imports_d, num_packets_per_lid_d, - import_lids_d, combineMode, false); + import_lids_d, combineMode, unpack_pids); } /// \brief Special version of Tpetra::Details::unpackCrsMatrixAndCombine @@ -1530,8 +1531,8 @@ unpackAndCombineIntoCrsArrays ( template void \ Details::unpackCrsMatrixAndCombineNew ( \ const CrsMatrix&, \ - const Kokkos::DualView::buffer_device_type>&, \ - const Kokkos::DualView::buffer_device_type>&, \ + Kokkos::DualView::buffer_device_type>, \ + Kokkos::DualView::buffer_device_type>, \ const Kokkos::DualView::buffer_device_type>&, \ const size_t, \ Distributor&, \ From f8d1b8401f0ec08c5d896541775cdaf8c325b52d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 21:45:22 -0700 Subject: [PATCH 20/49] Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack: Avoid merge @trilinos/tpetra computePaddingForCrsMatrixUnpack no longer stores the merged source + target entries for each row. Instead, it just counts the number of common entries in both. This should save memory for long rows. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 62 ++++++++++++++----- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 528ffa213741..a1c8172ca2ab 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -78,6 +78,42 @@ namespace Tpetra { namespace Details { namespace Impl { + template + size_t + countNumInCommon(SourceIterator srcBeg, + SourceIterator srcEnd, + TargetIterator tgtBeg, + TargetIterator tgtEnd) + { + size_t numInCommon = 0; + + auto srcIter = srcBeg; + auto tgtIter = tgtBeg; + while (srcIter != srcEnd && tgtIter != tgtEnd) { + tgtIter = std::lower_bound(tgtIter, tgtEnd, *srcIter); + if (tgtIter == tgtEnd) { + break; + } + if (*tgtIter == *srcIter) { + ++numInCommon; + ++srcIter; + ++tgtIter; + } + + srcIter = std::lower_bound(srcIter, srcEnd, *tgtIter); + if (srcIter == srcEnd) { + break; + } + if (*srcIter == *tgtIter) { + ++numInCommon; + ++tgtIter; + ++srcIter; + } + } + return numInCommon; + } + template Teuchos::ArrayView< typename RowGraph< @@ -5617,7 +5653,6 @@ namespace Tpetra { std::vector padding(numImports); std::vector gblColIndsReceived; std::vector gblColIndsTgt; - std::vector gblColIndsMerged; size_t srcNumDups = 0; size_t tgtNumDups = 0; @@ -5649,10 +5684,10 @@ namespace Tpetra { std::sort(gblColIndsReceived.begin(), gblColIndsReceived.end()); auto newEnd = std::unique(gblColIndsReceived.begin(), gblColIndsReceived.end()); - const size_t numEntriesUnique = + const size_t numEntriesRecvdUnique = static_cast(newEnd - gblColIndsReceived.begin()); - gblColIndsReceived.resize(numEntriesUnique); - srcNumDups += (numEntriesReceived - numEntriesUnique); + gblColIndsReceived.resize(numEntriesRecvdUnique); + srcNumDups += (numEntriesReceived - numEntriesRecvdUnique); size_t origNumTgtEnt = 0; size_t curNumTgtDups = 0; @@ -5662,18 +5697,15 @@ namespace Tpetra { tgtSorted, tgtMerged); tgtNumDups += curNumTgtDups; - const size_t origNumMerged = numEntriesUnique + + const size_t numInCommon = Details::Impl::countNumInCommon( + gblColIndsReceived.begin(), + gblColIndsReceived.end(), + gblColIndsTgt.begin(), + gblColIndsTgt.end()); + const size_t origNumBoth = numEntriesRecvdUnique + size_t(gblColIndsTgtView.size()); - if (gblColIndsMerged.size() != origNumMerged) { - gblColIndsMerged.resize(origNumMerged); - } - auto mergedNewEnd = - std::merge(gblColIndsReceived.begin(), gblColIndsReceived.end(), - gblColIndsTgtView.begin(), gblColIndsTgtView.end(), - gblColIndsMerged.begin()); - const size_t newNumMerged = - static_cast(mergedNewEnd - gblColIndsMerged.begin()); - mergedNumDups += (origNumMerged - newNumMerged); + const size_t newNumMerged = origNumBoth - numInCommon; + mergedNumDups += numInCommon; const size_t extraSpaceNeeded = newNumMerged >= origNumTgtEnt ? newNumMerged - origNumTgtEnt : From 391459616a4172dbb3f29a75c25ca08fbdd38482 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 4 Feb 2020 21:51:53 -0700 Subject: [PATCH 21/49] Tpetra::CrsGraph::computeCrsPadding(sames+permutes): Avoid merge @trilinos/tpetra The implementation of computeCrsPadding for sames and permutes no longer stores the merged source + target entries for each row. Instead, it just counts the number of common entries in both. This should save memory for long rows. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 55 +++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index a1c8172ca2ab..cdbc74279ee2 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5355,7 +5355,6 @@ namespace Tpetra { std::vector src_row_inds; std::vector tgt_row_inds; - std::vector merged_inds; size_t srcNumDups = 0; size_t tgtNumDups = 0; @@ -5398,18 +5397,17 @@ namespace Tpetra { tgt_sorted, tgt_merged); tgtNumDups += curNumTgtDups; + const size_t numInCommon = Details::Impl::countNumInCommon( + src_row_inds_view.begin(), + src_row_inds_view.end(), + tgt_row_inds_view.begin(), + tgt_row_inds_view.end()); const size_t orig_num_merged = - size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); - if (merged_inds.size() != orig_num_merged) { - merged_inds.resize(orig_num_merged); - } - auto merged_end = - std::merge(src_row_inds_view.begin(), src_row_inds_view.end(), - tgt_row_inds_view.begin(), tgt_row_inds_view.end(), - merged_inds.begin()); - const size_t new_num_merged = - static_cast(merged_end - merged_inds.begin()); - mergedNumDups += (orig_num_merged - new_num_merged); + size_t(src_row_inds_view.size()) + + size_t(tgt_row_inds_view.size()); + + const size_t new_num_merged = orig_num_merged - numInCommon; + mergedNumDups += numInCommon; const size_t how_much_padding = new_num_merged >= orig_num_tgt_entries ? @@ -5438,10 +5436,13 @@ namespace Tpetra { void CrsGraph:: computeCrsPaddingForPermutedIDs( - Kokkos::UnorderedMap& padding, + Kokkos::UnorderedMap& padding, const RowGraph& source, - const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs, + const Kokkos::DualView& permuteToLIDs, + const Kokkos::DualView& permuteFromLIDs, const bool padAll) const { using LO = LocalOrdinal; @@ -5468,7 +5469,8 @@ namespace Tpetra { const map_type& src_row_map = * (source.getRowMap ()); using insert_result = - typename Kokkos::UnorderedMap::insert_result; + typename Kokkos::UnorderedMap::insert_result; auto permuteToLIDs_h = permuteToLIDs.view_host (); auto permuteFromLIDs_h = permuteFromLIDs.view_host (); @@ -5481,7 +5483,6 @@ namespace Tpetra { std::vector src_row_inds; std::vector tgt_row_inds; - std::vector merged_inds; size_t srcNumDups = 0; size_t tgtNumDups = 0; @@ -5516,18 +5517,16 @@ namespace Tpetra { tgt_sorted, tgt_merged); tgtNumDups += curNumTgtDups; + const size_t numInCommon = Details::Impl::countNumInCommon( + src_row_inds_view.begin(), + src_row_inds_view.end(), + tgt_row_inds_view.begin(), + tgt_row_inds_view.end()); const size_t orig_num_merged = - size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); - if (merged_inds.size() != orig_num_merged) { - merged_inds.resize(orig_num_merged); - } - auto merged_end = - std::merge(src_row_inds_view.begin(), src_row_inds_view.end(), - tgt_row_inds_view.begin(), tgt_row_inds_view.end(), - merged_inds.begin()); - const size_t new_num_merged = - static_cast(merged_end - merged_inds.begin()); - mergedNumDups += (orig_num_merged - new_num_merged); + size_t(src_row_inds_view.size()) + + size_t(tgt_row_inds_view.size()); + const size_t new_num_merged = orig_num_merged - numInCommon; + mergedNumDups += numInCommon; const size_t how_much_padding = new_num_merged >= orig_num_tgt_entries ? From f3585c07d40d4a4e9b0d63c2be4df2b88c85109a Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 5 Feb 2020 09:57:11 -0700 Subject: [PATCH 22/49] Tpetra::CrsMatrix: Fix unused variable warning @trilinos/tpetra --- packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index a323500ec1bf..6dd39940b6db 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -6530,9 +6530,9 @@ namespace Tpetra { const auto newColIndsLen = myGraph_->k_gblInds1D_.extent(0); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (newValuesLen != newColIndsLen, std::logic_error, - "After padding, values and indices should be same size, but " - "k_values1D_.extent(0)=" << newValuesLen << " != myGraph_->" - "k_gblInds1D_.extent(0)=" << newColIndsLen << "."); + ": After padding, k_values1D_.extent(0)=" << newValuesLen + << " != myGraph_->k_gblInds1D_.extent(0)=" << newColIndsLen + << suffix); } else { padCrsArrays(row_ptr_beg, row_ptr_end, myGraph_->k_lclInds1D_, @@ -6541,9 +6541,9 @@ namespace Tpetra { const auto newColIndsLen = myGraph_->k_lclInds1D_.extent(0); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (newValuesLen != newColIndsLen, std::logic_error, - "After padding, values and indices should be same size, but " - "k_values1D_.extent(0)=" << newValuesLen << " != myGraph_->" - "k_lclInds1D_.extent(0)=" << newColIndsLen << "."); + ": After padding, k_values1D_.extent(0)=" << newValuesLen + << " != myGraph_->k_lclInds1D_.extent(0)=" << newColIndsLen + << suffix); } if (refill_num_row_entries) { From ae01ff0c6a0c1d6af3b024a7638dcfc5b7132d9f Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 5 Feb 2020 13:28:14 -0700 Subject: [PATCH 23/49] Tpetra::CrsGraph::insertGlobalIndicesImpl: Improve error message @trilinos/tpetra If insertGlobalIndicesImpl doesn't have enough space to insert the input indices, it throws an exception. Improve that exception's error message so that it prints the input column indices and the graph's current column indices in the row (respecting the verbose print count threshold). --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 9 +++ .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 74 +++++++++++-------- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 3f302f2bf136..2b9598a755bf 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -2414,6 +2414,15 @@ namespace Tpetra { /// /// This comes from Tpetra::Details::Behavior::debug("CrsGraph"). bool debug_ = getDebug(); + + //! Get initial value of verbose_ for this object. + static bool getVerbose(); + + /// \brief Whether to do extra debug checks. + /// + /// This comes from Tpetra::Details::Behavior::debug("CrsGraph"). + bool verbose_ = getVerbose(); + }; // class CrsGraph /// \brief Nonmember function to create an empty CrsGraph given a diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index cdbc74279ee2..d1bd64bda10d 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -300,6 +300,13 @@ namespace Tpetra { return Details::Behavior::debug("CrsGraph"); } + template + bool + CrsGraph:: + getVerbose() { + return Details::Behavior::verbose("CrsGraph"); + } + template CrsGraph:: CrsGraph (const Teuchos::RCP& rowMap, @@ -1990,9 +1997,11 @@ namespace Tpetra { const size_t numInputInds, std::function fun) { + using Details::verbosePrintArray; using Kokkos::View; using Kokkos::subview; using Kokkos::MemoryUnmanaged; + using Teuchos::ArrayView; using LO = LocalOrdinal; using GO = GlobalOrdinal; const char tfecfFuncName[] = "insertGlobalIndicesImpl: "; @@ -2010,13 +2019,28 @@ namespace Tpetra { constexpr size_t ONE (1); const int myRank = this->getComm()->getRank(); std::ostringstream os; - os << "On MPI Process " << myRank << ": Not enough capacity to " - "insert " << numInputInds + + os << "Proc " << myRank << ": Not enough capacity to insert " + << numInputInds << " ind" << (numInputInds != ONE ? "ices" : "ex") << " into local row " << lclRow << ", which currently has " << rowInfo.numEntries << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y") - << " and total allocation size " << rowInfo.allocSize << "."; + << " and total allocation size " << rowInfo.allocSize + << ". "; + const size_t maxNumToPrint = + Details::Behavior::verbosePrintCountThreshold(); + ArrayView inputGblColIndsView(inputGblColInds, + numInputInds); + verbosePrintArray(os, inputGblColIndsView, "Input global " + "column indices", maxNumToPrint); + os << ", "; + const GO* const curGblColInds = + k_gblInds1D_.data() + rowInfo.offset1D; + ArrayView curGblColIndsView(curGblColInds, + rowInfo.numEntries); + verbosePrintArray(os, curGblColIndsView, "Current global " + "column indices", maxNumToPrint); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (true, std::runtime_error, os.str()); } @@ -2755,10 +2779,7 @@ namespace Tpetra { "Local row index " << localRow << " is not in the row Map " "on the calling process."); if (! indicesAreAllocated ()) { - // Allocating indices takes a while and only needs to be done - // once per MPI process, so it's OK to query TPETRA_VERBOSE. - const bool verbose = Details::Behavior::verbose("CrsGraph"); - allocateIndices (LocalIndices, verbose); + allocateIndices (LocalIndices, verbose_); } if (debug_) { @@ -2840,11 +2861,8 @@ namespace Tpetra { "You are not allowed to call this method if fill is not active. " "If fillComplete has been called, you must first call resumeFill " "before you may insert indices."); - if (! this->indicesAreAllocated ()) { - // Allocating indices takes a while and only needs to be done - // once per MPI process, so it's OK to query TPETRA_VERBOSE. - const bool verbose = Details::Behavior::verbose("CrsGraph"); - this->allocateIndices (GlobalIndices, verbose); + if (! indicesAreAllocated ()) { + allocateIndices (GlobalIndices, verbose_); } const LO lclRow = this->rowMap_->getLocalElement (gblRow); if (lclRow != Tpetra::Details::OrdinalTraits::invalid ()) { @@ -2932,16 +2950,13 @@ namespace Tpetra { "You are not allowed to call this method if fill is not active. " "If fillComplete has been called, you must first call resumeFill " "before you may insert indices."); - if (! this->indicesAreAllocated ()) { - // Allocating indices takes a while and only needs to be done - // once per MPI process, so it's OK to query TPETRA_VERBOSE. - const bool verbose = Details::Behavior::verbose("CrsGraph"); - this->allocateIndices (GlobalIndices, verbose); + if (! indicesAreAllocated ()) { + allocateIndices (GlobalIndices, verbose_); } Teuchos::ArrayView gblColInds_av (gblColInds, numGblColInds); // If we have a column Map, use it to filter the entries. - if (! this->colMap_.is_null ()) { + if (! colMap_.is_null ()) { const map_type& colMap = * (this->colMap_); LO curOffset = 0; @@ -3012,10 +3027,7 @@ namespace Tpetra { ! rowMap_->isNodeLocalElement (lrow), std::runtime_error, "Local row " << lrow << " is not in the row Map on the calling process."); if (! indicesAreAllocated ()) { - // Allocating indices takes a while and only needs to be done - // once per MPI process, so it's OK to query TPETRA_VERBOSE. - const bool verbose = Details::Behavior::verbose("CrsGraph"); - allocateIndices (LocalIndices, verbose); + allocateIndices (LocalIndices, verbose_); } // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on @@ -3514,7 +3526,7 @@ namespace Tpetra { { using std::endl; const char tfecfFuncName[] = "fillComplete: "; - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; std::unique_ptr prefix; if (verbose) { @@ -5012,7 +5024,7 @@ namespace Tpetra { using this_type = CrsGraph; using row_graph_type = RowGraph; const char tfecfFuncName[] = "copyAndPermute: "; - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; std::unique_ptr prefix; if (verbose) { @@ -5324,7 +5336,7 @@ namespace Tpetra { const char tfecfFuncName[] = "computeCrsPaddingForSameIds: "; std::unique_ptr prefix; - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; if (verbose) { prefix = this->createPrefix("CrsGraph", "computeCrsPaddingForSameIDs"); @@ -5452,7 +5464,7 @@ namespace Tpetra { const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds"; std::unique_ptr prefix; - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; if (verbose) { prefix = this->createPrefix("CrsGraph", tfecfFuncName); std::ostringstream os; @@ -5747,7 +5759,7 @@ namespace Tpetra { const char tfecfFuncName[] = "packAndPrepare: "; ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare"); - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; std::unique_ptr prefix; if (verbose) { prefix = this->createPrefix("CrsGraph", "packAndPrepare"); @@ -5888,7 +5900,7 @@ namespace Tpetra { using device_execution_space = typename device_type::execution_space; const char tfecfFuncName[] = "packFillActive: "; - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; const auto numExportLIDs = exportLIDs.size (); std::unique_ptr prefix; @@ -6106,7 +6118,7 @@ namespace Tpetra { using exports_dv_type = Kokkos::DualView; const char tfecfFuncName[] = "packFillActiveNew: "; - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; const auto numExportLIDs = exportLIDs.extent (0); std::unique_ptr prefix; @@ -6372,7 +6384,7 @@ namespace Tpetra { const char tfecfFuncName[] = "unpackAndCombine: "; ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine"); - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; std::unique_ptr prefix; if (verbose) { @@ -6578,7 +6590,7 @@ namespace Tpetra { using LO = LocalOrdinal; using GO = GlobalOrdinal; const char tfecfFuncName[] = "getLocalDiagOffsets: "; - const bool verbose = Details::Behavior::verbose("CrsGraph"); + const bool verbose = verbose_; std::unique_ptr prefix; if (verbose) { From b52bcb8b91f89d2ebfeefc9fe7b883eb0de07cf2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 5 Feb 2020 15:29:58 -0700 Subject: [PATCH 24/49] Tpetra::CrsGraph: Fix bug in getSortedMergedGlobalRow --- packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index d1bd64bda10d..3892e955f1ef 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -145,7 +145,8 @@ namespace Tpetra { std::unique(gblColInds.begin(), gblColInds.end()); const size_t newNumEntries = static_cast(newEnd - gblColInds.begin()); - numDuplicates = size_t(newNumEntries - origNumEntries); + TEUCHOS_ASSERT( origNumEntries >= newNumEntries ); + numDuplicates = size_t(origNumEntries - newNumEntries); gblColInds = ArrayView(gblColInds.data(), newNumEntries); } @@ -5686,7 +5687,7 @@ namespace Tpetra { imports_h.data() + numEntBeg); const size_t gidsBeg = numEntBeg + numEntLen; - if (gblColIndsReceived.size() < size_t(numEntriesReceived)) { + if (gblColIndsReceived.size() != size_t(numEntriesReceived)) { gblColIndsReceived.resize(numEntriesReceived); } (void) PackTraits::unpackArray(gblColIndsReceived.data(), @@ -5697,8 +5698,11 @@ namespace Tpetra { gblColIndsReceived.end()); const size_t numEntriesRecvdUnique = static_cast(newEnd - gblColIndsReceived.begin()); + TEUCHOS_ASSERT( numEntriesRecvdUnique <= size_t(numEntriesReceived) ); + const size_t curSrcNumDups = + size_t(size_t(numEntriesReceived) - numEntriesRecvdUnique); + srcNumDups += curSrcNumDups; gblColIndsReceived.resize(numEntriesRecvdUnique); - srcNumDups += (numEntriesReceived - numEntriesRecvdUnique); size_t origNumTgtEnt = 0; size_t curNumTgtDups = 0; From b134b123aa8e8e8b68e06fcffee9c30dd55ba736 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 5 Feb 2020 16:05:37 -0700 Subject: [PATCH 25/49] Tpetra::CrsMatrix: Improved exception message --- packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 6dd39940b6db..664d9d1dc1b9 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -7812,7 +7812,7 @@ namespace Tpetra { // rank again here. This is an error message, so the small // run-time cost doesn't matter. See #1887. std::ostringstream os; - os << "(Proc " << comm.getRank () << ") " << msg->str () << endl; + os << "Proc " << comm.getRank () << ": " << msg->str () << endl; msg = std::unique_ptr (new std::ostringstream ()); ::Tpetra::Details::gathervPrint (*msg, os.str (), comm); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC From cff70507dbf7b7789e37cfc3ac5682ccc8691733 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 5 Feb 2020 15:29:03 -0700 Subject: [PATCH 26/49] Tpetra: Add unit test relating to #6663 @trilinos/tpetra --- .../tpetra/core/test/CrsMatrix/CMakeLists.txt | 10 + .../core/test/CrsMatrix/UnpackMerge.cpp | 230 ++++++++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp diff --git a/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt b/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt index 16d1dfbec792..83e98e72caba 100644 --- a/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt +++ b/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt @@ -380,6 +380,16 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( STANDARD_PASS_OUTPUT ) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnpackMerge + SOURCES + UnpackMerge + ${TEUCHOS_STD_UNIT_TEST_MAIN} + COMM mpi + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT + ) + SET(TIMING_INSTALLS "") INSTALL(TARGETS ${TIMING_INSTALLS} diff --git a/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp b/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp new file mode 100644 index 000000000000..e174009590c7 --- /dev/null +++ b/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp @@ -0,0 +1,230 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER +*/ + +#include "Tpetra_TestingUtilities.hpp" +#include "Tpetra_CrsMatrix.hpp" +#include "Tpetra_Import.hpp" +#include "Tpetra_Map.hpp" +#include "Kokkos_Core.hpp" + +namespace { // (anonymous) + + // Both source and target matrices have one row on each process. + // + // Target matrix global column indices: + // Proc 0: Global row index 0: [0, 1, 2, 3, 4, 5] + // Proc 1: Global row index 1: [0, 1, 2, 3, 4, 5] + // + // Source matrix global column indices: + // Proc 0: Global row index 1: [] + // Proc 1: Global row index 0: [3, 4, 5, 6, 7, 8, 9] + // + // After Import, target should look like this: + // Proc 0: Global row index 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + // Proc 1: Global row index 1: [0, 1, 2, 3, 4, 5] + + TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( CrsMatrix, UnpackMerge, Scalar, Node ) + { + using Tpetra::TestingUtilities::getDefaultComm; + using Teuchos::ArrayView; + using Teuchos::Comm; + using Teuchos::outArg; + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::REDUCE_MIN; + using Teuchos::reduceAll; + using Teuchos::tuple; + using std::endl; + using LO = Tpetra::Map<>::local_ordinal_type; + using GO = Tpetra::Map<>::global_ordinal_type; + using crs_matrix_type = Tpetra::CrsMatrix; + using import_type = Tpetra::Import; + using map_type = Tpetra::Map; + using GST = Tpetra::global_size_t; + using STS = Teuchos::ScalarTraits; + + RCP > comm = getDefaultComm(); + const int myRank = comm->getRank(); + const int numProcs = comm->getSize(); + + out << "Test that Tpetra::CrsMatrix::unpackAndCombine into a " + "target matrix with a non-static graph merges column indices" + << endl; + Teuchos::OSTab tab1(out); + + TEST_ASSERT( numProcs == 2 ); + if (numProcs != 2) { + out << "This test requires exactly two MPI processes, but you " + "ran it with " << numProcs << " process(es)." << endl; + return; + } + + const GO gblNumRows (2); + const GO indexBase (0); + std::vector srcRowMapInds; + std::vector tgtRowMapInds; + if (myRank == 0) { + srcRowMapInds = std::vector{{1}}; + tgtRowMapInds = std::vector{{0}}; + } + else if (myRank == 1) { + srcRowMapInds = std::vector{{0}}; + tgtRowMapInds = std::vector{{1}}; + } + const LO srcLclNumRows (srcRowMapInds.size()); + const LO tgtLclNumRows (tgtRowMapInds.size()); + + RCP srcRowMap = + rcp(new map_type(static_cast(gblNumRows), + srcRowMapInds.data(), srcLclNumRows, + indexBase, comm)); + RCP tgtRowMap = + rcp(new map_type(static_cast(gblNumRows), + tgtRowMapInds.data(), tgtLclNumRows, + indexBase, comm)); + + const GO gblNumCols = 10; + RCP colMap = + rcp(new map_type(static_cast(gblNumCols), + indexBase, comm, + Tpetra::LocallyReplicated)); + RCP domMap = + rcp(new map_type(static_cast(gblNumCols), + indexBase, comm, + Tpetra::GloballyDistributed)); + RCP ranMap = srcRowMap; + import_type importer(srcRowMap, tgtRowMap); + + std::vector srcGblColInds; + if (myRank == 1) { + srcGblColInds = std::vector{{3, 4, 5, 6, 7, 8, 9}}; + } + std::vector tgtGblColInds{{0, 1, 2, 3, 4, 5}}; + std::vector srcVals(srcGblColInds.size(), Scalar(1.0)); + std::vector tgtVals(tgtGblColInds.size(), Scalar(1.0)); + + std::vector expectedTgtVals; + if (myRank == 0) { + expectedTgtVals.resize(10); + for(LO k = 0; k < LO(3); ++k) { + expectedTgtVals[k] = 1.0; + } + for(LO k = LO(3); k < LO(6); ++k) { + expectedTgtVals[k] = 2.0; + } + for(LO k = LO(6); k < LO(10); ++k) { + expectedTgtVals[k] = 1.0; + } + } + else if (myRank == 1) { + expectedTgtVals.resize(6); + for(LO k = 0; k < LO(6); ++k) { + expectedTgtVals[k] = 1.0; + } + } + + for (const bool A_src_is_fill_complete : {false, true}) { + out << "A_src will" << (A_src_is_fill_complete ? "" : " NOT") + << " be fill complete." << endl; + crs_matrix_type A_src(srcRowMap, colMap, srcGblColInds.size()); + crs_matrix_type A_tgt(tgtRowMap, colMap, tgtGblColInds.size()); + + for (LO lclRow = 0; lclRow < srcLclNumRows; ++lclRow) { + const GO gblRow = srcRowMap->getGlobalElement(lclRow); + A_tgt.insertGlobalValues(gblRow, + Teuchos::ArrayView(tgtGblColInds), + Teuchos::ArrayView(tgtVals)); + A_src.insertGlobalValues(gblRow, + Teuchos::ArrayView(srcGblColInds), + Teuchos::ArrayView(srcVals)); + } + if (A_src_is_fill_complete) { + A_src.fillComplete(domMap, ranMap); + } + + out << "Finished A_src.fillComplete(domMap, ranMap)" << endl; + + TEST_ASSERT( ! A_tgt.isStaticGraph() ); + + A_tgt.doImport(A_src, importer, Tpetra::INSERT); + A_tgt.fillComplete(domMap, ranMap); + + Kokkos::fence(); // since we're accessing data on host now + + Teuchos::ArrayView lclColInds; + Teuchos::ArrayView vals; + const LO lclRowToTest (0); + A_tgt.getLocalRowView(lclRowToTest, lclColInds, vals); + + const LO expectedNumEnt = myRank == 0 ? LO(10) : LO(6); + TEST_EQUALITY( LO(lclColInds.size()), expectedNumEnt ); + TEST_EQUALITY( LO(vals.size()), expectedNumEnt ); + + if (success && myRank == 0) { + for (LO k = 0; k < expectedNumEnt; ++k) { + TEST_EQUALITY( lclColInds[k], LO(k) ); + const Scalar expectedVal = expectedTgtVals[k]; + TEST_EQUALITY( vals[k], expectedVal ); + } + } + + // Test whether all processes passed the test. + int lclSuccess = success ? 1 : 0; + int gblSuccess = 0; + reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess)); + TEST_EQUALITY_CONST( gblSuccess, 1 ); + } + } + +// +// INSTANTIATIONS +// + +#define UNIT_TEST_GROUP( SCALAR, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( CrsMatrix, UnpackMerge, SCALAR, NODE ) + + TPETRA_ETI_MANGLING_TYPEDEFS() + + TPETRA_INSTANTIATE_SN( UNIT_TEST_GROUP ) + +} // namespace (anonymous) + + From fb058d3d3e61613136b8ac52d21c5139a5d8577f Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 5 Feb 2020 17:46:28 -0700 Subject: [PATCH 27/49] Tpetra: Add countNumInCommon to Tpetra_Util.hpp; add unit test @trilinos/tpetra --- packages/tpetra/core/src/Tpetra_Util.hpp | 39 ++++++++ .../tpetra/core/test/Utils/CMakeLists.txt | 10 ++ .../core/test/Utils/countNumInCommon.cpp | 94 +++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 packages/tpetra/core/test/Utils/countNumInCommon.cpp diff --git a/packages/tpetra/core/src/Tpetra_Util.hpp b/packages/tpetra/core/src/Tpetra_Util.hpp index 4ab3dfac5717..3c441f59c21d 100644 --- a/packages/tpetra/core/src/Tpetra_Util.hpp +++ b/packages/tpetra/core/src/Tpetra_Util.hpp @@ -988,6 +988,45 @@ namespace Tpetra { } out << "]"; } + + /// \brief Given two sorted and merged ranges, return the number + /// of elements they have in common. + template + size_t + countNumInCommon(SourceIterator srcBeg, + SourceIterator srcEnd, + TargetIterator tgtBeg, + TargetIterator tgtEnd) + { + size_t numInCommon = 0; + + auto srcIter = srcBeg; + auto tgtIter = tgtBeg; + while (srcIter != srcEnd && tgtIter != tgtEnd) { + tgtIter = std::lower_bound(tgtIter, tgtEnd, *srcIter); + if (tgtIter == tgtEnd) { + break; + } + if (*tgtIter == *srcIter) { + ++numInCommon; + ++srcIter; + ++tgtIter; + } + + srcIter = std::lower_bound(srcIter, srcEnd, *tgtIter); + if (srcIter == srcEnd) { + break; + } + if (*srcIter == *tgtIter) { + ++numInCommon; + ++tgtIter; + ++srcIter; + } + } + return numInCommon; + } + } // namespace Details } // namespace Tpetra diff --git a/packages/tpetra/core/test/Utils/CMakeLists.txt b/packages/tpetra/core/test/Utils/CMakeLists.txt index 1d82d4047d7b..c2502d39332b 100644 --- a/packages/tpetra/core/test/Utils/CMakeLists.txt +++ b/packages/tpetra/core/test/Utils/CMakeLists.txt @@ -145,3 +145,13 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 1 STANDARD_PASS_OUTPUT ) + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + countNumInCommon + SOURCES + countNumInCommon + ${TEUCHOS_STD_UNIT_TEST_MAIN} + COMM serial mpi + NUM_MPI_PROCS 1 + STANDARD_PASS_OUTPUT + ) diff --git a/packages/tpetra/core/test/Utils/countNumInCommon.cpp b/packages/tpetra/core/test/Utils/countNumInCommon.cpp new file mode 100644 index 000000000000..e24889713bc7 --- /dev/null +++ b/packages/tpetra/core/test/Utils/countNumInCommon.cpp @@ -0,0 +1,94 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER +*/ + +#include "Tpetra_Util.hpp" +#include "Teuchos_UnitTestHarness.hpp" +#include +#include + +namespace { // (anonymous) + + TEUCHOS_UNIT_TEST( Utils, CountNumInCommon ) + { + std::vector newGblColInds {{142944, 142945, 142946, 142947, 142948, 142949, 142950, 142951, 142952, 142953, 142954, 142955, 142959, 142960, 142961, 142965, 142966, 142967, 142968, 142969, 142970, 143142, 143143, 143144, 198279, 198280, 198281, 198282, 198283, 198284, 198291, 198292, 198293, 198303, 198304, 198305, 198309, 198310, 198311, 198333, 198334, 198335, 198336, 198337, 198338, 198339, 198340, 198341, 198342, 198343, 198344, 198345, 198346, 198347, 198348, 198349, 198350, 198351, 198352, 198353, 198354, 198355, 198356, 198699, 198700, 198701, 198702, 198703, 198704, 198705, 198706, 198707, 198708, 198709, 198710, 198711, 198712, 198713, 198729, 198730, 198731, 198732, 198733, 198734, 198735, 198736, 198737, 198738, 198739, 198740, 198741, 198742, 198743, 198744, 198745, 198746}}; + + std::vector curGblColInds {{166215, 166216, 166217, 166218, 166219, 166220, 166221, 166222, 166223, 166224, 166225, 166226, 166227, 166228, 166229, 166230, 166231, 166232, 166233, 166234, 166235, 166236, 166237, 166238, 166239, 166240, 166241, 166242, 166243, 166244, 166245, 166246, 166247, 198279, 198280, 198281, 198282, 198283, 198284, 198285, 198286, 198287, 198288, 198289, 198290, 198291, 198292, 198293, 198294, 198295, 198296, 198297, 198298, 198299, 198300, 198301, 198302, 198303, 198304, 198305, 198306, 198307, 198308, 198309, 198310, 198311, 198312, 198313, 198314, 198315, 198316, 198317, 198333, 198334, 198335, 198336, 198337, 198338, 198339, 198340, 198341, 198342, 198343, 198344, 198345, 198346, 198347, 198348, 198349, 198350, 198351, 198352, 198353, 198354, 198355, 198356}}; + + const size_t newGblColIndsSize (96); + const size_t curGblColIndsSize (96); + + TEST_EQUALITY( newGblColInds.size(), newGblColIndsSize ); + TEST_EQUALITY( curGblColInds.size(), curGblColIndsSize ); + + std::sort(newGblColInds.begin(), newGblColInds.end()); + std::sort(curGblColInds.begin(), curGblColInds.end()); + + TEST_EQUALITY( newGblColInds.size(), newGblColIndsSize ); + TEST_EQUALITY( curGblColInds.size(), curGblColIndsSize ); + + auto newIter = std::unique(newGblColInds.begin(), newGblColInds.end()); + auto curIter = std::unique(curGblColInds.begin(), curGblColInds.end()); + + TEST_EQUALITY( size_t(newIter - newGblColInds.begin()), newGblColIndsSize ); + TEST_EQUALITY( size_t(curIter - curGblColInds.begin()), curGblColIndsSize ); + + constexpr size_t expectedMergeSize = 153; + + using Tpetra::Details::countNumInCommon; + const size_t numInCommon = + countNumInCommon(newGblColInds.begin(), newIter, + curGblColInds.begin(), curIter); + const size_t mergeSize = + newGblColIndsSize + curGblColIndsSize - numInCommon; + TEST_EQUALITY( mergeSize, expectedMergeSize ); + + std::vector mergedInds(newGblColInds.size() + curGblColInds.size()); + TEST_EQUALITY( mergedInds.size(), newGblColIndsSize + curGblColIndsSize ); + + auto mergedIter = std::set_union(newGblColInds.begin(), newIter, + curGblColInds.begin(), curIter, + mergedInds.begin()); + const size_t mergeSize2 = size_t(mergedIter - mergedInds.begin()); + + TEST_EQUALITY( mergeSize2, expectedMergeSize ); + } + +} // namespace (anonymous) From 7f1248238773207bd2ab4e8b09bf3d40846da272 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 5 Feb 2020 18:05:05 -0700 Subject: [PATCH 28/49] Tpetra: Improve countNumInCommon unit test --- .../core/test/Utils/countNumInCommon.cpp | 178 +++++++++++++++--- 1 file changed, 157 insertions(+), 21 deletions(-) diff --git a/packages/tpetra/core/test/Utils/countNumInCommon.cpp b/packages/tpetra/core/test/Utils/countNumInCommon.cpp index e24889713bc7..512370904ff1 100644 --- a/packages/tpetra/core/test/Utils/countNumInCommon.cpp +++ b/packages/tpetra/core/test/Utils/countNumInCommon.cpp @@ -46,14 +46,164 @@ namespace { // (anonymous) - TEUCHOS_UNIT_TEST( Utils, CountNumInCommon ) + void + testLists(Teuchos::FancyOStream& out, + bool& success, + std::vector& list1, + std::vector& list2, + const size_t expectedNumInCommon, + const size_t expectedUnionSize) + { + std::sort(list1.begin(), list1.end()); + std::sort(list2.begin(), list2.end()); + auto iter1 = std::unique(list1.begin(), list1.end()); + auto iter2 = std::unique(list2.begin(), list2.end()); + + using Tpetra::Details::countNumInCommon; + const size_t numInCommon = + countNumInCommon(list1.begin(), iter1, + list2.begin(), iter2); + TEST_EQUALITY( numInCommon, expectedNumInCommon ); + + const size_t unionSize = + list1.size() + list2.size() - numInCommon; + TEST_EQUALITY( unionSize, expectedUnionSize ); + + std::vector unionInds(list1.size() + list2.size()); + TEST_EQUALITY( unionInds.size(), list1.size() + list2.size() ); + + auto unionIter = std::set_union(list1.begin(), iter1, + list2.begin(), iter2, + unionInds.begin()); + const size_t unionSize2 = size_t(unionIter - unionInds.begin()); + TEST_EQUALITY( unionSize2, expectedUnionSize ); + } + + TEUCHOS_UNIT_TEST( Utils, CountNumInCommon_short ) + { + { + std::vector list1; + std::vector list2; + testLists(out, success, list1, list2, 0, 0); + if (! success) { + return; + } + } + { + std::vector list1{{666}}; + std::vector list2; + testLists(out, success, list1, list2, 0, 1); + if (! success) { + return; + } + } + { + std::vector list1; + std::vector list2{{666}}; + testLists(out, success, list1, list2, 0, 1); + if (! success) { + return; + } + } + { + std::vector list1{{418}}; + std::vector list2{{418}}; + testLists(out, success, list1, list2, 1, 1); + if (! success) { + return; + } + } + { + std::vector list1{{418, 419}}; + std::vector list2{{418}}; + testLists(out, success, list1, list2, 1, 2); + if (! success) { + return; + } + } + { + std::vector list1{{418}}; + std::vector list2{{418, 419}}; + testLists(out, success, list1, list2, 1, 2); + if (! success) { + return; + } + } + { + std::vector list1{{417, 418}}; + std::vector list2{{418}}; + testLists(out, success, list1, list2, 1, 2); + if (! success) { + return; + } + } + { + std::vector list1{{418}}; + std::vector list2{{417, 418}}; + testLists(out, success, list1, list2, 1, 2); + if (! success) { + return; + } + } + { + std::vector list1{{417, 418, 419}}; + std::vector list2{{418}}; + testLists(out, success, list1, list2, 1, 3); + if (! success) { + return; + } + } + { + std::vector list1{{418}}; + std::vector list2{{417, 418, 419}}; + testLists(out, success, list1, list2, 1, 3); + if (! success) { + return; + } + } + { + std::vector list1{{415, 418, 421}}; + std::vector list2{{418}}; + testLists(out, success, list1, list2, 1, 3); + if (! success) { + return; + } + } + { + std::vector list1{{418}}; + std::vector list2{{415, 418, 421}}; + testLists(out, success, list1, list2, 1, 3); + if (! success) { + return; + } + } + { + std::vector list1{{415, 419, 421}}; + std::vector list2{{418}}; + testLists(out, success, list1, list2, 0, 4); + if (! success) { + return; + } + } + { + std::vector list1{{418}}; + std::vector list2{{415, 419, 421}}; + testLists(out, success, list1, list2, 0, 4); + if (! success) { + return; + } + } + } + + // This came from an application test problem. + TEUCHOS_UNIT_TEST( Utils, CountNumInCommon_app ) { std::vector newGblColInds {{142944, 142945, 142946, 142947, 142948, 142949, 142950, 142951, 142952, 142953, 142954, 142955, 142959, 142960, 142961, 142965, 142966, 142967, 142968, 142969, 142970, 143142, 143143, 143144, 198279, 198280, 198281, 198282, 198283, 198284, 198291, 198292, 198293, 198303, 198304, 198305, 198309, 198310, 198311, 198333, 198334, 198335, 198336, 198337, 198338, 198339, 198340, 198341, 198342, 198343, 198344, 198345, 198346, 198347, 198348, 198349, 198350, 198351, 198352, 198353, 198354, 198355, 198356, 198699, 198700, 198701, 198702, 198703, 198704, 198705, 198706, 198707, 198708, 198709, 198710, 198711, 198712, 198713, 198729, 198730, 198731, 198732, 198733, 198734, 198735, 198736, 198737, 198738, 198739, 198740, 198741, 198742, 198743, 198744, 198745, 198746}}; std::vector curGblColInds {{166215, 166216, 166217, 166218, 166219, 166220, 166221, 166222, 166223, 166224, 166225, 166226, 166227, 166228, 166229, 166230, 166231, 166232, 166233, 166234, 166235, 166236, 166237, 166238, 166239, 166240, 166241, 166242, 166243, 166244, 166245, 166246, 166247, 198279, 198280, 198281, 198282, 198283, 198284, 198285, 198286, 198287, 198288, 198289, 198290, 198291, 198292, 198293, 198294, 198295, 198296, 198297, 198298, 198299, 198300, 198301, 198302, 198303, 198304, 198305, 198306, 198307, 198308, 198309, 198310, 198311, 198312, 198313, 198314, 198315, 198316, 198317, 198333, 198334, 198335, 198336, 198337, 198338, 198339, 198340, 198341, 198342, 198343, 198344, 198345, 198346, 198347, 198348, 198349, 198350, 198351, 198352, 198353, 198354, 198355, 198356}}; - const size_t newGblColIndsSize (96); - const size_t curGblColIndsSize (96); + constexpr size_t newGblColIndsSize (96); + constexpr size_t curGblColIndsSize (96); TEST_EQUALITY( newGblColInds.size(), newGblColIndsSize ); TEST_EQUALITY( curGblColInds.size(), curGblColIndsSize ); @@ -71,24 +221,10 @@ namespace { // (anonymous) TEST_EQUALITY( size_t(curIter - curGblColInds.begin()), curGblColIndsSize ); constexpr size_t expectedMergeSize = 153; - - using Tpetra::Details::countNumInCommon; - const size_t numInCommon = - countNumInCommon(newGblColInds.begin(), newIter, - curGblColInds.begin(), curIter); - const size_t mergeSize = - newGblColIndsSize + curGblColIndsSize - numInCommon; - TEST_EQUALITY( mergeSize, expectedMergeSize ); - - std::vector mergedInds(newGblColInds.size() + curGblColInds.size()); - TEST_EQUALITY( mergedInds.size(), newGblColIndsSize + curGblColIndsSize ); - - auto mergedIter = std::set_union(newGblColInds.begin(), newIter, - curGblColInds.begin(), curIter, - mergedInds.begin()); - const size_t mergeSize2 = size_t(mergedIter - mergedInds.begin()); - - TEST_EQUALITY( mergeSize2, expectedMergeSize ); + constexpr size_t expectedNumInCommon = newGblColIndsSize + + curGblColIndsSize - expectedMergeSize; + testLists(out, success, newGblColInds, curGblColInds, + expectedNumInCommon, expectedMergeSize); } } // namespace (anonymous) From aa858e552105a88236453c004967d87a4d3cddd6 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 5 Feb 2020 18:36:42 -0700 Subject: [PATCH 29/49] Tpetra::CrsGraph: Change padding to total, not increment @trilinos/tpetra This is related to #6663. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 105 ++++++------------ 1 file changed, 32 insertions(+), 73 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 3892e955f1ef..f16e703d3bab 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -65,6 +65,7 @@ #include "Tpetra_Details_packCrsGraph.hpp" #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp" #include "Tpetra_Details_determineLocalTriangularStructure.hpp" +#include "Tpetra_Util.hpp" #include #include #include @@ -78,42 +79,6 @@ namespace Tpetra { namespace Details { namespace Impl { - template - size_t - countNumInCommon(SourceIterator srcBeg, - SourceIterator srcEnd, - TargetIterator tgtBeg, - TargetIterator tgtEnd) - { - size_t numInCommon = 0; - - auto srcIter = srcBeg; - auto tgtIter = tgtBeg; - while (srcIter != srcEnd && tgtIter != tgtEnd) { - tgtIter = std::lower_bound(tgtIter, tgtEnd, *srcIter); - if (tgtIter == tgtEnd) { - break; - } - if (*tgtIter == *srcIter) { - ++numInCommon; - ++srcIter; - ++tgtIter; - } - - srcIter = std::lower_bound(srcIter, srcEnd, *tgtIter); - if (srcIter == srcEnd) { - break; - } - if (*srcIter == *tgtIter) { - ++numInCommon; - ++tgtIter; - ++srcIter; - } - } - return numInCommon; - } - template Teuchos::ArrayView< typename RowGraph< @@ -5382,7 +5347,8 @@ namespace Tpetra { insert_result result; const GO tgt_gid = rowMap_->getGlobalElement(tgt_lid); if (padAll) { - result = padding.insert(tgt_lid, orig_num_src_entries); + const size_t orig_num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid); + result = padding.insert(tgt_lid, orig_num_src_entries + orig_num_tgt_entries); } else { size_t curNumSrcDups = 0; @@ -5392,14 +5358,7 @@ namespace Tpetra { src_sorted, src_merged); srcNumDups += curNumSrcDups; if (src_row_inds_view.size() == 0) { // nothing new to insert - result = padding.insert(tgt_lid, size_t(0)); - // FIXME (mfh 09 Apr 2019, 04 Feb 2020) Kokkos::UnorderedMap - // is allowed to fail even if the user did nothing wrong. We - // should actually have a retry option. I just copied this - // code over from computeCrsPadding. - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - "unable to insert padding for LID " << tgt_lid); + continue; } size_t orig_num_tgt_entries = 0; @@ -5410,30 +5369,27 @@ namespace Tpetra { tgt_sorted, tgt_merged); tgtNumDups += curNumTgtDups; - const size_t numInCommon = Details::Impl::countNumInCommon( + const size_t numInCommon = Details::countNumInCommon( src_row_inds_view.begin(), src_row_inds_view.end(), tgt_row_inds_view.begin(), tgt_row_inds_view.end()); - const size_t orig_num_merged = + const size_t orig_num_union = size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); - const size_t new_num_merged = orig_num_merged - numInCommon; + const size_t new_num_union = orig_num_union - numInCommon; mergedNumDups += numInCommon; - const size_t how_much_padding = - new_num_merged >= orig_num_tgt_entries ? - new_num_merged - orig_num_tgt_entries : - size_t(0); - result = padding.insert (tgt_lid, how_much_padding); + if (new_num_union > orig_num_tgt_entries) { + result = padding.insert (tgt_lid, new_num_union); + // Kokkos::UnorderedMap is allowed to fail even if the user did + // nothing wrong. We should actually have a retry option. + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (result.failed(), std::runtime_error, + "unable to insert padding for LID " << tgt_lid); + } } - - // Kokkos::UnorderedMap is allowed to fail even if the user did - // nothing wrong. We should actually have a retry option. - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - "unable to insert padding for LID " << tgt_lid); } if (verbose) { @@ -5511,7 +5467,10 @@ namespace Tpetra { insert_result result; const LO tgt_lid = permuteToLIDs_h[i]; if (padAll) { - result = padding.insert (tgt_lid, orig_num_src_entries); + const size_t orig_num_tgt_entries = + getNumEntriesInLocalRow(tgt_lid); + result = padding.insert( + tgt_lid, orig_num_src_entries + orig_num_tgt_entries); } else { size_t curNumSrcDups = 0; @@ -5530,22 +5489,25 @@ namespace Tpetra { tgt_sorted, tgt_merged); tgtNumDups += curNumTgtDups; - const size_t numInCommon = Details::Impl::countNumInCommon( + const size_t numInCommon = Details::countNumInCommon( src_row_inds_view.begin(), src_row_inds_view.end(), tgt_row_inds_view.begin(), tgt_row_inds_view.end()); - const size_t orig_num_merged = + const size_t orig_num_union = size_t(src_row_inds_view.size()) + size_t(tgt_row_inds_view.size()); - const size_t new_num_merged = orig_num_merged - numInCommon; + const size_t new_num_union = orig_num_union - numInCommon; mergedNumDups += numInCommon; - const size_t how_much_padding = - new_num_merged >= orig_num_tgt_entries ? - new_num_merged - orig_num_tgt_entries : - size_t(0); - result = padding.insert (tgt_lid, how_much_padding); + if (new_num_union > orig_num_tgt_entries) { + result = padding.insert (tgt_lid, new_num_union); + // Kokkos::UnorderedMap is allowed to fail even if the user did + // nothing wrong. We should actually have a retry option. + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (result.failed(), std::runtime_error, + "unable to insert padding for LID " << tgt_lid); + } } // Kokkos::UnorderedMap is allowed to fail even if the user did @@ -5712,7 +5674,7 @@ namespace Tpetra { tgtSorted, tgtMerged); tgtNumDups += curNumTgtDups; - const size_t numInCommon = Details::Impl::countNumInCommon( + const size_t numInCommon = Details::countNumInCommon( gblColIndsReceived.begin(), gblColIndsReceived.end(), gblColIndsTgt.begin(), @@ -5722,10 +5684,7 @@ namespace Tpetra { const size_t newNumMerged = origNumBoth - numInCommon; mergedNumDups += numInCommon; - const size_t extraSpaceNeeded = newNumMerged >= origNumTgtEnt ? - newNumMerged - origNumTgtEnt : - size_t(0); - padding[whichImp] = extraSpaceNeeded; + padding[whichImp] = std::max(newNumMerged, origNumTgtEnt); offset += numBytes; } From d6564214a21b604acd1da64d5bcd927ea39e04b3 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 6 Feb 2020 18:30:26 -0700 Subject: [PATCH 30/49] Tpetra: Add 2nd UnpackMerge test @trilinos/tpetra This is related to #6663. --- .../core/test/CrsMatrix/UnpackMerge.cpp | 237 ++++++++++++++++-- 1 file changed, 218 insertions(+), 19 deletions(-) diff --git a/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp b/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp index e174009590c7..dc7e97eea814 100644 --- a/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp +++ b/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp @@ -47,6 +47,18 @@ namespace { // (anonymous) + using Tpetra::TestingUtilities::getDefaultComm; + using Teuchos::ArrayView; + using Teuchos::Comm; + using Teuchos::outArg; + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::REDUCE_MIN; + using Teuchos::reduceAll; + using Teuchos::tuple; + using std::endl; + using GST = Tpetra::global_size_t; + // Both source and target matrices have one row on each process. // // Target matrix global column indices: @@ -61,24 +73,13 @@ namespace { // (anonymous) // Proc 0: Global row index 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] // Proc 1: Global row index 1: [0, 1, 2, 3, 4, 5] - TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( CrsMatrix, UnpackMerge, Scalar, Node ) + TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( CrsMatrix, UnpackMerge1, Scalar, Node ) { - using Tpetra::TestingUtilities::getDefaultComm; - using Teuchos::ArrayView; - using Teuchos::Comm; - using Teuchos::outArg; - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::REDUCE_MIN; - using Teuchos::reduceAll; - using Teuchos::tuple; - using std::endl; using LO = Tpetra::Map<>::local_ordinal_type; using GO = Tpetra::Map<>::global_ordinal_type; using crs_matrix_type = Tpetra::CrsMatrix; using import_type = Tpetra::Import; using map_type = Tpetra::Map; - using GST = Tpetra::global_size_t; using STS = Teuchos::ScalarTraits; RCP > comm = getDefaultComm(); @@ -102,12 +103,12 @@ namespace { // (anonymous) std::vector srcRowMapInds; std::vector tgtRowMapInds; if (myRank == 0) { - srcRowMapInds = std::vector{{1}}; - tgtRowMapInds = std::vector{{0}}; + srcRowMapInds.push_back(1); + tgtRowMapInds.push_back(0); } else if (myRank == 1) { - srcRowMapInds = std::vector{{0}}; - tgtRowMapInds = std::vector{{1}}; + srcRowMapInds.push_back(0); + tgtRowMapInds.push_back(1); } const LO srcLclNumRows (srcRowMapInds.size()); const LO tgtLclNumRows (tgtRowMapInds.size()); @@ -214,17 +215,215 @@ namespace { // (anonymous) } } + //TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( CrsMatrix, UnpackMerge2, Scalar, Node ) + TEUCHOS_UNIT_TEST( CrsMatrix, UnpackMerge2 ) + { + using Scalar = Tpetra::CrsMatrix<>::scalar_type; + using LO = Tpetra::Map<>::local_ordinal_type; + using GO = Tpetra::Map<>::global_ordinal_type; + using Node = Tpetra::Map<>::node_type; + using crs_matrix_type = Tpetra::CrsMatrix; + using import_type = Tpetra::Import; + using map_type = Tpetra::Map; + using STS = Teuchos::ScalarTraits; + int lclSuccess = 1; + int gblSuccess = 0; + + RCP > comm = getDefaultComm(); + const int myRank = comm->getRank(); + const int numProcs = comm->getSize(); + + out << "Regression test with a real-life example" << endl; + Teuchos::OSTab tab1(out); + + TEST_ASSERT( numProcs == 2 ); + if (numProcs != 2) { + out << "This test requires exactly two MPI processes, but you " + "ran it with " << numProcs << " process(es)." << endl; + return; + } + + const GO gblNumRows (2); + const GO indexBase (0); + std::vector srcRowMapInds; + std::vector tgtRowMapInds; + if (myRank == 0) { + srcRowMapInds.push_back(1); + tgtRowMapInds.push_back(0); + } + else if (myRank == 1) { + srcRowMapInds.push_back(0); + tgtRowMapInds.push_back(1); + } + const LO srcLclNumRows (srcRowMapInds.size()); + const LO tgtLclNumRows (tgtRowMapInds.size()); + + RCP srcRowMap = + rcp(new map_type(static_cast(gblNumRows), + srcRowMapInds.data(), srcLclNumRows, + indexBase, comm)); + RCP tgtRowMap = + rcp(new map_type(static_cast(gblNumRows), + tgtRowMapInds.data(), tgtLclNumRows, + indexBase, comm)); + // [0, ... 199,999] + const GO gblNumCols = 200000; + RCP colMap = + rcp(new map_type(static_cast(gblNumCols), + indexBase, comm, + Tpetra::LocallyReplicated)); + RCP domMap = + rcp(new map_type(static_cast(gblNumCols), + indexBase, comm, + Tpetra::GloballyDistributed)); + RCP ranMap = srcRowMap; + import_type importer(srcRowMap, tgtRowMap); + + // Input to insert: 96 entries, sent from Proc 1. + std::vector srcGblColInds {{ + 142944, 142945, 142946, 142947, 142948, 142949, 142950, 142951, + 142952, 142953, 142954, 142955, 142959, 142960, 142961, 142965, + 142966, 142967, 142968, 142969, 142970, 143142, 143143, 143144, + 198279, 198280, 198281, 198282, 198283, 198284, 198291, 198292, + 198293, 198303, 198304, 198305, 198309, 198310, 198311, 198333, + 198334, 198335, 198336, 198337, 198338, 198339, 198340, 198341, + 198342, 198343, 198344, 198345, 198346, 198347, 198348, 198349, + 198350, 198351, 198352, 198353, 198354, 198355, 198356, 198699, + 198700, 198701, 198702, 198703, 198704, 198705, 198706, 198707, + 198708, 198709, 198710, 198711, 198712, 198713, 198729, 198730, + 198731, 198732, 198733, 198734, 198735, 198736, 198737, 198738, + 198739, 198740, 198741, 198742, 198743, 198744, 198745, 198746 + }}; + + // Current contents of Proc 0 row: 96 entries. + std::vector tgtGblColInds {{ + 166215, 166216, 166217, 166218, 166219, 166220, 166221, 166222, + 166223, 166224, 166225, 166226, 166227, 166228, 166229, 166230, + 166231, 166232, 166233, 166234, 166235, 166236, 166237, 166238, + 166239, 166240, 166241, 166242, 166243, 166244, 166245, 166246, + 166247, 198279, 198280, 198281, 198282, 198283, 198284, 198285, + 198286, 198287, 198288, 198289, 198290, 198291, 198292, 198293, + 198294, 198295, 198296, 198297, 198298, 198299, 198300, 198301, + 198302, 198303, 198304, 198305, 198306, 198307, 198308, 198309, + 198310, 198311, 198312, 198313, 198314, 198315, 198316, 198317, + 198333, 198334, 198335, 198336, 198337, 198338, 198339, 198340, + 198341, 198342, 198343, 198344, 198345, 198346, 198347, 198348, + 198349, 198350, 198351, 198352, 198353, 198354, 198355, 198356 + }}; + + TEST_EQUALITY( srcGblColInds.size(), size_t(96) ); + TEST_EQUALITY( tgtGblColInds.size(), size_t(96) ); + + std::vector srcCpy (srcGblColInds); + auto srcBeg = srcCpy.begin(); + auto srcEnd = srcCpy.end(); + std::sort(srcBeg, srcEnd); + srcEnd = std::unique(srcBeg, srcEnd); + + std::vector tgtCpy (tgtGblColInds); + auto tgtBeg = tgtCpy.begin(); + auto tgtEnd = tgtCpy.end(); + std::sort(tgtBeg, tgtEnd); + tgtEnd = std::unique(tgtBeg, tgtEnd); + + std::vector unionGblColInds(srcGblColInds.size() + + tgtGblColInds.size()); + auto unionEnd = std::set_union(srcBeg, srcEnd, tgtBeg, tgtEnd, + unionGblColInds.begin()); + unionGblColInds.resize(unionEnd - unionGblColInds.begin()); + const size_t unionSize = unionGblColInds.size(); + + out << "Number of elements in set union of column indices: " + << unionSize << endl; + + std::vector srcVals(srcGblColInds.size(), Scalar(1.0)); + std::vector tgtVals(tgtGblColInds.size(), Scalar(1.0)); + + std::vector expectedTgtVals; + if (myRank == 0) { + std::vector intersectionGblColInds + (std::min(srcGblColInds.size(), tgtGblColInds.size())); + auto intersectionEnd = + std::set_intersection(srcBeg, srcEnd, tgtBeg, tgtEnd, + intersectionGblColInds.begin()); + intersectionGblColInds.resize + (intersectionEnd - intersectionGblColInds.begin()); + + expectedTgtVals.resize(unionSize); + for(size_t k = 0; k < unionSize; ++k) { + const GO resultGid = unionGblColInds[k]; + auto it = std::lower_bound(intersectionGblColInds.begin(), + intersectionGblColInds.end(), + resultGid); + if (it != intersectionGblColInds.end() && + *it == resultGid) { + expectedTgtVals[k] = 2.0; + } + else { + expectedTgtVals[k] = 1.0; + } + } + } + + crs_matrix_type A_src(srcRowMap, colMap, srcGblColInds.size()); + crs_matrix_type A_tgt(tgtRowMap, colMap, tgtGblColInds.size()); + + for (LO lclRow = 0; lclRow < srcLclNumRows; ++lclRow) { + const GO gblRow = srcRowMap->getGlobalElement(lclRow); + A_tgt.insertGlobalValues(gblRow, + Teuchos::ArrayView(tgtGblColInds), + Teuchos::ArrayView(tgtVals)); + A_src.insertGlobalValues(gblRow, + Teuchos::ArrayView(srcGblColInds), + Teuchos::ArrayView(srcVals)); + } + A_src.fillComplete(domMap, ranMap); + + TEST_ASSERT( ! A_tgt.isStaticGraph() ); + + A_tgt.doImport(A_src, importer, Tpetra::INSERT); + A_tgt.fillComplete(domMap, ranMap); + + Kokkos::fence(); // since we're accessing data on host now + + if (myRank == 0) { + const GO gblRowToTest = tgtRowMap->getMinGlobalIndex(); + size_t numEnt = A_tgt.getNumEntriesInGlobalRow(gblRowToTest); + Teuchos::Array gblColInds(numEnt); + Teuchos::Array vals(numEnt); + A_tgt.getGlobalRowCopy(gblRowToTest, gblColInds(), + vals(), numEnt); + + const LO expectedNumEnt(expectedTgtVals.size()); + TEST_EQUALITY( numEnt, expectedNumEnt ); + TEST_EQUALITY( LO(gblColInds.size()), expectedNumEnt ); + TEST_EQUALITY( LO(vals.size()), expectedNumEnt ); + + if (success) { + for (LO k = 0; k < expectedNumEnt; ++k) { + TEST_EQUALITY( gblColInds[k], unionGblColInds[k] ); + const Scalar expectedVal = expectedTgtVals[k]; + TEST_EQUALITY( vals[k], expectedVal ); + } + } + } + + lclSuccess = success ? 1 : 0; + gblSuccess = 0; + reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess)); + TEST_EQUALITY_CONST( gblSuccess, 1 ); + } + // // INSTANTIATIONS // #define UNIT_TEST_GROUP( SCALAR, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( CrsMatrix, UnpackMerge, SCALAR, NODE ) + TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( CrsMatrix, UnpackMerge1, SCALAR, NODE ) + //TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( CrsMatrix, UnpackMerge2, SCALAR, NODE ) TPETRA_ETI_MANGLING_TYPEDEFS() TPETRA_INSTANTIATE_SN( UNIT_TEST_GROUP ) } // namespace (anonymous) - - From e1f86b72136057e76465cffe0698bf0412e58ee2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 10 Feb 2020 11:03:07 -0700 Subject: [PATCH 31/49] Tpetra: Add checkGlobalError function --- packages/tpetra/core/src/CMakeLists.txt | 2 +- .../src/Tpetra_Details_checkGlobalError.cpp | 99 +++++++++++++++++++ .../src/Tpetra_Details_checkGlobalError.hpp | 74 ++++++++++++++ 3 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 packages/tpetra/core/src/Tpetra_Details_checkGlobalError.cpp create mode 100644 packages/tpetra/core/src/Tpetra_Details_checkGlobalError.hpp diff --git a/packages/tpetra/core/src/CMakeLists.txt b/packages/tpetra/core/src/CMakeLists.txt index 60acfe9ba764..a9653d0ba840 100644 --- a/packages/tpetra/core/src/CMakeLists.txt +++ b/packages/tpetra/core/src/CMakeLists.txt @@ -768,5 +768,5 @@ SET_PROPERTY( # / from this directory, or to / from the 'impl' subdirectory. That ensures # that running "make" will also rerun CMake in order to regenerate Makefiles. # -# Here's another change, and another. +# Here's another change, another, and another. # diff --git a/packages/tpetra/core/src/Tpetra_Details_checkGlobalError.cpp b/packages/tpetra/core/src/Tpetra_Details_checkGlobalError.cpp new file mode 100644 index 000000000000..5ef8e7993b0d --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_checkGlobalError.cpp @@ -0,0 +1,99 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER +*/ + +#include "Tpetra_Details_checkGlobalError.hpp" +#include "Tpetra_Details_gathervPrint.hpp" +#include "Teuchos_CommHelpers.hpp" +#include "Teuchos_TestForException.hpp" +#include +#include + +namespace Tpetra { +namespace Details { + +void +checkGlobalError(std::ostream& globalOutputStream, + const bool localSuccess, + const char localErrorMessage[], + const char globalErrorMessageHeader[], + const Teuchos::Comm& comm) +{ + using Teuchos::outArg; + using Teuchos::REDUCE_MIN; + using Teuchos::reduceAll; + using std::endl; + + int lclGood = localSuccess ? 1 : 0; + int gblGood = 0; + reduceAll(comm, REDUCE_MIN, lclGood, outArg(gblGood)); + if (gblGood != 1) { + const int myRank = comm.getRank(); + if (myRank == 0) { + globalOutputStream << endl << globalErrorMessageHeader + << endl; + } + + if (localSuccess || localErrorMessage == nullptr) { + Details::gathervPrint(globalOutputStream, "", comm); + } + else { + std::ostringstream lclMsg; + lclMsg << endl; + constexpr int numStars = 60; + for (int star = 0; star < numStars; ++star) { + lclMsg << '*'; + } + lclMsg << endl << "Proc " << myRank << ": " + << localErrorMessage << endl; + Details::gathervPrint(globalOutputStream, lclMsg.str(), comm); + } + +#ifdef HAVE_TEUCHOS_MPI + (void) MPI_Abort(MPI_COMM_WORLD, -1); +#else + TEUCHOS_TEST_FOR_EXCEPTION + (true, std::runtime_error, "Tpetra reports a global error."); +#endif // HAVE_TEUCHOS_MPI + } +} + +} // namespace Details +} // namespace Tpetra diff --git a/packages/tpetra/core/src/Tpetra_Details_checkGlobalError.hpp b/packages/tpetra/core/src/Tpetra_Details_checkGlobalError.hpp new file mode 100644 index 000000000000..64049760d48d --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_checkGlobalError.hpp @@ -0,0 +1,74 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER +*/ + +/// \file Tpetra_Details_globalError.hpp +/// \brief Declaration of a function that reports a global error. +/// +/// \warning This is an implementation detail of Tpetra. Users may +/// not rely on this header file or any declarations or definitions +/// in it. They may disappear or change at any time. + +#ifndef TPETRA_DETAILS_GLOBALERROR_HPP +#define TPETRA_DETAILS_GLOBALERROR_HPP + +#include "TpetraCore_config.h" + +namespace Teuchos { + template + class Comm; +} // namespace Teuchos + +#include + +namespace Tpetra { +namespace Details { + +void +checkGlobalError(std::ostream& globalOutputStream, + const bool localSuccess, + const char localErrorMessage[], + const char globalErrorMessageHeader[], + const Teuchos::Comm& comm); + +} // namespace Details +} // namespace Tpetra + +#endif // TPETRA_DETAILS_GLOBALERROR_HPP From 7cd4fb9448b6b41a921d40a6dcbab75c20430f4b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Sun, 9 Feb 2020 17:48:16 -0700 Subject: [PATCH 32/49] Tpetra::Crs{Graph,Matrix}: Rewrite padding @trilinos/tpetra 1. Add new class Tpetra::Details::CrsPadding. 2. Use it to store padding in CrsGraph and CrsMatrix, instead of Kokkos::UnorderedMap. 3. Remove unpackCrsGraphAndCombine(New), since they depend on the old padding code and were only used in tests, not actually in CrsGraph or CrsMatrix. 4. Change pad_crs_arrays to make it more clear when it should reallocate & repack matrix values. --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 81 +- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 882 +++++++++--------- .../tpetra/core/src/Tpetra_CrsMatrix_decl.hpp | 2 +- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 105 +-- .../core/src/Tpetra_Details_CrsPadding.hpp | 259 +++++ .../core/src/Tpetra_Details_crsUtils.hpp | 408 +++++--- .../core/src/Tpetra_Details_gathervPrint.hpp | 2 - ..._Details_unpackCrsGraphAndCombine_decl.hpp | 60 -- ...a_Details_unpackCrsGraphAndCombine_def.hpp | 255 +---- .../tpetra/core/src/Tpetra_DistObject_def.hpp | 45 +- .../test/CrsGraph/CrsGraph_PackUnpack.cpp | 4 +- .../CrsGraph/CrsGraph_StaticImportExport.cpp | 1 - .../CrsGraph_UnpackIntoStaticGraph.cpp | 6 +- .../core/test/Utils/TpetraUtils_crsUtils.cpp | 17 +- 14 files changed, 1130 insertions(+), 997 deletions(-) create mode 100644 packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 2b9598a755bf..4b6c83d3b86c 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -76,17 +76,10 @@ namespace Tpetra { #ifndef DOXYGEN_SHOULD_SKIP_THIS namespace Details { - - template - void - unpackCrsGraphAndCombine( - CrsGraph& graph, - const Teuchos::ArrayView::packet_type>& imports, - const Teuchos::ArrayView& numPacketsPerLID, - const Teuchos::ArrayView& importLIDs, - size_t constantNumPackets, - Distributor & distor, - CombineMode combineMode); + template + class CrsPadding; } // namespace Details namespace { // (anonymous) @@ -1163,44 +1156,57 @@ namespace Tpetra { const Kokkos::DualView& permuteFromLIDs) override; + using padding_type = Details::CrsPadding< + local_ordinal_type, global_ordinal_type, device_type>; + void - applyCrsPadding( - const Kokkos::UnorderedMap& padding, - const bool verbose); + applyCrsPadding(const padding_type& padding, + const bool verbose); - Kokkos::UnorderedMap + std::unique_ptr computeCrsPadding( - const RowGraph& source, + const RowGraph& source, const size_t numSameIDs, - const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs, + const Kokkos::DualView& permuteToLIDs, + const Kokkos::DualView& permuteFromLIDs, const bool verbose) const; - Kokkos::UnorderedMap - computeCrsPadding( - const Kokkos::DualView& importLIDs, + // This actually modifies imports by sorting it. + std::unique_ptr + computeCrsPaddingForImports( + const Kokkos::DualView& importLIDs, + Kokkos::DualView imports, Kokkos::DualView numPacketsPerLID, const bool verbose) const; - std::vector + std::unique_ptr computePaddingForCrsMatrixUnpack( - const Kokkos::DualView& importLIDs, + const Kokkos::DualView& importLIDs, Kokkos::DualView imports, Kokkos::DualView numPacketsPerLID, const bool verbose) const; void - computeCrsPaddingForSameIDs (Kokkos::UnorderedMap& padding, - const RowGraph& source, - const size_t numSameIDs, - const bool padAll) const; + computeCrsPaddingForSameIDs( + padding_type& padding, + const RowGraph& source, + const local_ordinal_type numSameIDs) const; + void computeCrsPaddingForPermutedIDs( - Kokkos::UnorderedMap& padding, - const RowGraph& source, - const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs, - const bool padAll) const; + padding_type& padding, + const RowGraph& source, + const Kokkos::DualView& permuteToLIDs, + const Kokkos::DualView& permuteFromLIDs) const; virtual void packAndPrepare( @@ -1559,17 +1565,6 @@ namespace Tpetra { typename CrsGraphType::node_type> >& rangeMap, const Teuchos::RCP& params); - template - friend void - ::Tpetra::Details::unpackCrsGraphAndCombine( - CrsGraph& graph, - const Teuchos::ArrayView::packet_type>& imports, - const Teuchos::ArrayView& numPacketsPerLID, - const Teuchos::ArrayView& importLIDs, - size_t constantNumPackets, - Distributor & distor, - CombineMode combineMode); - public: /// \brief Import from this to the given destination /// graph, and make the result fill complete. diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index f16e703d3bab..c04b93860045 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -65,6 +65,7 @@ #include "Tpetra_Details_packCrsGraph.hpp" #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp" #include "Tpetra_Details_determineLocalTriangularStructure.hpp" +#include "Tpetra_Details_CrsPadding.hpp" #include "Tpetra_Util.hpp" #include #include @@ -79,42 +80,20 @@ namespace Tpetra { namespace Details { namespace Impl { - template - Teuchos::ArrayView< - typename RowGraph< - LocalOrdinal, GlobalOrdinal, Node>::global_ordinal_type> - getSortedMergedGlobalRow( - std::vector& gblColIndsStorage, - size_t& origNumEntries, - size_t& numDuplicates, - const RowGraph& graph, - const GlobalOrdinal gblRow, - const bool iAmSorted, - const bool iAmMerged) + template + Teuchos::ArrayView + getRowGraphGlobalRow( + std::vector& gblColIndsStorage, + const RowGraph& graph, + const GO gblRowInd) { - using Teuchos::ArrayView; - using GO = GlobalOrdinal; - - origNumEntries = graph.getNumEntriesInGlobalRow(gblRow); - if (origNumEntries > gblColIndsStorage.size()) { - gblColIndsStorage.resize(origNumEntries); - } - ArrayView gblColInds(gblColIndsStorage.data(), - origNumEntries); - graph.getGlobalRowCopy(gblRow, gblColInds, origNumEntries); - if (! iAmSorted) { - std::sort(gblColInds.begin(), gblColInds.end()); - } - if (! iAmMerged) { - auto newEnd = - std::unique(gblColInds.begin(), gblColInds.end()); - const size_t newNumEntries = - static_cast(newEnd - gblColInds.begin()); - TEUCHOS_ASSERT( origNumEntries >= newNumEntries ); - numDuplicates = size_t(origNumEntries - newNumEntries); - gblColInds = - ArrayView(gblColInds.data(), newNumEntries); + size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd); + if (gblColIndsStorage.size() < origNumEnt) { + gblColIndsStorage.resize(origNumEnt); } + Teuchos::ArrayView gblColInds(gblColIndsStorage.data(), + origNumEnt); + graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt); return gblColInds; } @@ -2311,15 +2290,21 @@ namespace Tpetra { "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, " "the graph is supposed to release its \"allocation specifications\" " "when it allocates its indices." << suffix); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (this->isGloballyIndexed () && - this->k_rowPtrs_.extent (0) != 0 && - (static_cast (this->k_rowPtrs_.extent (0)) != static_cast (lclNumRows + 1) || - this->k_rowPtrs_(lclNumRows) != static_cast (this->k_gblInds1D_.extent (0))), - std::logic_error, "If k_rowPtrs_ has nonzero size and " - "the graph is globally indexed, then " - "k_rowPtrs_ must have N+1 rows, and " - "k_rowPtrs_(N) must equal k_gblInds1D_.extent(0)." << suffix); + if (isGloballyIndexed() && k_rowPtrs_.extent(0) != 0) { + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (size_t(k_rowPtrs_.extent(0)) != size_t(lclNumRows + 1), + std::logic_error, "The graph is globally indexed and " + "k_rowPtrs_ has nonzero size " << k_rowPtrs_.extent(0) + << ", but that size does not equal lclNumRows+1 = " + << (lclNumRows+1) << "." << suffix); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC + (k_rowPtrs_(lclNumRows) != size_t(k_gblInds1D_.extent(0)), + std::logic_error, "The graph is globally indexed and " + "k_rowPtrs_ has nonzero size " << k_rowPtrs_.extent(0) + << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")=" + << k_rowPtrs_(lclNumRows) << " != k_gblInds1D_.extent(0)=" + << k_gblInds1D_.extent(0) << "." << suffix); + } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (this->isLocallyIndexed () && this->k_rowPtrs_.extent (0) != 0 && @@ -5013,21 +4998,22 @@ namespace Tpetra { if (verbose) { std::ostringstream os; - os << *prefix << "Target is StaticProfile; do CRS padding" << endl; + os << *prefix << "Compute padding" << endl; std::cerr << os.str (); } - auto padding = computeCrsPadding (srcRowGraph, numSameIDs, + auto padding = computeCrsPadding(srcRowGraph, numSameIDs, permuteToLIDs, permuteFromLIDs, verbose); - this->applyCrsPadding(padding, verbose); + applyCrsPadding(*padding, verbose); // If the source object is actually a CrsGraph, we can use view // mode instead of copy mode to access the entries in each row, // if the graph is not fill complete. - const this_type* srcCrsGraph = dynamic_cast (&source); + const this_type* srcCrsGraph = + dynamic_cast (&source); - const map_type& srcRowMap = * (srcRowGraph.getRowMap ()); - const map_type& tgtRowMap = * (this->getRowMap ()); - const bool src_filled = srcRowGraph.isFillComplete (); + const map_type& srcRowMap = *(srcRowGraph.getRowMap()); + const map_type& tgtRowMap = *(getRowMap()); + const bool src_filled = srcRowGraph.isFillComplete(); Teuchos::Array row_copy; LO myid = 0; @@ -5103,31 +5089,32 @@ namespace Tpetra { template void CrsGraph:: - applyCrsPadding( - const Kokkos::UnorderedMap& padding, - const bool verbose) + applyCrsPadding(const padding_type& padding, + const bool verbose) { using Details::ProfilingRegion; using Details::padCrsArrays; using std::endl; + using LO = local_ordinal_type; using execution_space = typename device_type::execution_space; using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type; using indices_type = t_GlobalOrdinal_1D; using local_indices_type = typename local_graph_type::entries_type::non_const_type; - using range_policy = Kokkos::RangePolicy>; - ProfilingRegion regionCAP ("Tpetra::CrsGraph::applyCrsPadding"); + using range_policy = + Kokkos::RangePolicy>; + const char tfecfFuncName[] = "applyCrsPadding"; + ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding"); std::unique_ptr prefix; if (verbose) { - prefix = this->createPrefix("CrsGraph", "applyCrsPadding"); + prefix = this->createPrefix("CrsGraph", tfecfFuncName); std::ostringstream os; - os << *prefix << "padding.size(): " << padding.size() - << ", indicesAreAllocated: " - << (indicesAreAllocated() ? "true" : "false") << endl; - std::cerr << os.str (); + os << *prefix << "padding: "; + padding.print(os); + os << endl; + std::cerr << os.str(); } const int myRank = ! verbose ? -1 : [&] () { auto map = this->getMap(); @@ -5141,14 +5128,21 @@ namespace Tpetra { return comm->getRank(); } (); - if (padding.size() == 0) { - return; - } + // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate + // row_ptrs_beg or allocate row_ptrs_end unless the allocation + // size needs to increase. That should be the job of + // padCrsArrays. // Assume global indexing we don't have any indices yet - if (! this->indicesAreAllocated()) { + if (! indicesAreAllocated()) { + if (verbose) { + std::ostringstream os; + os << *prefix << "Call allocateIndices" << endl; + std::cerr << os.str(); + } allocateIndices(GlobalIndices, verbose); } + TEUCHOS_ASSERT( indicesAreAllocated() ); // Making copies here because k_rowPtrs_ has a const type. Otherwise, we // would use it directly. @@ -5159,109 +5153,93 @@ namespace Tpetra { << k_rowPtrs_.extent(0) << endl; std::cerr << os.str(); } - row_ptrs_type row_ptrs_beg("row_ptrs_beg", this->k_rowPtrs_.extent(0)); - Kokkos::deep_copy(row_ptrs_beg, this->k_rowPtrs_); + using Kokkos::view_alloc; + using Kokkos::WithoutInitializing; + row_ptrs_type row_ptrs_beg( + view_alloc("row_ptrs_beg", WithoutInitializing), + k_rowPtrs_.extent(0)); + Kokkos::deep_copy(row_ptrs_beg, k_rowPtrs_); - const size_t N = (row_ptrs_beg.extent(0) == 0 ? 0 : row_ptrs_beg.extent(0) - 1); + const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) : + size_t(row_ptrs_beg.extent(0) - 1); if (verbose) { std::ostringstream os; os << *prefix << "Allocate row_ptrs_end: " << N << endl; std::cerr << os.str(); } - row_ptrs_type row_ptrs_end("row_ptrs_end", N); + row_ptrs_type row_ptrs_end( + view_alloc("row_ptrs_end", WithoutInitializing), N); - bool refill_num_row_entries = false; - if (this->k_numRowEntries_.extent(0) > 0) { - // Case 1: Unpacked storage - refill_num_row_entries = true; + const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0; + if (refill_num_row_entries) { // Case 1: Unpacked storage + // We can't assume correct *this capture until C++17, and it's + // likely more efficient just to capture what we need anyway. auto num_row_entries = this->k_numRowEntries_; - Kokkos::parallel_for("Fill end row pointers", range_policy(0, N), - KOKKOS_LAMBDA(const size_t i){ - row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i); - } - ); - - } else { - // mfh If packed storage, don't need row_ptrs_end to be separate allocation; - // could just have it alias row_ptrs_beg+1. - // Case 2: Packed storage - Kokkos::parallel_for("Fill end row pointers", range_policy(0, N), - KOKKOS_LAMBDA(const size_t i){ - row_ptrs_end(i) = row_ptrs_beg(i+1); - } - ); + Kokkos::parallel_for + ("Fill end row pointers", range_policy(0, N), + KOKKOS_LAMBDA (const size_t i) { + row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i); + }); } - - if(this->isGloballyIndexed()) { - if (verbose) { - std::ostringstream os; - os << *prefix << "Allocate (copy of) global column indices: " - << k_gblInds1D_.extent(0) << endl; - std::cerr << os.str(); - } - indices_type indices("indices", this->k_gblInds1D_.extent(0)); - Kokkos::deep_copy(indices, this->k_gblInds1D_); - using padding_type = Kokkos::UnorderedMap; - padCrsArrays(row_ptrs_beg, - row_ptrs_end, indices, padding, myRank, verbose); - if (verbose) { - std::ostringstream os; - os << *prefix << "Reassign k_gblInds1D_; old size: " - << k_gblInds1D_.extent(0) << endl; - std::cerr << os.str(); - } - this->k_gblInds1D_ = indices; + else { + // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed + // storage, we don't need row_ptr_end to be separate allocation; + // could just have it alias row_ptr_beg+1. + Kokkos::parallel_for + ("Fill end row pointers", range_policy(0, N), + KOKKOS_LAMBDA (const size_t i) { + row_ptrs_end(i) = row_ptrs_beg(i+1); + }); + } + + if (isGloballyIndexed()) { + padCrsArrays(row_ptrs_beg, row_ptrs_end, k_gblInds1D_, + padding, myRank, verbose); + TEUCHOS_ASSERT( padding.increase() == 0 || + k_gblInds1D_.extent(0) != 0 ); } else { - if (verbose) { - std::ostringstream os; - os << *prefix << "Allocate (copy of) local column indices: " - << k_lclInds1D_.extent(0) << endl; - std::cerr << os.str(); - } - local_indices_type indices("indices", this->k_lclInds1D_.extent(0)); - Kokkos::deep_copy(indices, this->k_lclInds1D_); - using padding_type = Kokkos::UnorderedMap; - padCrsArrays(row_ptrs_beg, - row_ptrs_end, indices, padding, myRank, verbose); - if (verbose) { - std::ostringstream os; - os << *prefix << "Reassign k_lclInds1D_; old size: " - << k_lclInds1D_.extent(0) << endl; - std::cerr << os.str(); - } - this->k_lclInds1D_ = indices; + padCrsArrays(row_ptrs_beg, row_ptrs_end, k_lclInds1D_, + padding, myRank, verbose); + TEUCHOS_ASSERT( padding.increase() == 0 || + k_lclInds1D_.extent(0) != 0 ); } if (refill_num_row_entries) { auto num_row_entries = this->k_numRowEntries_; - Kokkos::parallel_for("Fill num entries", range_policy(0, N), - KOKKOS_LAMBDA(const size_t i){ - num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i); - } - ); + Kokkos::parallel_for + ("Fill num entries", range_policy(0, N), + KOKKOS_LAMBDA (const size_t i) { + num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i); + }); } if (verbose) { std::ostringstream os; os << *prefix << "Reassign k_rowPtrs_; old size: " - << k_rowPtrs_.extent(0) << endl; + << k_rowPtrs_.extent(0) << ", new size: " + << row_ptrs_beg.extent(0) << endl; std::cerr << os.str(); + TEUCHOS_ASSERT( k_rowPtrs_.extent(0) == row_ptrs_beg.extent(0) ); } this->k_rowPtrs_ = row_ptrs_beg; } template - Kokkos::UnorderedMap + std::unique_ptr< + typename CrsGraph::padding_type + > CrsGraph:: - computeCrsPadding (const RowGraph& source, - const size_t numSameIDs, - const Kokkos::DualView& permuteToLIDs, - const Kokkos::DualView& permuteFromLIDs, - const bool verbose) const + computeCrsPadding( + const RowGraph& source, + const size_t numSameIDs, + const Kokkos::DualView& permuteToLIDs, + const Kokkos::DualView& permuteFromLIDs, + const bool verbose) const { + using LO = local_ordinal_type; using std::endl; - using LO = LocalOrdinal; - using padding_type = Kokkos::UnorderedMap; std::unique_ptr prefix; if (verbose) { @@ -5274,124 +5252,79 @@ namespace Tpetra { std::cerr << os.str(); } - padding_type padding (numSameIDs + permuteFromLIDs.extent (0)); - - computeCrsPaddingForSameIDs(padding, source, numSameIDs, false); - computeCrsPaddingForPermutedIDs(padding, source, permuteToLIDs, permuteFromLIDs, false); - - Kokkos::fence (); // Make sure device sees changes made by host - TEUCHOS_TEST_FOR_EXCEPTION - (padding.failed_insert(), std::runtime_error, - "failed to insert one or more indices in to padding map"); + std::unique_ptr padding( + new padding_type(padding_type::create_from_sames_and_permutes, + numSameIDs, permuteFromLIDs.extent(0))); + // We're accessing data on host, so make sure all device + // computations on the graphs' data are done. + // + // NOTE (mfh 08 Feb 2020) If we ever get rid of this fence, look + // carefully in computeCrsPaddingFor{Same,Permuted}IDs to see if + // we need a fence there. + Kokkos::fence(); + + computeCrsPaddingForSameIDs(*padding, source, + static_cast(numSameIDs)); + computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs, + permuteFromLIDs); return padding; } template void CrsGraph:: - computeCrsPaddingForSameIDs (Kokkos::UnorderedMap& padding, - const RowGraph& source, - const size_t numSameIDs, - const bool padAll) const + computeCrsPaddingForSameIDs( + padding_type& padding, + const RowGraph& source, + const local_ordinal_type numSameIDs) const { - using LO = LocalOrdinal; - using GO = GlobalOrdinal; - using Details::Impl::getSortedMergedGlobalRow; + using LO = local_ordinal_type; + using GO = global_ordinal_type; + using Details::Impl::getRowGraphGlobalRow; using std::endl; - const char tfecfFuncName[] = "computeCrsPaddingForSameIds: "; + const char tfecfFuncName[] = "computeCrsPaddingForSameIds"; std::unique_ptr prefix; const bool verbose = verbose_; if (verbose) { - prefix = - this->createPrefix("CrsGraph", "computeCrsPaddingForSameIDs"); + prefix = this->createPrefix("CrsGraph", tfecfFuncName); std::ostringstream os; - os << *prefix << "numSameIDs: " << numSameIDs - << ", padAll: " << (padAll ? "true" : "false") << endl; + os << *prefix << "numSameIDs: " << numSameIDs << endl; std::cerr << os.str(); } - if (! numSameIDs) { + if (numSameIDs == 0) { return; } - Kokkos::fence (); - - using insert_result = - typename Kokkos::UnorderedMap::insert_result; - - // Compute extra capacity needed to accommodate incoming data - const map_type& src_row_map = * (source.getRowMap ()); - + const map_type& srcRowMap = *(source.getRowMap()); + const map_type& tgtRowMap = *rowMap_; using this_type = CrsGraph; const this_type* srcCrs = dynamic_cast(&source); - const bool src_sorted = srcCrs == nullptr ? false : srcCrs->isSorted(); - const bool src_merged = srcCrs == nullptr ? false : srcCrs->isMerged(); - const bool tgt_sorted = this->isSorted(); - const bool tgt_merged = this->isMerged(); - - std::vector src_row_inds; - std::vector tgt_row_inds; + const bool src_is_unique = + srcCrs == nullptr ? false : srcCrs->isMerged(); + const bool tgt_is_unique = this->isMerged(); + std::vector srcGblColIndsScratch; + std::vector tgtGblColIndsScratch; size_t srcNumDups = 0; size_t tgtNumDups = 0; size_t mergedNumDups = 0; - - for (LO tgt_lid = 0; tgt_lid < static_cast (numSameIDs); ++tgt_lid) { - const GO src_gid = src_row_map.getGlobalElement(tgt_lid); - size_t orig_num_src_entries = source.getNumEntriesInGlobalRow(src_gid); - if (orig_num_src_entries == 0) { - continue; - } - insert_result result; - const GO tgt_gid = rowMap_->getGlobalElement(tgt_lid); - if (padAll) { - const size_t orig_num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid); - result = padding.insert(tgt_lid, orig_num_src_entries + orig_num_tgt_entries); - } - else { - size_t curNumSrcDups = 0; - Teuchos::ArrayView src_row_inds_view = - getSortedMergedGlobalRow(src_row_inds, orig_num_src_entries, - curNumSrcDups, source, src_gid, - src_sorted, src_merged); - srcNumDups += curNumSrcDups; - if (src_row_inds_view.size() == 0) { // nothing new to insert - continue; - } - - size_t orig_num_tgt_entries = 0; - size_t curNumTgtDups = 0; - Teuchos::ArrayView tgt_row_inds_view = - getSortedMergedGlobalRow(tgt_row_inds, orig_num_tgt_entries, - curNumTgtDups, *this, tgt_gid, - tgt_sorted, tgt_merged); - tgtNumDups += curNumTgtDups; - - const size_t numInCommon = Details::countNumInCommon( - src_row_inds_view.begin(), - src_row_inds_view.end(), - tgt_row_inds_view.begin(), - tgt_row_inds_view.end()); - const size_t orig_num_union = - size_t(src_row_inds_view.size()) + - size_t(tgt_row_inds_view.size()); - - const size_t new_num_union = orig_num_union - numInCommon; - mergedNumDups += numInCommon; - - if (new_num_union > orig_num_tgt_entries) { - result = padding.insert (tgt_lid, new_num_union); - // Kokkos::UnorderedMap is allowed to fail even if the user did - // nothing wrong. We should actually have a retry option. - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - "unable to insert padding for LID " << tgt_lid); - } - } + for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) { + const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd); + const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd); + auto srcGblColInds = getRowGraphGlobalRow( + srcGblColIndsScratch, source, srcGblRowInd); + auto tgtGblColInds = getRowGraphGlobalRow( + tgtGblColIndsScratch, *this, tgtGblRowInd); + padding.update_same(tgtNumDups, srcNumDups, mergedNumDups, + lclRowInd, tgtGblColInds.getRawPtr(), + tgtGblColInds.size(), tgt_is_unique, + srcGblColInds.getRawPtr(), + srcGblColInds.size(), src_is_unique); } - if (verbose) { std::ostringstream os; os << *prefix << "Done: srcNumDups: " << srcNumDups @@ -5405,18 +5338,17 @@ namespace Tpetra { void CrsGraph:: computeCrsPaddingForPermutedIDs( - Kokkos::UnorderedMap& padding, - const RowGraph& source, + padding_type& padding, + const RowGraph& source, const Kokkos::DualView& permuteToLIDs, const Kokkos::DualView& permuteFromLIDs, - const bool padAll) const + buffer_device_type>& permuteFromLIDs) const { - using LO = LocalOrdinal; - using GO = GlobalOrdinal; - using Details::Impl::getSortedMergedGlobalRow; + using LO = local_ordinal_type; + using GO = global_ordinal_type; + using Details::Impl::getRowGraphGlobalRow; using std::endl; const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds"; @@ -5428,93 +5360,48 @@ namespace Tpetra { os << *prefix << "permuteToLIDs.extent(0): " << permuteToLIDs.extent(0) << ", permuteFromLIDs.extent(0): " - << permuteFromLIDs.extent(0) - << ", padAll: " << (padAll ? "true" : "false") << endl; + << permuteFromLIDs.extent(0) << endl; std::cerr << os.str(); } - Kokkos::fence (); - - const map_type& src_row_map = * (source.getRowMap ()); - - using insert_result = - typename Kokkos::UnorderedMap::insert_result; - auto permuteToLIDs_h = permuteToLIDs.view_host (); - auto permuteFromLIDs_h = permuteFromLIDs.view_host (); + if (permuteToLIDs.extent(0) == 0) { + return; + } + const map_type& srcRowMap = *(source.getRowMap()); + const map_type& tgtRowMap = *rowMap_; using this_type = CrsGraph; const this_type* srcCrs = dynamic_cast(&source); - const bool src_sorted = srcCrs == nullptr ? false : srcCrs->isSorted(); - const bool src_merged = srcCrs == nullptr ? false : srcCrs->isMerged(); - const bool tgt_sorted = this->isSorted(); - const bool tgt_merged = this->isMerged(); + const bool src_is_unique = + srcCrs == nullptr ? false : srcCrs->isMerged(); + const bool tgt_is_unique = this->isMerged(); - std::vector src_row_inds; - std::vector tgt_row_inds; + TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() ); + auto permuteToLIDs_h = permuteToLIDs.view_host(); + TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() ); + auto permuteFromLIDs_h = permuteFromLIDs.view_host(); + std::vector srcGblColIndsScratch; + std::vector tgtGblColIndsScratch; size_t srcNumDups = 0; size_t tgtNumDups = 0; size_t mergedNumDups = 0; - const LO numPermutes = static_cast(permuteToLIDs_h.extent(0)); - for (LO i = 0; i < numPermutes; ++i) { - const GO src_gid = src_row_map.getGlobalElement(permuteFromLIDs_h[i]); - auto orig_num_src_entries = source.getNumEntriesInGlobalRow(src_gid); - if (orig_num_src_entries == 0) { - continue; - } - insert_result result; - const LO tgt_lid = permuteToLIDs_h[i]; - if (padAll) { - const size_t orig_num_tgt_entries = - getNumEntriesInLocalRow(tgt_lid); - result = padding.insert( - tgt_lid, orig_num_src_entries + orig_num_tgt_entries); - } - else { - size_t curNumSrcDups = 0; - Teuchos::ArrayView src_row_inds_view = - getSortedMergedGlobalRow(src_row_inds, orig_num_src_entries, - curNumSrcDups, source, src_gid, - src_sorted, src_merged); - srcNumDups += curNumSrcDups; - - const GO tgt_gid = rowMap_->getGlobalElement(tgt_lid); - size_t orig_num_tgt_entries = 0; - size_t curNumTgtDups = 0; - Teuchos::ArrayView tgt_row_inds_view = - getSortedMergedGlobalRow(tgt_row_inds, orig_num_tgt_entries, - curNumTgtDups, *this, tgt_gid, - tgt_sorted, tgt_merged); - tgtNumDups += curNumTgtDups; - - const size_t numInCommon = Details::countNumInCommon( - src_row_inds_view.begin(), - src_row_inds_view.end(), - tgt_row_inds_view.begin(), - tgt_row_inds_view.end()); - const size_t orig_num_union = - size_t(src_row_inds_view.size()) + - size_t(tgt_row_inds_view.size()); - const size_t new_num_union = orig_num_union - numInCommon; - mergedNumDups += numInCommon; - - if (new_num_union > orig_num_tgt_entries) { - result = padding.insert (tgt_lid, new_num_union); - // Kokkos::UnorderedMap is allowed to fail even if the user did - // nothing wrong. We should actually have a retry option. - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - "unable to insert padding for LID " << tgt_lid); - } - } - - // Kokkos::UnorderedMap is allowed to fail even if the user did - // nothing wrong. We should actually have a retry option. - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - "unable to insert padding for LID " << tgt_lid); + for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) { + const LO srcLclRowInd = permuteFromLIDs_h[whichPermute]; + const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd); + auto srcGblColInds = getRowGraphGlobalRow( + srcGblColIndsScratch, source, srcGblRowInd); + const LO tgtLclRowInd = permuteToLIDs_h[whichPermute]; + const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd); + auto tgtGblColInds = getRowGraphGlobalRow( + tgtGblColIndsScratch, *this, tgtGblRowInd); + padding.update_permute(tgtNumDups, srcNumDups, mergedNumDups, + whichPermute, tgtLclRowInd, + tgtGblColInds.getRawPtr(), + tgtGblColInds.size(), tgt_is_unique, + srcGblColInds.getRawPtr(), + srcGblColInds.size(), src_is_unique); } if (verbose) { @@ -5527,164 +5414,198 @@ namespace Tpetra { } template - Kokkos::UnorderedMap + std::unique_ptr< + typename CrsGraph::padding_type + > CrsGraph:: - computeCrsPadding (const Kokkos::DualView& importLIDs, - Kokkos::DualView numPacketsPerLID, - const bool verbose) const + computeCrsPaddingForImports( + const Kokkos::DualView& importLIDs, + Kokkos::DualView imports, + Kokkos::DualView numPacketsPerLID, + const bool verbose) const { + using Details::Impl::getRowGraphGlobalRow; using std::endl; - const char tfecfFuncName[] = "computeCrsPadding: "; + using LO = local_ordinal_type; + using GO = global_ordinal_type; + const char tfecfFuncName[] = "computeCrsPaddingForImports"; std::unique_ptr prefix; if (verbose) { - prefix = - this->createPrefix("CrsGraph", "computeCrsPadding(imports)"); + prefix = this->createPrefix("CrsGraph", tfecfFuncName); std::ostringstream os; - os << *prefix << "{importLIDs.extent(0): " + os << *prefix << "importLIDs.extent(0): " << importLIDs.extent(0) + << ", imports.extent(0): " + << imports.extent(0) << ", numPacketsPerLID.extent(0): " - << numPacketsPerLID.extent(0) << "}" - << endl; + << numPacketsPerLID.extent(0) << endl; std::cerr << os.str(); } - // Creating padding for each new incoming index - Kokkos::fence (); // Make sure device sees changes made by host - auto numEnt = static_cast (importLIDs.extent (0)); - - auto importLIDs_h = importLIDs.view_host (); + const LO numImports = static_cast(importLIDs.extent(0)); + std::unique_ptr padding( + new padding_type(padding_type::create_from_imports, + numImports)); + Kokkos::fence(); // Make sure device sees changes made by host + if (imports.need_sync_host()) { + imports.sync_host(); + } + auto imports_h = imports.view_host(); if (numPacketsPerLID.need_sync_host ()) { - numPacketsPerLID.sync_host (); + numPacketsPerLID.sync_host(); } - auto numPacketsPerLID_h = numPacketsPerLID.view_host (); + auto numPacketsPerLID_h = numPacketsPerLID.view_host(); - // without unpacking the import/export buffer, we don't know how many of the - // numPacketsPerLID[i] LIDs exist in the target. Below, it is assumed that - // none do, and padding is requested for all. - // - // Use tmp_padding since Kokkos::UnorderedMap does not allow re-insertion - std::map tmp_padding; - for (size_t i = 0; i < numEnt; ++i) - tmp_padding[importLIDs_h[i]] += numPacketsPerLID_h[i]; - - using padding_type = Kokkos::UnorderedMap; - padding_type padding (importLIDs.extent (0)); - for (auto&& item : tmp_padding) { - auto result = padding.insert (item.first, item.second); - // FIXME (mfh 09 Apr 2019) See note in other computeCrsPaddingoverload. - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - "unable to insert padding for LID " << item.first); + TEUCHOS_ASSERT( ! importLIDs.need_sync_host() ); + auto importLIDs_h = importLIDs.view_host(); + + const map_type& tgtRowMap = *rowMap_; + // Always merge source column indices, since isMerged() is + // per-process state, and we don't know its value on other + // processes that sent us data. + constexpr bool src_is_unique = false; + const bool tgt_is_unique = isMerged(); + + std::vector tgtGblColIndsScratch; + size_t srcNumDups = 0; + size_t tgtNumDups = 0; + size_t mergedNumDups = 0; + size_t offset = 0; + for (LO whichImport = 0; whichImport < numImports; ++whichImport) { + // CrsGraph packs just global column indices, while CrsMatrix + // packs bytes (first the number of entries in the row, then the + // global column indices, then other stuff like the matrix + // values in that row). + const LO origSrcNumEnt = + static_cast(numPacketsPerLID_h[whichImport]); + GO* const srcGblColInds = imports_h.data() + offset; + + const LO tgtLclRowInd = importLIDs_h[whichImport]; + const GO tgtGblRowInd = + tgtRowMap.getGlobalElement(tgtLclRowInd); + auto tgtGblColInds = getRowGraphGlobalRow( + tgtGblColIndsScratch, *this, tgtGblRowInd); + const size_t origTgtNumEnt(tgtGblColInds.size()); + + padding->update_import(tgtNumDups, srcNumDups, mergedNumDups, + whichImport, tgtLclRowInd, + tgtGblColInds.getRawPtr(), + origTgtNumEnt, tgt_is_unique, + srcGblColInds, + origSrcNumEnt, src_is_unique); + offset += origSrcNumEnt; } - TEUCHOS_TEST_FOR_EXCEPTION - (padding.failed_insert(), std::runtime_error, - "failed to insert one or more indices in to padding map"); + if (verbose) { + std::ostringstream os; + os << *prefix << "Done: srcNumDups: " << srcNumDups + << ", tgtNumDups: " << tgtNumDups + << ", mergedNumDups: " << mergedNumDups << endl; + std::cerr << os.str(); + } return padding; } template - std::vector + std::unique_ptr< + typename CrsGraph::padding_type + > CrsGraph:: computePaddingForCrsMatrixUnpack( - const Kokkos::DualView& importLIDs, + const Kokkos::DualView& importLIDs, Kokkos::DualView imports, Kokkos::DualView numPacketsPerLID, const bool verbose) const { - using LO = local_ordinal_type; - using GO = global_ordinal_type; - using Details::Impl::getSortedMergedGlobalRow; + using Details::Impl::getRowGraphGlobalRow; using Details::PackTraits; using std::endl; + using LO = local_ordinal_type; + using GO = global_ordinal_type; const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack"; std::unique_ptr prefix; if (verbose) { prefix = this->createPrefix("CrsGraph", tfecfFuncName); std::ostringstream os; - os << *prefix << "Start" << endl; + os << *prefix << "importLIDs.extent(0): " + << importLIDs.extent(0) + << ", imports.extent(0): " + << imports.extent(0) + << ", numPacketsPerLID.extent(0): " + << numPacketsPerLID.extent(0) << endl; std::cerr << os.str(); } - Kokkos::fence (); // Make sure host sees changes made by device + const LO numImports = static_cast(importLIDs.extent(0)); + std::unique_ptr padding( + new padding_type(padding_type::create_from_imports, + numImports)); + Kokkos::fence(); // Make sure host sees changes made by device if (imports.need_sync_host()) { imports.sync_host(); } + auto imports_h = imports.view_host(); if (numPacketsPerLID.need_sync_host ()) { numPacketsPerLID.sync_host(); } - - auto importLIDs_h = importLIDs.view_host(); - auto imports_h = imports.view_host(); auto numPacketsPerLID_h = numPacketsPerLID.view_host(); - const LO numImports = static_cast(importLIDs.extent(0)); - const bool tgtSorted = isSorted(); - const bool tgtMerged = isMerged(); + TEUCHOS_ASSERT( ! importLIDs.need_sync_host() ); + auto importLIDs_h = importLIDs.view_host(); - std::vector padding(numImports); - std::vector gblColIndsReceived; - std::vector gblColIndsTgt; + const map_type& tgtRowMap = *rowMap_; + // Always merge source column indices, since isMerged() is + // per-process state, and we don't know its value on other + // processes that sent us data. + constexpr bool src_is_unique = false; + const bool tgt_is_unique = isMerged(); + std::vector srcGblColIndsScratch; + std::vector tgtGblColIndsScratch; size_t srcNumDups = 0; size_t tgtNumDups = 0; size_t mergedNumDups = 0; - size_t offset = 0; - for (LO whichImp = 0; whichImp < numImports; ++whichImp) { - const LO lclRowInd = importLIDs_h[whichImp]; - const GO gblRowInd = rowMap_->getGlobalElement(lclRowInd); - - const size_t numBytes = numPacketsPerLID_h[whichImp]; + for (LO whichImport = 0; whichImport < numImports; ++whichImport) { + // CrsGraph packs just global column indices, while CrsMatrix + // packs bytes (first the number of entries in the row, then the + // global column indices, then other stuff like the matrix + // values in that row). + const size_t numBytes = numPacketsPerLID_h[whichImport]; if (numBytes == 0) { - continue; + continue; // special case: no entries to unpack for this row } - LO numEntriesReceived = 0; + LO origSrcNumEnt = 0; const size_t numEntBeg = offset; const size_t numEntLen = - PackTraits::packValueCount(numEntriesReceived); - PackTraits::unpackValue(numEntriesReceived, + PackTraits::packValueCount(origSrcNumEnt); + PackTraits::unpackValue(origSrcNumEnt, imports_h.data() + numEntBeg); const size_t gidsBeg = numEntBeg + numEntLen; - - if (gblColIndsReceived.size() != size_t(numEntriesReceived)) { - gblColIndsReceived.resize(numEntriesReceived); - } - (void) PackTraits::unpackArray(gblColIndsReceived.data(), - imports_h.data() + gidsBeg, - numEntriesReceived); - std::sort(gblColIndsReceived.begin(), gblColIndsReceived.end()); - auto newEnd = std::unique(gblColIndsReceived.begin(), - gblColIndsReceived.end()); - const size_t numEntriesRecvdUnique = - static_cast(newEnd - gblColIndsReceived.begin()); - TEUCHOS_ASSERT( numEntriesRecvdUnique <= size_t(numEntriesReceived) ); - const size_t curSrcNumDups = - size_t(size_t(numEntriesReceived) - numEntriesRecvdUnique); - srcNumDups += curSrcNumDups; - gblColIndsReceived.resize(numEntriesRecvdUnique); - - size_t origNumTgtEnt = 0; - size_t curNumTgtDups = 0; - Teuchos::ArrayView gblColIndsTgtView = - getSortedMergedGlobalRow(gblColIndsTgt, origNumTgtEnt, - curNumTgtDups, *this, gblRowInd, - tgtSorted, tgtMerged); - tgtNumDups += curNumTgtDups; - - const size_t numInCommon = Details::countNumInCommon( - gblColIndsReceived.begin(), - gblColIndsReceived.end(), - gblColIndsTgt.begin(), - gblColIndsTgt.end()); - const size_t origNumBoth = numEntriesRecvdUnique + - size_t(gblColIndsTgtView.size()); - const size_t newNumMerged = origNumBoth - numInCommon; - mergedNumDups += numInCommon; - - padding[whichImp] = std::max(newNumMerged, origNumTgtEnt); + if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) { + srcGblColIndsScratch.resize(origSrcNumEnt); + } + GO* const srcGblColInds = srcGblColIndsScratch.data(); + PackTraits::unpackArray(srcGblColInds, + imports_h.data() + gidsBeg, + origSrcNumEnt); + const LO tgtLclRowInd = importLIDs_h[whichImport]; + const GO tgtGblRowInd = + tgtRowMap.getGlobalElement(tgtLclRowInd); + auto tgtGblColInds = getRowGraphGlobalRow( + tgtGblColIndsScratch, *this, tgtGblRowInd); + const size_t origNumTgtEnt(tgtGblColInds.size()); + + padding->update_import(tgtNumDups, srcNumDups, mergedNumDups, + whichImport, tgtLclRowInd, + tgtGblColInds.getRawPtr(), + origNumTgtEnt, tgt_is_unique, + srcGblColInds, + origSrcNumEnt, src_is_unique); offset += numBytes; } @@ -6344,20 +6265,28 @@ namespace Tpetra { using std::endl; using LO = local_ordinal_type; using GO = global_ordinal_type; - const char tfecfFuncName[] = "unpackAndCombine: "; + const char tfecfFuncName[] = "unpackAndCombine"; ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine"); const bool verbose = verbose_; std::unique_ptr prefix; if (verbose) { - prefix = this->createPrefix("CrsGraph", "unpackAndCombine"); + prefix = this->createPrefix("CrsGraph", tfecfFuncName); std::ostringstream os; - os << *prefix << endl; + os << *prefix << "Start" << endl; std::cerr << os.str (); } - auto padding = computeCrsPadding (importLIDs, numPacketsPerLID, verbose); - applyCrsPadding(padding, verbose); + { + auto padding = computeCrsPaddingForImports( + importLIDs, imports, numPacketsPerLID, verbose); + applyCrsPadding(*padding, verbose); + if (verbose) { + std::ostringstream os; + os << *prefix << "Done computing & applying padding" << endl; + std::cerr << os.str (); + } + } // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly // reasonable meaning, whether or not the matrix is fill complete. @@ -6378,71 +6307,110 @@ namespace Tpetra { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (importLIDs.extent (0) != numPacketsPerLID.extent (0), - std::runtime_error, "importLIDs.extent(0) = " + std::runtime_error, ": importLIDs.extent(0) = " << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0) << "."); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (isFillComplete (), std::runtime_error, - "Import or Export operations are not allowed on the destination " - "CrsGraph if it is fill complete."); + ": Import or Export operations are not allowed on a target " + "CrsGraph that is fillComplete."); - const size_t numImportLIDs = static_cast (importLIDs.extent (0)); - if (numPacketsPerLID.need_sync_host ()) { - numPacketsPerLID.sync_host (); + const size_t numImportLIDs(importLIDs.extent(0)); + if (numPacketsPerLID.need_sync_host()) { + numPacketsPerLID.sync_host(); } - auto numPacketsPerLID_h = numPacketsPerLID.view_host (); + auto numPacketsPerLID_h = numPacketsPerLID.view_host(); + if (imports.need_sync_host()) { + imports.sync_host(); + } + auto imports_h = imports.view_host(); + TEUCHOS_ASSERT( ! importLIDs.need_sync_host() ); + auto importLIDs_h = importLIDs.view_host(); // If we're inserting in local indices, let's pre-allocate Teuchos::Array lclColInds; - if (this->isLocallyIndexed ()) { + if (isLocallyIndexed()) { + if (verbose) { + std::ostringstream os; + os << *prefix << "Preallocate local indices scratch" << endl; + std::cerr << os.str(); + } size_t maxNumInserts = 0; for (size_t i = 0; i < numImportLIDs; ++i) { maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]); } + if (verbose) { + std::ostringstream os; + os << *prefix << "Local indices scratch size: " + << maxNumInserts << endl; + std::cerr << os.str(); + } lclColInds.resize (maxNumInserts); } - - auto importLIDs_h = importLIDs.view_host (); - if (imports.need_sync_host ()) { - imports.sync_host (); - } - auto imports_h = imports.view_host (); - - const map_type& rowMap = * (this->rowMap_); - size_t importsOffset = 0; - for (size_t i = 0; i < numImportLIDs; ++i) { - const LO lclRow = importLIDs_h[i]; - const GO gblRow = rowMap.getGlobalElement (lclRow); - const LO numEnt = numPacketsPerLID_h[i]; - const GO* const gblColInds = (numEnt == 0) ? nullptr : - &imports_h[importsOffset]; - if (! this->isLocallyIndexed ()) { - if (gblRow == Tpetra::Details::OrdinalTraits::invalid ()) { - // This row is not in the row Map on the calling process. - this->insertGlobalIndicesIntoNonownedRows (gblRow, gblColInds, numEnt); + else { + if (verbose) { + std::ostringstream os; + os << *prefix; + if (isGloballyIndexed()) { + os << "Graph is globally indexed"; } else { - this->insertGlobalIndicesFiltered (lclRow, gblColInds, numEnt); + os << "Graph is neither locally nor globally indexed"; } + os << endl; + std::cerr << os.str(); } - else { - for (LO j = 0; j < numEnt; j++) { - lclColInds[j] = this->colMap_->getLocalElement (gblColInds[j]); + } + + TEUCHOS_ASSERT( ! rowMap_.is_null() ); + const map_type& rowMap = *rowMap_; + + try { + size_t importsOffset = 0; + for (size_t i = 0; i < numImportLIDs; ++i) { + if (verbose) { + std::ostringstream os; + os << *prefix << "i=" << i << ", numImportLIDs=" + << numImportLIDs << endl; + std::cerr << os.str(); } + // We can only unpack into owned rows, since we only have + // local row indices. + const LO lclRow = importLIDs_h[i]; + const GO gblRow = rowMap.getGlobalElement(lclRow); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (gblRow == Tpetra::Details::OrdinalTraits::invalid (), - std::runtime_error, - "cannot insert into unowned rows if isLocallyIndexed()."); - this->insertLocalIndices (lclRow, numEnt, lclColInds.data ()); + (gblRow == Teuchos::OrdinalTraits::invalid(), + std::logic_error, "importLIDs[i=" << i << "]=" + << lclRow << " is not in the row Map on the calling " + "process."); + const LO numEnt = numPacketsPerLID_h[i]; + const GO* const gblColInds = (numEnt == 0) ? nullptr : + imports_h.data() + importsOffset; + if (! isLocallyIndexed()) { + insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt); + } + else { + // FIXME (mfh 09 Feb 2020) Now would be a good time to do + // column Map filtering. + for (LO j = 0; j < numEnt; j++) { + lclColInds[j] = colMap_->getLocalElement(gblColInds[j]); + } + insertLocalIndices(lclRow, numEnt, lclColInds.data()); + } + importsOffset += numEnt; } - importsOffset += numEnt; } - + catch (std::exception& e) { + TEUCHOS_TEST_FOR_EXCEPTION + (true, std::runtime_error, + "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an " + "exception: " << endl << e.what()); + } if (verbose) { std::ostringstream os; os << *prefix << "Done" << endl; - std::cerr << os.str (); + std::cerr << os.str(); } } diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp index 5268a13aca3d..342c05de0962 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_decl.hpp @@ -3435,7 +3435,7 @@ namespace Tpetra { void applyCrsPadding( - const Kokkos::UnorderedMap& padding, + const typename crs_graph_type::padding_type& padding, const bool verbose); private: diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 664d9d1dc1b9..b25a6b7463fd 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -6436,15 +6436,18 @@ namespace Tpetra { void CrsMatrix:: applyCrsPadding( - const Kokkos::UnorderedMap& padding, + const typename crs_graph_type::padding_type& padding, const bool verbose) { using Details::ProfilingRegion; using Details::padCrsArrays; using std::endl; + using LO = local_ordinal_type; using execution_space = typename device_type::execution_space; - using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type; - using range_policy = Kokkos::RangePolicy>; + using row_ptrs_type = + typename local_graph_type::row_map_type::non_const_type; + using range_policy = + Kokkos::RangePolicy>; const char tfecfFuncName[] = "applyCrsPadding"; const char suffix[] = ". Please report this bug to the Tpetra developers."; @@ -6454,7 +6457,9 @@ namespace Tpetra { if (verbose) { prefix = this->createPrefix("CrsMatrix", tfecfFuncName); std::ostringstream os; - os << *prefix << "padding.size()=" << padding.size() << endl; + os << *prefix << "padding: "; + padding.print(os); + os << endl; std::cerr << os.str(); } const int myRank = ! verbose ? -1 : [&] () { @@ -6471,12 +6476,18 @@ namespace Tpetra { // NOTE (mfh 29 Jan 2020) This allocates the values array. if (! myGraph_->indicesAreAllocated()) { + if (verbose) { + std::ostringstream os; + os << *prefix << "Call allocateIndices" << endl; + std::cerr << os.str(); + } allocateValues(GlobalIndices, GraphNotYetAllocated, verbose); } - if (padding.size() == 0) { - return; - } + // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate + // row_ptrs_beg or allocate row_ptrs_end unless the allocation + // size needs to increase. That should be the job of + // padCrsArrays. // Making copies here because k_rowPtrs_ has a const type. Otherwise, we // would use it directly. @@ -6487,7 +6498,11 @@ namespace Tpetra { << myGraph_->k_rowPtrs_.extent(0) << endl; std::cerr << os.str(); } - row_ptrs_type row_ptr_beg("row_ptr_beg", myGraph_->k_rowPtrs_.extent(0)); + using Kokkos::view_alloc; + using Kokkos::WithoutInitializing; + row_ptrs_type row_ptr_beg( + view_alloc("row_ptr_beg", WithoutInitializing), + myGraph_->k_rowPtrs_.extent(0)); Kokkos::deep_copy(row_ptr_beg, myGraph_->k_rowPtrs_); const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) : @@ -6497,25 +6512,26 @@ namespace Tpetra { os << *prefix << "Allocate row_ptrs_end: " << N << endl; std::cerr << os.str(); } - row_ptrs_type row_ptr_end("row_ptr_end", N); + row_ptrs_type row_ptr_end( + view_alloc("row_ptr_end", WithoutInitializing), N); - bool refill_num_row_entries = false; - if (myGraph_->k_numRowEntries_.extent(0) > 0) { - // Case 1: Unpacked storage - refill_num_row_entries = true; + const bool refill_num_row_entries = + myGraph_->k_numRowEntries_.extent(0) != 0; + + if (refill_num_row_entries) { // unpacked storage + // We can't assume correct *this capture until C++17, and it's + // likely more efficient just to capture what we need anyway. auto num_row_entries = myGraph_->k_numRowEntries_; Kokkos::parallel_for ("Fill end row pointers", range_policy(0, N), KOKKOS_LAMBDA (const size_t i) { row_ptr_end(i) = row_ptr_beg(i) + num_row_entries(i); }); - - } else { - // FIXME (mfh 04 Feb 2020) If packed storage, don't need - // row_ptr_end to be separate allocation; could just have it - // alias row_ptr_beg+1. - // - // Case 2: Packed storage + } + else { + // FIXME (mfh 04 Feb 2020) Fix padCrsArrays so that if packed + // storage, we don't need row_ptr_end to be separate allocation; + // could just have it alias row_ptr_beg+1. Kokkos::parallel_for ("Fill end row pointers", range_policy(0, N), KOKKOS_LAMBDA (const size_t i) { @@ -6557,10 +6573,12 @@ namespace Tpetra { if (verbose) { std::ostringstream os; - os << *prefix << "Assign myGraph_->k_rowPtrs_: " - << "old=" << myGraph_->k_rowPtrs_.extent(0) - << ", new=" << row_ptr_beg.extent(0) << endl; + os << *prefix << "Assign myGraph_->k_rowPtrs_; " + << "old size: " << myGraph_->k_rowPtrs_.extent(0) + << ", new size: " << row_ptr_beg.extent(0) << endl; std::cerr << os.str(); + TEUCHOS_ASSERT( myGraph_->k_rowPtrs_.extent(0) == + row_ptr_beg.extent(0) ); } myGraph_->k_rowPtrs_ = row_ptr_beg; } @@ -6753,8 +6771,8 @@ namespace Tpetra { auto padding = myGraph_->computeCrsPadding(srcGraph, numSameIDs, permuteToLIDs_dv, permuteFromLIDs_dv, verbose); - if (padding.size() != 0) { - applyCrsPadding(padding, verbose); + if (padding->increase() != 0) { + applyCrsPadding(*padding, verbose); } } const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed (); @@ -6791,8 +6809,8 @@ namespace Tpetra { // copy. Really it's the GIDs that have to be copied (because // they have to be converted from LIDs). size_t checkRowLength = 0; - srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength); - + srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, + checkRowLength); if (debug) { TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC (rowLength != checkRowLength, std::logic_error, ": For " @@ -7880,34 +7898,13 @@ namespace Tpetra { distor, combineMode); } else { - std::vector paddingVec = - myGraph_->computePaddingForCrsMatrixUnpack( - importLIDs, imports, numPacketsPerLID, verbose); - using padding_type = - Kokkos::UnorderedMap; - const LO numImports = static_cast(importLIDs.extent(0)); - padding_type padding (numImports); - - // padding gets pre-filled on devic, but we're modifying it on - // host here, so we need to fence to ensure that device is done. - Kokkos::fence(); - - if (paddingVec.size() != 0) { - auto importLIDs_h = importLIDs.view_host(); - for (LO whichImp = 0; whichImp < numImports; ++whichImp) { - const LO lclRowInd = importLIDs_h[whichImp]; - auto result = - padding.insert(lclRowInd, paddingVec[whichImp]); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (result.failed(), std::runtime_error, - ": Unable to insert padding for LID " << lclRowInd); + { + auto padding = + myGraph_->computePaddingForCrsMatrixUnpack( + importLIDs, imports, numPacketsPerLID, verbose); + if (padding->increase() != 0) { + applyCrsPadding(*padding, verbose); } - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (padding.failed_insert(), std::runtime_error, - ": Failed to insert one or more indices into padding map"); - } - if (padding.size() != 0) { - applyCrsPadding(padding, verbose); } unpackAndCombineImplNonStatic(importLIDs, imports, numPacketsPerLID, diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp new file mode 100644 index 000000000000..203f8285f612 --- /dev/null +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -0,0 +1,259 @@ +#ifndef TPETRA_DETAILS_CRSPADDING_HPP +#define TPETRA_DETAILS_CRSPADDING_HPP + +#include "Tpetra_Details_Behavior.hpp" +#include "Tpetra_Util.hpp" +#include +#include +#include + +namespace Tpetra { + namespace Details { + + template + class CrsPadding { + private: + using LO = LocalOrdinal; + using GO = GlobalOrdinal; + + enum class Phase { + SAME, + PERMUTE, + IMPORT + }; + + public: + struct create_from_sames_and_permutes_tag {}; + static constexpr create_from_sames_and_permutes_tag + create_from_sames_and_permutes {}; + CrsPadding(create_from_sames_and_permutes_tag /* tag */, + const size_t /* numSameIDs */, + const size_t /* numPermutes */) + {} + + struct create_from_imports_tag {}; + static constexpr create_from_imports_tag create_from_imports {}; + CrsPadding(create_from_imports_tag /* tag */, + const size_t /* numImports */) + {} + + void + update_same( + size_t& tgtNumDups, // accumulator + size_t& srcNumDups, // accumulator + size_t& unionNumDups, // accumulator + const LO targetLocalIndex, + GO tgtGblColInds[], + const size_t origNumTgtEnt, + const bool tgtIsUnique, + GO srcGblColInds[], + const size_t origNumSrcEnt, + const bool srcIsUnique) + { + const LO whichSame = targetLocalIndex; + update_impl(Phase::SAME, + tgtNumDups, srcNumDups, unionNumDups, + whichSame, targetLocalIndex, + tgtGblColInds, origNumTgtEnt, tgtIsUnique, + srcGblColInds, origNumSrcEnt, srcIsUnique); + } + + void + update_permute( + size_t& tgtNumDups, // accumulator + size_t& srcNumDups, // accumulator + size_t& unionNumDups, // accumulator + const LO whichPermute, // index in permuteFrom/To + const LO targetLocalIndex, + GO tgtGblColInds[], + const size_t origNumTgtEnt, + const bool tgtIsUnique, + GO srcGblColInds[], + const size_t origNumSrcEnt, + const bool srcIsUnique) + { + update_impl(Phase::PERMUTE, + tgtNumDups, srcNumDups, unionNumDups, + whichPermute, targetLocalIndex, + tgtGblColInds, origNumTgtEnt, tgtIsUnique, + srcGblColInds, origNumSrcEnt, srcIsUnique); + } + + void + update_import( + size_t& tgtNumDups, // accumulator + size_t& srcNumDups, // accumulator + size_t& unionNumDups, // accumulator + const LO whichImport, + const LO targetLocalIndex, + GO tgtGblColInds[], + const size_t origNumTgtEnt, + const bool tgtIsUnique, + GO srcGblColInds[], + const size_t origNumSrcEnt, + const bool srcIsUnique) + { + update_impl(Phase::IMPORT, + tgtNumDups, srcNumDups, unionNumDups, + whichImport, targetLocalIndex, + tgtGblColInds, origNumTgtEnt, tgtIsUnique, + srcGblColInds, origNumSrcEnt, srcIsUnique); + } + + void print(std::ostream& out) const { + out << "increase: " << increase() << ", "; + const size_t maxNumToPrint = + Details::Behavior::verbosePrintCountThreshold(); + const size_t size = entries_.size(); + out << "entries: ["; + size_t k = 0; + for (const auto& keyval : entries_) { + if (k > maxNumToPrint) { + out << "..."; + break; + } + out << "(" << keyval.first << ", "; + Details::verbosePrintArray(out, keyval.second, + "Global column indices", maxNumToPrint); + out << ")"; + if (k + size_t(1) < size) { + out << ", "; + } + ++k; + } + out << "]"; + } + + /// \brief Increase (increment in the number of entries) in + /// required allocation size. + size_t increase() const { + return increase_; + } + + struct Result { + size_t allocSize; + bool found; + }; + + Result + get_result(const LO targetLocalIndex) const + { + auto it = entries_.find(targetLocalIndex); + if (it == entries_.end()) { + return {0, false}; + } + else { + return {it->second.size(), true}; + } + } + + private: + void + update_impl( + const Phase phase, + size_t& tgtNumDups, + size_t& srcNumDups, + size_t& unionNumDups, + const LO whichImport, + const LO targetLocalIndex, + GO tgtGblColInds[], + const size_t origNumTgtEnt, + const bool tgtIsUnique, + GO srcGblColInds[], + const size_t origNumSrcEnt, + const bool srcIsUnique) + { + // FIXME (08 Feb 2020) We only need to sort and unique + // tgtGblColInds if we haven't already seen it before. + size_t newNumTgtEnt = origNumTgtEnt; + auto tgtEnd = tgtGblColInds + origNumTgtEnt; + std::sort(tgtGblColInds, tgtEnd); + if (! tgtIsUnique) { + tgtEnd = std::unique(tgtGblColInds, tgtEnd); + newNumTgtEnt = size_t(tgtEnd - tgtGblColInds); + } + tgtNumDups += (origNumTgtEnt - newNumTgtEnt); + + size_t newNumSrcEnt = origNumSrcEnt; + auto srcEnd = srcGblColInds + origNumSrcEnt; + std::sort(srcGblColInds, srcEnd); + if (! srcIsUnique) { + srcEnd = std::unique(srcGblColInds, srcEnd); + newNumSrcEnt = size_t(srcEnd - srcGblColInds); + } + srcNumDups += (origNumSrcEnt - newNumSrcEnt); + + size_t unionNumEnt = 0; + merge_with_current_state(phase, unionNumEnt, + whichImport, targetLocalIndex, + tgtGblColInds, newNumTgtEnt, + srcGblColInds, newNumSrcEnt); + unionNumDups += (newNumTgtEnt + newNumSrcEnt - unionNumEnt); + if (unionNumEnt > origNumTgtEnt) { + increase_ += (unionNumEnt - origNumTgtEnt); + } + } + + std::vector& + get_union_col_inds(const Phase /* phase */, + const LO /* whichIndex */, + const LO tgtLclRowInd) + { + return entries_[tgtLclRowInd]; + } + + void + merge_with_current_state( + const Phase phase, + size_t& unionNumEnt, + const LO whichIndex, + const LO tgtLclRowInd, + const GO tgtColInds[], // sorted & merged + const size_t numTgtEnt, + const GO srcColInds[], // sorted & merged + const size_t numSrcEnt) + { + std::vector& unionColInds = + get_union_col_inds(phase, whichIndex, tgtLclRowInd); + + if (unionColInds.size() == 0) { + auto tgtEnd = tgtColInds + numTgtEnt; + auto srcEnd = srcColInds + numSrcEnt; + const size_t numInCommon = Details::countNumInCommon( + srcColInds, srcEnd, tgtColInds, tgtEnd); + unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; + unionColInds.resize(unionNumEnt); + (void) std::set_union(tgtColInds, tgtEnd, + srcColInds, srcEnd, + unionColInds.begin()); + } + else { + // We've already seen the target graph/matrix row before, so + // we need not even look at tgtColInds. + const size_t oldUnionSize = unionColInds.size(); + + const size_t maxUnionSize = numSrcEnt + unionColInds.size(); + if (scratchColInds_.size() < maxUnionSize) { + scratchColInds_.resize(maxUnionSize); + } + auto scratchEnd = std::set_union( + srcColInds, srcColInds + numSrcEnt, + unionColInds.begin(), unionColInds.end(), + scratchColInds_.begin()); + unionNumEnt = size_t(scratchEnd - scratchColInds_.begin()); + unionColInds.resize(unionNumEnt); + std::copy(scratchColInds_.begin(), scratchColInds_.end(), + unionColInds.begin()); + } + } + + // imports may overlap with sames and/or permutes, so it makes + // sense to store them all in one map. + std::map > entries_; + std::vector scratchColInds_; + size_t increase_ = 0; + }; + } // namespace Details +} // namespace Tpetra + +#endif // TPETRA_DETAILS_CRSPADDING_HPP diff --git a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp index 51f006f0a5a7..23bcb27ec993 100644 --- a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp @@ -44,7 +44,8 @@ #include "TpetraCore_config.h" #include "Kokkos_Core.hpp" -#include "Kokkos_UnorderedMap.hpp" +#include "Tpetra_Details_Behavior.hpp" +#include "Tpetra_Details_CrsPadding.hpp" #include #include @@ -61,162 +62,337 @@ namespace impl { template ViewType -uninitialized_view(const std::string& name, const size_t& size) +make_uninitialized_view( + const std::string& name, + const size_t size, + const bool verbose, + const std::string* const prefix) { - return ViewType (Kokkos::view_alloc(name, Kokkos::WithoutInitializing), size); + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate Kokkos::View " << name + << ": " << size << std::endl; + std::cerr << os.str(); + } + using Kokkos::view_alloc; + using Kokkos::WithoutInitializing; + return ViewType(view_alloc(name, WithoutInitializing), size); +} + +template +ViewType +make_initialized_view( + const std::string& name, + const size_t size, + const bool verbose, + const std::string* const prefix) +{ + if (verbose) { + std::ostringstream os; + os << *prefix << "Allocate & initialize Kokkos::View " + << name << ": " << size << std::endl; + std::cerr << os.str(); + } + return ViewType(name, size); } +template +void +assign_to_view(OutViewType& out, + const InViewType& in, + const char viewName[], + const bool verbose, + const std::string* const prefix) +{ + if (verbose) { + std::ostringstream os; + os << *prefix << "Assign to Kokkos::View " << viewName + << ": Old size: " << out.extent(0) + << ", New size: " << in.extent(0) << std::endl; + std::cerr << os.str(); + } + out = in; +} + +template +auto create_mirror_view( + const MemorySpace& memSpace, + const ViewType& view, + const bool verbose, + const std::string* const prefix) -> + decltype(Kokkos::create_mirror_view(memSpace, view)) +{ + if (verbose) { + std::ostringstream os; + os << *prefix << "Create mirror view: " + << "view.extent(0): " << view.extent(0) << std::endl; + std::cerr << os.str(); + } + return Kokkos::create_mirror_view(memSpace, view); +} + +enum class PadCrsAction { + INDICES_ONLY, + INDICES_AND_VALUES +}; + /// \brief Implementation of padCrsArrays +/// +/// \param row_ptr_beg [in] Offset to beginning of each row. +/// \param row_ptr_end [in] Offset to end of each row. +/// +/// Each row lclRow has row_ptr_end[lclRow] - row_ptr_beg[lclRow] +/// entries. Offsets row_ptr_end[lclRow] to +/// row_ptr_beg[lclRow+1] - 1 (inclusive) are extra space. template void pad_crs_arrays( - const RowPtr& row_ptr_beg, - const RowPtr& row_ptr_end, - Indices& indices, - Values& values, - const Padding& padding, - const int my_rank, - const bool verbose) + const PadCrsAction action, + const RowPtr& row_ptr_beg, + const RowPtr& row_ptr_end, + Indices& indices, + Values& values, + const Padding& padding, + const int my_rank, + const bool verbose) { + using Kokkos::view_alloc; + using Kokkos::WithoutInitializing; + using execution_space = typename Indices::execution_space; + using range_type = Kokkos::RangePolicy; + using std::endl; std::unique_ptr prefix; + + const size_t maxNumToPrint = verbose ? + Behavior::verbosePrintCountThreshold() : size_t(0); if (verbose) { std::ostringstream os; os << "Proc " << my_rank << ": Tpetra::...::pad_crs_arrays: "; prefix = std::unique_ptr(new std::string(os.str())); - os << "padding.size()=" << padding.size() << endl; + os << "Start" << endl; std::cerr << os.str(); } - if (padding.size() == 0 || row_ptr_beg.size() == 0) { - // Nothing to do - return; + if (verbose) { + std::ostringstream os; + os << *prefix << "On input: "; + + auto row_ptr_beg_h = + Kokkos::create_mirror_view(Kokkos::HostSpace(), row_ptr_beg); + Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg); + verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg before scan", + maxNumToPrint); + os << ", "; + auto row_ptr_end_h = + Kokkos::create_mirror_view(Kokkos::HostSpace(), row_ptr_end); + Kokkos::deep_copy(row_ptr_end_h, row_ptr_end); + verbosePrintArray(os, row_ptr_end_h, "row_ptr_end before scan", + maxNumToPrint); + os << ", indices.extent(0): " << indices.extent(0) + << ", values.extent(0): " << values.extent(0) + << ", padding: "; + padding.print(os); + os << endl; + std::cerr << os.str(); } - auto pad_values = values.extent(0) == indices.extent(0); + if (row_ptr_beg.size() == 0) { + if (verbose) { + std::ostringstream os; + os << *prefix << "Done; local matrix has no rows" << endl; + std::cerr << os.str(); + } + return; // nothing to do + } - // Determine if the indices array is large enough - auto num_row = row_ptr_beg.size() - 1; - auto policy = Kokkos::RangePolicy(0, num_row); + // const size_t additional_size_needed = padding.increase(); + // if (additional_size_needed == 0 || row_ptr_beg.size() == 0) { + // if (verbose) { + // std::ostringstream os; + // os << *prefix << "Done; no padding needed" << endl; + // std::cerr << os.str(); + // } + // return; // nothing to do + // } + + // If we get this far, then we need reallocation. + + const size_t lclNumRows(row_ptr_beg.size() - 1); + RowPtr newAllocPerRow = + make_uninitialized_view("newAllocPerRow", lclNumRows, + verbose, prefix.get()); if (verbose) { std::ostringstream os; - os << *prefix << "Allocate entries_this_row: " << num_row << endl; + os << *prefix << "Fill newAllocPerRow & compute increase" << endl; std::cerr << os.str(); } - RowPtr entries_this_row("entries_this_row", num_row); - Kokkos::deep_copy(entries_this_row, 0); - size_t additional_size_needed = 0; - Kokkos::parallel_reduce("Determine additional size needed", policy, - KOKKOS_LAMBDA(const int i, size_t& ladditional_size_needed) { - - auto allocated_this_row = row_ptr_beg(i+1) - row_ptr_beg(i); - auto used_this_row = row_ptr_end(i) - row_ptr_beg(i); - auto free_this_row = allocated_this_row - used_this_row; - entries_this_row(i) = allocated_this_row; - - auto k = padding.find(static_cast(i)); - if (padding.valid_at(k)) { - // Additional padding was requested for this LID - auto num_ent = padding.value_at(k); - auto n = (num_ent > free_this_row) ? num_ent - free_this_row : 0; - entries_this_row(i) += n; - ladditional_size_needed += n; - } - }, additional_size_needed); + size_t increase = 0; + { + Kokkos::HostSpace hostSpace; + auto row_ptr_end_h = create_mirror_view( + hostSpace, row_ptr_end, verbose, prefix.get()); + Kokkos::deep_copy(row_ptr_end_h, row_ptr_end); + auto row_ptr_beg_h = create_mirror_view( + hostSpace, row_ptr_beg, verbose, prefix.get()); + Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg); + + auto newAllocPerRow_h = create_mirror_view( + hostSpace, newAllocPerRow, verbose, prefix.get()); + Kokkos::parallel_reduce + ("Tpetra::CrsGraph: Compute new allocation size per row", + range_type(0, lclNumRows), + [&] (const size_t lclRowInd, size_t& lclIncrease) { + const size_t start = row_ptr_beg_h[lclRowInd]; + const size_t end = row_ptr_beg_h[lclRowInd+1]; + TEUCHOS_ASSERT( end >= start ); + const size_t oldAllocSize = end - start; + + // This is not a pack routine. Do not shrink! Shrinking now + // to fit the number of entries would ignore users' hint for + // the max number of entries in each row. Also, CrsPadding + // only counts entries and ignores any available free space. + + auto result = padding.get_result(lclRowInd); + if (result.found && result.allocSize > oldAllocSize) { + lclIncrease += (result.allocSize - oldAllocSize); + newAllocPerRow_h[lclRowInd] = result.allocSize; + } + else { + newAllocPerRow_h[lclRowInd] = oldAllocSize; + } + }, increase); + + if (verbose) { + std::ostringstream os; + os << *prefix << "increase: " << increase << ", "; + verbosePrintArray(os, newAllocPerRow_h, "newAllocPerRow", + maxNumToPrint); + os << endl; + std::cerr << os.str(); + } - if (additional_size_needed == 0) - return; + if (increase == 0) { + return; + } + Kokkos::deep_copy(newAllocPerRow, newAllocPerRow_h); + } - using ptrs_value_type = typename RowPtr::non_const_value_type; using inds_value_type = typename Indices::non_const_value_type; using vals_value_type = typename Values::non_const_value_type; - // The indices array must be resized and the row_ptr arrays shuffled - if (verbose) { - std::ostringstream os; - os << *prefix << "Allocate indices_new: " - << (indices.size() + additional_size_needed) << endl; - std::cerr << os.str(); + const size_t newIndsSize = size_t(indices.size()) + increase; + auto indices_new = make_uninitialized_view( + "Tpetra::CrsGraph column indices", newIndsSize, verbose, + prefix.get()); + + Values values_new; + if (action == PadCrsAction::INDICES_AND_VALUES) { + const size_t newValsSize = newIndsSize; + // NOTE (mfh 10 Feb 2020) If we don't initialize values_new here, + // then the CrsMatrix tests fail. + values_new = make_initialized_view( + "Tpetra::CrsMatrix values", newValsSize, verbose, prefix.get()); } - auto indices_new = uninitialized_view("ind new", indices.size()+additional_size_needed); + if (verbose) { - const size_t new_size = pad_values ? - size_t(values.size()) + additional_size_needed : size_t(0); std::ostringstream os; - os << *prefix << "Allocate values_new: " << new_size << endl; + os << *prefix << "Repack" << endl; std::cerr << os.str(); } - auto values_new = uninitialized_view("val new", pad_values ? values.size()+additional_size_needed : 0); - Kokkos::deep_copy(values_new, vals_value_type(0.0)); - - // mfh: Not so fussy about this not being a kernel initially, - // since we're adding a new feature upon which existing code does not rely, - // namely Export/Import to a StaticProfile CrsGraph. However, watch out - // for fence()ing relating to UVM. - auto this_row_beg = row_ptr_beg(0); - auto this_row_end = row_ptr_end(0); - using range = Kokkos::pair; - for (typename RowPtr::size_type i=0; i oldRange( + row_beg, row_beg + numEnt); + const Kokkos::pair newRange( + newRowBeg, newRowBeg + numEnt); + auto oldColInds = subview(indices, oldRange); + auto newColInds = subview(indices_new, newRange); + // memcpy works fine on device; the next step is to + // introduce two-level parallelism and use team copy. + memcpy(newColInds.data(), oldColInds.data(), + numEnt * sizeof(inds_value_type)); + if (action == PadCrsAction::INDICES_AND_VALUES) { + auto oldVals = subview(values, oldRange); + auto newVals = subview(values_new, newRange); + memcpy(newVals.data(), oldVals.data(), + numEnt * sizeof(vals_value_type)); + } + } + // It's the final pass, so we can modify these arrays. + row_ptr_beg[lclRow] = newRowBeg; + if (lclRow < lclNumRows) { + row_ptr_end[lclRow] = newRowBeg + numEnt; + } + } + newRowBeg += newRowAllocSize; + }); - // And then the values - if (pad_values) { - auto values_old_subview = subview(values, range(this_row_beg, this_row_beg+used_this_row)); - auto values_new_subview = subview(values_new, range(row_ptr_beg(i), row_ptr_beg(i)+used_this_row)); - memcpy(values_new_subview.data(), values_old_subview.data(), used_this_row * sizeof(vals_value_type)); - } + if (verbose) { + std::ostringstream os; - // Before modifying the row_ptr arrays, save current beg, end for next iteration - this_row_beg = row_ptr_beg(i+1); - this_row_end = row_ptr_end(i+1); + os << *prefix; + auto row_ptr_beg_h = + Kokkos::create_mirror_view(Kokkos::HostSpace(), row_ptr_beg); + Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg); + verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg after scan", + maxNumToPrint); + os << endl; + + os << *prefix; + auto row_ptr_end_h = + Kokkos::create_mirror_view(Kokkos::HostSpace(), row_ptr_end); + Kokkos::deep_copy(row_ptr_end_h, row_ptr_end); + verbosePrintArray(os, row_ptr_end_h, "row_ptr_end after scan", + maxNumToPrint); + os << endl; - auto used_next_row = row_ptr_end(i+1) - row_ptr_beg(i+1); - row_ptr_beg(i+1) = row_ptr_beg(i) + entries_this_row(i); - row_ptr_end(i+1) = row_ptr_beg(i+1) + used_next_row; + std::cerr << os.str(); } - { - // Copy indices/values for last row - row_ptr_beg(num_row) = indices_new.size(); - auto n = num_row - 1; - auto used_this_row = row_ptr_end(n) - row_ptr_beg(n); - - { - auto indices_old_subview = subview(indices, range(this_row_beg, this_row_beg+used_this_row)); - auto indices_new_subview = subview(indices_new, range(row_ptr_beg(n), row_ptr_beg(n)+used_this_row)); - memcpy(indices_new_subview.data(), indices_old_subview.data(), used_this_row * sizeof(inds_value_type)); - } - - if (pad_values) { - auto values_old_subview = subview(values, range(this_row_beg, this_row_beg+used_this_row)); - auto values_new_subview = subview(values_new, range(row_ptr_beg(n), row_ptr_beg(n)+used_this_row)); - memcpy(values_new_subview.data(), values_old_subview.data(), used_this_row * sizeof(vals_value_type)); - } - } + assign_to_view(indices, indices_new, + "Tpetra::CrsGraph column indices", + verbose, prefix.get()); + assign_to_view(values, values_new, + "Tpetra::CrsMatrix values", + verbose, prefix.get()); if (verbose) { + Kokkos::HostSpace hostSpace; + auto indices_h = create_mirror_view(hostSpace, indices); + Kokkos::deep_copy(indices_h, indices); + auto values_h = create_mirror_view(hostSpace, values); + Kokkos::deep_copy(values_h, values); std::ostringstream os; - os << *prefix << "Assign to indices: old=" << indices.size() - << ", new=" << indices_new.size() << endl; - std::cerr << os.str(); + os << "On output: "; + verbosePrintArray(os, indices_h, "indices", maxNumToPrint); + os << ", "; + verbosePrintArray(os, values_h, "values", maxNumToPrint); + os << ", padding: "; + padding.print(os); + os << endl; } - indices = indices_new; + if (verbose) { std::ostringstream os; - os << *prefix << "Assign to values: old=" << values.size() - << ", new=" << values_new.size() << endl; + os << *prefix << "Done" << endl; std::cerr << os.str(); } - values = values_new; } /// \brief Implementation of insertCrsIndices @@ -333,10 +509,6 @@ find_crs_indices( /// column index (in the indices array) of row i. /// \param [in/out] indices - array containing columns indices of nonzeros in /// CRS representation. -/// \param [in] padding - Kookos::UnorderedMap. padding[i] is the amount of free -/// space required for row i. If the distance between row_ptr_end[i] and -/// row_ptr_beg[i] does not accommodate padding[i], we resize and shift -/// indices to accommodate. /// template void @@ -351,8 +523,9 @@ padCrsArrays( using impl::pad_crs_arrays; // send empty values array Indices values; - pad_crs_arrays(rowPtrBeg, - rowPtrEnd, indices, values, padding, my_rank, verbose); + pad_crs_arrays( + impl::PadCrsAction::INDICES_ONLY, rowPtrBeg, rowPtrEnd, + indices, values, padding, my_rank, verbose); } template @@ -367,8 +540,9 @@ padCrsArrays( const bool verbose) { using impl::pad_crs_arrays; - pad_crs_arrays(rowPtrBeg, - rowPtrEnd, indices, values, padding, my_rank, verbose); + pad_crs_arrays( + impl::PadCrsAction::INDICES_AND_VALUES, rowPtrBeg, rowPtrEnd, + indices, values, padding, my_rank, verbose); } /// \brief Insert new indices in to current list of indices diff --git a/packages/tpetra/core/src/Tpetra_Details_gathervPrint.hpp b/packages/tpetra/core/src/Tpetra_Details_gathervPrint.hpp index 3c9a9ca54cb8..d80f94d728a3 100644 --- a/packages/tpetra/core/src/Tpetra_Details_gathervPrint.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_gathervPrint.hpp @@ -35,8 +35,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER */ diff --git a/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_decl.hpp b/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_decl.hpp index e1bb534c3fcb..40e20c3cd5e3 100644 --- a/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_decl.hpp @@ -88,66 +88,6 @@ class Distributor; // namespace Details { -/// \brief Unpack the imported column indices and combine -/// into graph. -/// -/// \tparam LO The type of local indices. See the -/// documentation of Map for requirements. -/// \tparam GO The type of global indices. See the -/// documentation of Map for requirements. -/// \tparam NT The Node type. See the documentation of Map -/// for requirements. -/// -/// \param sourceGraph [in] the CrsGraph source -/// -/// \param imports [in] Input pack buffer -/// -/// \param numPacketsPerLID [out] Entry k gives the number of bytes -/// packed for row exportLIDs[k] of the local graph. -/// -/// \param importLIDs [in] Local indices of the rows to pack. -/// -/// \param constantNumPackets [out] Setting this to zero tells the caller -/// to expect a possibly /// different ("nonconstant") number of packets per local index -/// (i.e., a possibly different number of entries per row). -/// -/// \param distor [in] The distributor (not used) -/// -/// \param combineMode [in] the mode to use for combining -/// -/// This is the public interface to the unpack and combine machinery and -/// converts passed Teuchos::ArrayView objects to Kokkos::View objects (and -/// copies back in to the Teuchos::ArrayView objects, if needed). When -/// CrsGraph migrates fully to adopting Kokkos::DualView objects for its storage -/// of data, this procedure could be bypassed. -template -void -unpackCrsGraphAndCombine( - CrsGraph& sourceGraph, - const Teuchos::ArrayView::packet_type>& imports, - const Teuchos::ArrayView& numPacketsPerLID, - const Teuchos::ArrayView& importLIDs, - size_t constantNumPackets, - Distributor & distor, - CombineMode combineMode); - -template -void -unpackCrsGraphAndCombineNew( - CrsGraph& sourceGraph, - const Kokkos::DualView::packet_type*, - typename CrsGraph::buffer_device_type>& - imports, - const Kokkos::DualView::buffer_device_type>& - numPacketsPerLID, - const Kokkos::DualView::buffer_device_type>& - importLIDs, - const size_t constantNumPackets, - Distributor & distor, - const CombineMode combineMode); - /// \brief Special version of Tpetra::Details::unpackCrsGraphAndCombine /// that also unpacks owning process ranks. /// diff --git a/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_def.hpp b/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_def.hpp index c7b151ff40d3..cc704e7082ee 100644 --- a/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_def.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_unpackCrsGraphAndCombine_def.hpp @@ -325,16 +325,21 @@ computeCrsPadding(const NumPackets& num_packets_per_lid, /// local graph /// /// This is a higher level interface to the UnpackAndCombineFunctor -template +template void unpackAndCombine (const RowView& row_ptrs_beg, const RowView& row_ptrs_end, IndicesView& indices, - const Kokkos::View& imports, - const Kokkos::View& num_packets_per_lid, - const Kokkos::View& import_lids, + const Kokkos::View& imports, + const Kokkos::View& num_packets_per_lid, + const Kokkos::View& import_lids, + const typename CrsGraph::padding_type& padding, const bool unpack_pids, const int myRank, const bool verbose) @@ -344,11 +349,13 @@ unpackAndCombine using NumPacketsView = Kokkos::View; using LO = LocalOrdinal; + using GO = GlobalOrdinal; + using device_type = typename Node::device_type; using execution_space = typename BufferDevice::execution_space; using range_policy = Kokkos::RangePolicy>; using unpack_functor_type = - UnpackAndCombineFunctor; + UnpackAndCombineFunctor; const char prefix[] = "Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackAndCombine: "; @@ -359,14 +366,9 @@ unpackAndCombine return; } - using device_type = typename IndicesView::device_type; - // Resize row pointers and indices to accommodate incoming data - auto padding = - computeCrsPadding - (num_packets_per_lid, import_lids, unpack_pids); - padCrsArrays (row_ptrs_beg, - row_ptrs_end, indices, padding, myRank, verbose); + padCrsArrays(row_ptrs_beg, row_ptrs_end, indices, padding, + myRank, verbose); // Get the offsets Kokkos::View offsets("offsets", num_import_lids+1); @@ -835,212 +837,6 @@ unpackAndCombineIntoCrsArrays( } // namespace UnpackAndCombineCrsGraphImpl -/// \brief Unpack the imported column indices and combine into graph. -/// -/// \tparam LO The type of local indices. See the -/// documentation of Map for requirements. -/// \tparam GO The type of global indices. See the -/// documentation of Map for requirements. -/// \tparam Node The Kokkos Node type. See the documentation of Map -/// for requirements. -/// -/// \param sourceGraph [in] the CrsGraph source -/// -/// \param imports [in] Input pack buffer -/// -/// \param numPacketsPerLID [out] Entry k gives the number of bytes -/// packed for row exportLIDs[k] of the local graph. -/// -/// \param importLIDs [in] Local indices of the rows to pack. -/// -/// \param constantNumPackets [out] Setting this to zero tells the caller -/// to expect a possibly /// different ("nonconstant") number of packets per local index -/// (i.e., a possibly different number of entries per row). -/// -/// \param distor [in] The distributor (not used) -/// -/// \param combineMode [in] the mode to use for combining indices. This value -/// is not checked. Any incoming indices are just inserted in to the graph. -/// graphs is -/// -/// This is the public interface to the unpack and combine machinery and -/// converts passed Teuchos::ArrayView objects to Kokkos::View objects (and -/// copies back in to the Teuchos::ArrayView objects, if needed). When -/// CrsGraph migrates fully to adopting Kokkos::DualView objects for its storage -/// of data, this procedure could be bypassed. -template -void -unpackCrsGraphAndCombine( - CrsGraph& graph, - const Teuchos::ArrayView::packet_type>& imports, - const Teuchos::ArrayView& numPacketsPerLID, - const Teuchos::ArrayView& importLIDs, - size_t /* constantNumPackets */, - Distributor & /* distor */, - CombineMode /* combineMode */) -{ - - TEUCHOS_TEST_FOR_EXCEPTION(!graph.isGloballyIndexed(), std::invalid_argument, - "Graph must be globally indexed!"); - - - using Kokkos::View; - using UnpackAndCombineCrsGraphImpl::unpackAndCombine; - using graph_type = CrsGraph; - using device_type = typename Node::device_type; - using buffer_device_type = typename graph_type::buffer_device_type; - using execution_space = typename device_type::execution_space; - using range_policy = Kokkos::RangePolicy>; - using row_ptrs_type = typename graph_type::local_graph_type::row_map_type::non_const_type; - using indices_type = typename graph_type::t_GlobalOrdinal_1D; - - // Convert all Teuchos::Array to Kokkos::View. - - buffer_device_type bufferOutputDevice; - - // numPacketsPerLID, importLIDs, and imports are input, so we have to copy - // them to device. Since unpacking is done directly in to the local graph - // (lclGraph), no copying needs to be performed after unpacking. - auto imports_d = - create_mirror_view_from_raw_host_array(bufferOutputDevice, - imports.getRawPtr(), imports.size(), - true, "imports"); - - auto num_packets_per_lid_d = - create_mirror_view_from_raw_host_array(bufferOutputDevice, - numPacketsPerLID.getRawPtr(), numPacketsPerLID.size(), - true, "num_packets_per_lid"); - - auto import_lids_d = - create_mirror_view_from_raw_host_array(bufferOutputDevice, - importLIDs.getRawPtr(), importLIDs.size(), - true, "import_lids"); - - // We are OK using the protected data directly (k_*) because this function is - // a friend of CrsGraph - indices_type indices("indices", graph.k_gblInds1D_.extent(0)); - Kokkos::deep_copy(indices, graph.k_gblInds1D_); - - row_ptrs_type row_ptrs_beg("row_ptrs_beg", graph.k_rowPtrs_.extent(0)); - Kokkos::deep_copy(row_ptrs_beg, graph.k_rowPtrs_); - - const size_t N = (row_ptrs_beg.extent(0) == 0 ? 0 : row_ptrs_beg.extent(0) - 1); - row_ptrs_type row_ptrs_end("row_ptrs_end", N); - - bool refill_num_row_entries = false; - if (graph.k_numRowEntries_.extent(0) > 0) { - // Case 1: Packed storage - refill_num_row_entries = true; - auto num_row_entries = graph.k_numRowEntries_; - Kokkos::parallel_for("Fill end row pointers", range_policy(0, N), - KOKKOS_LAMBDA(const size_t i){ - row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i); - }); - - } else { - // mfh If packed storage, don't need row_ptrs_end to be separate allocation; - // could just have it alias row_ptrs_beg+1. - - // Case 2: Packed storage - Kokkos::parallel_for("Fill end row pointers", - range_policy(0, N), KOKKOS_LAMBDA(const size_t i){ - row_ptrs_end(i) = row_ptrs_beg(i+1); - }); - } - - // Now do the actual unpack! - const bool verbose = ::Tpetra::Details::Behavior::verbose("CrsGraph"); - const int myRank = ! verbose ? -1 : [&] () { - auto map = graph.getMap(); - if (map.is_null()) { - return -1; - } - auto comm = map->getComm(); - if (comm.is_null()) { - return -1; - } - return comm->getRank(); - } (); - - unpackAndCombine - (row_ptrs_beg, row_ptrs_end, indices, imports_d, - num_packets_per_lid_d, import_lids_d, false, myRank, verbose); - - // mfh Later, permit graph to be locally indexed, and check whether - // incoming column indices are in the column Map. If not, error. - if (refill_num_row_entries) { - Kokkos::parallel_for("Fill num entries", - range_policy(0, N), KOKKOS_LAMBDA(const size_t i){ - graph.k_numRowEntries_(i) = row_ptrs_end(i) - row_ptrs_beg(i); - }); - } - graph.k_rowPtrs_ = row_ptrs_beg; - graph.k_gblInds1D_ = indices; - - return; -} - -template -void -unpackCrsGraphAndCombineNew( - CrsGraph& /* sourceGraph */, - const Kokkos::DualView::packet_type*, - typename CrsGraph::buffer_device_type>& /* imports */, - const Kokkos::DualView::buffer_device_type>& /* numPacketsPerLID */, - const Kokkos::DualView::buffer_device_type>& /* importLIDs */, - const size_t /* constantNumPackets */, - Distributor& /* distor */, - const CombineMode /* combineMode */) -{ - TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "METHOD NOT COMPLETE"); -#if 0 - using UnpackAndCombineCrsGraphImpl::unpackAndCombine; - using Tpetra::Details::castAwayConstDualView; - using Kokkos::View; - using device_type = typename Node::device_type; - using graph_type = CrsGraph; - using packet_type = typename graph_type::packet_type; - using local_graph_type = typename graph_type::local_graph_type; - using buffer_device_type = typename graph_type::buffer_device_type; - using buffer_memory_space = typename buffer_device_type::memory_space; - using memory_space = typename device_type::memory_space; - - using row_ptrs_type = typename graph_type::local_graph_type::row_map_type::non_const_type; - using execution_space = typename device_type::execution_space; - using indices_type = Kokkos::View; - - static_assert(std::is_same::value, - "Node::device_type and LocalGraph::device_type must be " - "the same."); - - { - auto numPacketsPerLID_nc = castAwayConstDualView(numPacketsPerLID); - numPacketsPerLID_nc.sync_device (); - } - auto num_packets_per_lid_d = numPacketsPerLID.view_device (); - - TEUCHOS_ASSERT( ! importLIDs.need_sync_device () ); - auto import_lids_d = importLIDs.view_device (); - - { - auto imports_nc = castAwayConstDualView(imports); - imports_nc.sync_device (); - } - auto imports_d = imports.view_device (); - - // Now do the actual unpack! - // TJF: Should be grabbed from the Graph - indices_type indices; - row_ptrs_type row_ptrs_beg; - row_ptrs_type row_ptrs_end; - unpackAndCombine( - row_ptrs_beg, row_ptrs_end, indices, imports_d, - num_packets_per_lid_d, import_lids_d, false); -#endif // 0 -} - /// \brief Special version of Tpetra::Details::unpackCrsGraphAndCombine /// that also unpacks owning process ranks. /// @@ -1303,27 +1099,6 @@ unpackAndCombineIntoCrsArrays( #define TPETRA_DETAILS_UNPACKCRSGRAPHANDCOMBINE_INSTANT( LO, GO, NT ) \ template void \ - Details::unpackCrsGraphAndCombine( \ - CrsGraph&, \ - const Teuchos::ArrayView::packet_type>&, \ - const Teuchos::ArrayView&, \ - const Teuchos::ArrayView&, \ - size_t, \ - Distributor&, \ - CombineMode); \ - template void \ - Details::unpackCrsGraphAndCombineNew( \ - CrsGraph&, \ - const Kokkos::DualView::packet_type*, \ - CrsGraph::buffer_device_type>&, \ - const Kokkos::DualView::buffer_device_type>&, \ - const Kokkos::DualView::buffer_device_type>&, \ - const size_t, \ - Distributor&, \ - const CombineMode); \ - template void \ Details::unpackAndCombineIntoCrsArrays( \ const CrsGraph &, \ const Teuchos::ArrayView&, \ diff --git a/packages/tpetra/core/src/Tpetra_DistObject_def.hpp b/packages/tpetra/core/src/Tpetra_DistObject_def.hpp index a33f2e429f88..b72d577ad679 100644 --- a/packages/tpetra/core/src/Tpetra_DistObject_def.hpp +++ b/packages/tpetra/core/src/Tpetra_DistObject_def.hpp @@ -51,7 +51,9 @@ #include "Tpetra_Distributor.hpp" #include "Tpetra_Details_reallocDualViewIfNeeded.hpp" #include "Tpetra_Details_Behavior.hpp" +#include "Tpetra_Details_checkGlobalError.hpp" #include "Tpetra_Details_Profiling.hpp" +#include "Teuchos_CommHelpers.hpp" #include "Teuchos_TypeNameTraits.hpp" #include #include @@ -678,7 +680,7 @@ namespace Tpetra { using Details::Behavior; using ::Tpetra::Details::dualViewStatusToString; using ::Tpetra::Details::getArrayViewFromDualView; - using ::Tpetra::Details::ProfilingRegion; + using Details::ProfilingRegion; using Kokkos::Compat::getArrayView; using Kokkos::Compat::getConstArrayView; using Kokkos::Compat::getKokkosViewDeepCopy; @@ -695,6 +697,7 @@ namespace Tpetra { Teuchos::TimeMonitor doXferMon (*doXferTimer_); #endif // HAVE_TPETRA_TRANSFER_TIMERS + const bool debug = Behavior::debug("DistObject"); const bool verbose = Behavior::verbose("DistObject"); // Prefix for verbose output. Use a pointer, so we don't pay for // string construction unless needed. We set this below. @@ -1164,16 +1167,36 @@ namespace Tpetra { Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_); #endif // HAVE_TPETRA_TRANSFER_TIMERS - // NOTE (mfh 26 Apr 2016) We don't actually need to sync the - // input DualViews, but they DO need to be most recently - // updated in the same memory space. - // - // FIXME (mfh 26 Apr 2016) Check that all input DualViews - // were most recently updated in the same memory space, and - // sync them to the same place (based on commOnHost) if not. - this->unpackAndCombine (remoteLIDs, this->imports_, - this->numImportPacketsPerLID_, - constantNumPackets, distor, CM); + if (debug) { + std::ostringstream lclErrStrm; + bool lclSuccess = false; + try { + this->unpackAndCombine (remoteLIDs, this->imports_, + this->numImportPacketsPerLID_, + constantNumPackets, distor, CM); + lclSuccess = true; + } + catch (std::exception& e) { + lclErrStrm << "unpackAndCombine threw an exception: " + << endl << e.what(); + } + catch (...) { + lclErrStrm << "unpackAndCombine threw an exception " + "not a subclass of std::exception."; + } + const char gblErrMsgHeader[] = "Tpetra::DistObject::" + "doTransferNew threw an exception in unpackAndCombine on " + "one or more processes in the DistObject's communicator."; + auto comm = getMap()->getComm(); + Details::checkGlobalError(std::cerr, lclSuccess, + lclErrStrm.str().c_str(), + gblErrMsgHeader, *comm); + } + else { + this->unpackAndCombine (remoteLIDs, this->imports_, + this->numImportPacketsPerLID_, + constantNumPackets, distor, CM); + } } // if (needCommunication) } // if (CM != ZERO) diff --git a/packages/tpetra/core/test/CrsGraph/CrsGraph_PackUnpack.cpp b/packages/tpetra/core/test/CrsGraph/CrsGraph_PackUnpack.cpp index 6ec4cf9de4b1..52d331374e21 100644 --- a/packages/tpetra/core/test/CrsGraph/CrsGraph_PackUnpack.cpp +++ b/packages/tpetra/core/test/CrsGraph/CrsGraph_PackUnpack.cpp @@ -69,7 +69,6 @@ using Teuchos::Comm; using Teuchos::outArg; using Tpetra::Details::gathervPrint; using Tpetra::Details::packCrsGraph; -using Tpetra::Details::unpackCrsGraphAndCombine; using std::endl; template @@ -199,6 +198,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(CrsGraph, PackThenUnpackAndCombine, LO, GO, NT out << "Building second graph" << endl; RCP B = rcp(new crs_graph_type(row_map, col_map, A->getNodeNumEntries())); +#if 0 out << "Calling unpackCrsGraphAndCombine" << endl; { @@ -290,7 +290,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(CrsGraph, PackThenUnpackAndCombine, LO, GO, NT return; // no point in continuing } } - +#endif // 0 } // PackWithError sends intentionally bad inputs to pack/unpack to make sure diff --git a/packages/tpetra/core/test/CrsGraph/CrsGraph_StaticImportExport.cpp b/packages/tpetra/core/test/CrsGraph/CrsGraph_StaticImportExport.cpp index 2a7203bd529b..6591bf3fc383 100644 --- a/packages/tpetra/core/test/CrsGraph/CrsGraph_StaticImportExport.cpp +++ b/packages/tpetra/core/test/CrsGraph/CrsGraph_StaticImportExport.cpp @@ -68,7 +68,6 @@ using Teuchos::Comm; using Teuchos::outArg; using Tpetra::Details::gathervPrint; using Tpetra::Details::packCrsGraph; -using Tpetra::Details::unpackCrsGraphAndCombine; // Create and return a simple example CrsMatrix, with row distribution // over the given Map. diff --git a/packages/tpetra/core/test/CrsGraph/CrsGraph_UnpackIntoStaticGraph.cpp b/packages/tpetra/core/test/CrsGraph/CrsGraph_UnpackIntoStaticGraph.cpp index 972638ee8c11..e72d7ad42c72 100644 --- a/packages/tpetra/core/test/CrsGraph/CrsGraph_UnpackIntoStaticGraph.cpp +++ b/packages/tpetra/core/test/CrsGraph/CrsGraph_UnpackIntoStaticGraph.cpp @@ -68,7 +68,6 @@ using Teuchos::Comm; using Teuchos::outArg; using Tpetra::Details::gathervPrint; using Tpetra::Details::packCrsGraph; -using Tpetra::Details::unpackCrsGraphAndCombine; using std::endl; @@ -171,6 +170,9 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(CrsGraph, PackThenUnpackAndCombine, LO, GO, NT } } + return; +#if 0 + // Now unpack in to the static graph out << "Calling unpackCrsGraphAndCombine" << endl; @@ -283,7 +285,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(CrsGraph, PackThenUnpackAndCombine, LO, GO, NT return; // no point in continuing } } - +#endif // 0 } diff --git a/packages/tpetra/core/test/Utils/TpetraUtils_crsUtils.cpp b/packages/tpetra/core/test/Utils/TpetraUtils_crsUtils.cpp index d3c43d0f84a6..1e112468de9a 100644 --- a/packages/tpetra/core/test/Utils/TpetraUtils_crsUtils.cpp +++ b/packages/tpetra/core/test/Utils/TpetraUtils_crsUtils.cpp @@ -59,7 +59,7 @@ using Teuchos::CommandLineProcessor; using Tpetra::Details::padCrsArrays; using Tpetra::Details::insertCrsIndices; using Tpetra::Details::findCrsIndices; -using Tpetra::Details::impl::uninitialized_view; +using Tpetra::Details::impl::make_uninitialized_view; using std::vector; namespace { @@ -75,6 +75,7 @@ namespace { // TEUCHOS_UNIT_TEST(CrsGraph, ResizeRowPointersAndIndices_1) { +#if 0 using device_type = typename Tpetra::Map<>::device_type; using execution_space = typename device_type::execution_space; using ordinal_type = size_t; @@ -84,14 +85,14 @@ TEUCHOS_UNIT_TEST(CrsGraph, ResizeRowPointersAndIndices_1) ordinal_type num_row = 4; ordinal_type num_indices_per_row = 5; ordinal_type num_indices = num_indices_per_row * num_row; - auto row_ptrs_beg = uninitialized_view("beg", num_row+1); + auto row_ptrs_beg = make_uninitialized_view("beg", num_row+1); // this assumes UVM for (ordinal_type i=0; i("end", num_row); + auto row_ptrs_end = make_uninitialized_view("end", num_row); for (ordinal_type i=0; i("indices", num_indices); + auto indices = make_uninitialized_view("indices", num_indices); for (ordinal_type i=0; i("import lids", num_row); - auto num_packets_per_lid = uninitialized_view("num packets", num_row); + auto import_lids = make_uninitialized_view("import lids", num_row); + auto num_packets_per_lid = make_uninitialized_view("num packets", num_row); for (ordinal_type i=0; i::device_type device_type; using execution_space = typename device_type::execution_space; using ordinal_type = size_t; @@ -221,7 +224,7 @@ TEUCHOS_UNIT_TEST(CrsGraph, ResizeRowPointersAndIndices_2) // Row 2 TEST_ASSERT(indices(11) == 7); TEST_ASSERT(indices(12) == 8); - +#endif // 0 } template From 704b84610a39f5f490907e2a807a9dc08eb12410 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 10 Feb 2020 18:02:03 -0700 Subject: [PATCH 33/49] Tpetra: Remove CrsPadding::increase method --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 4 ---- .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 8 ++------ .../core/src/Tpetra_Details_CrsPadding.hpp | 17 ++++++----------- .../tpetra/core/src/Tpetra_Details_crsUtils.hpp | 12 ------------ 4 files changed, 8 insertions(+), 33 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index c04b93860045..a069a72313a5 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5195,14 +5195,10 @@ namespace Tpetra { if (isGloballyIndexed()) { padCrsArrays(row_ptrs_beg, row_ptrs_end, k_gblInds1D_, padding, myRank, verbose); - TEUCHOS_ASSERT( padding.increase() == 0 || - k_gblInds1D_.extent(0) != 0 ); } else { padCrsArrays(row_ptrs_beg, row_ptrs_end, k_lclInds1D_, padding, myRank, verbose); - TEUCHOS_ASSERT( padding.increase() == 0 || - k_lclInds1D_.extent(0) != 0 ); } if (refill_num_row_entries) { diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index b25a6b7463fd..237c60537adc 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -6771,9 +6771,7 @@ namespace Tpetra { auto padding = myGraph_->computeCrsPadding(srcGraph, numSameIDs, permuteToLIDs_dv, permuteFromLIDs_dv, verbose); - if (padding->increase() != 0) { - applyCrsPadding(*padding, verbose); - } + applyCrsPadding(*padding, verbose); } const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed (); // @@ -7902,9 +7900,7 @@ namespace Tpetra { auto padding = myGraph_->computePaddingForCrsMatrixUnpack( importLIDs, imports, numPacketsPerLID, verbose); - if (padding->increase() != 0) { - applyCrsPadding(*padding, verbose); - } + applyCrsPadding(*padding, verbose); } unpackAndCombineImplNonStatic(importLIDs, imports, numPacketsPerLID, diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index 203f8285f612..93f939de1ce5 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -101,7 +101,6 @@ namespace Tpetra { } void print(std::ostream& out) const { - out << "increase: " << increase() << ", "; const size_t maxNumToPrint = Details::Behavior::verbosePrintCountThreshold(); const size_t size = entries_.size(); @@ -124,12 +123,6 @@ namespace Tpetra { out << "]"; } - /// \brief Increase (increment in the number of entries) in - /// required allocation size. - size_t increase() const { - return increase_; - } - struct Result { size_t allocSize; bool found; @@ -189,9 +182,12 @@ namespace Tpetra { tgtGblColInds, newNumTgtEnt, srcGblColInds, newNumSrcEnt); unionNumDups += (newNumTgtEnt + newNumSrcEnt - unionNumEnt); - if (unionNumEnt > origNumTgtEnt) { - increase_ += (unionNumEnt - origNumTgtEnt); - } + + // FIXME (mfh 10 Feb 2020) If unionNumEnt <= origNumTgtEnt, + // then don't even store the row. This should save space for + // a common case in which the calling process receives few + // rows. Remember that CrsPadding only accounts for entries; + // it ignores any free space at the end of each row. } std::vector& @@ -251,7 +247,6 @@ namespace Tpetra { // sense to store them all in one map. std::map > entries_; std::vector scratchColInds_; - size_t increase_ = 0; }; } // namespace Details } // namespace Tpetra diff --git a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp index 23bcb27ec993..e6514c0fe625 100644 --- a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp @@ -206,18 +206,6 @@ pad_crs_arrays( return; // nothing to do } - // const size_t additional_size_needed = padding.increase(); - // if (additional_size_needed == 0 || row_ptr_beg.size() == 0) { - // if (verbose) { - // std::ostringstream os; - // os << *prefix << "Done; no padding needed" << endl; - // std::cerr << os.str(); - // } - // return; // nothing to do - // } - - // If we get this far, then we need reallocation. - const size_t lclNumRows(row_ptr_beg.size() - 1); RowPtr newAllocPerRow = make_uninitialized_view("newAllocPerRow", lclNumRows, From f9390f6fd979326e782e4105e49ab38bfb3029b8 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Mon, 10 Feb 2020 20:44:17 -0700 Subject: [PATCH 34/49] Tpetra: CrsPadding now only stores source inds not in tgt @trilinos/tpetra Tpetra::Details::CrsPadding now only stores those source indices that are not already in the target, for each row. This avoids duplicating storage of the target matrix, and thus optimizes for cases like doExport of a shared graph into an owning graph (a common case for PDE discretizations). NOTE: This means that CrsPadding stores increments, not total allocation sizes. CrsGraph and CrsMatrix do the right thing with the increments. --- .../core/src/Tpetra_Details_CrsPadding.hpp | 118 ++++++++++++------ .../core/src/Tpetra_Details_crsUtils.hpp | 9 +- 2 files changed, 85 insertions(+), 42 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index 93f939de1ce5..73f515ffe5fb 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -124,7 +124,7 @@ namespace Tpetra { } struct Result { - size_t allocSize; + size_t numInSrcNotInTgt; bool found; }; @@ -182,18 +182,12 @@ namespace Tpetra { tgtGblColInds, newNumTgtEnt, srcGblColInds, newNumSrcEnt); unionNumDups += (newNumTgtEnt + newNumSrcEnt - unionNumEnt); - - // FIXME (mfh 10 Feb 2020) If unionNumEnt <= origNumTgtEnt, - // then don't even store the row. This should save space for - // a common case in which the calling process receives few - // rows. Remember that CrsPadding only accounts for entries; - // it ignores any free space at the end of each row. } std::vector& - get_union_col_inds(const Phase /* phase */, - const LO /* whichIndex */, - const LO tgtLclRowInd) + get_difference_col_inds(const Phase /* phase */, + const LO /* whichIndex */, + const LO tgtLclRowInd) { return entries_[tgtLclRowInd]; } @@ -209,37 +203,83 @@ namespace Tpetra { const GO srcColInds[], // sorted & merged const size_t numSrcEnt) { - std::vector& unionColInds = - get_union_col_inds(phase, whichIndex, tgtLclRowInd); - - if (unionColInds.size() == 0) { - auto tgtEnd = tgtColInds + numTgtEnt; - auto srcEnd = srcColInds + numSrcEnt; - const size_t numInCommon = Details::countNumInCommon( - srcColInds, srcEnd, tgtColInds, tgtEnd); - unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; - unionColInds.resize(unionNumEnt); - (void) std::set_union(tgtColInds, tgtEnd, - srcColInds, srcEnd, - unionColInds.begin()); - } - else { - // We've already seen the target graph/matrix row before, so - // we need not even look at tgtColInds. - const size_t oldUnionSize = unionColInds.size(); + using Details::countNumInCommon; + // We only need to accumulate those source indices that are + // not already target indices. This is because we always have + // the target indices on input to this function, so there's no + // need to store them here again. That still could be a lot + // to store, but it's better than duplicating target matrix + // storage. + // + // This means that consumers of this data structure need to + // treat entries_[tgtLclRowInd].size() as an increment, not as + // the required new allocation size itself. + // + // We store + // + // difference(union(incoming source indices, + // already stored source indices), + // target indices) + + auto tgtEnd = tgtColInds + numTgtEnt; + auto srcEnd = srcColInds + numSrcEnt; + const size_t numInCommon = countNumInCommon( + srcColInds, srcEnd, tgtColInds, tgtEnd); + TEUCHOS_ASSERT( numTgtEnt + numSrcEnt >= numInCommon ); + unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; + + if (unionNumEnt > numTgtEnt) { + TEUCHOS_ASSERT( numSrcEnt != 0 ); + + // At least one input source index isn't in the target. + std::vector& diffColInds = + get_difference_col_inds(phase, whichIndex, tgtLclRowInd); + const size_t oldDiffNumEnt = diffColInds.size(); + + if (oldDiffNumEnt == 0) { + TEUCHOS_ASSERT( numSrcEnt >= numInCommon ); + const size_t newDiffNumEnt = numSrcEnt - numInCommon; + diffColInds.resize(newDiffNumEnt); + auto diffEnd = std::set_difference(srcColInds, srcEnd, + tgtColInds, tgtEnd, + diffColInds.begin()); + const size_t newLen(diffEnd - diffColInds.begin()); + TEUCHOS_ASSERT( newLen == newDiffNumEnt ); + } + else { + TEUCHOS_ASSERT( diffColInds.data() != nullptr ); + + // scratch = union(srcColInds, diffColInds) + const size_t unionSize = numSrcEnt + oldDiffNumEnt - + countNumInCommon(srcColInds, srcEnd, + diffColInds.begin(), + diffColInds.end()); + if (scratchColInds_.size() < unionSize) { + scratchColInds_.resize(unionSize); + } + auto unionBeg = scratchColInds_.begin(); + + auto unionEnd = std::set_union( + srcColInds, srcEnd, + diffColInds.begin(), diffColInds.end(), + unionBeg); + const size_t newUnionLen(unionEnd - unionBeg); + TEUCHOS_ASSERT( newUnionLen == unionSize ); + + // diffColInds = difference(scratch, tgtColInds) + const size_t unionTgtInCommon = countNumInCommon( + unionBeg, unionEnd, tgtColInds, tgtEnd); + TEUCHOS_ASSERT( unionSize >= unionTgtInCommon ); - const size_t maxUnionSize = numSrcEnt + unionColInds.size(); - if (scratchColInds_.size() < maxUnionSize) { - scratchColInds_.resize(maxUnionSize); + const size_t newDiffNumEnt = unionSize - unionTgtInCommon; + TEUCHOS_ASSERT( newDiffNumEnt >= oldDiffNumEnt ); + diffColInds.resize(newDiffNumEnt); + auto diffEnd = std::set_difference(unionBeg, unionEnd, + tgtColInds, tgtEnd, + diffColInds.begin()); + const size_t diffLen(diffEnd - diffColInds.begin()); + TEUCHOS_ASSERT( diffLen == newDiffNumEnt ); } - auto scratchEnd = std::set_union( - srcColInds, srcColInds + numSrcEnt, - unionColInds.begin(), unionColInds.end(), - scratchColInds_.begin()); - unionNumEnt = size_t(scratchEnd - scratchColInds_.begin()); - unionColInds.resize(unionNumEnt); - std::copy(scratchColInds_.begin(), scratchColInds_.end(), - unionColInds.begin()); } } diff --git a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp index e6514c0fe625..f70727907644 100644 --- a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp @@ -235,6 +235,8 @@ pad_crs_arrays( const size_t end = row_ptr_beg_h[lclRowInd+1]; TEUCHOS_ASSERT( end >= start ); const size_t oldAllocSize = end - start; + const size_t oldNumEnt = row_ptr_end_h[lclRowInd] - start; + TEUCHOS_ASSERT( oldNumEnt <= oldAllocSize ); // This is not a pack routine. Do not shrink! Shrinking now // to fit the number of entries would ignore users' hint for @@ -242,9 +244,10 @@ pad_crs_arrays( // only counts entries and ignores any available free space. auto result = padding.get_result(lclRowInd); - if (result.found && result.allocSize > oldAllocSize) { - lclIncrease += (result.allocSize - oldAllocSize); - newAllocPerRow_h[lclRowInd] = result.allocSize; + const size_t newNumEnt = oldNumEnt + result.numInSrcNotInTgt; + if (newNumEnt > oldAllocSize) { + lclIncrease += (newNumEnt - oldAllocSize); + newAllocPerRow_h[lclRowInd] = newNumEnt; } else { newAllocPerRow_h[lclRowInd] = oldAllocSize; From 3948f9eca70bcc5f578c248c72bab3a454ae12c4 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 11 Feb 2020 13:19:52 -0700 Subject: [PATCH 35/49] Tpetra: Fix CUDA build error in pad_crs_arrays @trilinos/tpetra That was supposed to be a host parallel loop, but I was using a device RangePolicy. This commit fixes that. I also removed the DeviceType template parameter from CrsPadding. CrsPadding was not using that template parameter, and the class is an implementation detail anyway, so we can always put it back if we need it. I don't like to give the impression that a class is using Kokkos and/or running on device when it's not. --- .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 5 ++-- .../core/src/Tpetra_Details_CrsPadding.hpp | 12 ++++++++- .../core/src/Tpetra_Details_crsUtils.hpp | 25 +++++++++---------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 4b6c83d3b86c..d18a94c1eb06 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -77,8 +77,7 @@ namespace Tpetra { #ifndef DOXYGEN_SHOULD_SKIP_THIS namespace Details { template + class GlobalOrdinal> class CrsPadding; } // namespace Details @@ -1157,7 +1156,7 @@ namespace Tpetra { buffer_device_type>& permuteFromLIDs) override; using padding_type = Details::CrsPadding< - local_ordinal_type, global_ordinal_type, device_type>; + local_ordinal_type, global_ordinal_type>; void applyCrsPadding(const padding_type& padding, diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index 73f515ffe5fb..ab73f727f1a9 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -10,7 +10,10 @@ namespace Tpetra { namespace Details { - template + /// \brief Keep track of how much more space a CrsGraph or + /// CrsMatrix needs, when the graph or matrix is the target of a + /// doExport or doImport. + template class CrsPadding { private: using LO = LocalOrdinal; @@ -128,6 +131,13 @@ namespace Tpetra { bool found; }; + /// \brief For a given target matrix local row index, return the + /// number of unique source column indices to merge into that + /// row encountered thus far that are not already in the row, + /// and whether we've seen that row already. + /// + /// This method relies only on const methods of std::map, and + /// thus should be thread safe (on host). Result get_result(const LO targetLocalIndex) const { diff --git a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp index f70727907644..e056d733474f 100644 --- a/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_crsUtils.hpp @@ -158,9 +158,6 @@ pad_crs_arrays( { using Kokkos::view_alloc; using Kokkos::WithoutInitializing; - using execution_space = typename Indices::execution_space; - using range_type = Kokkos::RangePolicy; - using std::endl; std::unique_ptr prefix; @@ -173,19 +170,19 @@ pad_crs_arrays( os << "Start" << endl; std::cerr << os.str(); } + Kokkos::HostSpace hostSpace; if (verbose) { std::ostringstream os; os << *prefix << "On input: "; - auto row_ptr_beg_h = - Kokkos::create_mirror_view(Kokkos::HostSpace(), row_ptr_beg); + Kokkos::create_mirror_view(hostSpace, row_ptr_beg); Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg); verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg before scan", maxNumToPrint); os << ", "; auto row_ptr_end_h = - Kokkos::create_mirror_view(Kokkos::HostSpace(), row_ptr_end); + Kokkos::create_mirror_view(hostSpace, row_ptr_end); Kokkos::deep_copy(row_ptr_end_h, row_ptr_end); verbosePrintArray(os, row_ptr_end_h, "row_ptr_end before scan", maxNumToPrint); @@ -217,7 +214,6 @@ pad_crs_arrays( } size_t increase = 0; { - Kokkos::HostSpace hostSpace; auto row_ptr_end_h = create_mirror_view( hostSpace, row_ptr_end, verbose, prefix.get()); Kokkos::deep_copy(row_ptr_end_h, row_ptr_end); @@ -227,9 +223,11 @@ pad_crs_arrays( auto newAllocPerRow_h = create_mirror_view( hostSpace, newAllocPerRow, verbose, prefix.get()); + using host_range_type = Kokkos::RangePolicy< + Kokkos::DefaultHostExecutionSpace, size_t>; Kokkos::parallel_reduce ("Tpetra::CrsGraph: Compute new allocation size per row", - range_type(0, lclNumRows), + host_range_type(0, lclNumRows), [&] (const size_t lclRowInd, size_t& lclIncrease) { const size_t start = row_ptr_beg_h[lclRowInd]; const size_t end = row_ptr_beg_h[lclRowInd+1]; @@ -291,6 +289,8 @@ pad_crs_arrays( os << *prefix << "Repack" << endl; std::cerr << os.str(); } + using execution_space = typename Indices::execution_space; + using range_type = Kokkos::RangePolicy; Kokkos::parallel_scan( "Tpetra::CrsGraph or CrsMatrix repack", range_type(0, lclNumRows+1), @@ -339,7 +339,7 @@ pad_crs_arrays( os << *prefix; auto row_ptr_beg_h = - Kokkos::create_mirror_view(Kokkos::HostSpace(), row_ptr_beg); + Kokkos::create_mirror_view(hostSpace, row_ptr_beg); Kokkos::deep_copy(row_ptr_beg_h, row_ptr_beg); verbosePrintArray(os, row_ptr_beg_h, "row_ptr_beg after scan", maxNumToPrint); @@ -347,7 +347,7 @@ pad_crs_arrays( os << *prefix; auto row_ptr_end_h = - Kokkos::create_mirror_view(Kokkos::HostSpace(), row_ptr_end); + Kokkos::create_mirror_view(hostSpace, row_ptr_end); Kokkos::deep_copy(row_ptr_end_h, row_ptr_end); verbosePrintArray(os, row_ptr_end_h, "row_ptr_end after scan", maxNumToPrint); @@ -364,10 +364,9 @@ pad_crs_arrays( verbose, prefix.get()); if (verbose) { - Kokkos::HostSpace hostSpace; - auto indices_h = create_mirror_view(hostSpace, indices); + auto indices_h = Kokkos::create_mirror_view(hostSpace, indices); Kokkos::deep_copy(indices_h, indices); - auto values_h = create_mirror_view(hostSpace, values); + auto values_h = Kokkos::create_mirror_view(hostSpace, values); Kokkos::deep_copy(values_h, values); std::ostringstream os; os << "On output: "; From f4c5e6fc5ce68f26887ac3f5733526fcb6aeb61d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 11 Feb 2020 13:28:10 -0700 Subject: [PATCH 36/49] Tpetra: Fix build warnings in countNumInCommon test @trilinos/tpetra --- .../core/test/Utils/countNumInCommon.cpp | 47 ++++++++++++------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/packages/tpetra/core/test/Utils/countNumInCommon.cpp b/packages/tpetra/core/test/Utils/countNumInCommon.cpp index 512370904ff1..169001aa2ca6 100644 --- a/packages/tpetra/core/test/Utils/countNumInCommon.cpp +++ b/packages/tpetra/core/test/Utils/countNumInCommon.cpp @@ -90,7 +90,9 @@ namespace { // (anonymous) } } { - std::vector list1{{666}}; + std::vector list1; + list1.push_back(666); + // std::vector list1{{666}}; // warns; don't do this std::vector list2; testLists(out, success, list1, list2, 0, 1); if (! success) { @@ -99,15 +101,18 @@ namespace { // (anonymous) } { std::vector list1; - std::vector list2{{666}}; + std::vector list2; + list2.push_back(666); testLists(out, success, list1, list2, 0, 1); if (! success) { return; } } { - std::vector list1{{418}}; - std::vector list2{{418}}; + std::vector list1; + list1.push_back(418); + std::vector list2; + list2.push_back(418); testLists(out, success, list1, list2, 1, 1); if (! success) { return; @@ -115,14 +120,16 @@ namespace { // (anonymous) } { std::vector list1{{418, 419}}; - std::vector list2{{418}}; + std::vector list2; + list2.push_back(418); testLists(out, success, list1, list2, 1, 2); if (! success) { return; } } { - std::vector list1{{418}}; + std::vector list1; + list1.push_back(418); std::vector list2{{418, 419}}; testLists(out, success, list1, list2, 1, 2); if (! success) { @@ -131,14 +138,16 @@ namespace { // (anonymous) } { std::vector list1{{417, 418}}; - std::vector list2{{418}}; + std::vector list2; + list2.push_back(418); testLists(out, success, list1, list2, 1, 2); if (! success) { return; } } { - std::vector list1{{418}}; + std::vector list1; + list1.push_back(418); std::vector list2{{417, 418}}; testLists(out, success, list1, list2, 1, 2); if (! success) { @@ -147,14 +156,16 @@ namespace { // (anonymous) } { std::vector list1{{417, 418, 419}}; - std::vector list2{{418}}; + std::vector list2; + list2.push_back(418); testLists(out, success, list1, list2, 1, 3); if (! success) { return; } } { - std::vector list1{{418}}; + std::vector list1; + list1.push_back(418); std::vector list2{{417, 418, 419}}; testLists(out, success, list1, list2, 1, 3); if (! success) { @@ -163,14 +174,16 @@ namespace { // (anonymous) } { std::vector list1{{415, 418, 421}}; - std::vector list2{{418}}; + std::vector list2; + list2.push_back(418); testLists(out, success, list1, list2, 1, 3); if (! success) { return; } } { - std::vector list1{{418}}; + std::vector list1; + list1.push_back(418); std::vector list2{{415, 418, 421}}; testLists(out, success, list1, list2, 1, 3); if (! success) { @@ -179,14 +192,16 @@ namespace { // (anonymous) } { std::vector list1{{415, 419, 421}}; - std::vector list2{{418}}; + std::vector list2; + list2.push_back(418); testLists(out, success, list1, list2, 0, 4); if (! success) { return; } } { - std::vector list1{{418}}; + std::vector list1; + list1.push_back(418); std::vector list2{{415, 419, 421}}; testLists(out, success, list1, list2, 0, 4); if (! success) { @@ -202,8 +217,8 @@ namespace { // (anonymous) std::vector curGblColInds {{166215, 166216, 166217, 166218, 166219, 166220, 166221, 166222, 166223, 166224, 166225, 166226, 166227, 166228, 166229, 166230, 166231, 166232, 166233, 166234, 166235, 166236, 166237, 166238, 166239, 166240, 166241, 166242, 166243, 166244, 166245, 166246, 166247, 198279, 198280, 198281, 198282, 198283, 198284, 198285, 198286, 198287, 198288, 198289, 198290, 198291, 198292, 198293, 198294, 198295, 198296, 198297, 198298, 198299, 198300, 198301, 198302, 198303, 198304, 198305, 198306, 198307, 198308, 198309, 198310, 198311, 198312, 198313, 198314, 198315, 198316, 198317, 198333, 198334, 198335, 198336, 198337, 198338, 198339, 198340, 198341, 198342, 198343, 198344, 198345, 198346, 198347, 198348, 198349, 198350, 198351, 198352, 198353, 198354, 198355, 198356}}; - constexpr size_t newGblColIndsSize (96); - constexpr size_t curGblColIndsSize (96); + constexpr size_t newGblColIndsSize(96); + constexpr size_t curGblColIndsSize(96); TEST_EQUALITY( newGblColInds.size(), newGblColIndsSize ); TEST_EQUALITY( curGblColInds.size(), curGblColIndsSize ); From b7e784e90c29db4b4dbb079479906e35344affaa Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 11 Feb 2020 21:21:29 -0700 Subject: [PATCH 37/49] Tpetra: Add CrsGraph test exercising CrsPadding; fix build warning --- .../tpetra/core/test/CrsGraph/CMakeLists.txt | 9 + .../tpetra/core/test/CrsGraph/UnpackMerge.cpp | 345 ++++++++++++++++++ .../tpetra/core/test/CrsMatrix/CMakeLists.txt | 2 +- .../core/test/CrsMatrix/UnpackMerge.cpp | 7 +- 4 files changed, 359 insertions(+), 4 deletions(-) create mode 100644 packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp diff --git a/packages/tpetra/core/test/CrsGraph/CMakeLists.txt b/packages/tpetra/core/test/CrsGraph/CMakeLists.txt index c4a023ed16c3..400b6fbf5c40 100644 --- a/packages/tpetra/core/test/CrsGraph/CMakeLists.txt +++ b/packages/tpetra/core/test/CrsGraph/CMakeLists.txt @@ -137,3 +137,12 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( STANDARD_PASS_OUTPUT ) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + CrsGraph_UnpackMerge + SOURCES + UnpackMerge + ${TEUCHOS_STD_UNIT_TEST_MAIN} + COMM mpi + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT + ) diff --git a/packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp b/packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp new file mode 100644 index 000000000000..37af38aa99e7 --- /dev/null +++ b/packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp @@ -0,0 +1,345 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +// @HEADER +*/ + +#include "Tpetra_TestingUtilities.hpp" +#include "Tpetra_CrsGraph.hpp" +#include "Tpetra_Import.hpp" +#include "Tpetra_Map.hpp" +#include "Kokkos_Core.hpp" + +namespace { // (anonymous) + + using Tpetra::TestingUtilities::getDefaultComm; + using Teuchos::ArrayView; + using Teuchos::Comm; + using Teuchos::outArg; + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::REDUCE_MIN; + using Teuchos::reduceAll; + using Teuchos::tuple; + using std::endl; + using GST = Tpetra::global_size_t; + + // Both source and target graphs have one row on each process. + // + // Target graph's global column indices: + // Proc 0: Global row index 0: [0, 1, 2, 3, 4, 5] + // Proc 1: Global row index 1: [0, 1, 2, 3, 4, 5] + // + // Source graph's global column indices: + // Proc 0: Global row index 1: [] + // Proc 1: Global row index 0: [3, 4, 5, 6, 7, 8, 9] + // + // After Import, target should look like this: + // Proc 0: Global row index 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + // Proc 1: Global row index 1: [0, 1, 2, 3, 4, 5] + + TEUCHOS_UNIT_TEST( CrsGraph, UnpackMerge1 ) + { + using LO = Tpetra::Map<>::local_ordinal_type; + using GO = Tpetra::Map<>::global_ordinal_type; + using crs_graph_type = Tpetra::CrsGraph; + using import_type = Tpetra::Import; + using map_type = Tpetra::Map; + + RCP > comm = getDefaultComm(); + const int myRank = comm->getRank(); + const int numProcs = comm->getSize(); + + out << "Test that Tpetra::CrsGraph::unpackAndCombine into a " + "target graph merges column indices" << endl; + Teuchos::OSTab tab1(out); + + TEST_ASSERT( numProcs == 2 ); + if (numProcs != 2) { + out << "This test requires exactly two MPI processes, but you " + "ran it with " << numProcs << " process(es)." << endl; + return; + } + + const GO gblNumRows (2); + const GO indexBase (0); + std::vector srcRowMapInds; + std::vector tgtRowMapInds; + if (myRank == 0) { + srcRowMapInds.push_back(1); + tgtRowMapInds.push_back(0); + } + else if (myRank == 1) { + srcRowMapInds.push_back(0); + tgtRowMapInds.push_back(1); + } + const LO srcLclNumRows (srcRowMapInds.size()); + const LO tgtLclNumRows (tgtRowMapInds.size()); + + RCP srcRowMap = + rcp(new map_type(static_cast(gblNumRows), + srcRowMapInds.data(), srcLclNumRows, + indexBase, comm)); + RCP tgtRowMap = + rcp(new map_type(static_cast(gblNumRows), + tgtRowMapInds.data(), tgtLclNumRows, + indexBase, comm)); + + const GO gblNumCols = 10; + RCP colMap = + rcp(new map_type(static_cast(gblNumCols), + indexBase, comm, + Tpetra::LocallyReplicated)); + RCP domMap = + rcp(new map_type(static_cast(gblNumCols), + indexBase, comm, + Tpetra::GloballyDistributed)); + RCP ranMap = srcRowMap; + import_type importer(srcRowMap, tgtRowMap); + + std::vector srcGblColInds; + if (myRank == 1) { + srcGblColInds = std::vector{{3, 4, 5, 6, 7, 8, 9}}; + } + std::vector tgtGblColInds{{0, 1, 2, 3, 4, 5}}; + + for (const bool A_src_is_fill_complete : {false, true}) { + out << "A_src will" << (A_src_is_fill_complete ? "" : " NOT") + << " be fill complete." << endl; + crs_graph_type A_src(srcRowMap, colMap, srcGblColInds.size()); + crs_graph_type A_tgt(tgtRowMap, colMap, tgtGblColInds.size()); + + for (LO lclRow = 0; lclRow < srcLclNumRows; ++lclRow) { + const GO gblRow = srcRowMap->getGlobalElement(lclRow); + A_tgt.insertGlobalIndices( + gblRow, Teuchos::ArrayView(tgtGblColInds)); + A_src.insertGlobalIndices( + gblRow, Teuchos::ArrayView(srcGblColInds)); + } + if (A_src_is_fill_complete) { + A_src.fillComplete(domMap, ranMap); + } + + out << "Finished A_src.fillComplete(domMap, ranMap)" << endl; + + A_tgt.doImport(A_src, importer, Tpetra::INSERT); + A_tgt.fillComplete(domMap, ranMap); + + Kokkos::fence(); // since we're accessing data on host now + + Teuchos::ArrayView lclColInds; + const LO lclRowToTest (0); + A_tgt.getLocalRowView(lclRowToTest, lclColInds); + + const LO expectedNumEnt = myRank == 0 ? LO(10) : LO(6); + TEST_EQUALITY( LO(lclColInds.size()), expectedNumEnt ); + + if (success && myRank == 0) { + for (LO k = 0; k < expectedNumEnt; ++k) { + TEST_EQUALITY( lclColInds[k], LO(k) ); + } + } + + // Test whether all processes passed the test. + int lclSuccess = success ? 1 : 0; + int gblSuccess = 0; + reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess)); + TEST_EQUALITY_CONST( gblSuccess, 1 ); + } + } + + TEUCHOS_UNIT_TEST( CrsGraph, UnpackMerge2 ) + { + using LO = Tpetra::Map<>::local_ordinal_type; + using GO = Tpetra::Map<>::global_ordinal_type; + using Node = Tpetra::Map<>::node_type; + using crs_graph_type = Tpetra::CrsGraph; + using import_type = Tpetra::Import; + using map_type = Tpetra::Map; + int lclSuccess = 1; + int gblSuccess = 0; + + RCP > comm = getDefaultComm(); + const int myRank = comm->getRank(); + const int numProcs = comm->getSize(); + + out << "Regression test with a real-life example" << endl; + Teuchos::OSTab tab1(out); + + TEST_ASSERT( numProcs == 2 ); + if (numProcs != 2) { + out << "This test requires exactly two MPI processes, but you " + "ran it with " << numProcs << " process(es)." << endl; + return; + } + + const GO gblNumRows (2); + const GO indexBase (0); + std::vector srcRowMapInds; + std::vector tgtRowMapInds; + if (myRank == 0) { + srcRowMapInds.push_back(1); + tgtRowMapInds.push_back(0); + } + else if (myRank == 1) { + srcRowMapInds.push_back(0); + tgtRowMapInds.push_back(1); + } + const LO srcLclNumRows (srcRowMapInds.size()); + const LO tgtLclNumRows (tgtRowMapInds.size()); + + RCP srcRowMap = + rcp(new map_type(static_cast(gblNumRows), + srcRowMapInds.data(), srcLclNumRows, + indexBase, comm)); + RCP tgtRowMap = + rcp(new map_type(static_cast(gblNumRows), + tgtRowMapInds.data(), tgtLclNumRows, + indexBase, comm)); + // [0, ... 199,999] + const GO gblNumCols = 200000; + RCP colMap = + rcp(new map_type(static_cast(gblNumCols), + indexBase, comm, + Tpetra::LocallyReplicated)); + RCP domMap = + rcp(new map_type(static_cast(gblNumCols), + indexBase, comm, + Tpetra::GloballyDistributed)); + RCP ranMap = srcRowMap; + import_type importer(srcRowMap, tgtRowMap); + + // Input to insert: 96 entries, sent from Proc 1. + std::vector srcGblColInds {{ + 142944, 142945, 142946, 142947, 142948, 142949, 142950, 142951, + 142952, 142953, 142954, 142955, 142959, 142960, 142961, 142965, + 142966, 142967, 142968, 142969, 142970, 143142, 143143, 143144, + 198279, 198280, 198281, 198282, 198283, 198284, 198291, 198292, + 198293, 198303, 198304, 198305, 198309, 198310, 198311, 198333, + 198334, 198335, 198336, 198337, 198338, 198339, 198340, 198341, + 198342, 198343, 198344, 198345, 198346, 198347, 198348, 198349, + 198350, 198351, 198352, 198353, 198354, 198355, 198356, 198699, + 198700, 198701, 198702, 198703, 198704, 198705, 198706, 198707, + 198708, 198709, 198710, 198711, 198712, 198713, 198729, 198730, + 198731, 198732, 198733, 198734, 198735, 198736, 198737, 198738, + 198739, 198740, 198741, 198742, 198743, 198744, 198745, 198746 + }}; + + // Current contents of Proc 0 row: 96 entries. + std::vector tgtGblColInds {{ + 166215, 166216, 166217, 166218, 166219, 166220, 166221, 166222, + 166223, 166224, 166225, 166226, 166227, 166228, 166229, 166230, + 166231, 166232, 166233, 166234, 166235, 166236, 166237, 166238, + 166239, 166240, 166241, 166242, 166243, 166244, 166245, 166246, + 166247, 198279, 198280, 198281, 198282, 198283, 198284, 198285, + 198286, 198287, 198288, 198289, 198290, 198291, 198292, 198293, + 198294, 198295, 198296, 198297, 198298, 198299, 198300, 198301, + 198302, 198303, 198304, 198305, 198306, 198307, 198308, 198309, + 198310, 198311, 198312, 198313, 198314, 198315, 198316, 198317, + 198333, 198334, 198335, 198336, 198337, 198338, 198339, 198340, + 198341, 198342, 198343, 198344, 198345, 198346, 198347, 198348, + 198349, 198350, 198351, 198352, 198353, 198354, 198355, 198356 + }}; + + TEST_EQUALITY( srcGblColInds.size(), size_t(96) ); + TEST_EQUALITY( tgtGblColInds.size(), size_t(96) ); + + std::vector srcCpy (srcGblColInds); + auto srcBeg = srcCpy.begin(); + auto srcEnd = srcCpy.end(); + std::sort(srcBeg, srcEnd); + srcEnd = std::unique(srcBeg, srcEnd); + + std::vector tgtCpy (tgtGblColInds); + auto tgtBeg = tgtCpy.begin(); + auto tgtEnd = tgtCpy.end(); + std::sort(tgtBeg, tgtEnd); + tgtEnd = std::unique(tgtBeg, tgtEnd); + + std::vector unionGblColInds(srcGblColInds.size() + + tgtGblColInds.size()); + auto unionEnd = std::set_union(srcBeg, srcEnd, tgtBeg, tgtEnd, + unionGblColInds.begin()); + unionGblColInds.resize(unionEnd - unionGblColInds.begin()); + const size_t unionSize = unionGblColInds.size(); + + out << "Number of elements in set union of column indices: " + << unionSize << endl; + + crs_graph_type A_src(srcRowMap, colMap, srcGblColInds.size()); + crs_graph_type A_tgt(tgtRowMap, colMap, tgtGblColInds.size()); + + for (LO lclRow = 0; lclRow < srcLclNumRows; ++lclRow) { + const GO gblRow = srcRowMap->getGlobalElement(lclRow); + A_tgt.insertGlobalIndices( + gblRow, Teuchos::ArrayView(tgtGblColInds)); + A_src.insertGlobalIndices( + gblRow, Teuchos::ArrayView(srcGblColInds)); + } + A_src.fillComplete(domMap, ranMap); + + A_tgt.doImport(A_src, importer, Tpetra::INSERT); + A_tgt.fillComplete(domMap, ranMap); + + Kokkos::fence(); // since we're accessing data on host now + + if (myRank == 0) { + const GO gblRowToTest = tgtRowMap->getMinGlobalIndex(); + size_t numEnt = A_tgt.getNumEntriesInGlobalRow(gblRowToTest); + Teuchos::Array gblColInds(numEnt); + A_tgt.getGlobalRowCopy(gblRowToTest, gblColInds(), numEnt); + + const LO expectedNumEnt(unionGblColInds.size()); + TEST_EQUALITY( size_t(numEnt), expectedNumEnt ); + TEST_EQUALITY( size_t(gblColInds.size()), + size_t(expectedNumEnt) ); + + if (success) { + for (LO k = 0; k < expectedNumEnt; ++k) { + TEST_EQUALITY( gblColInds[k], unionGblColInds[k] ); + } + } + } + + lclSuccess = success ? 1 : 0; + gblSuccess = 0; + reduceAll(*comm, REDUCE_MIN, lclSuccess, outArg(gblSuccess)); + TEST_EQUALITY_CONST( gblSuccess, 1 ); + } + +} // namespace (anonymous) diff --git a/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt b/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt index 83e98e72caba..f6fd956796b1 100644 --- a/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt +++ b/packages/tpetra/core/test/CrsMatrix/CMakeLists.txt @@ -381,7 +381,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( ) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnpackMerge + CrsMatrix_UnpackMerge SOURCES UnpackMerge ${TEUCHOS_STD_UNIT_TEST_MAIN} diff --git a/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp b/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp index dc7e97eea814..dbea7832aed9 100644 --- a/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp +++ b/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp @@ -395,9 +395,10 @@ namespace { // (anonymous) vals(), numEnt); const LO expectedNumEnt(expectedTgtVals.size()); - TEST_EQUALITY( numEnt, expectedNumEnt ); - TEST_EQUALITY( LO(gblColInds.size()), expectedNumEnt ); - TEST_EQUALITY( LO(vals.size()), expectedNumEnt ); + TEST_EQUALITY( size_t(numEnt), expectedNumEnt ); + TEST_EQUALITY( size_t(gblColInds.size()), + size_t(expectedNumEnt) ); + TEST_EQUALITY( size_t(vals.size()), size_t(expectedNumEnt) ); if (success) { for (LO k = 0; k < expectedNumEnt; ++k) { From 09c57dc0cf364f8a8a46a01e9c0665742b7422c9 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Tue, 11 Feb 2020 21:22:10 -0700 Subject: [PATCH 38/49] Tpetra: Fix gemm test to use Tpetra::ScopeGuard, not Tpetra::Map The test used to use Tpetra::Map to ensure Kokkos initialization. Change it to use Tpetra::ScopeGuard instead. Also, add a debug option, so that the test doesn't print quite so much output unless you explicitly ask for it. --- packages/tpetra/core/test/Blas/CMakeLists.txt | 1 - packages/tpetra/core/test/Blas/gemm.cpp | 82 ++++++++++--------- 2 files changed, 43 insertions(+), 40 deletions(-) diff --git a/packages/tpetra/core/test/Blas/CMakeLists.txt b/packages/tpetra/core/test/Blas/CMakeLists.txt index 259f5e38eeed..c1396ff54422 100644 --- a/packages/tpetra/core/test/Blas/CMakeLists.txt +++ b/packages/tpetra/core/test/Blas/CMakeLists.txt @@ -12,7 +12,6 @@ TRIBITS_ADD_EXECUTABLE( gemm SOURCES gemm - ${TEUCHOS_STD_UNIT_TEST_MAIN} COMM serial mpi ) diff --git a/packages/tpetra/core/test/Blas/gemm.cpp b/packages/tpetra/core/test/Blas/gemm.cpp index 5233c1e84fe9..ba6acf1c6c58 100644 --- a/packages/tpetra/core/test/Blas/gemm.cpp +++ b/packages/tpetra/core/test/Blas/gemm.cpp @@ -35,14 +35,13 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER */ #include "Tpetra_TestingUtilities.hpp" -#include "Tpetra_Map.hpp" +#include "Tpetra_Details_Behavior.hpp" +#include "Tpetra_Core.hpp" #include "Teuchos_BLAS.hpp" #include "Kokkos_Core.hpp" #include "Kokkos_Random.hpp" @@ -65,7 +64,6 @@ namespace { clp.setOption ("M", &M, "First matrix dimension M"); } - template::value> struct MachinePrecision @@ -227,7 +225,7 @@ namespace { } } - const LO A2_stride = A2.stride(1); + const LO A2_stride = A2.stride(1); const LO B2_stride = B2.stride(1); const LO C2_stride = C2.stride(1); @@ -373,10 +371,11 @@ namespace { const bool isTrans_B = transOptIsTrans[transInd_B]; const bool isConj_B = transOptIsConj[transInd_B]; - testGemmVsTeuchosBlasForOneTransComb - (out, success, randPool, m, n, k, - trans_A, isTrans_A, isConj_A, - trans_B, isTrans_B, isConj_B); + testGemmVsTeuchosBlasForOneTransComb< + EntryType, CoeffType, DeviceType>( + out, success, randPool, m, n, k, + trans_A, isTrans_A, isConj_A, + trans_B, isTrans_B, isConj_B); if (! success) { out << "At least one test FAILED; abandoning the others." << endl; return; @@ -391,40 +390,37 @@ namespace { TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( Blas, Gemm, SCALAR ) { - typedef SCALAR entry_type; - typedef SCALAR coeff_type; - typedef Tpetra::Map<> map_type; - typedef map_type::device_type device_type; - - Teuchos::OSTab tab0 (out); - out << "Test \"KokkosBlas::gemm\"" << endl; - Teuchos::OSTab tab1 (out); - - auto comm = Tpetra::TestingUtilities::getDefaultComm (); - // Creating a Map instance takes care of Kokkos initialization and - // finalization automatically. - Tpetra::Map<> map (comm->getSize (), 1, 0, comm); - - auto randPool = preparePseudorandomNumberGenerator (); + using entry_type = SCALAR; + using coeff_type = SCALAR; + using execution_space = Kokkos::DefaultExecutionSpace; + using memory_space = execution_space::memory_space; + using device_type = Kokkos::Device; + const bool debug = Tpetra::Details::Behavior::debug("gemm"); + + Teuchos::RCP fancyOutPtr = debug ? + Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cerr)) : + Teuchos::rcpFromRef(out); + Teuchos::FancyOStream& fancyOut = *fancyOutPtr; + + fancyOut << "Test \"KokkosBlas::gemm\"" << endl; + Teuchos::OSTab tab1(fancyOut); + + auto randPool = preparePseudorandomNumberGenerator(); const LO n_vals[] = {1, 2, 5, 13}; const LO k_vals[] = {1, 2, 5, 13}; - if (comm->getRank() == 0) std::cout << std::endl; + fancyOut << endl; LO m = M; - { - for (LO n : n_vals) { - for (LO k : k_vals) { - if (comm->getRank() == 0) - std::cout << "Testing m,n,k = " << m << "," << n << "," << k - << std::endl; - testGemmVsTeuchosBlas (out, - success, - randPool, - m, n, k); - if (! success) { - out << "At least one test FAILED; abandoning the others." << endl; - return; - } + for (LO n : n_vals) { + for (LO k : k_vals) { + fancyOut << "Testing m,n,k = " << m << "," << n << "," << k + << endl; + testGemmVsTeuchosBlas( + fancyOut, success, randPool, m, n, k); + if (! success) { + fancyOut << "At least one test FAILED; abandoning the others." + << endl; + return; } } } @@ -444,3 +440,11 @@ namespace { } // namespace (anonymous) +int +main(int argc, char* argv[]) +{ + Tpetra::ScopeGuard tpetraScope(&argc, &argv); + const int errCode = + Teuchos::UnitTestRepository::runUnitTestsFromMain (argc, argv); + return errCode; +} From a5d6564a8cc902a8311eadf3ee7b7e2cd038319d Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 12 Feb 2020 09:31:19 -0700 Subject: [PATCH 39/49] Tpetra: Fix build warning in test @trilinos/tpetra --- packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp b/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp index dbea7832aed9..d8c8e160c571 100644 --- a/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp +++ b/packages/tpetra/core/test/CrsMatrix/UnpackMerge.cpp @@ -395,7 +395,7 @@ namespace { // (anonymous) vals(), numEnt); const LO expectedNumEnt(expectedTgtVals.size()); - TEST_EQUALITY( size_t(numEnt), expectedNumEnt ); + TEST_EQUALITY( size_t(numEnt), size_t(expectedNumEnt) ); TEST_EQUALITY( size_t(gblColInds.size()), size_t(expectedNumEnt) ); TEST_EQUALITY( size_t(vals.size()), size_t(expectedNumEnt) ); From 6fd509d2e58c7bd4522ff35bfafac095dfb9f3e2 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 12 Feb 2020 14:56:53 -0700 Subject: [PATCH 40/49] Tpetra: Fix build warning in test --- packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp b/packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp index 37af38aa99e7..2931c549b262 100644 --- a/packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp +++ b/packages/tpetra/core/test/CrsGraph/UnpackMerge.cpp @@ -325,7 +325,7 @@ namespace { // (anonymous) A_tgt.getGlobalRowCopy(gblRowToTest, gblColInds(), numEnt); const LO expectedNumEnt(unionGblColInds.size()); - TEST_EQUALITY( size_t(numEnt), expectedNumEnt ); + TEST_EQUALITY( size_t(numEnt), size_t(expectedNumEnt) ); TEST_EQUALITY( size_t(gblColInds.size()), size_t(expectedNumEnt) ); From 8beeb3c22e7e824e3256ffcb564c59c93f03da43 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 12 Feb 2020 14:58:58 -0700 Subject: [PATCH 41/49] Tpetra: Fix CrsPadding set difference sizes --- .../core/src/Tpetra_Details_CrsPadding.hpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index ab73f727f1a9..4992b67304ca 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -248,17 +248,15 @@ namespace Tpetra { if (oldDiffNumEnt == 0) { TEUCHOS_ASSERT( numSrcEnt >= numInCommon ); - const size_t newDiffNumEnt = numSrcEnt - numInCommon; - diffColInds.resize(newDiffNumEnt); + diffColInds.resize(numSrcEnt); auto diffEnd = std::set_difference(srcColInds, srcEnd, tgtColInds, tgtEnd, diffColInds.begin()); const size_t newLen(diffEnd - diffColInds.begin()); - TEUCHOS_ASSERT( newLen == newDiffNumEnt ); + TEUCHOS_ASSERT( newLen <= numSrcEnt ); + diffColInds.resize(newLen); } else { - TEUCHOS_ASSERT( diffColInds.data() != nullptr ); - // scratch = union(srcColInds, diffColInds) const size_t unionSize = numSrcEnt + oldDiffNumEnt - countNumInCommon(srcColInds, srcEnd, @@ -268,7 +266,6 @@ namespace Tpetra { scratchColInds_.resize(unionSize); } auto unionBeg = scratchColInds_.begin(); - auto unionEnd = std::set_union( srcColInds, srcEnd, diffColInds.begin(), diffColInds.end(), @@ -281,14 +278,13 @@ namespace Tpetra { unionBeg, unionEnd, tgtColInds, tgtEnd); TEUCHOS_ASSERT( unionSize >= unionTgtInCommon ); - const size_t newDiffNumEnt = unionSize - unionTgtInCommon; - TEUCHOS_ASSERT( newDiffNumEnt >= oldDiffNumEnt ); - diffColInds.resize(newDiffNumEnt); + diffColInds.resize(unionSize); auto diffEnd = std::set_difference(unionBeg, unionEnd, tgtColInds, tgtEnd, diffColInds.begin()); const size_t diffLen(diffEnd - diffColInds.begin()); - TEUCHOS_ASSERT( diffLen == newDiffNumEnt ); + TEUCHOS_ASSERT( diffLen <= unionSize ); + diffColInds.resize(diffLen); } } } From 4489c4dfb822c7a2e89c6b8746f37be943abc062 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 13 Feb 2020 13:15:21 -0700 Subject: [PATCH 42/49] Tpetra: Add more verbose output to CrsGraph & Distributor @trilinos/tpetra Also, get rid of some "#ifdef HAVE_TPETRA_DEBUG ... #endif" from Distributor; instead, use the run-time value of Behavior::Debug("Distributor") to decide whether to run debug checks. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 20 +++ .../tpetra/core/src/Tpetra_CrsMatrix_def.hpp | 37 ++++- .../tpetra/core/src/Tpetra_Distributor.cpp | 154 ++++++++++-------- 3 files changed, 142 insertions(+), 69 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index a069a72313a5..e4144436db78 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5535,8 +5535,11 @@ namespace Tpetra { << numPacketsPerLID.extent(0) << endl; std::cerr << os.str(); } + const bool extraVerbose = + verbose && Details::Behavior::verbose("CrsPadding"); const LO numImports = static_cast(importLIDs.extent(0)); + TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports ); std::unique_ptr padding( new padding_type(padding_type::create_from_imports, numImports)); @@ -5572,6 +5575,13 @@ namespace Tpetra { // global column indices, then other stuff like the matrix // values in that row). const size_t numBytes = numPacketsPerLID_h[whichImport]; + if (extraVerbose) { + std::ostringstream os; + os << *prefix << "whichImport=" << whichImport + << ", numImports=" << numImports + << ", numBytes=" << numBytes << endl; + std::cerr << os.str(); + } if (numBytes == 0) { continue; // special case: no entries to unpack for this row } @@ -5579,8 +5589,18 @@ namespace Tpetra { const size_t numEntBeg = offset; const size_t numEntLen = PackTraits::packValueCount(origSrcNumEnt); + TEUCHOS_ASSERT( numBytes >= numEntLen ); + TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen ); PackTraits::unpackValue(origSrcNumEnt, imports_h.data() + numEntBeg); + if (extraVerbose) { + std::ostringstream os; + os << *prefix << "whichImport=" << whichImport + << ": origSrcNumEnt=" << origSrcNumEnt << endl; + std::cerr << os.str(); + } + TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) ); + TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) ); const size_t gidsBeg = numEntBeg + numEntLen; if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) { srcGblColIndsScratch.resize(origSrcNumEnt); diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index 237c60537adc..e2ed4f1e7a87 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -7879,7 +7879,9 @@ namespace Tpetra { if (verbose) { prefix = this->createPrefix("CrsMatrix", tfecfFuncName); std::ostringstream os; - os << *prefix << "importLIDs.extent(0): " + os << *prefix << "isStaticGraph(): " + << (isStaticGraph() ? "true" : "false") + << ", importLIDs.extent(0): " << importLIDs.extent(0) << ", imports.extent(0): " << imports.extent(0) @@ -7897,16 +7899,45 @@ namespace Tpetra { } else { { - auto padding = - myGraph_->computePaddingForCrsMatrixUnpack( + using padding_type = typename crs_graph_type::padding_type; + std::unique_ptr padding; + try { + padding = myGraph_->computePaddingForCrsMatrixUnpack( importLIDs, imports, numPacketsPerLID, verbose); + } + catch (std::exception& e) { + const auto rowMap = getRowMap(); + const auto comm = rowMap.is_null() ? Teuchos::null : + rowMap->getComm(); + const int myRank = comm.is_null() ? -1 : comm->getRank(); + TEUCHOS_TEST_FOR_EXCEPTION + (true, std::runtime_error, "Proc " << myRank << ": " + "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack " + "threw an exception: " << e.what()); + } + if (verbose) { + std::ostringstream os; + os << *prefix << "Call applyCrsPadding" << endl; + std::cerr << os.str(); + } applyCrsPadding(*padding, verbose); } + if (verbose) { + std::ostringstream os; + os << *prefix << "Call unpackAndCombineImplNonStatic" << endl; + std::cerr << os.str(); + } unpackAndCombineImplNonStatic(importLIDs, imports, numPacketsPerLID, constantNumPackets, distor, combineMode); } + + if (verbose) { + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); + } } template diff --git a/packages/tpetra/core/src/Tpetra_Distributor.cpp b/packages/tpetra/core/src/Tpetra_Distributor.cpp index ce4c3344c9bf..82e53881f637 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.cpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.cpp @@ -33,13 +33,13 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER #include "Tpetra_Distributor.hpp" +#include "Tpetra_Details_Behavior.hpp" #include "Tpetra_Details_gathervPrint.hpp" +#include "Tpetra_Util.hpp" #include "Tpetra_Details_makeValidVerboseStream.hpp" #include "Teuchos_StandardParameterEntryValidators.hpp" #include "Teuchos_VerboseObjectParameterListHelpers.hpp" @@ -773,11 +773,11 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": computeReceives: "; - prefix = std::unique_ptr (new std::string (os.str ())); - os << "{selfMessage_: " << (selfMessage_ ? "true" : "false") - << ", tag: " << tag << "}" << endl; - *out_ << os.str (); + os << "Proc " << myRank << ": Tpetra::Distributor::computeReceives: "; + prefix = std::unique_ptr(new std::string(os.str())); + os << "selfMessage_: " << (selfMessage_ ? "true" : "false") + << ", tag: " << tag << endl; + std::cerr << os.str(); } // toProcsFromMe[i] == the number of messages sent by this process @@ -809,7 +809,7 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; os << *prefix << "Reduce & scatter" << endl; - *out_ << os.str (); + std::cerr << os.str(); } // Compute the number of receives that this process needs to @@ -920,7 +920,7 @@ namespace Tpetra { std::ostringstream os; os << *prefix << "Post " << actualNumReceives << " irecv" << (actualNumReceives != size_t (1) ? "s" : "") << endl; - *out_ << os.str (); + std::cerr << os.str(); } // Post the (nonblocking) receives. @@ -936,7 +936,7 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; os << *prefix << "Posted any-proc irecv w/ tag " << tag << endl; - *out_ << os.str (); + std::cerr << os.str(); } } @@ -944,7 +944,7 @@ namespace Tpetra { std::ostringstream os; os << *prefix << "Post " << numSends_ << " send" << (numSends_ != size_t (1) ? "s" : "") << endl; - *out_ << os.str (); + std::cerr << os.str(); } // Post the sends: Tell each process to which we are sending how // many packets it should expect from us in the communication @@ -965,7 +965,7 @@ namespace Tpetra { std::ostringstream os; os << *prefix << "Posted send to Proc " << procsTo_[i] << " w/ tag " << tag << endl; - *out_ << os.str (); + std::cerr << os.str(); } } else { @@ -984,7 +984,7 @@ namespace Tpetra { std::ostringstream os; os << myRank << ": computeReceives: waitAll on " << requests.size () << " requests" << endl; - *out_ << os.str (); + std::cerr << os.str(); } // // Wait on all the receives. When they arrive, check the status @@ -1031,8 +1031,8 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; - os << *prefix << "Done!" << endl; - *out_ << os.str (); + os << *prefix << "Done" << endl; + std::cerr << os.str(); } } @@ -1044,20 +1044,25 @@ namespace Tpetra { using Teuchos::REDUCE_MAX; using Teuchos::reduceAll; using std::endl; - const char rawPrefix[] = "Tpetra::Distributor::createFromSends: "; + const char rawPrefix[] = "Tpetra::Distributor::createFromSends"; Teuchos::OSTab tab (out_); const size_t numExports = exportProcIDs.size(); const int myProcID = comm_->getRank(); const int numProcs = comm_->getSize(); + const bool debug = Details::Behavior::debug("Distributor"); + const size_t maxNumToPrint = verbose_ ? + Details::Behavior::verbosePrintCountThreshold() : size_t(0); std::unique_ptr prefix; if (verbose_) { std::ostringstream os; os << "Proc " << myProcID << ": " << rawPrefix << ": "; - prefix = std::unique_ptr (new std::string (os.str ())); - os << "exportPIDs: " << exportProcIDs << endl; - *out_ << os.str (); + prefix = std::unique_ptr(new std::string(os.str())); + Details::verbosePrintArray(os, exportProcIDs, "exportPIDs", + maxNumToPrint); + os << endl; + std::cerr << os.str(); } // exportProcIDs tells us the communication pattern for this @@ -1114,15 +1119,11 @@ namespace Tpetra { size_t numActive = 0; int needSendBuff = 0; // Boolean -#ifdef HAVE_TPETRA_DEBUG - int badID = -1; // only used in a debug build -#endif // HAVE_TPETRA_DEBUG + int badID = -1; // only used in debug mode for (size_t i = 0; i < numExports; ++i) { const int exportID = exportProcIDs[i]; if (exportID >= numProcs) { -#ifdef HAVE_TPETRA_DEBUG badID = myProcID; -#endif // HAVE_TPETRA_DEBUG break; } else if (exportID >= 0) { @@ -1147,32 +1148,31 @@ namespace Tpetra { } } -#ifdef HAVE_TPETRA_DEBUG - // Test whether any process in the communicator got an invalid - // process ID. If badID != -1 on this process, then it equals - // this process' rank. The max of all badID over all processes is - // the max rank which has an invalid process ID. - { + if (debug) { + // Test whether any process in the communicator got an invalid + // process ID. If badID != -1 on this process, then it equals + // this process' rank. The max of all badID over all processes + // is the max rank which has an invalid process ID. int gbl_badID; reduceAll (*comm_, REDUCE_MAX, badID, outArg (gbl_badID)); - TEUCHOS_TEST_FOR_EXCEPTION(gbl_badID >= 0, std::runtime_error, - Teuchos::typeName(*this) << "::createFromSends: Proc " << gbl_badID - << ", perhaps among other processes, got a bad send process ID."); + TEUCHOS_TEST_FOR_EXCEPTION + (gbl_badID >= 0, std::runtime_error, rawPrefix << "Proc " + << gbl_badID << ", perhaps among other processes, got a bad " + "send process ID."); } -#else - // FIXME (mfh 12 Apr 2013, 15 Jul 2015) Rather than simply - // ignoring this information, we should think about how to pass it - // along so that all the processes find out about it. In a - // release build with efficiency warnings turned off, the next - // collective communication happens in computeReceives(). We - // could figure out how to encode the error flag in that - // operation, for example by adding an extra entry to the - // collective's output array that encodes the error condition (0 - // on all processes if no error, else 1 on any process with the - // error, so that the sum will produce a nonzero value if any - // process had an error). I'll defer this change for now and - // recommend instead that people with troubles try a debug build. -#endif // HAVE_TPETRA_DEBUG + // FIXME (mfh 12 Apr 2013, 15 Jul 2015, 13 Feb 2020) Rather than + // simply ignoring this information when not in debug mode, we + // should think about how to pass it along so that all the + // processes find out about it. In a release build with + // efficiency warnings turned off, the next collective + // communication happens in computeReceives(). We could figure + // out how to encode the error flag in that operation, for example + // by adding an extra entry to the collective's output array that + // encodes the error condition (0 on all processes if no error, + // else 1 on any process with the error, so that the sum will + // produce a nonzero value if any process had an error). I'll + // defer this change for now and recommend instead that people + // with troubles try a debug build. #if defined(HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS) || defined(HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS) { @@ -1186,6 +1186,12 @@ namespace Tpetra { } #endif + if (verbose_) { + std::ostringstream os; + os << *prefix << "Detect whether I have a self message" << endl; + std::cerr << os.str(); + } + // Determine from the caller's data whether or not the current // process should send (a) message(s) to itself. if (starts[myProcID] != 0) { @@ -1195,11 +1201,15 @@ namespace Tpetra { selfMessage_ = false; } -#ifdef HAVE_TEUCHOS_DEBUG bool index_neq_numActive = false; bool send_neq_numSends = false; -#endif if (! needSendBuff) { + if (verbose_) { + std::ostringstream os; + os << *prefix << "I don't need a send buffer or indicesTo_ " + "(fast path)" << endl; + std::cerr << os.str(); + } // grouped by proc, no send buffer or indicesTo_ needed numSends_ = 0; // Count total number of sends, i.e., total number of procs to @@ -1235,11 +1245,9 @@ namespace Tpetra { index += starts[procID]; procIndex += starts[procID]; } -#ifdef HAVE_TEUCHOS_DEBUG if (index != numActive) { index_neq_numActive = true; } -#endif } // sort the startsTo and proc IDs together, in ascending order, according // to proc IDs @@ -1257,6 +1265,12 @@ namespace Tpetra { } } else { + if (verbose_) { + std::ostringstream os; + os << *prefix << "I need a send buffer & indicesTo_ " + "(slow path)" << endl; + std::cerr << os.str(); + } // not grouped by proc, need send buffer and indicesTo_ // starts[i] is the number of sends to proc i @@ -1341,34 +1355,42 @@ namespace Tpetra { ++snd; } } -#ifdef HAVE_TEUCHOS_DEBUG if (snd != numSends_) { send_neq_numSends = true; } -#endif } -#ifdef HAVE_TEUCHOS_DEBUG - SHARED_TEST_FOR_EXCEPTION(index_neq_numActive, std::logic_error, - "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_); - SHARED_TEST_FOR_EXCEPTION(send_neq_numSends, std::logic_error, - "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_); -#endif + if (debug) { + SHARED_TEST_FOR_EXCEPTION + (index_neq_numActive, std::logic_error, + rawPrefix << "logic error. Please notify the Tpetra team.", *comm_); + SHARED_TEST_FOR_EXCEPTION + (send_neq_numSends, std::logic_error, + rawPrefix << "logic error. Please notify the Tpetra team.", *comm_); + } - if (selfMessage_) --numSends_; + if (selfMessage_) { + if (verbose_) { + std::ostringstream os; + os << *prefix << "Sending self message; numSends " + << numSends_ << " -> " << (numSends_ - 1) << endl; + std::cerr << os.str(); + } + --numSends_; + } // Invert map to see what msgs are received and what length computeReceives(); - if (verbose_) { - std::ostringstream os; - os << *prefix << "Done!" << endl; - *out_ << os.str (); - } - // createFromRecvs() calls createFromSends(), but will set // howInitialized_ again after calling createFromSends(). howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS; + if (verbose_) { + std::ostringstream os; + os << *prefix << "Done; totalReceiveLength_=" + << totalReceiveLength_ << endl; + std::cerr << os.str(); + } return totalReceiveLength_; } From d15f0162c5f85599bd676da750563041f26acfff Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 13 Feb 2020 15:06:28 -0700 Subject: [PATCH 43/49] Tpetra: More verbose debug output * Add verbose debug output (via Behavior::verbose("CrsPadding")) to CrsPadding * Refactor Distributor's verbose debug output --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 32 +- .../core/src/Tpetra_Details_CrsPadding.hpp | 99 ++++- .../tpetra/core/src/Tpetra_Distributor.cpp | 117 ++--- .../tpetra/core/src/Tpetra_Distributor.hpp | 412 +++++++++--------- 4 files changed, 398 insertions(+), 262 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index e4144436db78..2aa3d07eb549 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5248,9 +5248,15 @@ namespace Tpetra { std::cerr << os.str(); } + const int myRank = [&] () { + auto comm = rowMap_.is_null() ? Teuchos::null : + rowMap_->getComm(); + return comm.is_null() ? -1 : comm->getRank(); + } (); std::unique_ptr padding( new padding_type(padding_type::create_from_sames_and_permutes, - numSameIDs, permuteFromLIDs.extent(0))); + myRank, numSameIDs, + permuteFromLIDs.extent(0))); // We're accessing data on host, so make sure all device // computations on the graphs' data are done. @@ -5441,9 +5447,14 @@ namespace Tpetra { } const LO numImports = static_cast(importLIDs.extent(0)); + const int myRank = [&] () { + auto comm = rowMap_.is_null() ? Teuchos::null : + rowMap_->getComm(); + return comm.is_null() ? -1 : comm->getRank(); + } (); std::unique_ptr padding( new padding_type(padding_type::create_from_imports, - numImports)); + myRank, numImports)); Kokkos::fence(); // Make sure device sees changes made by host if (imports.need_sync_host()) { imports.sync_host(); @@ -5540,9 +5551,14 @@ namespace Tpetra { const LO numImports = static_cast(importLIDs.extent(0)); TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports ); + const int myRank = [&] () { + auto comm = rowMap_.is_null() ? Teuchos::null : + rowMap_->getComm(); + return comm.is_null() ? -1 : comm->getRank(); + } (); std::unique_ptr padding( new padding_type(padding_type::create_from_imports, - numImports)); + myRank, numImports)); Kokkos::fence(); // Make sure host sees changes made by device if (imports.need_sync_host()) { imports.sync_host(); @@ -5596,7 +5612,8 @@ namespace Tpetra { if (extraVerbose) { std::ostringstream os; os << *prefix << "whichImport=" << whichImport - << ": origSrcNumEnt=" << origSrcNumEnt << endl; + << ", numImports=" << numImports + << ", origSrcNumEnt=" << origSrcNumEnt << endl; std::cerr << os.str(); } TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) ); @@ -5616,6 +5633,13 @@ namespace Tpetra { tgtGblColIndsScratch, *this, tgtGblRowInd); const size_t origNumTgtEnt(tgtGblColInds.size()); + if (extraVerbose) { + std::ostringstream os; + os << *prefix << "whichImport=" << whichImport + << ", numImports=" << numImports + << ": Call padding->update_import" << endl; + std::cerr << os.str(); + } padding->update_import(tgtNumDups, srcNumDups, mergedNumDups, whichImport, tgtLclRowInd, tgtGblColInds.getRawPtr(), diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index 4992b67304ca..cbd9f6b06e96 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -4,7 +4,9 @@ #include "Tpetra_Details_Behavior.hpp" #include "Tpetra_Util.hpp" #include -#include +#include +#include +#include #include namespace Tpetra { @@ -30,14 +32,18 @@ namespace Tpetra { static constexpr create_from_sames_and_permutes_tag create_from_sames_and_permutes {}; CrsPadding(create_from_sames_and_permutes_tag /* tag */, + const int myRank, const size_t /* numSameIDs */, const size_t /* numPermutes */) + : myRank_(myRank) {} struct create_from_imports_tag {}; static constexpr create_from_imports_tag create_from_imports {}; CrsPadding(create_from_imports_tag /* tag */, + const int myRank, const size_t /* numImports */) + : myRank_(myRank) {} void @@ -166,6 +172,18 @@ namespace Tpetra { const size_t origNumSrcEnt, const bool srcIsUnique) { + using std::endl; + std::unique_ptr prefix; + if (verbose_) { + prefix = createPrefix("update_impl"); + std::ostringstream os; + os << *prefix << "Start: " + << "targetLocalIndex=" << targetLocalIndex + << ", origNumTgtEnt=" << origNumTgtEnt + << ", origNumSrcEnt=" << origNumSrcEnt << endl; + std::cerr << os.str(); + } + // FIXME (08 Feb 2020) We only need to sort and unique // tgtGblColInds if we haven't already seen it before. size_t newNumTgtEnt = origNumTgtEnt; @@ -177,6 +195,12 @@ namespace Tpetra { } tgtNumDups += (origNumTgtEnt - newNumTgtEnt); + if (verbose_) { + std::ostringstream os; + os << *prefix << "tgtNumDups=" << tgtNumDups << endl; + std::cerr << os.str(); + } + size_t newNumSrcEnt = origNumSrcEnt; auto srcEnd = srcGblColInds + origNumSrcEnt; std::sort(srcGblColInds, srcEnd); @@ -186,12 +210,25 @@ namespace Tpetra { } srcNumDups += (origNumSrcEnt - newNumSrcEnt); + if (verbose_) { + std::ostringstream os; + os << *prefix << "srcNumDups=" << srcNumDups << endl; + std::cerr << os.str(); + } + size_t unionNumEnt = 0; merge_with_current_state(phase, unionNumEnt, whichImport, targetLocalIndex, tgtGblColInds, newNumTgtEnt, srcGblColInds, newNumSrcEnt); unionNumDups += (newNumTgtEnt + newNumSrcEnt - unionNumEnt); + + if (verbose_) { + std::ostringstream os; + os << *prefix << "Done: " + << "unionNumDups=" << unionNumDups << endl; + std::cerr << os.str(); + } } std::vector& @@ -214,6 +251,17 @@ namespace Tpetra { const size_t numSrcEnt) { using Details::countNumInCommon; + using std::endl; + std::unique_ptr prefix; + if (verbose_) { + prefix = createPrefix("merge_with_current_state"); + std::ostringstream os; + os << *prefix << "Start: " + << "tgtLclRowInd=" << tgtLclRowInd + << ", numTgtEnt=" << numTgtEnt + << ", numSrcEnt=" << numSrcEnt << endl; + std::cerr << os.str(); + } // We only need to accumulate those source indices that are // not already target indices. This is because we always have // the target indices on input to this function, so there's no @@ -239,6 +287,12 @@ namespace Tpetra { unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; if (unionNumEnt > numTgtEnt) { + if (verbose_) { + std::ostringstream os; + os << *prefix << "unionNumEnt=" << unionNumEnt + << " > numTgtEnt=" << numTgtEnt << endl; + std::cerr << os.str(); + } TEUCHOS_ASSERT( numSrcEnt != 0 ); // At least one input source index isn't in the target. @@ -247,6 +301,11 @@ namespace Tpetra { const size_t oldDiffNumEnt = diffColInds.size(); if (oldDiffNumEnt == 0) { + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=0" << endl; + std::cerr << os.str(); + } TEUCHOS_ASSERT( numSrcEnt >= numInCommon ); diffColInds.resize(numSrcEnt); auto diffEnd = std::set_difference(srcColInds, srcEnd, @@ -257,6 +316,12 @@ namespace Tpetra { diffColInds.resize(newLen); } else { + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt + << "; call countNumInCommon" << endl; + std::cerr << os.str(); + } // scratch = union(srcColInds, diffColInds) const size_t unionSize = numSrcEnt + oldDiffNumEnt - countNumInCommon(srcColInds, srcEnd, @@ -265,6 +330,13 @@ namespace Tpetra { if (scratchColInds_.size() < unionSize) { scratchColInds_.resize(unionSize); } + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt + << ", unionSize=" << unionSize << ", call set_union" + << endl; + std::cerr << os.str(); + } auto unionBeg = scratchColInds_.begin(); auto unionEnd = std::set_union( srcColInds, srcEnd, @@ -276,6 +348,14 @@ namespace Tpetra { // diffColInds = difference(scratch, tgtColInds) const size_t unionTgtInCommon = countNumInCommon( unionBeg, unionEnd, tgtColInds, tgtEnd); + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt + << ", unionSize=" << unionSize + << ", unionTgtInCommon=" << unionTgtInCommon + << "; call set_difference" << endl; + std::cerr << os.str(); + } TEUCHOS_ASSERT( unionSize >= unionTgtInCommon ); diffColInds.resize(unionSize); @@ -287,12 +367,29 @@ namespace Tpetra { diffColInds.resize(diffLen); } } + + if (verbose_) { + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); + } + } + + std::unique_ptr + createPrefix(const char funcName[]) + { + std::ostringstream os; + os << "Proc " << myRank_ << ": CrsPadding::" << funcName + << ": "; + return std::unique_ptr(new std::string(os.str())); } // imports may overlap with sames and/or permutes, so it makes // sense to store them all in one map. std::map > entries_; std::vector scratchColInds_; + int myRank_ = -1; + bool verbose_ = Behavior::verbose("CrsPadding"); }; } // namespace Details } // namespace Tpetra diff --git a/packages/tpetra/core/src/Tpetra_Distributor.cpp b/packages/tpetra/core/src/Tpetra_Distributor.cpp index 82e53881f637..9ea059d35d3b 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.cpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.cpp @@ -145,14 +145,12 @@ namespace Tpetra { Distributor:: Distributor (const Teuchos::RCP >& comm, - const Teuchos::RCP& out, + const Teuchos::RCP& /* out */, const Teuchos::RCP& plist) : comm_ (comm) - , out_ (::Tpetra::Details::makeValidVerboseStream (out)) , howInitialized_ (Details::DISTRIBUTOR_NOT_INITIALIZED) , sendType_ (Details::DISTRIBUTOR_SEND) , barrierBetween_ (barrierBetween_default) - , verbose_ (tpetraDistributorDebugDefault) , selfMessage_ (false) , numSends_ (0) , maxSendLength_ (0) @@ -162,9 +160,7 @@ namespace Tpetra { , lastRoundBytesRecv_ (0) , useDistinctTags_ (useDistinctTags_default) { - TEUCHOS_ASSERT( ! out_.is_null () ); - - this->setParameterList (plist); // sets verbose_ via Behavior + this->setParameterList(plist); #ifdef TPETRA_DISTRIBUTOR_TIMERS makeTimers (); #endif // TPETRA_DISTRIBUTOR_TIMERS @@ -190,7 +186,6 @@ namespace Tpetra { Distributor:: Distributor (const Distributor& distributor) : comm_ (distributor.comm_) - , out_ (distributor.out_) , howInitialized_ (Details::DISTRIBUTOR_INITIALIZED_BY_COPY) , sendType_ (distributor.sendType_) , barrierBetween_ (distributor.barrierBetween_) @@ -217,8 +212,6 @@ namespace Tpetra { using Teuchos::RCP; using Teuchos::rcp; - TEUCHOS_ASSERT( ! out_.is_null () ); - RCP rhsList = distributor.getParameterList (); RCP newList = rhsList.is_null () ? Teuchos::null : Teuchos::parameterList (*rhsList); @@ -235,7 +228,6 @@ namespace Tpetra { using Teuchos::RCP; std::swap (comm_, rhs.comm_); - std::swap (out_, rhs.out_); std::swap (howInitialized_, rhs.howInitialized_); std::swap (sendType_, rhs.sendType_); std::swap (barrierBetween_, rhs.barrierBetween_); @@ -276,6 +268,25 @@ namespace Tpetra { // Distributor use the same timers. } + bool + Distributor::getVerbose() + { + return Details::Behavior::verbose("Distributor") || + Details::Behavior::verbose("Tpetra::Distributor"); + } + + std::unique_ptr + Distributor:: + createPrefix(const char methodName[]) const + { + const int myRank = comm_.is_null() ? -1 : comm_->getRank(); + std::ostringstream pfxStrm; + pfxStrm << "Proc " << myRank << ": Tpetra::Distributor::" + << methodName << ": "; + return std::unique_ptr( + new std::string(pfxStrm.str())); + } + void Distributor:: setParameterList (const Teuchos::RCP& plist) @@ -284,19 +295,12 @@ namespace Tpetra { using Teuchos::FancyOStream; using Teuchos::getIntegralValue; using Teuchos::includesVerbLevel; - using Teuchos::OSTab; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using std::endl; - const bool verboseDefault = Behavior::verbose ("Distributor") || - Behavior::verbose ("Tpetra::Distributor"); - - if (plist.is_null ()) { - verbose_ = verboseDefault; - } - else { + if (! plist.is_null()) { RCP validParams = getValidParameters (); plist->validateParametersAndSetDefaults (*validParams); @@ -305,7 +309,6 @@ namespace Tpetra { const Details::EDistributorSendType sendType = getIntegralValue (*plist, "Send type"); const bool useDistinctTags = plist->get ("Use distinct tags"); - const bool debug = plist->get ("Debug"); { // mfh 03 May 2016: We keep this option only for backwards // compatibility, but it must always be true. See discussion of @@ -340,7 +343,6 @@ namespace Tpetra { sendType_ = sendType; barrierBetween_ = barrierBetween; useDistinctTags_ = useDistinctTags; - verbose_ = debug || verboseDefault; // ParameterListAcceptor semantics require pointer identity of the // sublist passed to setParameterList(), so we save the pointer. @@ -444,7 +446,7 @@ namespace Tpetra { void Distributor::createReverseDistributor() const { - reverseDistributor_ = Teuchos::rcp (new Distributor (comm_, out_)); + reverseDistributor_ = Teuchos::rcp(new Distributor(comm_)); reverseDistributor_->howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE; reverseDistributor_->sendType_ = sendType_; reverseDistributor_->barrierBetween_ = barrierBetween_; @@ -521,47 +523,45 @@ namespace Tpetra { using Teuchos::FancyOStream; using Teuchos::includesVerbLevel; using Teuchos::is_null; - using Teuchos::OSTab; using Teuchos::RCP; using Teuchos::waitAll; using std::endl; - Teuchos::OSTab tab (out_); - #ifdef TPETRA_DISTRIBUTOR_TIMERS Teuchos::TimeMonitor timeMon (*timer_doWaits_); #endif // TPETRA_DISTRIBUTOR_TIMERS const int myRank = comm_->getRank (); + const bool debug = Details::Behavior::debug("Distributor"); + std::unique_ptr prefix; if (verbose_) { + prefix = createPrefix("doWaits"); std::ostringstream os; - os << myRank << ": doWaits: # reqs = " - << requests_.size () << endl; - *out_ << os.str (); + os << *prefix << "Start: requests_.size(): " + << requests_.size() << endl; + std::cerr << os.str(); } if (requests_.size() > 0) { - waitAll (*comm_, requests_()); - -#ifdef HAVE_TEUCHOS_DEBUG - // Make sure that waitAll() nulled out all the requests. - for (Array > >::const_iterator it = requests_.begin(); - it != requests_.end(); ++it) - { - TEUCHOS_TEST_FOR_EXCEPTION( ! is_null (*it), std::runtime_error, - Teuchos::typeName(*this) << "::doWaits(): Communication requests " - "should all be null aftr calling Teuchos::waitAll() on them, but " - "at least one request is not null."); + waitAll(*comm_, requests_()); + + if (debug) { + // Make sure that waitAll() nulled out all the requests. + for (auto it = requests_.begin(); it != requests_.end(); ++it) { + TEUCHOS_TEST_FOR_EXCEPTION + (! is_null(*it), std::runtime_error, + "Tpetra::Distributor::doWaits: Communication requests " + "should all be null aftr calling Teuchos::waitAll on " + "them, but at least one request is not null."); + } } -#endif // HAVE_TEUCHOS_DEBUG // Restore the invariant that requests_.size() is the number of // outstanding nonblocking communication requests. requests_.resize (0); } -#ifdef HAVE_TEUCHOS_DEBUG - { + if (debug) { const int localSizeNonzero = (requests_.size () != 0) ? 1 : 0; int globalSizeNonzero = 0; Teuchos::reduceAll (*comm_, Teuchos::REDUCE_MAX, @@ -573,12 +573,11 @@ namespace Tpetra { "a nonzero number of outstanding posts. There should be none at this " "point. Please report this bug to the Tpetra developers."); } -#endif // HAVE_TEUCHOS_DEBUG if (verbose_) { std::ostringstream os; - os << myRank << ": doWaits done" << endl; - *out_ << os.str (); + os << *prefix << "Done" << endl; + std::cerr << os.str(); } } @@ -659,7 +658,7 @@ namespace Tpetra { void Distributor:: - describe (Teuchos::FancyOStream &out, + describe (Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { using std::endl; @@ -762,7 +761,6 @@ namespace Tpetra { using Teuchos::waitAll; using std::endl; - Teuchos::OSTab tab (out_); const int myRank = comm_->getRank(); const int numProcs = comm_->getSize(); @@ -772,11 +770,11 @@ namespace Tpetra { std::unique_ptr prefix; if (verbose_) { + prefix = createPrefix("computeReceives"); std::ostringstream os; - os << "Proc " << myRank << ": Tpetra::Distributor::computeReceives: "; - prefix = std::unique_ptr(new std::string(os.str())); - os << "selfMessage_: " << (selfMessage_ ? "true" : "false") - << ", tag: " << tag << endl; + os << *prefix + << "selfMessage_: " << (selfMessage_ ? "true" : "false") + << ", pathTag: " << pathTag << ", tag: " << tag << endl; std::cerr << os.str(); } @@ -1046,7 +1044,6 @@ namespace Tpetra { using std::endl; const char rawPrefix[] = "Tpetra::Distributor::createFromSends"; - Teuchos::OSTab tab (out_); const size_t numExports = exportProcIDs.size(); const int myProcID = comm_->getRank(); const int numProcs = comm_->getSize(); @@ -1056,9 +1053,9 @@ namespace Tpetra { Details::Behavior::verbosePrintCountThreshold() : size_t(0); std::unique_ptr prefix; if (verbose_) { + prefix = createPrefix("createFromSends"); std::ostringstream os; - os << "Proc " << myProcID << ": " << rawPrefix << ": "; - prefix = std::unique_ptr(new std::string(os.str())); + os << *prefix; Details::verbosePrintArray(os, exportProcIDs, "exportPIDs", maxNumToPrint); os << endl; @@ -1399,6 +1396,14 @@ namespace Tpetra { createFromSendsAndRecvs (const Teuchos::ArrayView& exportProcIDs, const Teuchos::ArrayView& remoteProcIDs) { + std::unique_ptr prefix; + if (verbose_) { + prefix = createPrefix("createFromSendsAndRecvs"); + std::ostringstream os; + os << *prefix << "Start" << std::endl; + std::cerr << os.str(); + } + // note the exportProcIDs and remoteProcIDs _must_ be a list that has // an entry for each GID. If the export/remoteProcIDs is taken from // the getProcs{From|To} lists that are extracted from a previous distributor, @@ -1590,6 +1595,12 @@ namespace Tpetra { } #endif // 0 numReceives_-=selfMessage_; + + if (verbose_) { + std::ostringstream os; + os << *prefix << "Done" << std::endl; + std::cerr << os.str(); + } } } // namespace Tpetra diff --git a/packages/tpetra/core/src/Tpetra_Distributor.hpp b/packages/tpetra/core/src/Tpetra_Distributor.hpp index dddfd992850c..cfdec6717c61 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.hpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.hpp @@ -34,8 +34,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER @@ -43,11 +41,11 @@ #define TPETRA_DISTRIBUTOR_HPP #include "Tpetra_Util.hpp" -#include -#include -#include -#include -#include +#include "Teuchos_as.hpp" +#include "Teuchos_Describable.hpp" +#include "Teuchos_ParameterListAcceptorDefaultBase.hpp" +#include "Teuchos_VerboseObject.hpp" +#include "Tpetra_Details_Behavior.hpp" // If TPETRA_DISTRIBUTOR_TIMERS is defined, Distributor will time // doPosts (both versions) and doWaits, and register those timers with @@ -808,9 +806,6 @@ namespace Tpetra { //! The communicator over which to perform distributions. Teuchos::RCP > comm_; - //! Output stream for debug output. - Teuchos::RCP out_; - //! How the Distributor was initialized (if it was). Details::EDistributorHowInitialized howInitialized_; @@ -823,8 +818,18 @@ namespace Tpetra { //! Whether to do a barrier between receives and sends in do[Reverse]Posts(). bool barrierBetween_; + //! Get default value of verbose_ (see below). + static bool getVerbose(); + + /// \brief Get prefix for verbose debug output. + /// + /// \brief methodName [in] Name of the method in which you want to + /// print verbose debug output. + std::unique_ptr + createPrefix(const char methodName[]) const; + //! Whether to print copious debug output to stderr on all processes. - bool verbose_; + bool verbose_ = getVerbose(); //@} /// \brief Whether I am supposed to send a message to myself. @@ -1142,35 +1147,32 @@ namespace Tpetra { using Teuchos::includesVerbLevel; using Teuchos::ireceive; using Teuchos::isend; - using Teuchos::OSTab; using Teuchos::readySend; using Teuchos::send; using Teuchos::ssend; using Teuchos::TypeNameTraits; using Teuchos::typeName; using std::endl; - typedef Array::size_type size_type; + using size_type = Array::size_type; #ifdef TPETRA_DISTRIBUTOR_TIMERS Teuchos::TimeMonitor timeMon (*timer_doPosts3_); #endif // TPETRA_DISTRIBUTOR_TIMERS + const bool debug = Details::Behavior::debug("Distributor"); const int myRank = comm_->getRank (); // Run-time configurable parameters that come from the input // ParameterList set by setParameterList(). const Details::EDistributorSendType sendType = sendType_; const bool doBarrier = barrierBetween_; - Teuchos::OSTab tab0 (out_); std::unique_ptr prefix; if (verbose_) { + prefix = createPrefix("doPosts(3-arg, ArrayRCP)"); std::ostringstream os; - os << "Proc " << myRank << ": Distributor::doPosts(3-arg, ArrayRCP): "; - prefix = std::unique_ptr (new std::string (os.str ())); - os << endl; - *out_ << os.str (); + os << *prefix << "Start" << endl; + std::cerr << os.str(); } - Teuchos::OSTab tab1 (out_); TEUCHOS_TEST_FOR_EXCEPTION( sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier, std::logic_error, @@ -1209,13 +1211,13 @@ namespace Tpetra { const int pathTag = 0; const int tag = this->getTag (pathTag); -#ifdef HAVE_TPETRA_DEBUG - TEUCHOS_TEST_FOR_EXCEPTION - (requests_.size () != 0, - std::logic_error, - "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Process " - << myRank << ": requests_.size() = " << requests_.size () << " != 0."); -#endif // HAVE_TPETRA_DEBUG + if (debug) { + TEUCHOS_TEST_FOR_EXCEPTION + (requests_.size () != 0, + std::logic_error, + "Tpetra::Distributor::doPosts(3 args, Teuchos::ArrayRCP): Process " + << myRank << ": requests_.size() = " << requests_.size () << " != 0."); + } // Distributor uses requests_.size() as the number of outstanding // nonblocking message requests, so we resize to zero to maintain @@ -1238,7 +1240,7 @@ namespace Tpetra { std::ostringstream os; os << *prefix << (indicesTo_.empty () ? "Fast" : "Slow") << ": Post receives" << endl; - *out_ << os.str (); + std::cerr << os.str(); } // Post the nonblocking receives. It's common MPI wisdom to post @@ -1260,7 +1262,7 @@ namespace Tpetra { os << *prefix << (indicesTo_.empty () ? "Fast" : "Slow") << ": Post irecv: {source: " << procsFrom_[i] << ", tag: " << tag << "}" << endl; - *out_ << os.str (); + std::cerr << os.str(); } // If my process is receiving these packet(s) from another // process (not a self-receive): @@ -1298,7 +1300,7 @@ namespace Tpetra { std::ostringstream os; os << *prefix << (indicesTo_.empty () ? "Fast" : "Slow") << ": Barrier" << endl; - *out_ << os.str (); + std::cerr << os.str(); } // If we are using ready sends (MPI_Rsend) below, we need to do // a barrier before we post the ready sends. This is because a @@ -1333,7 +1335,7 @@ namespace Tpetra { std::ostringstream os; os << *prefix << (indicesTo_.empty () ? "Fast" : "Slow") << ": Post sends" << endl; - *out_ << os.str (); + std::cerr << os.str(); } if (indicesTo_.empty ()) { @@ -1350,7 +1352,7 @@ namespace Tpetra { std::ostringstream os; os << *prefix << ": Post send: {target: " << procsTo_[p] << ", tag: " << tag << "}" << endl; - *out_ << os.str (); + std::cerr << os.str(); } ArrayView tmpSend = @@ -1394,7 +1396,7 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; os << *prefix << "Fast: Self-send" << endl; - *out_ << os.str (); + std::cerr << os.str(); } // This is how we "send a message to ourself": we copy from // the export buffer to the import buffer. That saves @@ -1430,7 +1432,7 @@ namespace Tpetra { std::ostringstream os; os << *prefix << "Slow: Post send: " "{target: " << procsTo_[p] << ", tag: " << tag << "}" << endl; - *out_ << os.str (); + std::cerr << os.str(); } typename ArrayView::iterator srcBegin, srcEnd; @@ -1484,7 +1486,7 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; os << *prefix << "Slow: Self-send" << endl; - *out_ << os.str (); + std::cerr << os.str(); } for (size_t k = 0; k < lengthsTo_[selfNum]; ++k) { std::copy (exports.begin()+indicesTo_[selfIndex]*numPackets, @@ -1499,7 +1501,7 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; os << *prefix << "Done!" << endl; - *out_ << os.str (); + std::cerr << os.str(); } } @@ -1520,13 +1522,16 @@ namespace Tpetra { using Teuchos::send; using Teuchos::ssend; using Teuchos::TypeNameTraits; -#ifdef HAVE_TEUCHOS_DEBUG - using Teuchos::OSTab; -#endif // HAVE_TEUCHOS_DEBUG using std::endl; typedef Array::size_type size_type; - Teuchos::OSTab tab (out_); + std::unique_ptr prefix; + if (verbose_) { + prefix = createPrefix("doPosts(4-arg, Teuchos)"); + std::ostringstream os; + os << *prefix << "Start" << endl; + std::cerr << os.str(); + } #ifdef TPETRA_DISTRIBUTOR_TIMERS Teuchos::TimeMonitor timeMon (*timer_doPosts4_); @@ -1550,11 +1555,9 @@ namespace Tpetra { // } // // Add one tab level. We declare this outside the doPrint scopes // // so that the tab persists until the end of this method. -// Teuchos::OSTab tab = this->getOSTab (); // if (doPrint) { // *out << "Parameters:" << endl; // { -// OSTab tab2 (out); // *out << "sendType: " << DistributorSendTypeEnumToString (sendType) // << endl << "barrierBetween: " << doBarrier << endl; // } @@ -1605,9 +1608,9 @@ namespace Tpetra { #endif // HAVE_TEUCHOS_DEBUG if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID << ": doPosts(4 args, Teuchos::ArrayRCP, " - << (indicesTo_.empty () ? "fast" : "slow") << ")" << endl; - *out_ << os.str (); + os << *prefix << (indicesTo_.empty () ? "fast" : "slow") + << " path" << endl; + std::cerr << os.str(); } // Distributor uses requests_.size() as the number of outstanding @@ -1715,9 +1718,8 @@ namespace Tpetra { if (indicesTo_.empty()) { if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID - << ": doPosts(4 args, Teuchos::ArrayRCP, fast): posting sends" << endl; - *out_ << os.str (); + os << *prefix << "fast path: posting sends" << endl; + std::cerr << os.str(); } // Data are already blocked (laid out) by process, so we don't @@ -1773,17 +1775,15 @@ namespace Tpetra { } if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID - << ": doPosts(4 args, Teuchos::ArrayRCP, fast) done" << endl; - *out_ << os.str (); + os << *prefix << "fast path: done" << endl; + std::cerr << os.str(); } } else { // data are not blocked by proc, use send buffer if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID - << ": doPosts(4 args, Teuchos::ArrayRCP, slow): posting sends" << endl; - *out_ << os.str (); + os << *prefix << "slow path: posting sends" << endl; + std::cerr << os.str(); } // FIXME (mfh 05 Mar 2013) This may be broken for Isend. @@ -1864,9 +1864,8 @@ namespace Tpetra { } if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID - << ": doPosts(4 args, Teuchos::ArrayRCP, slow) done" << endl; - *out_ << os.str (); + os << *prefix << "slow path: done" << endl; + std::cerr << os.str(); } } } @@ -1995,17 +1994,15 @@ namespace Tpetra { using Teuchos::rcp; using std::endl; - RCP tab0, tab1; + std::unique_ptr prefix; if (verbose_) { - tab0 = rcp (new Teuchos::OSTab (out_)); - const int myRank = comm_->getRank (); + prefix = createPrefix("doPostsAndWaits(3-arg, Kokkos)"); std::ostringstream os; - os << "Proc " << myRank - << ": Distributor::doPostsAndWaits(3 args, Kokkos): " - << "{sendType: " << DistributorSendTypeEnumToString (sendType_) - << ", barrierBetween: " << barrierBetween_ << "}" << endl; - *out_ << os.str (); - tab1 = rcp (new Teuchos::OSTab (out_)); + os << *prefix << "sendType: " + << DistributorSendTypeEnumToString(sendType_) + << ", barrierBetween: " + << (barrierBetween_ ? "true" : "false") << endl; + std::cerr << os.str(); } TEUCHOS_TEST_FOR_EXCEPTION( @@ -2015,39 +2012,55 @@ namespace Tpetra { "this method with posts outstanding."); if (verbose_) { - const int myRank = comm_->getRank (); std::ostringstream os; - os << "Proc " << myRank - << ": Distributor::doPostsAndWaits: Call doPosts" << endl; - *out_ << os.str (); + os << *prefix << "Call doPosts" << endl; + std::cerr << os.str(); } doPosts (exports, numPackets, imports); if (verbose_) { - const int myRank = comm_->getRank (); std::ostringstream os; - os << "Proc " << myRank - << ": Distributor::doPostsAndWaits: Call doWaits" << endl; - *out_ << os.str (); + os << *prefix << "Call doWaits" << endl; + std::cerr << os.str(); } doWaits (); + if (verbose_) { + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); + } } template typename std::enable_if<(Kokkos::Impl::is_view::value && Kokkos::Impl::is_view::value)>::type Distributor:: - doPostsAndWaits (const ExpView& exports, - const Teuchos::ArrayView& numExportPacketsPerLID, - const ImpView& imports, - const Teuchos::ArrayView& numImportPacketsPerLID) + doPostsAndWaits(const ExpView& exports, + const Teuchos::ArrayView& numExportPacketsPerLID, + const ImpView& imports, + const Teuchos::ArrayView& numImportPacketsPerLID) { - TEUCHOS_TEST_FOR_EXCEPTION( - requests_.size () != 0, std::runtime_error, - "Tpetra::Distributor::doPostsAndWaits(4 args): There are " - << requests_.size () << " outstanding nonblocking messages pending. " - "It is incorrect to call this method with posts outstanding."); + using std::endl; + const char rawPrefix[] = "doPostsAndWaits(4-arg, Kokkos)"; - doPosts (exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); - doWaits (); + std::unique_ptr prefix; + if (verbose_) { + prefix = createPrefix(rawPrefix); + std::ostringstream os; + os << *prefix << "Start" << endl; + std::cerr << os.str(); + } + TEUCHOS_TEST_FOR_EXCEPTION + (requests_.size() != 0, std::runtime_error, + "Tpetra::Distributor::" << rawPrefix << ": There is/are " + << requests_.size() << " outstanding nonblocking message(s) " + "pending. It is incorrect to call this method with posts " + "outstanding."); + doPosts(exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + doWaits(); + if (verbose_) { + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); + } } @@ -2064,7 +2077,6 @@ namespace Tpetra { using Teuchos::includesVerbLevel; using Teuchos::ireceive; using Teuchos::isend; - using Teuchos::OSTab; using Teuchos::readySend; using Teuchos::send; using Teuchos::ssend; @@ -2080,10 +2092,11 @@ namespace Tpetra { typedef ImpView imports_view_type; #ifdef KOKKOS_ENABLE_CUDA - static_assert (! std::is_same::value && - ! std::is_same::value, - "Please do not use Tpetra::Distributor with UVM " - "allocations. See GitHub issue #1088."); + static_assert + (! std::is_same::value && + ! std::is_same::value, + "Please do not use Tpetra::Distributor with UVM allocations. " + "See Trilinos GitHub issue #1088."); #endif // KOKKOS_ENABLE_CUDA #ifdef TPETRA_DISTRIBUTOR_TIMERS @@ -2096,14 +2109,13 @@ namespace Tpetra { const Details::EDistributorSendType sendType = sendType_; const bool doBarrier = barrierBetween_; - Teuchos::OSTab tab0 (out_); + std::unique_ptr prefix; if (verbose_) { + prefix = createPrefix("doPosts(3-arg, Kokkos)"); std::ostringstream os; - os << "Proc " << myRank - << ": Distributor::doPosts(3 args, Kokkos)" << endl; - *out_ << os.str (); + os << *prefix << "Start" << endl; + std::cerr << os.str(); } - Teuchos::OSTab tab1 (out_); TEUCHOS_TEST_FOR_EXCEPTION( sendType == Details::DISTRIBUTOR_RSEND && ! doBarrier, @@ -2124,11 +2136,11 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": doPosts: totalNumImportPackets = " << + os << *prefix << "totalNumImportPackets = " << totalNumImportPackets << " = " << totalReceiveLength_ << " * " << numPackets << "; imports.extent(0) = " << imports.extent (0) << endl; - *out_ << os.str (); + std::cerr << os.str(); } #ifdef HAVE_TPETRA_DEBUG @@ -2197,10 +2209,9 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": doPosts(3 args, Kokkos, " - << (indicesTo_.empty () ? "fast" : "slow") << "): Post receives" - << endl; - *out_ << os.str (); + os << *prefix << (indicesTo_.empty() ? "fast" : "slow") + << " path: post receives" << endl; + std::cerr << os.str(); } // Post the nonblocking receives. It's common MPI wisdom to post @@ -2219,11 +2230,11 @@ namespace Tpetra { if (procsFrom_[i] != myRank) { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": doPosts(3 args, Kokkos, " - << (indicesTo_.empty () ? "fast" : "slow") << "): " - << "Post irecv: {source: " << procsFrom_[i] + os << *prefix + << (indicesTo_.empty() ? "fast" : "slow") << " path: " + << "post irecv: {source: " << procsFrom_[i] << ", tag: " << tag << "}" << endl; - *out_ << os.str (); + std::cerr << os.str(); } // If my process is receiving these packet(s) from another // process (not a self-receive): @@ -2258,9 +2269,9 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": doPosts(3 args, Kokkos, " - << (indicesTo_.empty () ? "fast" : "slow") << "): Barrier" << endl; - *out_ << os.str (); + os << *prefix << (indicesTo_.empty() ? "fast" : "slow") + << " path: barrier" << endl; + std::cerr << os.str(); } // If we are using ready sends (MPI_Rsend) below, we need to do // a barrier before we post the ready sends. This is because a @@ -2293,17 +2304,16 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": doPosts(3 args, Kokkos, " - << (indicesTo_.empty () ? "fast" : "slow") << "): Post sends" << endl; - *out_ << os.str (); + os << *prefix << (indicesTo_.empty() ? "fast" : "slow") + << " path: post sends" << endl; + std::cerr << os.str(); } if (indicesTo_.empty()) { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank - << ": doPosts(3 args, Kokkos, fast): posting sends" << endl; - *out_ << os.str (); + os << *prefix << "fast path: posting sends" << endl; + std::cerr << os.str(); } // Data are already blocked (laid out) by process, so we don't @@ -2317,11 +2327,10 @@ namespace Tpetra { if (procsTo_[p] != myRank) { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": doPosts(3 args, Kokkos, fast): Post send: " - "{target: " << procsTo_[p] << ", tag: " << tag << "}" << endl; - *out_ << os.str (); + os << *prefix << "fast path: post send: {target: " + << procsTo_[p] << ", tag: " << tag << "}" << endl; + std::cerr << os.str(); } - exports_view_type tmpSend = subview_offset( exports, startsTo_[p]*numPackets, lengthsTo_[p]*numPackets); @@ -2363,9 +2372,8 @@ namespace Tpetra { if (selfMessage_) { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank - << ": doPosts(3 args, Kokkos, fast): Self-send" << endl; - *out_ << os.str (); + os << *prefix << "fast path: self-send" << endl; + std::cerr << os.str(); } // This is how we "send a message to ourself": we copy from // the export buffer to the import buffer. That saves @@ -2380,18 +2388,16 @@ namespace Tpetra { } if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": doPosts(3 args, Kokkos, fast) done" << endl; - *out_ << os.str (); + os << *prefix << "fast path: done" << endl; + std::cerr << os.str(); } } else { // data are not blocked by proc, use send buffer if (verbose_) { std::ostringstream os; - os << "Proc " << myRank - << ": doPosts(3 args, Kokkos, slow): posting sends" << endl; - *out_ << os.str (); + os << *prefix << "slow path: posting sends" << endl; + std::cerr << os.str(); } - typedef typename ExpView::non_const_value_type Packet; typedef typename ExpView::array_layout Layout; typedef typename ExpView::device_type Device; @@ -2416,10 +2422,9 @@ namespace Tpetra { if (procsTo_[p] != myRank) { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank - << ": doPosts(3 args, Kokkos, slow): Post send: {target: " + os << *prefix << "slow path: post send: {target: " << procsTo_[p] << ", tag: " << tag << "}" << endl; - *out_ << os.str (); + std::cerr << os.str(); } size_t sendArrayOffset = 0; @@ -2471,9 +2476,8 @@ namespace Tpetra { if (selfMessage_) { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank - << ": doPosts(3 args, Kokkos, slow): Self-send" << endl; - *out_ << os.str (); + os << *prefix << "slow path: self-send" << endl; + std::cerr << os.str(); } for (size_t k = 0; k < lengthsTo_[selfNum]; ++k) { deep_copy_offset(imports, exports, selfReceiveOffset, @@ -2484,16 +2488,15 @@ namespace Tpetra { } if (verbose_) { std::ostringstream os; - os << "Proc " << myRank - << ": doPosts(3 args, Kokkos, slow) done" << endl; - *out_ << os.str (); + os << *prefix << "slow path: done" << endl; + std::cerr << os.str(); } } if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": doPosts done" << endl; - *out_ << os.str (); + os << *prefix << "Done" << endl; + std::cerr << os.str(); } } @@ -2513,9 +2516,6 @@ namespace Tpetra { using Teuchos::send; using Teuchos::ssend; using Teuchos::TypeNameTraits; -#ifdef HAVE_TEUCHOS_DEBUG - using Teuchos::OSTab; -#endif // HAVE_TEUCHOS_DEBUG using std::endl; using Kokkos::Compat::create_const_view; using Kokkos::Compat::create_view; @@ -2532,7 +2532,13 @@ namespace Tpetra { "allocations. See GitHub issue #1088."); #endif // KOKKOS_ENABLE_CUDA - Teuchos::OSTab tab (out_); + std::unique_ptr prefix; + if (verbose_) { + prefix = createPrefix("doPosts(4-arg, Kokkos)"); + std::ostringstream os; + os << *prefix << "Start" << endl; + std::cerr << os.str(); + } #ifdef TPETRA_DISTRIBUTOR_TIMERS Teuchos::TimeMonitor timeMon (*timer_doPosts4_); @@ -2556,11 +2562,9 @@ namespace Tpetra { // } // // Add one tab level. We declare this outside the doPrint scopes // // so that the tab persists until the end of this method. -// Teuchos::OSTab tab = this->getOSTab (); // if (doPrint) { // *out << "Parameters:" << endl; // { -// OSTab tab2 (out); // *out << "sendType: " << DistributorSendTypeEnumToString (sendType) // << endl << "barrierBetween: " << doBarrier << endl; // } @@ -2607,11 +2611,10 @@ namespace Tpetra { #endif // HAVE_TEUCHOS_DEBUG if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID << ": doPosts(4 args, Kokkos, " - << (indicesTo_.empty () ? "fast" : "slow") << ")" << endl; - *out_ << os.str (); + os << *prefix << (indicesTo_.empty() ? "fast" : "slow") + << " path, tag=" << tag << endl; + std::cerr << os.str(); } - // Distributor uses requests_.size() as the number of outstanding // nonblocking message requests, so we resize to zero to maintain // this invariant. @@ -2716,9 +2719,8 @@ namespace Tpetra { if (indicesTo_.empty()) { if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID - << ": doPosts(4 args, Kokkos, fast): posting sends" << endl; - *out_ << os.str (); + os << *prefix << "fast path: posting sends" << endl; + std::cerr << os.str(); } // Data are already blocked (laid out) by process, so we don't @@ -2773,17 +2775,16 @@ namespace Tpetra { } if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID << ": doPosts(4 args, Kokkos, fast) done" << endl; - *out_ << os.str (); + os << *prefix << "fast path: done" << endl; + std::cerr << os.str(); } } else { // data are not blocked by proc, use send buffer if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID << ": doPosts(4 args, Kokkos, slow): posting sends" << endl; - *out_ << os.str (); + os << *prefix << "slow path: posting sends" << endl; + std::cerr << os.str(); } - // FIXME (mfh 05 Mar 2013) This may be broken for Isend. typedef typename ExpView::non_const_value_type Packet; typedef typename ExpView::array_layout Layout; @@ -2794,7 +2795,7 @@ namespace Tpetra { TEUCHOS_TEST_FOR_EXCEPTION( sendType == Details::DISTRIBUTOR_ISEND, std::logic_error, - "Tpetra::Distributor::doPosts(4 args, Kokkos): " + "Tpetra::Distributor::doPosts(4-arg, Kokkos): " "The \"send buffer\" code path may not necessarily work with nonblocking sends."); Array indicesOffsets (numExportPacketsPerLID.size(), 0); @@ -2864,9 +2865,8 @@ namespace Tpetra { } if (verbose_) { std::ostringstream os; - os << "Proc " << myProcID - << ": doPosts(4 args, Kokkos, slow) done" << endl; - *out_ << os.str (); + os << *prefix << "slow path: done" << endl; + std::cerr << os.str(); } } } @@ -2957,12 +2957,13 @@ namespace Tpetra { using std::endl; typedef typename ArrayView::size_type size_type; - Teuchos::OSTab tab (out_); const int myRank = comm_->getRank (); + std::unique_ptr prefix; if (verbose_) { + prefix = createPrefix("computeSends"); std::ostringstream os; - os << "Proc " << myRank << ": computeSends" << endl; - *out_ << os.str (); + os << *prefix << "Start" << endl; + std::cerr << os.str(); } TEUCHOS_TEST_FOR_EXCEPTION( @@ -2982,13 +2983,12 @@ namespace Tpetra { // Use a temporary Distributor to send the (importGIDs[i], myRank) // pairs to importProcIDs[i]. // - Distributor tempPlan (comm_, out_); + Distributor tempPlan(comm_); if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": computeSends: tempPlan.createFromSends" << endl; - *out_ << os.str (); + os << *prefix << "Call tempPlan.createFromSends" << endl; + std::cerr << os.str(); } - // mfh 20 Mar 2014: An extra-cautious cast from unsigned to // signed, in order to forestall any possible causes for Bug 6069. const size_t numExportsAsSizeT = tempPlan.createFromSends (importProcIDs); @@ -3038,8 +3038,8 @@ namespace Tpetra { Array exportObjs (tempPlan.getTotalReceiveLength () * 2); if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": computeSends: tempPlan.doPostsAndWaits" << endl; - *out_ << os.str (); + os << *prefix << "Call tempPlan.doPostsAndWaits" << endl; + std::cerr << os.str(); } tempPlan.doPostsAndWaits (importObjs (), 2, exportObjs ()); @@ -3051,8 +3051,8 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; - os << "Proc " << myRank << ": computeSends done" << endl; - *out_ << os.str (); + os << *prefix << "Done" << endl; + std::cerr << os.str(); } } @@ -3064,38 +3064,41 @@ namespace Tpetra { Teuchos::Array &exportProcIDs) { using std::endl; - - Teuchos::OSTab tab (out_); const int myRank = comm_->getRank(); + const bool debug = Details::Behavior::debug("Distributor"); + std::unique_ptr prefix; if (verbose_) { - *out_ << "Proc " << myRank << ": createFromRecvs" << endl; + prefix = createPrefix("createFromRecvs"); + std::ostringstream os; + os << *prefix << "Start" << endl; + std::cerr << os.str(); } -#ifdef HAVE_TPETRA_DEBUG - using Teuchos::outArg; - using Teuchos::reduceAll; - - // In debug mode, first test locally, then do an all-reduce to - // make sure that all processes passed. - const int errProc = - (remoteGIDs.size () != remoteProcIDs.size ()) ? myRank : -1; - int maxErrProc = -1; - reduceAll (*comm_, Teuchos::REDUCE_MAX, errProc, outArg (maxErrProc)); - TEUCHOS_TEST_FOR_EXCEPTION(maxErrProc != -1, std::runtime_error, - Teuchos::typeName (*this) << "::createFromRecvs(): lists of remote IDs " - "and remote process IDs must have the same size on all participating " - "processes. Maximum process ID with error: " << maxErrProc << "."); -#else // NOT HAVE_TPETRA_DEBUG - - // In non-debug mode, just test locally. - TEUCHOS_TEST_FOR_EXCEPTION( - remoteGIDs.size () != remoteProcIDs.size (), std::invalid_argument, - Teuchos::typeName (*this) << "::createFromRecvs<" << - Teuchos::TypeNameTraits::name () << ">(): On Process " << - myRank << ": remoteGIDs.size() = " << remoteGIDs.size () << " != " - "remoteProcIDs.size() = " << remoteProcIDs.size () << "."); -#endif // HAVE_TPETRA_DEBUG + if (debug) { + using Teuchos::outArg; + using Teuchos::REDUCE_MAX; + using Teuchos::reduceAll; + // In debug mode, first test locally, then do an all-reduce to + // make sure that all processes passed. + const int errProc = + (remoteGIDs.size () != remoteProcIDs.size ()) ? myRank : -1; + int maxErrProc = -1; + reduceAll(*comm_, REDUCE_MAX, errProc, outArg(maxErrProc)); + TEUCHOS_TEST_FOR_EXCEPTION + (maxErrProc != -1, std::runtime_error, + "Tpetra::Distributor::createFromRecvs: lists of remote IDs " + "and remote process IDs must have the same size on all participating " + "processes. Maximum process ID with error: " << maxErrProc << "."); + } + else { // in non-debug mode, just test locally + TEUCHOS_TEST_FOR_EXCEPTION + (remoteGIDs.size () != remoteProcIDs.size (), std::invalid_argument, + "Tpetra::Distributor::createFromRecvs<" << + Teuchos::TypeNameTraits::name () << ">(): On Process " << + myRank << ": remoteGIDs.size() = " << remoteGIDs.size () << " != " + "remoteProcIDs.size() = " << remoteProcIDs.size () << "."); + } computeSends (remoteGIDs, remoteProcIDs, exportGIDs, exportProcIDs); @@ -3107,21 +3110,22 @@ namespace Tpetra { // even if we account for selfMessage_. selfMessage_ is set in // createFromSends. std::ostringstream os; - os << "Proc " << myRank << ": {numProcsSendingToMe: " + os << *prefix << "numProcsSendingToMe: " << numProcsSendingToMe << ", remoteProcIDs.size(): " << remoteProcIDs.size () << ", selfMessage_: " - << (selfMessage_ ? "true" : "false") << "}" << std::endl; - *out_ << os.str (); + << (selfMessage_ ? "true" : "false") << "" << endl; + std::cerr << os.str(); } + howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS; + if (verbose_) { - *out_ << "Proc " << myRank << ": createFromRecvs done" << endl; + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); } - - howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS; } - } // namespace Tpetra #endif // TPETRA_DISTRIBUTOR_HPP From a0c00c282f413710bf18aad6734e010c8420f8d9 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 13 Feb 2020 16:01:38 -0700 Subject: [PATCH 44/49] Tpetra::CrsPadding: Improve verbose debug output @trilinos/tpetra --- .../core/src/Tpetra_Details_CrsPadding.hpp | 158 ++++++++++-------- .../tpetra/core/src/Tpetra_Distributor.cpp | 5 +- 2 files changed, 90 insertions(+), 73 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index cbd9f6b06e96..82e33438fbf8 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -285,87 +285,103 @@ namespace Tpetra { srcColInds, srcEnd, tgtColInds, tgtEnd); TEUCHOS_ASSERT( numTgtEnt + numSrcEnt >= numInCommon ); unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; + if (verbose_) { + std::ostringstream os; + os << *prefix << "numInCommon=" << numInCommon + << ", unionNumEnt=" << unionNumEnt << endl; + std::cerr << os.str(); + } - if (unionNumEnt > numTgtEnt) { + if (numInCommon == numSrcEnt) { if (verbose_) { std::ostringstream os; - os << *prefix << "unionNumEnt=" << unionNumEnt - << " > numTgtEnt=" << numTgtEnt << endl; + os << *prefix << "Done (early; store nothing)" << endl; std::cerr << os.str(); } - TEUCHOS_ASSERT( numSrcEnt != 0 ); + return; + } - // At least one input source index isn't in the target. - std::vector& diffColInds = - get_difference_col_inds(phase, whichIndex, tgtLclRowInd); - const size_t oldDiffNumEnt = diffColInds.size(); + // At least one input source index isn't in the target. + std::vector& diffColInds = + get_difference_col_inds(phase, whichIndex, tgtLclRowInd); + const size_t oldDiffNumEnt = diffColInds.size(); - if (oldDiffNumEnt == 0) { - if (verbose_) { - std::ostringstream os; - os << *prefix << "oldDiffNumEnt=0" << endl; - std::cerr << os.str(); - } - TEUCHOS_ASSERT( numSrcEnt >= numInCommon ); - diffColInds.resize(numSrcEnt); - auto diffEnd = std::set_difference(srcColInds, srcEnd, - tgtColInds, tgtEnd, - diffColInds.begin()); - const size_t newLen(diffEnd - diffColInds.begin()); - TEUCHOS_ASSERT( newLen <= numSrcEnt ); - diffColInds.resize(newLen); + if (oldDiffNumEnt == 0) { + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=0; call " + "set_difference(src,tgt,diff)" << endl; + std::cerr << os.str(); } - else { - if (verbose_) { - std::ostringstream os; - os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt - << "; call countNumInCommon" << endl; - std::cerr << os.str(); - } - // scratch = union(srcColInds, diffColInds) - const size_t unionSize = numSrcEnt + oldDiffNumEnt - - countNumInCommon(srcColInds, srcEnd, - diffColInds.begin(), - diffColInds.end()); - if (scratchColInds_.size() < unionSize) { - scratchColInds_.resize(unionSize); - } - if (verbose_) { - std::ostringstream os; - os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt - << ", unionSize=" << unionSize << ", call set_union" - << endl; - std::cerr << os.str(); - } - auto unionBeg = scratchColInds_.begin(); - auto unionEnd = std::set_union( - srcColInds, srcEnd, - diffColInds.begin(), diffColInds.end(), - unionBeg); - const size_t newUnionLen(unionEnd - unionBeg); - TEUCHOS_ASSERT( newUnionLen == unionSize ); - - // diffColInds = difference(scratch, tgtColInds) - const size_t unionTgtInCommon = countNumInCommon( - unionBeg, unionEnd, tgtColInds, tgtEnd); - if (verbose_) { - std::ostringstream os; - os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt - << ", unionSize=" << unionSize - << ", unionTgtInCommon=" << unionTgtInCommon - << "; call set_difference" << endl; - std::cerr << os.str(); - } - TEUCHOS_ASSERT( unionSize >= unionTgtInCommon ); + diffColInds.resize(numSrcEnt); + auto diffEnd = std::set_difference(srcColInds, srcEnd, + tgtColInds, tgtEnd, + diffColInds.begin()); + const size_t newLen(diffEnd - diffColInds.begin()); + TEUCHOS_ASSERT( newLen <= numSrcEnt ); + diffColInds.resize(newLen); + } + else { + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt + << "; call countNumInCommon(src,diff)" << endl; + std::cerr << os.str(); + } + // scratch = union(srcColInds, diffColInds) + const size_t unionSize = numSrcEnt + oldDiffNumEnt - + countNumInCommon(srcColInds, srcEnd, + diffColInds.begin(), + diffColInds.end()); + if (verbose_) { + std::ostringstream os; + os << *prefix << "unionSize=" << unionSize << endl; + std::cerr << os.str(); + } + if (scratchColInds_.size() < unionSize) { + scratchColInds_.resize(unionSize); + } + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt + << ", unionSize=" << unionSize << ", call " + "set_union(src,diff,union)" << endl; + std::cerr << os.str(); + } + auto unionBeg = scratchColInds_.begin(); + auto unionEnd = std::set_union(srcColInds, srcEnd, + diffColInds.begin(), diffColInds.end(), + unionBeg); + const size_t newUnionLen(unionEnd - unionBeg); + TEUCHOS_ASSERT( newUnionLen == unionSize ); - diffColInds.resize(unionSize); - auto diffEnd = std::set_difference(unionBeg, unionEnd, - tgtColInds, tgtEnd, - diffColInds.begin()); - const size_t diffLen(diffEnd - diffColInds.begin()); - TEUCHOS_ASSERT( diffLen <= unionSize ); - diffColInds.resize(diffLen); + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt + << ", unionSize=" << unionSize << ", call " + "countNumInCommon(union,tgt)" << endl; + std::cerr << os.str(); + } + // diffColInds = difference(scratch, tgtColInds) + const size_t unionTgtInCommon = + countNumInCommon(unionBeg, unionEnd, tgtColInds, tgtEnd); + if (verbose_) { + std::ostringstream os; + os << *prefix << "oldDiffNumEnt=" << oldDiffNumEnt + << ", unionSize=" << unionSize + << ", unionTgtInCommon=" << unionTgtInCommon + << "; call set_difference" << endl; + std::cerr << os.str(); } + TEUCHOS_ASSERT( unionSize >= unionTgtInCommon ); + + diffColInds.resize(unionSize); + auto diffEnd = std::set_difference(unionBeg, unionEnd, + tgtColInds, tgtEnd, + diffColInds.begin()); + const size_t diffLen(diffEnd - diffColInds.begin()); + TEUCHOS_ASSERT( diffLen <= unionSize ); + diffColInds.resize(diffLen); } if (verbose_) { diff --git a/packages/tpetra/core/src/Tpetra_Distributor.cpp b/packages/tpetra/core/src/Tpetra_Distributor.cpp index 9ea059d35d3b..7e3a8cb4c461 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.cpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.cpp @@ -980,8 +980,9 @@ namespace Tpetra { if (verbose_) { std::ostringstream os; - os << myRank << ": computeReceives: waitAll on " - << requests.size () << " requests" << endl; + const size_t numReq = requests.size(); + os << *prefix << "waitAll on " << numReq << " request" + << (numReq != size_t(1) ? "s" : "") << endl; std::cerr << os.str(); } // From 8a7b59e02d9cab40f6333e798df31f57e490039b Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 13 Feb 2020 16:25:55 -0700 Subject: [PATCH 45/49] Tpetra: Improve CrsPadding verbose output --- .../tpetra/core/src/Tpetra_Details_CrsPadding.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index 82e33438fbf8..c0793e0d9534 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -192,8 +192,9 @@ namespace Tpetra { if (! tgtIsUnique) { tgtEnd = std::unique(tgtGblColInds, tgtEnd); newNumTgtEnt = size_t(tgtEnd - tgtGblColInds); + TEUCHOS_ASSERT( newNumTgtEnt <= origNumTgtEnt ); + tgtNumDups += (origNumTgtEnt - newNumTgtEnt); } - tgtNumDups += (origNumTgtEnt - newNumTgtEnt); if (verbose_) { std::ostringstream os; @@ -207,8 +208,9 @@ namespace Tpetra { if (! srcIsUnique) { srcEnd = std::unique(srcGblColInds, srcEnd); newNumSrcEnt = size_t(srcEnd - srcGblColInds); + TEUCHOS_ASSERT( newNumSrcEnt <= origNumSrcEnt ); + srcNumDups += (origNumSrcEnt - newNumSrcEnt); } - srcNumDups += (origNumSrcEnt - newNumSrcEnt); if (verbose_) { std::ostringstream os; @@ -283,14 +285,13 @@ namespace Tpetra { auto srcEnd = srcColInds + numSrcEnt; const size_t numInCommon = countNumInCommon( srcColInds, srcEnd, tgtColInds, tgtEnd); - TEUCHOS_ASSERT( numTgtEnt + numSrcEnt >= numInCommon ); - unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; if (verbose_) { std::ostringstream os; - os << *prefix << "numInCommon=" << numInCommon - << ", unionNumEnt=" << unionNumEnt << endl; + os << *prefix << "numInCommon=" << numInCommon << endl; std::cerr << os.str(); } + TEUCHOS_ASSERT( numTgtEnt + numSrcEnt >= numInCommon ); + unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; if (numInCommon == numSrcEnt) { if (verbose_) { From 5c33c8cb6aacc693e266b2caa399414a0e372778 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 13 Feb 2020 17:28:29 -0700 Subject: [PATCH 46/49] Tpetra: Fix #6663 @trilinos/tpetra This change makes the SD test run to completion. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 32 +++++----- .../core/src/Tpetra_Details_CrsPadding.hpp | 60 +++++++++---------- 2 files changed, 45 insertions(+), 47 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 2aa3d07eb549..7ab753262384 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -5313,7 +5313,7 @@ namespace Tpetra { std::vector tgtGblColIndsScratch; size_t srcNumDups = 0; size_t tgtNumDups = 0; - size_t mergedNumDups = 0; + // size_t mergedNumDups = 0; for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) { const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd); const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd); @@ -5321,7 +5321,7 @@ namespace Tpetra { srcGblColIndsScratch, source, srcGblRowInd); auto tgtGblColInds = getRowGraphGlobalRow( tgtGblColIndsScratch, *this, tgtGblRowInd); - padding.update_same(tgtNumDups, srcNumDups, mergedNumDups, + padding.update_same(tgtNumDups, srcNumDups, /* mergedNumDups, */ lclRowInd, tgtGblColInds.getRawPtr(), tgtGblColInds.size(), tgt_is_unique, srcGblColInds.getRawPtr(), @@ -5330,8 +5330,8 @@ namespace Tpetra { if (verbose) { std::ostringstream os; os << *prefix << "Done: srcNumDups: " << srcNumDups - << ", tgtNumDups: " << tgtNumDups - << ", mergedNumDups: " << mergedNumDups << endl; + << ", tgtNumDups: " << tgtNumDups << endl; + // os << ", mergedNumDups: " << mergedNumDups << endl; std::cerr << os.str(); } } @@ -5387,7 +5387,7 @@ namespace Tpetra { std::vector tgtGblColIndsScratch; size_t srcNumDups = 0; size_t tgtNumDups = 0; - size_t mergedNumDups = 0; + // size_t mergedNumDups = 0; const LO numPermutes = static_cast(permuteToLIDs_h.extent(0)); for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) { const LO srcLclRowInd = permuteFromLIDs_h[whichPermute]; @@ -5398,7 +5398,7 @@ namespace Tpetra { const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd); auto tgtGblColInds = getRowGraphGlobalRow( tgtGblColIndsScratch, *this, tgtGblRowInd); - padding.update_permute(tgtNumDups, srcNumDups, mergedNumDups, + padding.update_permute(tgtNumDups, srcNumDups, /* mergedNumDups, */ whichPermute, tgtLclRowInd, tgtGblColInds.getRawPtr(), tgtGblColInds.size(), tgt_is_unique, @@ -5409,8 +5409,8 @@ namespace Tpetra { if (verbose) { std::ostringstream os; os << *prefix << "Done: srcNumDups: " << srcNumDups - << ", tgtNumDups: " << tgtNumDups - << ", mergedNumDups: " << mergedNumDups << endl; + << ", tgtNumDups: " << tgtNumDups << endl; + // os << ", mergedNumDups: " << mergedNumDups << endl; std::cerr << os.str(); } } @@ -5478,7 +5478,7 @@ namespace Tpetra { std::vector tgtGblColIndsScratch; size_t srcNumDups = 0; size_t tgtNumDups = 0; - size_t mergedNumDups = 0; + /* size_t mergedNumDups = 0; */ size_t offset = 0; for (LO whichImport = 0; whichImport < numImports; ++whichImport) { // CrsGraph packs just global column indices, while CrsMatrix @@ -5496,7 +5496,7 @@ namespace Tpetra { tgtGblColIndsScratch, *this, tgtGblRowInd); const size_t origTgtNumEnt(tgtGblColInds.size()); - padding->update_import(tgtNumDups, srcNumDups, mergedNumDups, + padding->update_import(tgtNumDups, srcNumDups, /* mergedNumDups, */ whichImport, tgtLclRowInd, tgtGblColInds.getRawPtr(), origTgtNumEnt, tgt_is_unique, @@ -5508,8 +5508,8 @@ namespace Tpetra { if (verbose) { std::ostringstream os; os << *prefix << "Done: srcNumDups: " << srcNumDups - << ", tgtNumDups: " << tgtNumDups - << ", mergedNumDups: " << mergedNumDups << endl; + << ", tgtNumDups: " << tgtNumDups << endl; + // os << ", mergedNumDups: " << mergedNumDups << endl; std::cerr << os.str(); } return padding; @@ -5583,7 +5583,7 @@ namespace Tpetra { std::vector tgtGblColIndsScratch; size_t srcNumDups = 0; size_t tgtNumDups = 0; - size_t mergedNumDups = 0; + // size_t mergedNumDups = 0; size_t offset = 0; for (LO whichImport = 0; whichImport < numImports; ++whichImport) { // CrsGraph packs just global column indices, while CrsMatrix @@ -5640,7 +5640,7 @@ namespace Tpetra { << ": Call padding->update_import" << endl; std::cerr << os.str(); } - padding->update_import(tgtNumDups, srcNumDups, mergedNumDups, + padding->update_import(tgtNumDups, srcNumDups, /* mergedNumDups, */ whichImport, tgtLclRowInd, tgtGblColInds.getRawPtr(), origNumTgtEnt, tgt_is_unique, @@ -5652,8 +5652,8 @@ namespace Tpetra { if (verbose) { std::ostringstream os; os << *prefix << "Done: srcNumDups: " << srcNumDups - << ", tgtNumDups: " << tgtNumDups - << ", mergedNumDups: " << mergedNumDups << endl; + << ", tgtNumDups: " << tgtNumDups << endl; + // os << ", mergedNumDups: " << mergedNumDups << endl; std::cerr << os.str(); } return padding; diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index c0793e0d9534..7015a70ec011 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -50,7 +50,7 @@ namespace Tpetra { update_same( size_t& tgtNumDups, // accumulator size_t& srcNumDups, // accumulator - size_t& unionNumDups, // accumulator + /* size_t& unionNumDups, */ // accumulator const LO targetLocalIndex, GO tgtGblColInds[], const size_t origNumTgtEnt, @@ -61,7 +61,7 @@ namespace Tpetra { { const LO whichSame = targetLocalIndex; update_impl(Phase::SAME, - tgtNumDups, srcNumDups, unionNumDups, + tgtNumDups, srcNumDups, /* unionNumDups, */ whichSame, targetLocalIndex, tgtGblColInds, origNumTgtEnt, tgtIsUnique, srcGblColInds, origNumSrcEnt, srcIsUnique); @@ -71,7 +71,7 @@ namespace Tpetra { update_permute( size_t& tgtNumDups, // accumulator size_t& srcNumDups, // accumulator - size_t& unionNumDups, // accumulator + /* size_t& unionNumDups, */ // accumulator const LO whichPermute, // index in permuteFrom/To const LO targetLocalIndex, GO tgtGblColInds[], @@ -82,7 +82,7 @@ namespace Tpetra { const bool srcIsUnique) { update_impl(Phase::PERMUTE, - tgtNumDups, srcNumDups, unionNumDups, + tgtNumDups, srcNumDups, /* unionNumDups, */ whichPermute, targetLocalIndex, tgtGblColInds, origNumTgtEnt, tgtIsUnique, srcGblColInds, origNumSrcEnt, srcIsUnique); @@ -92,7 +92,7 @@ namespace Tpetra { update_import( size_t& tgtNumDups, // accumulator size_t& srcNumDups, // accumulator - size_t& unionNumDups, // accumulator + /* size_t& unionNumDups, */ // accumulator const LO whichImport, const LO targetLocalIndex, GO tgtGblColInds[], @@ -103,7 +103,7 @@ namespace Tpetra { const bool srcIsUnique) { update_impl(Phase::IMPORT, - tgtNumDups, srcNumDups, unionNumDups, + tgtNumDups, srcNumDups, /* unionNumDups, */ whichImport, targetLocalIndex, tgtGblColInds, origNumTgtEnt, tgtIsUnique, srcGblColInds, origNumSrcEnt, srcIsUnique); @@ -162,7 +162,7 @@ namespace Tpetra { const Phase phase, size_t& tgtNumDups, size_t& srcNumDups, - size_t& unionNumDups, + /* size_t& unionNumDups, */ const LO whichImport, const LO targetLocalIndex, GO tgtGblColInds[], @@ -218,17 +218,15 @@ namespace Tpetra { std::cerr << os.str(); } - size_t unionNumEnt = 0; - merge_with_current_state(phase, unionNumEnt, + //size_t unionNumEnt = 0; + merge_with_current_state(phase, /* unionNumEnt, */ whichImport, targetLocalIndex, tgtGblColInds, newNumTgtEnt, srcGblColInds, newNumSrcEnt); - unionNumDups += (newNumTgtEnt + newNumSrcEnt - unionNumEnt); - if (verbose_) { std::ostringstream os; - os << *prefix << "Done: " - << "unionNumDups=" << unionNumDups << endl; + os << *prefix << "Done" << endl; + // os << "unionNumDups=" << unionNumDups << endl; std::cerr << os.str(); } } @@ -244,7 +242,7 @@ namespace Tpetra { void merge_with_current_state( const Phase phase, - size_t& unionNumEnt, + /* size_t& unionNumEnt, */ const LO whichIndex, const LO tgtLclRowInd, const GO tgtColInds[], // sorted & merged @@ -283,24 +281,24 @@ namespace Tpetra { auto tgtEnd = tgtColInds + numTgtEnt; auto srcEnd = srcColInds + numSrcEnt; - const size_t numInCommon = countNumInCommon( - srcColInds, srcEnd, tgtColInds, tgtEnd); - if (verbose_) { - std::ostringstream os; - os << *prefix << "numInCommon=" << numInCommon << endl; - std::cerr << os.str(); - } - TEUCHOS_ASSERT( numTgtEnt + numSrcEnt >= numInCommon ); - unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; + // const size_t numInCommon = countNumInCommon( + // srcColInds, srcEnd, tgtColInds, tgtEnd); + // if (verbose_) { + // std::ostringstream os; + // os << *prefix << "numInCommon=" << numInCommon << endl; + // std::cerr << os.str(); + // } + // TEUCHOS_ASSERT( numTgtEnt + numSrcEnt >= numInCommon ); + // unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; - if (numInCommon == numSrcEnt) { - if (verbose_) { - std::ostringstream os; - os << *prefix << "Done (early; store nothing)" << endl; - std::cerr << os.str(); - } - return; - } + // if (numInCommon == numSrcEnt) { + // if (verbose_) { + // std::ostringstream os; + // os << *prefix << "Done (early; store nothing)" << endl; + // std::cerr << os.str(); + // } + // return; + // } // At least one input source index isn't in the target. std::vector& diffColInds = From 298f5bbd7765592545e26a12a50d9c75b93b9af6 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 13 Feb 2020 21:23:53 -0700 Subject: [PATCH 47/49] Tpetra: Add another unit test for countNumInCommon @trilinos/tpetra --- .../core/test/Utils/countNumInCommon.cpp | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/packages/tpetra/core/test/Utils/countNumInCommon.cpp b/packages/tpetra/core/test/Utils/countNumInCommon.cpp index 169001aa2ca6..530e5def7251 100644 --- a/packages/tpetra/core/test/Utils/countNumInCommon.cpp +++ b/packages/tpetra/core/test/Utils/countNumInCommon.cpp @@ -242,4 +242,67 @@ namespace { // (anonymous) expectedNumInCommon, expectedMergeSize); } + // This came from an application test problem, where I suspect that + // countNumInCommon may have been caught in an infinite loop. + TEUCHOS_UNIT_TEST( Utils, CountNumInCommon_regression ) + { + std::vector tgtColInds {{ + 91917, 91918, 91919, 91926, 91927, 91928, 92142, 92143, + 92144, 92148, 92149, 92150, 92151, 92152, 92153, 92538, + 92539, 92540, 92712, 92713, 92714, 95430, 95431, 95432, + 95466, 95467, 95468, 95469, 95470, 95471, 95556, 95557, + 95558, 95559, 95560, 95561, 95565, 95566, 95567, 95571, + 95572, 95573, 95619, 95620, 95621, 95622, 95623, 95624, + 95625, 95626, 95627, 95628, 95629, 95630, 95631, 95632, + 95633, 95634, 95635, 95636 + }}; + + std::vector srcColInds {{ + 57090, 57091, 57092, 57093, 57094, 57095, 57102, 57103, + 57104, 57117, 57118, 57119, 57120, 57121, 57122, 57123, + 57124, 57125, 57129, 57130, 57131, 57183, 57184, 57185, + 57186, 57187, 57188, 57195, 57196, 57197, 57198, 57199, + 57200, 57204, 57205, 57206, 57207, 57208, 57209, 57222, + 57223, 57224, 57225, 57226, 57227, 57228, 57229, 57230, + 57234, 57235, 57236, 57237, 57238, 57239, 57240, 57241, + 57242, 57243, 57244, 57245, 57246, 57247, 57248, 95421, + 95422, 95423, 95430, 95431, 95432, 95436, 95437, 95438, + 95457, 95458, 95459, 95463, 95464, 95465, 95466, 95467, + 95468, 95469, 95470, 95471, 95472, 95473, 95474, 95556, + 95557, 95558, 95559, 95560, 95561, 95565, 95566, 95567, + 95571, 95572, 95573, 95619, 95620, 95621, 95622, 95623, + 95624, 95625, 95626, 95627, 95628, 95629, 95630, 95631, + 95632, 95633, 95634, 95635, 95636, 95754, 95755, 95756, + 95757, 95758, 95759, 95835, 95836, 95837, 95838, 95839, + 95840, 95841, 95842, 95843 + }}; + + size_t numTgt = tgtColInds.size(); + size_t numSrc = srcColInds.size(); + size_t expectedUnionSize = 0; + { + std::vector tgtColInds2(tgtColInds); + std::sort(tgtColInds2.begin(), tgtColInds2.end()); + auto tgtEnd = std::unique(tgtColInds2.begin(), tgtColInds2.end()); + numTgt = size_t(tgtEnd - tgtColInds2.begin()); + TEST_EQUALITY( numTgt, tgtColInds.size() ); + + std::vector srcColInds2(srcColInds); + std::sort(srcColInds2.begin(), srcColInds2.end()); + auto srcEnd = std::unique(srcColInds2.begin(), srcColInds2.end()); + numSrc = size_t(srcEnd - srcColInds2.begin()); + TEST_EQUALITY( numSrc, srcColInds.size() ); + + std::vector unionInds(numTgt + numSrc); + auto unionEnd = std::set_union(tgtColInds2.begin(), tgtEnd, + srcColInds2.begin(), srcEnd, + unionInds.begin()); + expectedUnionSize = size_t(unionEnd - unionInds.begin()); + } + + const size_t expectedNumInCommon(39); // counted by hand + testLists(out, success, tgtColInds, srcColInds, + expectedNumInCommon, expectedUnionSize); + } + } // namespace (anonymous) From 7aee1477553f741cdaf4118826bf59cf29bf7bf4 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Thu, 13 Feb 2020 21:56:56 -0700 Subject: [PATCH 48/49] Tpetra::Distributor: Make debug check throw the right exception @trilinos/tpetra My previous changes exposed more debug checks. One unit test was expecting a debug check to throw runtime_error, but it was throwing invalid_argument instead. This commit fixes that. I also cleaned up some error handling code in Distributor. --- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 115 ++++++++++-------- .../core/src/Tpetra_Details_CrsPadding.hpp | 58 +-------- .../tpetra/core/src/Tpetra_Distributor.cpp | 2 +- .../tpetra/core/src/Tpetra_Distributor.hpp | 113 ++++++++--------- .../Distributor/Distributor_UnitTests.cpp | 111 ++++++++--------- 5 files changed, 182 insertions(+), 217 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index 7ab753262384..d4e5b95eef42 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -4706,35 +4706,70 @@ namespace Tpetra { CrsGraph:: sortAndMergeAllIndices (const bool sorted, const bool merged) { - using ::Tpetra::Details::ProfilingRegion; - typedef LocalOrdinal LO; - typedef typename Kokkos::View::HostMirror::execution_space - host_execution_space; - typedef Kokkos::RangePolicy range_type; - const char tfecfFuncName[] = "sortAndMergeAllIndices: "; - ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::sortAndMergeAllIndices"); + using std::endl; + using LO = LocalOrdinal; + using host_execution_space = + typename Kokkos::View::HostMirror:: + execution_space; + using range_type = Kokkos::RangePolicy; + const char tfecfFuncName[] = "sortAndMergeAllIndices"; + Details::ProfilingRegion regionSortAndMerge + ("Tpetra::CrsGraph::sortAndMergeAllIndices"); + std::unique_ptr prefix; + if (verbose_) { + prefix = this->createPrefix("CrsGraph", tfecfFuncName); + std::ostringstream os; + os << *prefix << "Start: " + << "sorted=" << (sorted ? "true" : "false") + << ", merged=" << (merged ? "true" : "false") << endl; + std::cerr << os.str(); + } TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (this->isGloballyIndexed (), std::logic_error, + (this->isGloballyIndexed(), std::logic_error, "This method may only be called after makeIndicesLocal." ); - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - (! merged && this->isStorageOptimized (), std::logic_error, - "The graph is already storage optimized, so we shouldn't be merging any " - "indices. Please report this bug to the Tpetra developers."); + (! merged && this->isStorageOptimized(), std::logic_error, + "The graph is already storage optimized, so we shouldn't be " + "merging any indices. " + "Please report this bug to the Tpetra developers."); if (! sorted || ! merged) { - const LO lclNumRows = static_cast (this->getNodeNumRows ()); - size_t totalNumDups = 0; - // FIXME (mfh 08 May 2017) This may assume CUDA UVM. - Kokkos::parallel_reduce (range_type (0, lclNumRows), - [this, sorted, merged] (const LO& lclRow, size_t& numDups) { - const RowInfo rowInfo = this->getRowInfo (lclRow); - numDups += this->sortAndMergeRowIndices (rowInfo, sorted, merged); - }, totalNumDups); + const LO lclNumRows(this->getNodeNumRows()); + auto range = range_type(0, lclNumRows); + + // FIXME (mfh 08 May 2017) Loops below assume CUDA UVM. + if (verbose_) { + size_t totalNumDups = 0; + Kokkos::parallel_reduce(range, + [this, sorted, merged] (const LO lclRow, size_t& numDups) + { + const RowInfo rowInfo = this->getRowInfo(lclRow); + numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged); + }, + totalNumDups); + std::ostringstream os; + os << *prefix << "totalNumDups=" << totalNumDups << endl; + std::cerr << os.str(); + } + else { + // FIXME (mfh 08 May 2017) This may assume CUDA UVM. + Kokkos::parallel_for(range, + [this, sorted, merged] (const LO lclRow) + { + const RowInfo rowInfo = this->getRowInfo(lclRow); + this->sortAndMergeRowIndices(rowInfo, sorted, merged); + }); + } this->indicesAreSorted_ = true; // we just sorted every row this->noRedundancies_ = true; // we just merged every row } + + if (verbose_) { + std::ostringstream os; + os << *prefix << "Done" << endl; + std::cerr << os.str(); + } } template @@ -5311,9 +5346,6 @@ namespace Tpetra { std::vector srcGblColIndsScratch; std::vector tgtGblColIndsScratch; - size_t srcNumDups = 0; - size_t tgtNumDups = 0; - // size_t mergedNumDups = 0; for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) { const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd); const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd); @@ -5321,17 +5353,14 @@ namespace Tpetra { srcGblColIndsScratch, source, srcGblRowInd); auto tgtGblColInds = getRowGraphGlobalRow( tgtGblColIndsScratch, *this, tgtGblRowInd); - padding.update_same(tgtNumDups, srcNumDups, /* mergedNumDups, */ - lclRowInd, tgtGblColInds.getRawPtr(), + padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(), tgtGblColInds.size(), tgt_is_unique, srcGblColInds.getRawPtr(), srcGblColInds.size(), src_is_unique); } if (verbose) { std::ostringstream os; - os << *prefix << "Done: srcNumDups: " << srcNumDups - << ", tgtNumDups: " << tgtNumDups << endl; - // os << ", mergedNumDups: " << mergedNumDups << endl; + os << *prefix << "Done" << endl; std::cerr << os.str(); } } @@ -5385,9 +5414,6 @@ namespace Tpetra { std::vector srcGblColIndsScratch; std::vector tgtGblColIndsScratch; - size_t srcNumDups = 0; - size_t tgtNumDups = 0; - // size_t mergedNumDups = 0; const LO numPermutes = static_cast(permuteToLIDs_h.extent(0)); for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) { const LO srcLclRowInd = permuteFromLIDs_h[whichPermute]; @@ -5398,8 +5424,7 @@ namespace Tpetra { const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd); auto tgtGblColInds = getRowGraphGlobalRow( tgtGblColIndsScratch, *this, tgtGblRowInd); - padding.update_permute(tgtNumDups, srcNumDups, /* mergedNumDups, */ - whichPermute, tgtLclRowInd, + padding.update_permute(whichPermute, tgtLclRowInd, tgtGblColInds.getRawPtr(), tgtGblColInds.size(), tgt_is_unique, srcGblColInds.getRawPtr(), @@ -5408,9 +5433,7 @@ namespace Tpetra { if (verbose) { std::ostringstream os; - os << *prefix << "Done: srcNumDups: " << srcNumDups - << ", tgtNumDups: " << tgtNumDups << endl; - // os << ", mergedNumDups: " << mergedNumDups << endl; + os << *prefix << "Done" << endl; std::cerr << os.str(); } } @@ -5476,9 +5499,6 @@ namespace Tpetra { const bool tgt_is_unique = isMerged(); std::vector tgtGblColIndsScratch; - size_t srcNumDups = 0; - size_t tgtNumDups = 0; - /* size_t mergedNumDups = 0; */ size_t offset = 0; for (LO whichImport = 0; whichImport < numImports; ++whichImport) { // CrsGraph packs just global column indices, while CrsMatrix @@ -5496,8 +5516,7 @@ namespace Tpetra { tgtGblColIndsScratch, *this, tgtGblRowInd); const size_t origTgtNumEnt(tgtGblColInds.size()); - padding->update_import(tgtNumDups, srcNumDups, /* mergedNumDups, */ - whichImport, tgtLclRowInd, + padding->update_import(whichImport, tgtLclRowInd, tgtGblColInds.getRawPtr(), origTgtNumEnt, tgt_is_unique, srcGblColInds, @@ -5507,9 +5526,7 @@ namespace Tpetra { if (verbose) { std::ostringstream os; - os << *prefix << "Done: srcNumDups: " << srcNumDups - << ", tgtNumDups: " << tgtNumDups << endl; - // os << ", mergedNumDups: " << mergedNumDups << endl; + os << *prefix << "Done" << endl; std::cerr << os.str(); } return padding; @@ -5581,9 +5598,6 @@ namespace Tpetra { std::vector srcGblColIndsScratch; std::vector tgtGblColIndsScratch; - size_t srcNumDups = 0; - size_t tgtNumDups = 0; - // size_t mergedNumDups = 0; size_t offset = 0; for (LO whichImport = 0; whichImport < numImports; ++whichImport) { // CrsGraph packs just global column indices, while CrsMatrix @@ -5640,8 +5654,7 @@ namespace Tpetra { << ": Call padding->update_import" << endl; std::cerr << os.str(); } - padding->update_import(tgtNumDups, srcNumDups, /* mergedNumDups, */ - whichImport, tgtLclRowInd, + padding->update_import(whichImport, tgtLclRowInd, tgtGblColInds.getRawPtr(), origNumTgtEnt, tgt_is_unique, srcGblColInds, @@ -5651,9 +5664,7 @@ namespace Tpetra { if (verbose) { std::ostringstream os; - os << *prefix << "Done: srcNumDups: " << srcNumDups - << ", tgtNumDups: " << tgtNumDups << endl; - // os << ", mergedNumDups: " << mergedNumDups << endl; + os << *prefix << "Done" << endl; std::cerr << os.str(); } return padding; diff --git a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp index 7015a70ec011..6a6dedb1894a 100644 --- a/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_CrsPadding.hpp @@ -48,9 +48,6 @@ namespace Tpetra { void update_same( - size_t& tgtNumDups, // accumulator - size_t& srcNumDups, // accumulator - /* size_t& unionNumDups, */ // accumulator const LO targetLocalIndex, GO tgtGblColInds[], const size_t origNumTgtEnt, @@ -60,18 +57,13 @@ namespace Tpetra { const bool srcIsUnique) { const LO whichSame = targetLocalIndex; - update_impl(Phase::SAME, - tgtNumDups, srcNumDups, /* unionNumDups, */ - whichSame, targetLocalIndex, + update_impl(Phase::SAME, whichSame, targetLocalIndex, tgtGblColInds, origNumTgtEnt, tgtIsUnique, srcGblColInds, origNumSrcEnt, srcIsUnique); } void update_permute( - size_t& tgtNumDups, // accumulator - size_t& srcNumDups, // accumulator - /* size_t& unionNumDups, */ // accumulator const LO whichPermute, // index in permuteFrom/To const LO targetLocalIndex, GO tgtGblColInds[], @@ -81,18 +73,13 @@ namespace Tpetra { const size_t origNumSrcEnt, const bool srcIsUnique) { - update_impl(Phase::PERMUTE, - tgtNumDups, srcNumDups, /* unionNumDups, */ - whichPermute, targetLocalIndex, + update_impl(Phase::PERMUTE, whichPermute, targetLocalIndex, tgtGblColInds, origNumTgtEnt, tgtIsUnique, srcGblColInds, origNumSrcEnt, srcIsUnique); } void update_import( - size_t& tgtNumDups, // accumulator - size_t& srcNumDups, // accumulator - /* size_t& unionNumDups, */ // accumulator const LO whichImport, const LO targetLocalIndex, GO tgtGblColInds[], @@ -102,9 +89,7 @@ namespace Tpetra { const size_t origNumSrcEnt, const bool srcIsUnique) { - update_impl(Phase::IMPORT, - tgtNumDups, srcNumDups, /* unionNumDups, */ - whichImport, targetLocalIndex, + update_impl(Phase::IMPORT, whichImport, targetLocalIndex, tgtGblColInds, origNumTgtEnt, tgtIsUnique, srcGblColInds, origNumSrcEnt, srcIsUnique); } @@ -160,9 +145,6 @@ namespace Tpetra { void update_impl( const Phase phase, - size_t& tgtNumDups, - size_t& srcNumDups, - /* size_t& unionNumDups, */ const LO whichImport, const LO targetLocalIndex, GO tgtGblColInds[], @@ -193,12 +175,11 @@ namespace Tpetra { tgtEnd = std::unique(tgtGblColInds, tgtEnd); newNumTgtEnt = size_t(tgtEnd - tgtGblColInds); TEUCHOS_ASSERT( newNumTgtEnt <= origNumTgtEnt ); - tgtNumDups += (origNumTgtEnt - newNumTgtEnt); } if (verbose_) { std::ostringstream os; - os << *prefix << "tgtNumDups=" << tgtNumDups << endl; + os << *prefix << "finished src; process tgt" << endl; std::cerr << os.str(); } @@ -209,24 +190,14 @@ namespace Tpetra { srcEnd = std::unique(srcGblColInds, srcEnd); newNumSrcEnt = size_t(srcEnd - srcGblColInds); TEUCHOS_ASSERT( newNumSrcEnt <= origNumSrcEnt ); - srcNumDups += (origNumSrcEnt - newNumSrcEnt); } - if (verbose_) { - std::ostringstream os; - os << *prefix << "srcNumDups=" << srcNumDups << endl; - std::cerr << os.str(); - } - - //size_t unionNumEnt = 0; - merge_with_current_state(phase, /* unionNumEnt, */ - whichImport, targetLocalIndex, + merge_with_current_state(phase, whichImport, targetLocalIndex, tgtGblColInds, newNumTgtEnt, srcGblColInds, newNumSrcEnt); if (verbose_) { std::ostringstream os; os << *prefix << "Done" << endl; - // os << "unionNumDups=" << unionNumDups << endl; std::cerr << os.str(); } } @@ -242,7 +213,6 @@ namespace Tpetra { void merge_with_current_state( const Phase phase, - /* size_t& unionNumEnt, */ const LO whichIndex, const LO tgtLclRowInd, const GO tgtColInds[], // sorted & merged @@ -281,24 +251,6 @@ namespace Tpetra { auto tgtEnd = tgtColInds + numTgtEnt; auto srcEnd = srcColInds + numSrcEnt; - // const size_t numInCommon = countNumInCommon( - // srcColInds, srcEnd, tgtColInds, tgtEnd); - // if (verbose_) { - // std::ostringstream os; - // os << *prefix << "numInCommon=" << numInCommon << endl; - // std::cerr << os.str(); - // } - // TEUCHOS_ASSERT( numTgtEnt + numSrcEnt >= numInCommon ); - // unionNumEnt = numTgtEnt + numSrcEnt - numInCommon; - - // if (numInCommon == numSrcEnt) { - // if (verbose_) { - // std::ostringstream os; - // os << *prefix << "Done (early; store nothing)" << endl; - // std::cerr << os.str(); - // } - // return; - // } // At least one input source index isn't in the target. std::vector& diffColInds = diff --git a/packages/tpetra/core/src/Tpetra_Distributor.cpp b/packages/tpetra/core/src/Tpetra_Distributor.cpp index 7e3a8cb4c461..385fb12c2e7b 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.cpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.cpp @@ -1056,7 +1056,7 @@ namespace Tpetra { if (verbose_) { prefix = createPrefix("createFromSends"); std::ostringstream os; - os << *prefix; + os << *prefix << "Start: "; Details::verbosePrintArray(os, exportProcIDs, "exportPIDs", maxNumToPrint); os << endl; diff --git a/packages/tpetra/core/src/Tpetra_Distributor.hpp b/packages/tpetra/core/src/Tpetra_Distributor.hpp index cfdec6717c61..85f6c4d99355 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.hpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.hpp @@ -2940,10 +2940,10 @@ namespace Tpetra { template void Distributor:: - computeSends (const Teuchos::ArrayView & importGIDs, - const Teuchos::ArrayView & importProcIDs, - Teuchos::Array & exportGIDs, - Teuchos::Array & exportProcIDs) + computeSends(const Teuchos::ArrayView& importGIDs, + const Teuchos::ArrayView& importProcIDs, + Teuchos::Array& exportGIDs, + Teuchos::Array& exportProcIDs) { // NOTE (mfh 19 Apr 2012): There was a note on this code saying: // "assumes that size_t >= Ordinal". The code certainly does @@ -2955,7 +2955,10 @@ namespace Tpetra { using Teuchos::Array; using Teuchos::ArrayView; using std::endl; - typedef typename ArrayView::size_type size_type; + using size_type = typename ArrayView::size_type; + const char errPrefix[] = "Tpetra::Distributor::computeSends: "; + const char suffix[] = + " Please report this bug to the Tpetra developers."; const int myRank = comm_->getRank (); std::unique_ptr prefix; @@ -2966,18 +2969,18 @@ namespace Tpetra { std::cerr << os.str(); } - TEUCHOS_TEST_FOR_EXCEPTION( - importGIDs.size () != importProcIDs.size (), std::invalid_argument, - "Tpetra::Distributor::computeSends: On Process " << myRank << ": " - "importProcIDs.size() = " << importProcIDs.size () - << " != importGIDs.size() = " << importGIDs.size () << "."); + TEUCHOS_TEST_FOR_EXCEPTION + (importGIDs.size () != importProcIDs.size (), + std::invalid_argument, errPrefix << "On Process " << myRank + << ": importProcIDs.size()=" << importProcIDs.size() + << " != importGIDs.size()=" << importGIDs.size() << "."); - const size_type numImports = importProcIDs.size (); - Array importObjs (2*numImports); + const size_type numImports = importProcIDs.size(); + Array importObjs(2*numImports); // Pack pairs (importGIDs[i], my process ID) to send into importObjs. for (size_type i = 0; i < numImports; ++i) { - importObjs[2*i] = static_cast (importGIDs[i]); - importObjs[2*i+1] = static_cast (myRank); + importObjs[2*i] = static_cast(importGIDs[i]); + importObjs[2*i+1] = static_cast(myRank); } // // Use a temporary Distributor to send the (importGIDs[i], myRank) @@ -2991,24 +2994,25 @@ namespace Tpetra { } // mfh 20 Mar 2014: An extra-cautious cast from unsigned to // signed, in order to forestall any possible causes for Bug 6069. - const size_t numExportsAsSizeT = tempPlan.createFromSends (importProcIDs); - const size_type numExports = static_cast (numExportsAsSizeT); - TEUCHOS_TEST_FOR_EXCEPTION( - numExports < 0, std::logic_error, "Tpetra::Distributor::computeSends: " - "tempPlan.createFromSends() returned numExports = " << numExportsAsSizeT - << " as a size_t, which overflows to " << numExports << " when cast to " - << Teuchos::TypeNameTraits::name () << ". " - "Please report this bug to the Tpetra developers."); - TEUCHOS_TEST_FOR_EXCEPTION( - static_cast (tempPlan.getTotalReceiveLength ()) != numExports, - std::logic_error, "Tpetra::Distributor::computeSends: tempPlan.getTotal" - "ReceiveLength() = " << tempPlan.getTotalReceiveLength () << " != num" - "Exports = " << numExports << ". Please report this bug to the " - "Tpetra developers."); + const size_t numExportsAsSizeT = + tempPlan.createFromSends(importProcIDs); + const size_type numExports = + static_cast(numExportsAsSizeT); + TEUCHOS_TEST_FOR_EXCEPTION + (numExports < 0, std::logic_error, errPrefix << + "tempPlan.createFromSends() returned numExports=" + << numExportsAsSizeT << " as a size_t, which overflows to " + << numExports << " when cast to " << + Teuchos::TypeNameTraits::name () << "." << suffix); + TEUCHOS_TEST_FOR_EXCEPTION + (size_type(tempPlan.getTotalReceiveLength()) != numExports, + std::logic_error, errPrefix << "tempPlan.getTotalReceiveLength()=" + << tempPlan.getTotalReceiveLength () << " != numExports=" + << numExports << "." << suffix); if (numExports > 0) { - exportGIDs.resize (numExports); - exportProcIDs.resize (numExports); + exportGIDs.resize(numExports); + exportProcIDs.resize(numExports); } // exportObjs: Packed receive buffer. (exportObjs[2*i], @@ -3019,21 +3023,16 @@ namespace Tpetra { // size_t. This issue might come up, for example, on a 32-bit // machine using 64-bit global indices. I will add a check here // for that case. - TEUCHOS_TEST_FOR_EXCEPTION( - sizeof (size_t) < sizeof (OrdinalType), std::logic_error, - "Tpetra::Distributor::computeSends: sizeof(size_t) = " << sizeof(size_t) - << " < sizeof(" << Teuchos::TypeNameTraits::name () << ") = " - << sizeof (OrdinalType) << ". This violates an assumption of the " - "method. It's not hard to work around (just use Array as " - "the export buffer, not Array), but we haven't done that yet. " - "Please report this bug to the Tpetra developers."); + static_assert(sizeof(size_t) >= sizeof(OrdinalType), + "Tpetra::Distributor::computeSends: " + "sizeof(size_t) < sizeof(OrdinalType)."); - TEUCHOS_TEST_FOR_EXCEPTION( - tempPlan.getTotalReceiveLength () < static_cast (numExports), - std::logic_error, - "Tpetra::Distributor::computeSends: tempPlan.getTotalReceiveLength() = " - << tempPlan.getTotalReceiveLength() << " < numExports = " << numExports - << ". Please report this bug to the Tpetra developers."); + TEUCHOS_TEST_FOR_EXCEPTION + (tempPlan.getTotalReceiveLength () < size_t(numExports), + std::logic_error, + errPrefix << "tempPlan.getTotalReceiveLength()=" + << tempPlan.getTotalReceiveLength() << " < numExports=" + << numExports << "." << suffix); Array exportObjs (tempPlan.getTotalReceiveLength () * 2); if (verbose_) { @@ -3064,8 +3063,8 @@ namespace Tpetra { Teuchos::Array &exportProcIDs) { using std::endl; + const char errPrefix[] = "Tpetra::Distributor::createFromRecvs: "; const int myRank = comm_->getRank(); - const bool debug = Details::Behavior::debug("Distributor"); std::unique_ptr prefix; if (verbose_) { @@ -3075,6 +3074,7 @@ namespace Tpetra { std::cerr << os.str(); } + const bool debug = Details::Behavior::debug("Distributor"); if (debug) { using Teuchos::outArg; using Teuchos::REDUCE_MAX; @@ -3086,23 +3086,24 @@ namespace Tpetra { int maxErrProc = -1; reduceAll(*comm_, REDUCE_MAX, errProc, outArg(maxErrProc)); TEUCHOS_TEST_FOR_EXCEPTION - (maxErrProc != -1, std::runtime_error, - "Tpetra::Distributor::createFromRecvs: lists of remote IDs " - "and remote process IDs must have the same size on all participating " - "processes. Maximum process ID with error: " << maxErrProc << "."); + (maxErrProc != -1, std::runtime_error, errPrefix << "Lists " + "of remote IDs and remote process IDs must have the same " + "size on all participating processes. Maximum process ID " + "with error: " << maxErrProc << "."); } else { // in non-debug mode, just test locally + // NOTE (mfh 13 Feb 2020) This needs to throw std::runtime_error + // in order to make an existing Distributor unit test pass. TEUCHOS_TEST_FOR_EXCEPTION - (remoteGIDs.size () != remoteProcIDs.size (), std::invalid_argument, - "Tpetra::Distributor::createFromRecvs<" << - Teuchos::TypeNameTraits::name () << ">(): On Process " << - myRank << ": remoteGIDs.size() = " << remoteGIDs.size () << " != " - "remoteProcIDs.size() = " << remoteProcIDs.size () << "."); + (remoteGIDs.size() != remoteProcIDs.size(), std::runtime_error, + errPrefix << "On Process " << myRank << ": " + "remoteGIDs.size()=" << remoteGIDs.size() << + " != remoteProcIDs.size()=" << remoteProcIDs.size() << "."); } - computeSends (remoteGIDs, remoteProcIDs, exportGIDs, exportProcIDs); + computeSends(remoteGIDs, remoteProcIDs, exportGIDs, exportProcIDs); - const size_t numProcsSendingToMe = createFromSends (exportProcIDs ()); + const size_t numProcsSendingToMe = createFromSends(exportProcIDs ()); if (verbose_) { // NOTE (mfh 20 Mar 2014) If remoteProcIDs could contain diff --git a/packages/tpetra/core/test/Distributor/Distributor_UnitTests.cpp b/packages/tpetra/core/test/Distributor/Distributor_UnitTests.cpp index 6f42727accf3..487f4b54ab86 100644 --- a/packages/tpetra/core/test/Distributor/Distributor_UnitTests.cpp +++ b/packages/tpetra/core/test/Distributor/Distributor_UnitTests.cpp @@ -35,8 +35,6 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// // ************************************************************************ // @HEADER */ @@ -44,6 +42,7 @@ #include "Teuchos_UnitTestHarness.hpp" #include "Tpetra_Core.hpp" #include "Tpetra_Distributor.hpp" +#include "Tpetra_Details_Behavior.hpp" #include "Teuchos_Array.hpp" #include "Teuchos_as.hpp" @@ -176,11 +175,6 @@ namespace { TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } - -// mfh 01 Apr 2013: Distributor only checks input arguments in a -// debug build, so this test is only enabled in a debug build. -#ifdef HAVE_TPETRA_DEBUG - //// TEUCHOS_UNIT_TEST( Distributor, badArgsFromSends) { RCP > comm = getDefaultComm(); @@ -190,27 +184,33 @@ namespace { // every node size_t numImports = 0; + const bool debug = Tpetra::Details::Behavior::debug("Distributor"); + // create from sends with bad node IDs - { - Distributor distributor(comm); - TEST_THROW( numImports = distributor.createFromSends( tuple(myImageID+1)), std::runtime_error ); + if (debug) { + { + Distributor distributor(comm); + TEST_THROW( numImports = distributor.createFromSends( tuple(myImageID+1)), std::runtime_error ); + // Printing numImports prevents a compiler warning (set but unused). + out << "numImports result: " << numImports << std::endl; + } + { + Distributor distributor(comm); + TEST_THROW( numImports = distributor.createFromSends( tuple(0,myImageID+1,0)), std::runtime_error ); + // Printing numImports prevents a compiler warning (set but unused). + out << "numImports result: " << numImports << std::endl; + } } - // Printing numImports prevents a compiler warning (set but unused). - out << "numImports result: " << numImports << std::endl; - - { - Distributor distributor(comm); - TEST_THROW( numImports = distributor.createFromSends( tuple(0,myImageID+1,0)), std::runtime_error ); + else { + out << "Debug mode not enabled; set TPETRA_DEBUG=Distributor " + "to test." << std::endl; } - // Printing numImports prevents a compiler warning (set but unused). - out << "numImports result: " << numImports << std::endl; // All procs fail if any proc fails int globalSuccess_int = -1; reduceAll( *comm, REDUCE_SUM, success ? 0 : 1, outArg(globalSuccess_int) ); TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } -#endif // HAVE_TPETRA_DEBUG //// TEUCHOS_UNIT_TEST( Distributor, createFromSendsMixedContig) @@ -864,52 +864,55 @@ namespace { TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } - -// mfh 01 Apr 2013: Distributor only checks input arguments in a -// debug build, so this test is only enabled in a debug build. -#ifdef HAVE_TPETRA_DEBUG - //// TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( Distributor, badArgsFromRecvs, Ordinal ) { RCP > comm = getDefaultComm(); const int myImageID = comm->getRank(); - // each node i sends to node i+1 - // for the last node, this results in an invalid node id, which should throw an exception on - // every node - // create from recvs with bad node IDs - { - Distributor distributor(comm); - Array exportIDs; - Array exportNodeIDs; - TEST_THROW( distributor.createFromRecvs( tuple(0), tuple(myImageID+1), exportIDs, exportNodeIDs), std::runtime_error ); - } - { - Distributor distributor(comm); - Array exportIDs; - Array exportNodeIDs; - TEST_THROW( distributor.createFromRecvs( tuple(0,0,0), tuple(0,myImageID+1,0), exportIDs, exportNodeIDs), std::runtime_error ); - } - // create from recvs with conflicting sizes, but otherwise valid entries - { - Distributor distributor(comm); - Array exportIDs; - Array exportNodeIDs; - TEST_THROW( distributor.createFromRecvs( tuple(0), tuple(0,0), exportIDs, exportNodeIDs), std::runtime_error ); + const bool debug = Tpetra::Details::Behavior::debug("Distributor"); + + // Each (MPI) process i sends to process i+1. For the last + // process, calling createFromRecvs with these data result in an + // invalid process id. In debug mode, this should throw an + // exception on every process. + + if (debug) { + { + Distributor distributor(comm); + Array exportIDs; + Array exportNodeIDs; + TEST_THROW( distributor.createFromRecvs( tuple(0), tuple(myImageID+1), exportIDs, exportNodeIDs), std::runtime_error ); + } + { + Distributor distributor(comm); + Array exportIDs; + Array exportNodeIDs; + TEST_THROW( distributor.createFromRecvs( tuple(0,0,0), tuple(0,myImageID+1,0), exportIDs, exportNodeIDs), std::runtime_error ); + } + // create from recvs with conflicting sizes, but otherwise valid entries + { + Distributor distributor(comm); + Array exportIDs; + Array exportNodeIDs; + TEST_THROW( distributor.createFromRecvs( tuple(0), tuple(0,0), exportIDs, exportNodeIDs), std::runtime_error ); + } + { + Distributor distributor(comm); + Array exportIDs; + Array exportNodeIDs; + TEST_THROW( distributor.createFromRecvs( tuple(0,0), tuple(0), exportIDs, exportNodeIDs), std::runtime_error ); + } } - { - Distributor distributor(comm); - Array exportIDs; - Array exportNodeIDs; - TEST_THROW( distributor.createFromRecvs( tuple(0,0), tuple(0), exportIDs, exportNodeIDs), std::runtime_error ); + else { + out << "Debug mode not enabled; set TPETRA_DEBUG=Distributor " + "to test." << std::endl; } + // All procs fail if any proc fails int globalSuccess_int = -1; reduceAll( *comm, REDUCE_SUM, success ? 0 : 1, outArg(globalSuccess_int) ); TEST_EQUALITY_CONST( globalSuccess_int, 0 ); } -#endif // HAVE_TPETRA_DEBUG - //// TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( Distributor, createFromRecvs, Ordinal ) { RCP > comm = getDefaultComm(); @@ -995,5 +998,3 @@ namespace { # endif // FAST_DEVELOPMENT_UNIT_TEST_BUILD } - - From 5832e16d4ddec26b5e130aee4c17bc39bdabd9a3 Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Fri, 14 Feb 2020 09:10:35 -0700 Subject: [PATCH 49/49] Tpetra::Distributor: Fix build warning (unused variable) --- packages/tpetra/core/src/Tpetra_Distributor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/tpetra/core/src/Tpetra_Distributor.cpp b/packages/tpetra/core/src/Tpetra_Distributor.cpp index 385fb12c2e7b..3eb6e35cafb4 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.cpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.cpp @@ -516,8 +516,9 @@ namespace Tpetra { reverseDistributor_->reverseDistributor_ = Teuchos::null; } - - void Distributor::doWaits() { + void + Distributor::doWaits() + { using Teuchos::Array; using Teuchos::CommRequest; using Teuchos::FancyOStream; @@ -531,7 +532,6 @@ namespace Tpetra { Teuchos::TimeMonitor timeMon (*timer_doWaits_); #endif // TPETRA_DISTRIBUTOR_TIMERS - const int myRank = comm_->getRank (); const bool debug = Details::Behavior::debug("Distributor"); std::unique_ptr prefix;