From fb518e6f83b41513aa4cb2b033bfb7fb1241c8d6 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Wed, 3 Feb 2016 21:38:13 -0700 Subject: [PATCH] IOSS: Potential fix for intel openmp issues with std::sort --- packages/seacas/libraries/ioss/src/Ioss_Map.C | 134 ++++++- .../seacas/libraries/ioss/src/Ioss_ParallelUtils.C | 21 +- .../seacas/libraries/ioss/src/Ioss_ParallelUtils.h | 124 ++++++- .../ioss/src/exo_par/Iopx_DecompositionData.C | 389 ++++++++++----------- 4 files changed, 440 insertions(+), 228 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/Ioss_Map.C b/packages/seacas/libraries/ioss/src/Ioss_Map.C index 1e94209..f9ac976 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_Map.C +++ b/packages/seacas/libraries/ioss/src/Ioss_Map.C @@ -44,6 +44,102 @@ #include // for vector, vector<>::iterator, etc namespace { + const int GDS_QSORT_CUTOFF=12; + + template + void GDS_SWAP(INT *V, size_t I, size_t J) + { + std::swap(V[I], V[J]); + } + + template + size_t gds_median3(INT v[], size_t left, size_t right) + { + size_t center; + center = (left + right) / 2; + + if (v[left] > v[center]) + GDS_SWAP(v, left, center); + if (v[left] > v[right]) + GDS_SWAP(v, left, right); + if (v[center] > v[right]) + GDS_SWAP(v, center, right); + + GDS_SWAP(v, center, right-1); + return right-1; + } + + template + void gds_qsort_int(INT v[], size_t left, size_t right) + { + size_t pivot; + size_t i, j; + + if (left + GDS_QSORT_CUTOFF <= right) { + pivot = gds_median3(v, left, right); + i = left; + j = right - 1; + + for ( ; ; ) { + while (v[++i] < v[pivot]); + while (v[--j] > v[pivot]); + if (i < j) { + GDS_SWAP(v, i, j); + } else { + break; + } + } + + GDS_SWAP(v, i, right-1); + gds_qsort_int(v, left, i-1); + gds_qsort_int(v, i+1, right); + } + } + + template + void gds_isort_int(INT v[], size_t N) + { + size_t i,j; + size_t ndx = 0; + INT small; + INT tmp; + + if (N <= 1) return; + small = v[0]; + for (i = 1; i < N; i++) { + if (v[i] < small) { + small = v[i]; + ndx = i; + } + } + /* Put smallest value in slot 0 */ + GDS_SWAP(v, 0, ndx); + + for (i=1; i + void gds_qsort(std::vector &v) + { + if (v.size() <= 1) return; + gds_qsort_int(v.data(), 0, v.size()-1); + gds_isort_int(v.data(), v.size()); + } + + + template + bool is_unique(std::vector vec) + { + auto last = std::unique(vec.begin(), vec.end()); + return last == vec.end(); + } + // Determines whether the input map is sequential (map[i] == i) bool is_sequential(const Ioss::MapContainer &the_map) { @@ -116,6 +212,20 @@ namespace { void verify_no_duplicate_ids(std::vector &reverse_map, int processor, const std::string &type) { // Check for duplicate ids... +#if 1 + for (size_t i=1; i < reverse_map.size(); i++) { + if (reverse_map[i-1].first == reverse_map[i].first) { + std::ostringstream errmsg; + errmsg << "\nERROR: Duplicate " << type << " global id detected on processor " + << processor << ".\n" + << " Global id " << reverse_map[i].first + << " assigned to local " << type << "s " + << reverse_map[i].second << " and " + << reverse_map[i-1].second << ".\n"; + IOSS_ERROR(errmsg); + } + } +#else std::vector::iterator dup = std::adjacent_find(reverse_map.begin(), reverse_map.end(), IdPairEqual()); @@ -131,6 +241,7 @@ namespace { << (*other).second << ".\n"; IOSS_ERROR(errmsg); } +#endif } template @@ -176,10 +287,16 @@ void Ioss::Map::build_reverse_map(int64_t num_to_get, int64_t offset, int proces // 5. Check for duplicate global_ids... // Build a vector containing the current ids... - ReverseMapContainer new_ids(num_to_get); + ReverseMapContainer new_ids; + new_ids.reserve(num_to_get); + if (map.size() < offset+num_to_get+1) { + std::ostringstream errmsg; + errmsg << "\nERROR: " << entityType << " map is too small on processor " << processor << ".\n"; + IOSS_ERROR(errmsg); + } for (int64_t i=0; i < num_to_get; i++) { int64_t local_id = offset + i + 1; - new_ids[i] = std::make_pair(map[local_id], local_id); + new_ids.push_back(std::make_pair(map[local_id], local_id)); if (map[local_id] <= 0) { std::ostringstream errmsg; @@ -190,7 +307,18 @@ void Ioss::Map::build_reverse_map(int64_t num_to_get, int64_t offset, int proces } // Sort that vector... - std::sort(new_ids.begin(), new_ids.end(), IdPairCompare()); + gds_qsort(new_ids); + +#if 0 + // REMOVE DEBUGGING CODE + verify_no_duplicate_ids(new_ids, processor, entityType+"new_ids"); + if (!is_unique(new_ids)) { + std::ostringstream errmsg; + errmsg << "\nERROR: " << entityType << " map detected non-unique global id/local id pair on processor " << processor << ".\n"; + IOSS_ERROR(errmsg); + } + // END REMOVE DEBUGGING CODE +#endif int64_t new_id_min = new_ids.empty() ? 0 : new_ids.front().first; int64_t old_id_max = reverse.empty() ? 0 : reverse.back().first; diff --git a/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.C b/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.C index 5cfd950..1a8f6d9 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.C +++ b/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.C @@ -47,12 +47,6 @@ #include #include -namespace { - MPI_Datatype mpi_type(double /*dummy*/) {return MPI_DOUBLE;} - MPI_Datatype mpi_type(int /*dummy*/) {return MPI_INT;} - MPI_Datatype mpi_type(unsigned int /*dummy*/) {return MPI_UNSIGNED;} - MPI_Datatype mpi_type(int64_t /*dummy*/) {return MPI_LONG_LONG_INT;} -} #endif Ioss::ParallelUtils::ParallelUtils(MPI_Comm the_communicator) @@ -62,8 +56,8 @@ Ioss::ParallelUtils::ParallelUtils(MPI_Comm the_communicator) bool Ioss::ParallelUtils::get_environment(const std::string &name, std::string &value, bool sync_parallel) const { #ifdef HAVE_MPI - char *result_string = NULL; - char *broadcast_string = NULL; + char *result_string = nullptr; + char *broadcast_string = nullptr; int string_length = 0; int rank = parallel_rank(); @@ -98,12 +92,12 @@ bool Ioss::ParallelUtils::get_environment(const std::string &name, std::string & return string_length > 0; #else char *result_string = std::getenv(name.c_str()); - if (result_string != NULL) { + if (result_string != nullptr) { value = std::string(result_string); } else { value = std::string(""); } - return (result_string != NULL); + return (result_string != nullptr); #endif } @@ -122,7 +116,7 @@ bool Ioss::ParallelUtils::get_environment(const std::string &name, bool sync_par // Return true if 'name' defined, no matter what the value. // Return false if 'name' not defined. #ifdef HAVE_MPI - char *result_string = NULL; + char *result_string = nullptr; int string_length = 0; int rank = Ioss::ParallelUtils::parallel_rank(); @@ -137,7 +131,7 @@ bool Ioss::ParallelUtils::get_environment(const std::string &name, bool sync_par return string_length > 0; #else char *result_string = std::getenv(name.c_str()); - return (result_string != NULL); + return (result_string != nullptr); #endif } @@ -351,9 +345,10 @@ template void Ioss::ParallelUtils::global_array_minmax(std::vector&, Mi template void Ioss::ParallelUtils::global_array_minmax(std::vector &local_minmax, MinMax which) const { - if (!local_minmax.empty()) + if (!local_minmax.empty()) { global_array_minmax(&local_minmax[0], local_minmax.size(), which); } +} template void Ioss::ParallelUtils::gather(int, std::vector&) const; template void Ioss::ParallelUtils::gather(int64_t, std::vector&) const; diff --git a/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.h b/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.h index d6fe8fc..4f993e7 100644 --- a/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.h +++ b/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.h @@ -34,17 +34,23 @@ #define IOSS_Ioss_ParallelUtils_h #include // for Int64Vector, IntVector +#include #include // for size_t #include // for string #include // for vector +#include + +#ifdef HAVE_MPI +#include +#endif namespace Ioss { class ParallelUtils { public: - explicit ParallelUtils(MPI_Comm communicator); - ~ParallelUtils() {} + explicit ParallelUtils(MPI_Comm the_communicator); + ~ParallelUtils() = default; // Assignment operator // Copy constructor @@ -117,5 +123,119 @@ namespace Ioss { private: MPI_Comm communicator_; }; + +#ifdef HAVE_MPI + inline MPI_Datatype mpi_type(double /*dummy*/) {return MPI_DOUBLE;} + inline MPI_Datatype mpi_type(int /*dummy*/) {return MPI_INT;} + inline MPI_Datatype mpi_type(int64_t /*dummy*/) {return MPI_LONG_LONG_INT;} + inline MPI_Datatype mpi_type(unsigned int /*dummy*/) {return MPI_UNSIGNED;} + + inline int power_2(int count) + { + // Return the power of two which is equal to or greater than 'count' + // count = 15 -> returns 16 + // count = 16 -> returns 16 + // count = 17 -> returns 32 + + // Use brute force... + int pow2 = 1; + while (pow2 < count) { + pow2 *= 2; + } + return pow2; + } + + template + int MY_Alltoallv64(std::vector &sendbuf, const std::vector &sendcounts, const std::vector &senddisp, + std::vector &recvbuf, const std::vector &recvcounts, const std::vector &recvdisp, MPI_Comm comm) + { + int processor_count = 0; + int my_processor = 0; + MPI_Comm_size(comm, &processor_count); + MPI_Comm_rank(comm, &my_processor); + + // Verify that all 'counts' can fit in an integer. Symmetric + // communication, so recvcounts are sendcounts on another processor. + for (int i=0; i < processor_count; i++) { + int snd_cnt = (int)sendcounts[i]; + if ((int64_t)snd_cnt != sendcounts[i]) { + std::ostringstream errmsg; + errmsg << "ERROR: The number of items that must be communicated via MPI calls from\n" + << " processor " << my_processor << " to processor " << i << " is " << sendcounts[i] + << "\n which exceeds the storage capacity of the integers used by MPI functions.\n"; + std::cerr << errmsg.str(); + exit(EXIT_FAILURE); + } + } + + size_t pow_2=power_2(processor_count); + + for(size_t i=1; i < pow_2; i++) { + MPI_Status status; + + int tag = 24713; + size_t exchange_proc = i ^ my_processor; + if(exchange_proc < (size_t)processor_count){ + int snd_cnt = (int)sendcounts[exchange_proc]; // Converts from int64_t to int as needed by mpi + int rcv_cnt = (int)recvcounts[exchange_proc]; + if ((size_t)my_processor < exchange_proc) { + MPI_Send(&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)), exchange_proc, tag, comm); + MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag, comm, &status); + } + else { + MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag, comm, &status); + MPI_Send(&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)), exchange_proc, tag, comm); + } + } + } + + // Take care of this processor's data movement... + std::copy(&sendbuf[senddisp[my_processor]], + &sendbuf[senddisp[my_processor]+sendcounts[my_processor]], + &recvbuf[recvdisp[my_processor]]); + return 0; + } + + template + int MY_Alltoallv(std::vector &sendbuf, const std::vector &sendcnts, const std::vector &senddisp, + std::vector &recvbuf, const std::vector &recvcnts, const std::vector &recvdisp, MPI_Comm comm) + { + // Wrapper to handle case where send/recv counts and displacements are 64-bit integers. + // Two cases: + // 1) They are of type 64-bit integers, but only storing data in the 32-bit integer range. + // -- if (sendcnts[#proc-1] + senddisp[#proc-1] < 2^31, then we are ok + // 2) They are of type 64-bit integers, and storing data in the 64-bit integer range. + // -- call special alltoallv which does point-to-point sends + int processor_count = 0; + MPI_Comm_size(comm, &processor_count); + size_t max_comm = sendcnts[processor_count-1] + senddisp[processor_count-1]; + size_t one = 1; + if (max_comm < one<<31) { + // count and displacement data in range, need to copy to integer vector. + std::vector send_cnt(sendcnts.begin(), sendcnts.end()); + std::vector send_dis(senddisp.begin(), senddisp.end()); + std::vector recv_cnt(recvcnts.begin(), recvcnts.end()); + std::vector recv_dis(recvdisp.begin(), recvdisp.end()); + return MPI_Alltoallv(TOPTR(sendbuf), TOPTR(send_cnt), TOPTR(send_dis), mpi_type(T(0)), + TOPTR(recvbuf), TOPTR(recv_cnt), TOPTR(recv_dis), mpi_type(T(0)), comm); + } + else { + // Same as if each processor sent a message to every other process with: + // MPI_Send(sendbuf+senddisp[i]*sizeof(sendtype),sendcnts[i], sendtype, i, tag, comm); + // And received a message from each processor with a call to: + // MPI_Recv(recvbuf+recvdisp[i]*sizeof(recvtype),recvcnts[i], recvtype, i, tag, comm); + return MY_Alltoallv64(sendbuf, sendcnts, senddisp, recvbuf, recvcnts, recvdisp, comm); + } + } + + template + int MY_Alltoallv(std::vector &sendbuf, const std::vector &sendcnts, const std::vector &senddisp, + std::vector &recvbuf, const std::vector &recvcnts, const std::vector &recvdisp, + MPI_Comm comm) + { + return MPI_Alltoallv(TOPTR(sendbuf), (int*)TOPTR(sendcnts), (int*)TOPTR(senddisp), mpi_type(T(0)), + TOPTR(recvbuf), (int*)TOPTR(recvcnts), (int*)TOPTR(recvdisp), mpi_type(T(0)), comm); + } +#endif } #endif diff --git a/packages/seacas/libraries/ioss/src/exo_par/Iopx_DecompositionData.C b/packages/seacas/libraries/ioss/src/exo_par/Iopx_DecompositionData.C index d3c25fd..2eef03a 100644 --- a/packages/seacas/libraries/ioss/src/exo_par/Iopx_DecompositionData.C +++ b/packages/seacas/libraries/ioss/src/exo_par/Iopx_DecompositionData.C @@ -62,151 +62,122 @@ #endif namespace { - MPI_Datatype mpi_type(double /*dummy*/) {return MPI_DOUBLE;} - MPI_Datatype mpi_type(int /*dummy*/) {return MPI_INT;} - MPI_Datatype mpi_type(int64_t /*dummy*/) {return MPI_LONG_LONG_INT;} + const int GDS_QSORT_CUTOFF=12; - template - bool is_sorted(const std::vector &vec) + template + void GDS_SWAP(INT *V, size_t I, size_t J) { - for (size_t i=1; i < vec.size(); i++) { - if (vec[i-1] > vec[i]) - return false; - } - return true; + std::swap(V[I], V[J]); } - - int exodus_byte_size_api(int exoid) + + template + size_t gds_median3(INT v[], size_t left, size_t right) { - // Check byte-size of integers stored on the database... - int mode = ex_int64_status(exoid) & EX_ALL_INT64_API; - if (mode) { - return 8; - } else { - return 4; - } - } + size_t center; + center = (left + right) / 2; - int power_2(int count) - { - // Return the power of two which is equal to or greater than 'count' - // count = 15 -> returns 16 - // count = 16 -> returns 16 - // count = 17 -> returns 32 + if (v[left] > v[center]) + GDS_SWAP(v, left, center); + if (v[left] > v[right]) + GDS_SWAP(v, left, right); + if (v[center] > v[right]) + GDS_SWAP(v, center, right); - // Use brute force... - int pow2 = 1; - while (pow2 < count) { - pow2 *= 2; - } - return pow2; + GDS_SWAP(v, center, right-1); + return right-1; } - void check_dynamic_cast(const void *ptr) + template + void gds_qsort_int(INT v[], size_t left, size_t right) { - if (ptr == NULL) { - std::cerr << "INTERNAL ERROR: Invalid dynamic cast returned NULL\n"; - exit(EXIT_FAILURE); + size_t pivot; + size_t i, j; + + if (left + GDS_QSORT_CUTOFF <= right) { + pivot = gds_median3(v, left, right); + i = left; + j = right - 1; + + for ( ; ; ) { + while (v[++i] < v[pivot]); + while (v[--j] > v[pivot]); + if (i < j) { + GDS_SWAP(v, i, j); + } else { + break; + } + } + + GDS_SWAP(v, i, right-1); + gds_qsort_int(v, left, i-1); + gds_qsort_int(v, i+1, right); } } - template - int MY_Alltoallv64(std::vector &sendbuf, const std::vector &sendcounts, const std::vector &senddisp, - std::vector &recvbuf, const std::vector &recvcounts, const std::vector &recvdisp, MPI_Comm comm) - { - int processor_count = 0; - int my_processor = 0; - MPI_Comm_size(comm, &processor_count); - MPI_Comm_rank(comm, &my_processor); - - // Verify that all 'counts' can fit in an integer. Symmetric - // communication, so recvcounts are sendcounts on another processor. - for (int i=0; i < processor_count; i++) { - int snd_cnt = (int)sendcounts[i]; - if ((int64_t)snd_cnt != sendcounts[i]) { - std::ostringstream errmsg; - errmsg << "ERROR: The number of items that must be communicated via MPI calls from\n" - << " processor " << my_processor << " to processor " << i << " is " << sendcounts[i] - << "\n which exceeds the storage capacity of the integers used by MPI functions.\n"; - std::cerr << errmsg.str(); - exit(EXIT_FAILURE); + template + void gds_isort_int(INT v[], size_t N) + { + size_t i,j; + size_t ndx = 0; + INT small; + INT tmp; + + if (N <= 1) return; + small = v[0]; + for (i = 1; i < N; i++) { + if (v[i] < small) { + small = v[i]; + ndx = i; } } + /* Put smallest value in slot 0 */ + GDS_SWAP(v, 0, ndx); - size_t pow_2=power_2(processor_count); - - for(size_t i=1; i < pow_2; i++) { - MPI_Status status; - - int tag = 24713; - size_t exchange_proc = i ^ my_processor; - if(exchange_proc < (size_t)processor_count){ - int snd_cnt = (int)sendcounts[exchange_proc]; // Converts from int64_t to int as needed by mpi - int rcv_cnt = (int)recvcounts[exchange_proc]; - if ((size_t)my_processor < exchange_proc) { - MPI_Send(&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)), exchange_proc, tag, comm); - MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag, comm, &status); - } - else { - MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag, comm, &status); - MPI_Send(&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)), exchange_proc, tag, comm); - } + for (i=1; i + void gds_qsort(std::vector &v) + { + if (v.size() <= 1) return; + gds_qsort_int(v.data(), 0, v.size()-1); + gds_isort_int(v.data(), v.size()); } + template - int MY_Alltoallv(std::vector &sendbuf, const std::vector &sendcnts, const std::vector &senddisp, - std::vector &recvbuf, const std::vector &recvcnts, const std::vector &recvdisp, MPI_Comm comm) - { - // Wrapper to handle case where send/recv counts and displacements are 64-bit integers. - // Two cases: - // 1) They are of type 64-bit integers, but only storing data in the 32-bit integer range. - // -- if (sendcnts[#proc-1] + senddisp[#proc-1] < 2^31, then we are ok - // 2) They are of type 64-bit integers, and storing data in the 64-bit integer range. - // -- call special alltoallv which does point-to-point sends - assert(is_sorted(senddisp)); - assert(is_sorted(recvdisp)); - - int processor_count = 0; - MPI_Comm_size(comm, &processor_count); - size_t max_comm = sendcnts[processor_count-1] + senddisp[processor_count-1]; - size_t one = 1; - if (max_comm < one<<31) { - // count and displacement data in range, need to copy to integer vector. - std::vector send_cnt(sendcnts.begin(), sendcnts.end()); - std::vector send_dis(senddisp.begin(), senddisp.end()); - std::vector recv_cnt(recvcnts.begin(), recvcnts.end()); - std::vector recv_dis(recvdisp.begin(), recvdisp.end()); - return MPI_Alltoallv(TOPTR(sendbuf), (int*)TOPTR(send_cnt), (int*)TOPTR(send_dis), mpi_type(T(0)), - TOPTR(recvbuf), (int*)TOPTR(recv_cnt), (int*)TOPTR(recv_dis), mpi_type(T(0)), comm); + bool is_sorted(const std::vector &vec) + { + for (size_t i=1; i < vec.size(); i++) { + if (vec[i-1] > vec[i]) + return false; } - else { - // Same as if each processor sent a message to every other process with: - // MPI_Send(sendbuf+senddisp[i]*sizeof(sendtype),sendcnts[i], sendtype, i, tag, comm); - // And received a message from each processor with a call to: - // MPI_Recv(recvbuf+recvdisp[i]*sizeof(recvtype),recvcnts[i], recvtype, i, tag, comm); - return MY_Alltoallv64(sendbuf, sendcnts, senddisp, recvbuf, recvcnts, recvdisp, comm); + return true; + } + int exodus_byte_size_api(int exoid) + { + // Check byte-size of integers stored on the database... + int mode = ex_int64_status(exoid) & EX_ALL_INT64_API; + if (mode) { + return 8; + } else { + return 4; } } - template - int MY_Alltoallv(std::vector &sendbuf, const std::vector &sendcnts, const std::vector &senddisp, - std::vector &recvbuf, const std::vector &recvcnts, const std::vector &recvdisp, - MPI_Comm comm) + void check_dynamic_cast(const void *ptr) { - assert(is_sorted(senddisp)); - assert(is_sorted(recvdisp)); - - return MPI_Alltoallv(TOPTR(sendbuf), (int*)TOPTR(sendcnts), (int*)TOPTR(senddisp), mpi_type(T(0)), - TOPTR(recvbuf), (int*)TOPTR(recvcnts), (int*)TOPTR(recvdisp), mpi_type(T(0)), comm); + if (ptr == nullptr) { + std::cerr << "INTERNAL ERROR: Invalid dynamic cast returned nullptr\n"; + exit(EXIT_FAILURE); + } } template @@ -214,8 +185,7 @@ namespace { { std::sort(vec.begin(), vec.end()); vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); - // shrink-to-fit... - std::vector(vec).swap(vec); + vec.shrink_to_fit(); } template @@ -297,9 +267,7 @@ namespace { *ierr = ZOLTAN_OK; if (lids) { - for (size_t i = 0; i < element_count; i++) { - lids[i] = i; - } + std::iota(lids, lids+element_count, 0); } if (wdim) { @@ -381,16 +349,16 @@ namespace { for (size_t i=0; i < el_blocks.size(); i++) { std::string type = Ioss::Utils::lowercase(el_blocks[i].topologyType); Ioss::ElementTopology *topology = Ioss::ElementTopology::factory(type, false); - if (topology != NULL) { + if (topology != nullptr) { Ioss::ElementTopology *boundary = topology->boundary_type(0); - if (boundary != NULL) { + if (boundary != nullptr) { common_nodes = std::min(common_nodes, boundary->number_boundaries()); } else { // Different topologies on some element faces... size_t nb = topology->number_boundaries(); for (size_t b=1; b <= nb; b++) { boundary = topology->boundary_type(b); - if (boundary != NULL) { + if (boundary != nullptr) { common_nodes = std::min(common_nodes, boundary->number_boundaries()); } } @@ -570,6 +538,7 @@ namespace Iopx { } std::sort(importElementMap.begin(), importElementMap.end()); + // gds_qsort(importElementMap); std::copy(importElementCount.begin(), importElementCount.end(), importElementIndex.begin()); generate_index(importElementIndex); @@ -642,7 +611,7 @@ namespace Iopx { size_t local_elem = 0; // All values are 0 - localElementMap.reserve(local_elem); + localElementMap.resize(local_elem); exportElementCount.resize(processorCount+1); exportElementIndex.resize(processorCount+1); importElementCount.resize(processorCount+1); @@ -735,8 +704,8 @@ namespace Iopx { exportElementCount[myProcessor] = 0; importElementCount.resize(processorCount+1); - MPI_Alltoall(TOPTR(exportElementCount), 1, mpi_type((INT)0), - TOPTR(importElementCount), 1, mpi_type((INT)0), comm_); + MPI_Alltoall(TOPTR(exportElementCount), 1, Ioss::mpi_type((INT)0), + TOPTR(importElementCount), 1, Ioss::mpi_type((INT)0), comm_); // Now fill the vectors with the elements ... size_t exp_size = std::accumulate(exportElementCount.begin(), exportElementCount.end(), 0); @@ -762,8 +731,8 @@ namespace Iopx { std::copy(importElementCount.begin(), importElementCount.end(), importElementIndex.begin()); generate_index(importElementIndex); - MY_Alltoallv(exportElementMap, exportElementCount, exportElementIndex, - importElementMap, importElementCount, importElementIndex, comm_); + Ioss::MY_Alltoallv(exportElementMap, exportElementCount, exportElementIndex, + importElementMap, importElementCount, importElementIndex, comm_); #if DEBUG_OUTPUT std::cerr << "Processor " << myProcessor << ":\t" @@ -781,7 +750,7 @@ namespace Iopx { idx_t *elem_partition) { idx_t wgt_flag = 0; // No weights - idx_t *elm_wgt = NULL; + idx_t *elm_wgt = nullptr; idx_t ncon = 1; idx_t num_flag = 0; // Use C-based numbering idx_t common_nodes = get_common_node_count(el_blocks, comm_); @@ -816,8 +785,8 @@ namespace Iopx { } else if (method == "GEOM_KWAY" || method == "KWAY_GEOM") { - idx_t *dual_xadj = NULL; - idx_t *dual_adjacency = NULL; + idx_t *dual_xadj = nullptr; + idx_t *dual_adjacency = nullptr; int rc = ParMETIS_V3_Mesh2Dual(element_dist, pointer, adjacency, &num_flag, &common_nodes, &dual_xadj, &dual_adjacency, &comm_); @@ -866,7 +835,7 @@ namespace Iopx { void DecompositionData::zoltan_decompose(const std::string &method) { float version = 0.0; - Zoltan_Initialize(0, NULL, &version); + Zoltan_Initialize(0, nullptr, &version); Zoltan zz(comm_); @@ -892,14 +861,14 @@ namespace Iopx { int num_local = 0; int num_import = 1; int num_export = 1; - ZOLTAN_ID_PTR import_global_ids = NULL; - ZOLTAN_ID_PTR import_local_ids = NULL; - ZOLTAN_ID_PTR export_global_ids = NULL; - ZOLTAN_ID_PTR export_local_ids = NULL; - int *import_procs = NULL; - int *import_to_part = NULL; - int *export_procs = NULL; - int *export_to_part = NULL; + ZOLTAN_ID_PTR import_global_ids = nullptr; + ZOLTAN_ID_PTR import_local_ids = nullptr; + ZOLTAN_ID_PTR export_global_ids = nullptr; + ZOLTAN_ID_PTR export_local_ids = nullptr; + int *import_procs = nullptr; + int *import_to_part = nullptr; + int *export_procs = nullptr; + int *export_to_part = nullptr; num_local = 1; @@ -933,7 +902,7 @@ namespace Iopx { export_map.push_back(std::make_pair(export_procs[i],export_global_ids[i])); } - std::sort(export_map.begin(), export_map.end()); + gds_qsort(export_map); exportElementMap.reserve(num_export); exportElementIndex.resize(processorCount+1); exportElementCount.resize(processorCount+1); @@ -954,7 +923,7 @@ namespace Iopx { export_map.push_back(std::make_pair(export_procs[i],export_glob[i])); } - std::sort(export_map.begin(), export_map.end()); + gds_qsort(export_map); exportElementMap.reserve(num_export); exportElementIndex.resize(processorCount+1); exportElementCount.resize(processorCount+1); @@ -1023,8 +992,8 @@ namespace Iopx { } } - MPI_Alltoall(TOPTR(export_conn_size), 1, mpi_type((INT)0), - TOPTR(import_conn_size), 1, mpi_type((INT)0), comm_); + MPI_Alltoall(TOPTR(export_conn_size), 1, Ioss::mpi_type((INT)0), + TOPTR(import_conn_size), 1, Ioss::mpi_type((INT)0), comm_); // Now fill the vectors with the nodes ... size_t exp_size = std::accumulate(export_conn_size.begin(), export_conn_size.end(), 0); @@ -1064,8 +1033,8 @@ namespace Iopx { { std::vector import_conn(imp_size); - MY_Alltoallv(export_conn, export_conn_size, export_disp, - import_conn, import_conn_size, import_disp, comm_); + Ioss::MY_Alltoallv(export_conn, export_conn_size, export_disp, + import_conn, import_conn_size, import_disp, comm_); // Done with export_conn... std::vector().swap(export_conn); @@ -1108,8 +1077,8 @@ namespace Iopx { // Tell other processors how many nodes I will be importing from // them... importNodeCount[myProcessor] = 0; - MPI_Alltoall(TOPTR(importNodeCount), 1, mpi_type((INT)0), - TOPTR(exportNodeCount), 1, mpi_type((INT)0), comm_); + MPI_Alltoall(TOPTR(importNodeCount), 1, Ioss::mpi_type((INT)0), + TOPTR(exportNodeCount), 1, Ioss::mpi_type((INT)0), comm_); size_t import_sum = std::accumulate(importNodeCount.begin(), importNodeCount.end(), 0); size_t export_sum = std::accumulate(exportNodeCount.begin(), exportNodeCount.end(), 0); @@ -1146,8 +1115,8 @@ namespace Iopx { std::copy(importNodeCount.begin(), importNodeCount.end(), importNodeIndex.begin()); generate_index(importNodeIndex); - MY_Alltoallv(import_nodes, importNodeCount, importNodeIndex, - exportNodeMap, exportNodeCount, exportNodeIndex, comm_); + Ioss::MY_Alltoallv(import_nodes, importNodeCount, importNodeIndex, + exportNodeMap, exportNodeCount, exportNodeIndex, comm_); // Map that converts nodes from the global index (1-based) to a local-per-processor index (1-based) nodeGTL.swap(nodes); @@ -1187,7 +1156,7 @@ namespace Iopx { node_proc_list.push_back(std::make_pair(exportNodeMap[i], p)); } } - std::sort(node_proc_list.begin(), node_proc_list.end()); + gds_qsort(node_proc_list); std::vector > shared_nodes; for (size_t i=0; i < node_proc_list.size(); i++) { @@ -1254,15 +1223,15 @@ namespace Iopx { // Tell other processors how many nodes/procs I am sending them... std::vector recv_comm_map_count(processorCount); - MPI_Alltoall(TOPTR(send_comm_map_count), 1, mpi_type((INT)0), - TOPTR(recv_comm_map_count), 1, mpi_type((INT)0), comm_); + MPI_Alltoall(TOPTR(send_comm_map_count), 1, Ioss::mpi_type((INT)0), + TOPTR(recv_comm_map_count), 1, Ioss::mpi_type((INT)0), comm_); std::vector recv_comm_map_disp(recv_comm_map_count); generate_index(recv_comm_map_disp); nodeCommMap.resize(recv_comm_map_disp[processorCount-1] + recv_comm_map_count[processorCount-1]); - MY_Alltoallv(send_comm_map, send_comm_map_count, send_comm_map_disp, - nodeCommMap, recv_comm_map_count, recv_comm_map_disp, comm_); + Ioss::MY_Alltoallv(send_comm_map, send_comm_map_count, send_comm_map_disp, + nodeCommMap, recv_comm_map_count, recv_comm_map_disp, comm_); // Map global 0-based index to local 1-based index. for (size_t i=0; i < nodeCommMap.size(); i+=2) { @@ -1348,7 +1317,7 @@ namespace Iopx { } fileBlockIndex[b+1] = fileBlockIndex[b] + ebs[b].num_entry; el_blocks[b].topologyType = ebs[b].topology; - if (ebs[b].num_entry == 0 && (std::strcmp(ebs[b].topology, "NULL") == 0)) + if (ebs[b].num_entry == 0 && (std::strcmp(ebs[b].topology, "nullptr") == 0)) el_blocks[b].topologyType = "sphere"; el_blocks[b].nodesPerEntity = ebs[b].num_nodes_per_entry; @@ -1392,7 +1361,7 @@ namespace Iopx { #if DEBUG_OUTPUT std::cerr << "Processor " << myProcessor << " has " << overlap << " elements on element block " << id << "\n"; #endif - ex_get_partial_conn(exodusId, EX_ELEM_BLOCK, id, blk_start, overlap, TOPTR(connectivity), NULL, NULL); + ex_get_partial_conn(exodusId, EX_ELEM_BLOCK, id, blk_start, overlap, TOPTR(connectivity), nullptr, nullptr); size_t el = 0; for (size_t elem = 0; elem < overlap; elem++) { pointer.push_back(adjacency.size()); @@ -1463,9 +1432,9 @@ namespace Iopx { node_sets[i].id_ = ids[i]; sets[i].id = ids[i]; sets[i].type = EX_NODE_SET; - sets[i].entry_list = NULL; - sets[i].extra_list = NULL; - sets[i].distribution_factor_list = NULL; + sets[i].entry_list = nullptr; + sets[i].extra_list = nullptr; + sets[i].distribution_factor_list = nullptr; } ex_get_sets(exodusId, sets.size(), TOPTR(sets)); @@ -1498,7 +1467,7 @@ namespace Iopx { if (myProcessor == root) { size_t offset = 0; for (size_t i=0; i < set_count; i++) { - ex_get_set(exodusId, EX_NODE_SET, sets[i].id, &nodelist[offset], NULL); + ex_get_set(exodusId, EX_NODE_SET, sets[i].id, &nodelist[offset], nullptr); offset += sets[i].num_entry; } assert(offset == nodelist_size); @@ -1608,9 +1577,9 @@ namespace Iopx { side_sets[i].id_ = ids[i]; sets[i].id = ids[i]; sets[i].type = EX_SIDE_SET; - sets[i].entry_list = NULL; - sets[i].extra_list = NULL; - sets[i].distribution_factor_list = NULL; + sets[i].entry_list = nullptr; + sets[i].extra_list = nullptr; + sets[i].distribution_factor_list = nullptr; } ex_get_sets(exodusId, sets.size(), TOPTR(sets)); @@ -1643,7 +1612,7 @@ namespace Iopx { if (myProcessor == root) { size_t offset = 0; for (size_t i=0; i < set_count; i++) { - ex_get_set(exodusId, EX_SIDE_SET, sets[i].id, &elemlist[offset], NULL); + ex_get_set(exodusId, EX_SIDE_SET, sets[i].id, &elemlist[offset], nullptr); offset += sets[i].num_entry; } assert(offset == elemlist_size); @@ -1837,8 +1806,8 @@ namespace Iopx { // Tell each processor how many nodes worth of data to send to // every other processor... - MPI_Alltoall(TOPTR(recv_count), 1, mpi_type((INT)0), - TOPTR(send_count), 1, mpi_type((INT)0), comm_); + MPI_Alltoall(TOPTR(recv_count), 1, Ioss::mpi_type((INT)0), + TOPTR(send_count), 1, Ioss::mpi_type((INT)0), comm_); send_count[myProcessor] = 0; @@ -1873,8 +1842,8 @@ namespace Iopx { } } - MY_Alltoallv(node_comm_recv, recv_count, recv_disp, - node_comm_send, send_count, send_disp, comm_); + Ioss::MY_Alltoallv(node_comm_recv, recv_count, recv_disp, + node_comm_send, send_count, send_disp, comm_); // At this point, 'node_comm_send' contains the list of nodes that I need to provide // coordinate data for. @@ -1920,8 +1889,8 @@ namespace Iopx { recv_disp[i] *= spatialDimension; } - MY_Alltoallv(coord_send, send_count, send_disp, - coord_recv, recv_count, recv_disp, comm_); + Ioss::MY_Alltoallv(coord_send, send_count, send_disp, + coord_recv, recv_count, recv_disp, comm_); // Don't need coord_send data anymore ... clean out the vector. std::vector().swap(coord_send); @@ -2093,8 +2062,8 @@ namespace Iopx { } // Get my imported data and send my exported data... - MY_Alltoallv(export_data, exportNodeCount, exportNodeIndex, - import_data, importNodeCount, importNodeIndex, comm_); + Ioss::MY_Alltoallv(export_data, exportNodeCount, exportNodeIndex, + import_data, importNodeCount, importNodeIndex, comm_); // Copy the imported data into ioss_data... for (size_t i=0; i < importNodeMap.size(); i++) { @@ -2134,8 +2103,8 @@ namespace Iopx { } // Get my imported data and send my exported data... - MY_Alltoallv(export_data, export_count, export_disp, - import_data, import_count, import_disp, comm_); + Ioss::MY_Alltoallv(export_data, export_count, export_disp, + import_data, import_count, import_disp, comm_); // Copy the imported data into ioss_data... for (size_t i=0; i < importNodeMap.size(); i++) { @@ -2176,8 +2145,8 @@ namespace Iopx { } // Get my imported data and send my exported data... - MY_Alltoallv(export_data, exportElementCount, exportElementIndex, - import_data, importElementCount, importElementIndex, comm_); + Ioss::MY_Alltoallv(export_data, exportElementCount, exportElementIndex, + import_data, importElementCount, importElementIndex, comm_); // Copy the imported data into ioss_data... // Some comes before the local data... @@ -2219,8 +2188,8 @@ namespace Iopx { } // Get my imported data and send my exported data... - MY_Alltoallv(export_data, export_count, export_disp, - import_data, import_count, import_disp, comm_); + Ioss::MY_Alltoallv(export_data, export_count, export_disp, + import_data, import_count, import_disp, comm_); // Copy the imported data into ioss_data... // Some comes before the local data... @@ -2248,21 +2217,21 @@ namespace Iopx { int ierr = 0; if (field.get_name() == "mesh_model_coordinates_x") { ierr = ex_get_partial_coord(exodusId, nodeOffset+1, nodeCount, - TOPTR(tmp), NULL, NULL); + TOPTR(tmp), nullptr, nullptr); if (ierr >= 0) communicate_node_data(TOPTR(tmp), ioss_data, 1); } else if (field.get_name() == "mesh_model_coordinates_y") { ierr = ex_get_partial_coord(exodusId, nodeOffset+1, nodeCount, - NULL, TOPTR(tmp), NULL); + nullptr, TOPTR(tmp), nullptr); if (ierr >= 0) communicate_node_data(TOPTR(tmp), ioss_data, 1); } else if (field.get_name() == "mesh_model_coordinates_z") { ierr = ex_get_partial_coord(exodusId, nodeOffset+1, nodeCount, - NULL, NULL, TOPTR(tmp)); + nullptr, nullptr, TOPTR(tmp)); if (ierr >= 0) communicate_node_data(TOPTR(tmp), ioss_data, 1); } @@ -2289,7 +2258,7 @@ namespace Iopx { for (size_t d = 0; d < spatialDimension; d++) { double* coord[3]; - coord[0] = coord[1] = coord[2] = NULL; + coord[0] = coord[1] = coord[2] = nullptr; coord[d] = TOPTR(tmp); ierr = ex_get_partial_coord(exodusId, nodeOffset+1, nodeCount, coord[0], coord[1], coord[2]); @@ -2328,7 +2297,7 @@ namespace Iopx { assert(sizeof(INT) == exodus_byte_size_api(exodusId)); std::vector file_conn(count * nnpe); - ex_get_partial_conn(exodusId, EX_ELEM_BLOCK, id, offset+1, count, TOPTR(file_conn), NULL, NULL); + ex_get_partial_conn(exodusId, EX_ELEM_BLOCK, id, offset+1, count, TOPTR(file_conn), nullptr, nullptr); communicate_block_data(TOPTR(file_conn), data, blk_seq, nnpe); for (size_t i=0; i < blk.iossCount * nnpe; i++) { @@ -2368,8 +2337,8 @@ namespace Iopx { } // Get my imported data and send my exported data... - MY_Alltoallv(exports, blk.exportCount, blk.exportIndex, - imports, blk.importCount, blk.importIndex, comm_); + Ioss::MY_Alltoallv(exports, blk.exportCount, blk.exportIndex, + imports, blk.importCount, blk.importIndex, comm_); // Map local and imported data to ioss_data. for (size_t i=0; i < blk.localMap.size(); i++) { @@ -2399,8 +2368,8 @@ namespace Iopx { } // Get my imported data and send my exported data... - MY_Alltoallv(exports, export_count, export_disp, - imports, import_count, import_disp, comm_); + Ioss::MY_Alltoallv(exports, export_count, export_disp, + imports, import_count, import_disp, comm_); // Map local and imported data to ioss_data. for (size_t i=0; i < blk.localMap.size(); i++) { @@ -2898,12 +2867,12 @@ namespace Iopx { // Read the nodeset data from the file.. if (field.get_name() == "ids" || field.get_name() == "ids_raw") { file_data.resize(set.file_count()); - ierr = ex_get_set(exodusId, type, id, TOPTR(file_data), NULL); + ierr = ex_get_set(exodusId, type, id, TOPTR(file_data), nullptr); } else if (field.get_name() == "sides") { // Sideset only... if (type == EX_SIDE_SET) { file_data.resize(set.file_count()); - ierr = ex_get_set(exodusId, type, id, NULL, TOPTR(file_data)); + ierr = ex_get_set(exodusId, type, id, nullptr, TOPTR(file_data)); } else { return -1; } @@ -2911,9 +2880,9 @@ namespace Iopx { ex_set set_param[1]; set_param[0].id = id; set_param[0].type = type; - set_param[0].entry_list = NULL; - set_param[0].extra_list = NULL; - set_param[0].distribution_factor_list = NULL; + set_param[0].entry_list = nullptr; + set_param[0].extra_list = nullptr; + set_param[0].distribution_factor_list = nullptr; ierr = ex_get_sets(exodusId, 1, set_param); if (set_param[0].num_distribution_factor == 0) { @@ -2982,9 +2951,9 @@ namespace Iopx { ex_set set_param[1]; set_param[0].id = id; set_param[0].type = EX_SIDE_SET; - set_param[0].entry_list = NULL; - set_param[0].extra_list = NULL; - set_param[0].distribution_factor_list = NULL; + set_param[0].entry_list = nullptr; + set_param[0].extra_list = nullptr; + set_param[0].distribution_factor_list = nullptr; ex_get_sets(exodusId, 1, set_param); if (set_param[0].num_distribution_factor == 0) { // This should have been caught above. @@ -3016,8 +2985,8 @@ namespace Iopx { ex_set set_param[1]; set_param[0].id = id; set_param[0].type = EX_SIDE_SET; - set_param[0].entry_list = NULL; - set_param[0].extra_list = NULL; + set_param[0].entry_list = nullptr; + set_param[0].extra_list = nullptr; set_param[0].distribution_factor_list = TOPTR(file_data); ierr = ex_get_sets(exodusId, 1, set_param); } @@ -3034,9 +3003,9 @@ namespace Iopx { ex_set set_param[1]; set_param[0].id = id; set_param[0].type = EX_SIDE_SET; - set_param[0].entry_list = NULL; - set_param[0].extra_list = NULL; - set_param[0].distribution_factor_list = NULL; + set_param[0].entry_list = nullptr; + set_param[0].extra_list = nullptr; + set_param[0].distribution_factor_list = nullptr; ex_get_sets(exodusId, 1, set_param); df_count = set_param[0].num_distribution_factor; } @@ -3085,8 +3054,8 @@ namespace Iopx { ex_set set_param[1]; set_param[0].id = id; set_param[0].type = EX_SIDE_SET; - set_param[0].entry_list = NULL; - set_param[0].extra_list = NULL; + set_param[0].entry_list = nullptr; + set_param[0].extra_list = nullptr; set_param[0].distribution_factor_list = TOPTR(file_data); ex_get_sets(exodusId, 1, set_param); } @@ -3218,8 +3187,8 @@ namespace Iopx { generate_index(rcv_offset); std::vector rcv_list(*rcv_offset.rbegin() + *rcv_count.rbegin()); - MY_Alltoallv(snd_list, snd_count, snd_offset, - rcv_list, rcv_count, rcv_offset, comm_); + Ioss::MY_Alltoallv(snd_list, snd_count, snd_offset, + rcv_list, rcv_count, rcv_offset, comm_); // Iterate rcv_list and convert global ids to the global-implicit position... for (size_t i=0; i < rcv_list.size(); i++) { @@ -3229,8 +3198,8 @@ namespace Iopx { } // Send the data back now... - MY_Alltoallv(rcv_list, rcv_count, rcv_offset, - snd_list, snd_count, snd_offset, comm_); + Ioss::MY_Alltoallv(rcv_list, rcv_count, rcv_offset, + snd_list, snd_count, snd_offset, comm_); // Fill in the remaining portions of the global_implicit_map... std::vector tmp_disp(snd_offset); -- 2.7.0