From fb518e6f83b41513aa4cb2b033bfb7fb1241c8d6 Mon Sep 17 00:00:00 2001
From: Greg Sjaardema <gsjaardema@gmail.com>
Date: Wed, 3 Feb 2016 21:38:13 -0700
Subject: [PATCH] IOSS: Potential fix for intel openmp issues with std::sort

---
 packages/seacas/libraries/ioss/src/Ioss_Map.C      | 134 ++++++-
 .../seacas/libraries/ioss/src/Ioss_ParallelUtils.C |  21 +-
 .../seacas/libraries/ioss/src/Ioss_ParallelUtils.h | 124 ++++++-
 .../ioss/src/exo_par/Iopx_DecompositionData.C      | 389 ++++++++++-----------
 4 files changed, 440 insertions(+), 228 deletions(-)
diff --git a/packages/seacas/libraries/ioss/src/Ioss_Map.C b/packages/seacas/libraries/ioss/src/Ioss_Map.C
index 1e94209..f9ac976 100644
--- a/packages/seacas/libraries/ioss/src/Ioss_Map.C
+++ b/packages/seacas/libraries/ioss/src/Ioss_Map.C
@@ -44,6 +44,102 @@
 #include <vector>                       // for vector, vector<>::iterator, etc
 
 namespace {
+  const int GDS_QSORT_CUTOFF=12;
+
+  template <typename INT>
+  void GDS_SWAP(INT *V, size_t I, size_t J)
+  {
+    std::swap(V[I], V[J]);
+  }
+    
+  template <typename INT>
+  size_t gds_median3(INT v[], size_t left, size_t right)
+  {
+    size_t center;
+    center = (left + right) / 2;
+
+    if (v[left] > v[center])
+      GDS_SWAP(v, left, center);
+    if (v[left] > v[right])
+      GDS_SWAP(v, left, right);
+    if (v[center] > v[right])
+      GDS_SWAP(v, center, right);
+
+    GDS_SWAP(v, center, right-1);
+    return right-1;
+  }
+
+  template <typename INT>
+  void gds_qsort_int(INT v[], size_t left, size_t right)
+  {
+    size_t pivot;
+    size_t i, j;
+  
+    if (left + GDS_QSORT_CUTOFF <= right) {
+      pivot = gds_median3(v, left, right);
+      i = left;
+      j = right - 1;
+
+      for ( ; ; ) {
+	while (v[++i] < v[pivot]);
+	while (v[--j] > v[pivot]);
+	if (i < j) {
+	  GDS_SWAP(v, i, j);
+	} else {
+	  break;
+	}
+      }
+
+      GDS_SWAP(v, i, right-1);
+      gds_qsort_int(v, left, i-1);
+      gds_qsort_int(v, i+1, right);
+    }
+  }
+
+  template <typename INT>
+  void gds_isort_int(INT v[], size_t N)
+  {
+    size_t i,j;
+    size_t ndx = 0;
+    INT small;
+    INT tmp;
+  
+    if (N <= 1) return;
+    small = v[0];
+    for (i = 1; i < N; i++) {
+      if (v[i] < small) {
+	small = v[i];
+	ndx = i;
+      }
+    }
+    /* Put smallest value in slot 0 */
+    GDS_SWAP(v, 0, ndx);
+
+    for (i=1; i <N; i++) {
+      tmp = v[i];
+      for (j=i; tmp < v[j-1]; j--) {
+	v[j] = v[j-1];
+      }
+      v[j] = tmp;
+    }
+  }
+
+  template <typename INT>
+  void gds_qsort(std::vector<INT> &v)
+  {
+    if (v.size() <= 1) return;
+    gds_qsort_int(v.data(), 0, v.size()-1);
+    gds_isort_int(v.data(), v.size());
+  }
+
+
+  template <typename T>
+  bool is_unique(std::vector<T> vec)
+  {
+    auto last = std::unique(vec.begin(), vec.end());
+    return last == vec.end();
+  }
+
   // Determines whether the input map is sequential (map[i] == i)
   bool is_sequential(const Ioss::MapContainer &the_map)
   {
@@ -116,6 +212,20 @@ namespace {
   void verify_no_duplicate_ids(std::vector<Ioss::IdPair> &reverse_map, int processor, const std::string &type)
   {
     // Check for duplicate ids...
+#if 1
+    for (size_t i=1; i < reverse_map.size(); i++) {
+      if (reverse_map[i-1].first == reverse_map[i].first) {
+	std::ostringstream errmsg;
+	errmsg << "\nERROR: Duplicate " << type << " global id detected on processor "
+	       << processor << ".\n"
+	       << "       Global id " << reverse_map[i].first
+	       << " assigned to local " << type << "s "
+	       << reverse_map[i].second << " and "
+	       << reverse_map[i-1].second << ".\n";
+	IOSS_ERROR(errmsg);
+      }
+    }
+#else
     std::vector<Ioss::IdPair>::iterator dup = std::adjacent_find(reverse_map.begin(),
 							   reverse_map.end(),
 							   IdPairEqual());
@@ -131,6 +241,7 @@ namespace {
 	     << (*other).second << ".\n";
       IOSS_ERROR(errmsg);
     }
+#endif
   }
 
   template <typename INT>
@@ -176,10 +287,16 @@ void Ioss::Map::build_reverse_map(int64_t num_to_get, int64_t offset, int proces
   // 5. Check for duplicate global_ids...
 
   // Build a vector containing the current ids...
-  ReverseMapContainer new_ids(num_to_get);
+  ReverseMapContainer new_ids;
+  new_ids.reserve(num_to_get);
+  if (map.size() < offset+num_to_get+1) {
+      std::ostringstream errmsg;
+      errmsg << "\nERROR: " << entityType << " map is too small on processor " << processor << ".\n";
+      IOSS_ERROR(errmsg);
+  }
   for (int64_t i=0; i < num_to_get; i++) {
     int64_t local_id = offset + i + 1;
-    new_ids[i] = std::make_pair(map[local_id], local_id);
+    new_ids.push_back(std::make_pair(map[local_id], local_id));
 
     if (map[local_id] <= 0) {
       std::ostringstream errmsg;
@@ -190,7 +307,18 @@ void Ioss::Map::build_reverse_map(int64_t num_to_get, int64_t offset, int proces
   }
 
   // Sort that vector...
-  std::sort(new_ids.begin(), new_ids.end(), IdPairCompare());
+  gds_qsort(new_ids);
+
+#if 0
+  // REMOVE DEBUGGING CODE
+  verify_no_duplicate_ids(new_ids, processor, entityType+"new_ids");
+  if (!is_unique(new_ids)) {
+    std::ostringstream errmsg;
+    errmsg << "\nERROR: " << entityType << " map detected non-unique global id/local id pair on processor " << processor << ".\n";
+      IOSS_ERROR(errmsg);
+  }
+  // END REMOVE DEBUGGING CODE
+#endif
 
   int64_t new_id_min = new_ids.empty() ? 0 : new_ids.front().first;
   int64_t old_id_max = reverse.empty() ? 0 : reverse.back().first;
diff --git a/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.C b/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.C
index 5cfd950..1a8f6d9 100644
--- a/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.C
+++ b/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.C
@@ -47,12 +47,6 @@
 #include <mpi.h>
 #include <Ioss_SerializeIO.h>
 
-namespace {
-  MPI_Datatype mpi_type(double /*dummy*/)  {return MPI_DOUBLE;}
-  MPI_Datatype mpi_type(int /*dummy*/)     {return MPI_INT;}
-  MPI_Datatype mpi_type(unsigned int /*dummy*/)     {return MPI_UNSIGNED;}
-  MPI_Datatype mpi_type(int64_t /*dummy*/) {return MPI_LONG_LONG_INT;}
-}
 #endif
 
 Ioss::ParallelUtils::ParallelUtils(MPI_Comm the_communicator)
@@ -62,8 +56,8 @@ Ioss::ParallelUtils::ParallelUtils(MPI_Comm the_communicator)
 bool Ioss::ParallelUtils::get_environment(const std::string &name, std::string &value, bool sync_parallel) const
 {
 #ifdef HAVE_MPI
-  char *result_string = NULL;
-  char *broadcast_string = NULL;
+  char *result_string = nullptr;
+  char *broadcast_string = nullptr;
   int string_length = 0;
 
   int rank = parallel_rank();
@@ -98,12 +92,12 @@ bool Ioss::ParallelUtils::get_environment(const std::string &name, std::string &
   return string_length > 0;
 #else
   char *result_string = std::getenv(name.c_str());
-  if (result_string != NULL) {
+  if (result_string != nullptr) {
     value = std::string(result_string);
   } else {
     value = std::string("");
   }
-  return (result_string != NULL);
+  return (result_string != nullptr);
 #endif
 }
 
@@ -122,7 +116,7 @@ bool Ioss::ParallelUtils::get_environment(const std::string &name, bool sync_par
   // Return true if 'name' defined, no matter what the value.
   // Return false if 'name' not defined.
 #ifdef HAVE_MPI
-  char *result_string = NULL;
+  char *result_string = nullptr;
   int string_length = 0;
 
   int rank = Ioss::ParallelUtils::parallel_rank();
@@ -137,7 +131,7 @@ bool Ioss::ParallelUtils::get_environment(const std::string &name, bool sync_par
   return string_length > 0;
 #else
   char *result_string = std::getenv(name.c_str());
-  return (result_string != NULL);
+  return (result_string != nullptr);
 #endif
 }
 
@@ -351,9 +345,10 @@ template void Ioss::ParallelUtils::global_array_minmax(std::vector<double>&,  Mi
 template <typename T>
 void Ioss::ParallelUtils::global_array_minmax(std::vector<T> &local_minmax,  MinMax which) const
 {
-  if (!local_minmax.empty())
+  if (!local_minmax.empty()) {
     global_array_minmax(&local_minmax[0], local_minmax.size(), which);
 }
+}
 
 template void Ioss::ParallelUtils::gather(int, std::vector<int>&) const;
 template void Ioss::ParallelUtils::gather(int64_t, std::vector<int64_t>&) const;
diff --git a/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.h b/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.h
index d6fe8fc..4f993e7 100644
--- a/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.h
+++ b/packages/seacas/libraries/ioss/src/Ioss_ParallelUtils.h
@@ -34,17 +34,23 @@
 #define IOSS_Ioss_ParallelUtils_h
 
 #include <Ioss_CodeTypes.h>             // for Int64Vector, IntVector
+#include <Ioss_Utils.h>
 #include <stddef.h>                     // for size_t
 #include <string>                       // for string
 #include <vector>                       // for vector
+#include <assert.h>
+
+#ifdef HAVE_MPI
+#include <mpi.h>
+#endif
 
 namespace Ioss {
 
   class ParallelUtils {
   public:
 
-    explicit ParallelUtils(MPI_Comm communicator);
-    ~ParallelUtils() {}
+    explicit ParallelUtils(MPI_Comm the_communicator);
+    ~ParallelUtils() = default;
 
     // Assignment operator
     // Copy constructor
@@ -117,5 +123,119 @@ namespace Ioss {
   private:
     MPI_Comm communicator_;
   };
+
+#ifdef HAVE_MPI
+  inline MPI_Datatype mpi_type(double /*dummy*/)  {return MPI_DOUBLE;}
+  inline MPI_Datatype mpi_type(int /*dummy*/)     {return MPI_INT;}
+  inline MPI_Datatype mpi_type(int64_t /*dummy*/) {return MPI_LONG_LONG_INT;}
+  inline MPI_Datatype mpi_type(unsigned int /*dummy*/)     {return MPI_UNSIGNED;}
+
+  inline int power_2(int count)
+  {
+    // Return the power of two which is equal to or greater than 'count'
+    // count = 15 -> returns 16
+    // count = 16 -> returns 16
+    // count = 17 -> returns 32
+
+    // Use brute force...
+    int pow2 = 1;
+    while (pow2 < count) {
+      pow2 *= 2;
+    }
+    return pow2;
+  }
+
+  template <typename T>
+  int MY_Alltoallv64(std::vector<T> &sendbuf, const std::vector<int64_t> &sendcounts, const std::vector<int64_t> &senddisp,
+                     std::vector<T> &recvbuf, const std::vector<int64_t> &recvcounts, const std::vector<int64_t> &recvdisp, MPI_Comm  comm)
+  {
+    int processor_count = 0;
+    int my_processor = 0;
+    MPI_Comm_size(comm, &processor_count);
+    MPI_Comm_rank(comm, &my_processor);
+
+    // Verify that all 'counts' can fit in an integer. Symmetric
+    // communication, so recvcounts are sendcounts on another processor.
+    for (int i=0; i < processor_count; i++) {
+      int snd_cnt = (int)sendcounts[i];
+      if ((int64_t)snd_cnt != sendcounts[i]) {
+        std::ostringstream errmsg;
+        errmsg << "ERROR: The number of items that must be communicated via MPI calls from\n"
+               << "       processor " << my_processor << " to processor " << i << " is " << sendcounts[i]
+               << "\n       which exceeds the storage capacity of the integers used by MPI functions.\n";
+        std::cerr << errmsg.str();
+        exit(EXIT_FAILURE);
+      }
+    }
+
+    size_t pow_2=power_2(processor_count);
+
+    for(size_t i=1; i < pow_2; i++) {
+      MPI_Status status;
+
+      int tag = 24713;
+      size_t exchange_proc = i ^ my_processor;
+      if(exchange_proc < (size_t)processor_count){
+        int snd_cnt = (int)sendcounts[exchange_proc]; // Converts from int64_t to int as needed by mpi
+        int rcv_cnt = (int)recvcounts[exchange_proc];
+        if ((size_t)my_processor < exchange_proc) {
+          MPI_Send(&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)), exchange_proc, tag, comm);
+          MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag, comm, &status);
+        }
+        else {
+          MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag, comm, &status);
+          MPI_Send(&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)), exchange_proc, tag, comm);
+        }
+      }
+    }
+
+    // Take care of this processor's data movement...
+    std::copy(&sendbuf[senddisp[my_processor]],
+              &sendbuf[senddisp[my_processor]+sendcounts[my_processor]],
+              &recvbuf[recvdisp[my_processor]]);
+    return 0;
+  }
+
+  template <typename T>
+  int MY_Alltoallv(std::vector<T> &sendbuf, const std::vector<int64_t> &sendcnts, const std::vector<int64_t> &senddisp, 
+                   std::vector<T> &recvbuf, const std::vector<int64_t> &recvcnts, const std::vector<int64_t> &recvdisp, MPI_Comm comm)
+  {
+    // Wrapper to handle case where send/recv counts and displacements are 64-bit integers.
+    // Two cases:
+    // 1) They are of type 64-bit integers, but only storing data in the 32-bit integer range.
+    //    -- if (sendcnts[#proc-1] + senddisp[#proc-1] < 2^31, then we are ok
+    // 2) They are of type 64-bit integers, and storing data in the 64-bit integer range.
+    //    -- call special alltoallv which does point-to-point sends
+    int processor_count = 0;
+    MPI_Comm_size(comm, &processor_count);
+    size_t max_comm = sendcnts[processor_count-1] + senddisp[processor_count-1];
+    size_t one = 1;
+    if (max_comm < one<<31) {
+      // count and displacement data in range, need to copy to integer vector.
+      std::vector<int> send_cnt(sendcnts.begin(), sendcnts.end());
+      std::vector<int> send_dis(senddisp.begin(), senddisp.end());
+      std::vector<int> recv_cnt(recvcnts.begin(), recvcnts.end());
+      std::vector<int> recv_dis(recvdisp.begin(), recvdisp.end());
+      return MPI_Alltoallv(TOPTR(sendbuf), TOPTR(send_cnt), TOPTR(send_dis), mpi_type(T(0)),
+                           TOPTR(recvbuf), TOPTR(recv_cnt), TOPTR(recv_dis), mpi_type(T(0)), comm);
+    }
+    else {
+      // Same as if each processor sent a message to every other process with:
+      //     MPI_Send(sendbuf+senddisp[i]*sizeof(sendtype),sendcnts[i], sendtype, i, tag, comm);
+      // And received a message from each processor with a call to:
+      //     MPI_Recv(recvbuf+recvdisp[i]*sizeof(recvtype),recvcnts[i], recvtype, i, tag, comm);
+      return MY_Alltoallv64(sendbuf, sendcnts, senddisp, recvbuf, recvcnts, recvdisp, comm);
+    }
+  }
+
+  template <typename T>
+  int MY_Alltoallv(std::vector<T> &sendbuf, const std::vector<int> &sendcnts, const std::vector<int> &senddisp, 
+                   std::vector<T> &recvbuf, const std::vector<int> &recvcnts, const std::vector<int> &recvdisp,
+                   MPI_Comm comm)
+  {
+    return MPI_Alltoallv(TOPTR(sendbuf), (int*)TOPTR(sendcnts), (int*)TOPTR(senddisp), mpi_type(T(0)),
+                         TOPTR(recvbuf), (int*)TOPTR(recvcnts), (int*)TOPTR(recvdisp), mpi_type(T(0)), comm);
+  }
+#endif
 }
 #endif
diff --git a/packages/seacas/libraries/ioss/src/exo_par/Iopx_DecompositionData.C b/packages/seacas/libraries/ioss/src/exo_par/Iopx_DecompositionData.C
index d3c25fd..2eef03a 100644
--- a/packages/seacas/libraries/ioss/src/exo_par/Iopx_DecompositionData.C
+++ b/packages/seacas/libraries/ioss/src/exo_par/Iopx_DecompositionData.C
@@ -62,151 +62,122 @@
 #endif
 
 namespace {
-  MPI_Datatype mpi_type(double /*dummy*/)  {return MPI_DOUBLE;}
-  MPI_Datatype mpi_type(int /*dummy*/)     {return MPI_INT;}
-  MPI_Datatype mpi_type(int64_t /*dummy*/) {return MPI_LONG_LONG_INT;}
+  const int GDS_QSORT_CUTOFF=12;
 
-  template <typename T>
-  bool is_sorted(const std::vector<T> &vec)
+  template <typename INT>
+  void GDS_SWAP(INT *V, size_t I, size_t J)
   {
-    for (size_t i=1; i < vec.size(); i++) {
-      if (vec[i-1] > vec[i])
-        return false;
-    }
-    return true;
+    std::swap(V[I], V[J]);
   }
-
-  int exodus_byte_size_api(int exoid)
+    
+  template <typename INT>
+  size_t gds_median3(INT v[], size_t left, size_t right)
   {
-    // Check byte-size of integers stored on the database...
-    int mode = ex_int64_status(exoid) & EX_ALL_INT64_API;
-    if (mode) {
-      return 8;
-    } else {
-      return 4;
-    }
-  }
+    size_t center;
+    center = (left + right) / 2;
 
-  int power_2(int count)
-  {
-    // Return the power of two which is equal to or greater than 'count'
-    // count = 15 -> returns 16
-    // count = 16 -> returns 16
-    // count = 17 -> returns 32
+    if (v[left] > v[center])
+      GDS_SWAP(v, left, center);
+    if (v[left] > v[right])
+      GDS_SWAP(v, left, right);
+    if (v[center] > v[right])
+      GDS_SWAP(v, center, right);
 
-    // Use brute force...
-    int pow2 = 1;
-    while (pow2 < count) {
-      pow2 *= 2;
-    }
-    return pow2;
+    GDS_SWAP(v, center, right-1);
+    return right-1;
   }
 
-  void check_dynamic_cast(const void *ptr)
+  template <typename INT>
+  void gds_qsort_int(INT v[], size_t left, size_t right)
   {
-    if (ptr == NULL) {
-      std::cerr << "INTERNAL ERROR: Invalid dynamic cast returned NULL\n";
-      exit(EXIT_FAILURE);
+    size_t pivot;
+    size_t i, j;
+  
+    if (left + GDS_QSORT_CUTOFF <= right) {
+      pivot = gds_median3(v, left, right);
+      i = left;
+      j = right - 1;
+
+      for ( ; ; ) {
+	while (v[++i] < v[pivot]);
+	while (v[--j] > v[pivot]);
+	if (i < j) {
+	  GDS_SWAP(v, i, j);
+	} else {
+	  break;
+	}
+      }
+
+      GDS_SWAP(v, i, right-1);
+      gds_qsort_int(v, left, i-1);
+      gds_qsort_int(v, i+1, right);
     }
   }
 
-  template <typename T>
-  int MY_Alltoallv64(std::vector<T> &sendbuf, const std::vector<int64_t> &sendcounts, const std::vector<int64_t> &senddisp,
-                     std::vector<T> &recvbuf, const std::vector<int64_t> &recvcounts, const std::vector<int64_t> &recvdisp, MPI_Comm  comm)
-  {
-    int processor_count = 0;
-    int my_processor = 0;
-    MPI_Comm_size(comm, &processor_count);
-    MPI_Comm_rank(comm, &my_processor);
-
-    // Verify that all 'counts' can fit in an integer. Symmetric
-    // communication, so recvcounts are sendcounts on another processor.
-    for (int i=0; i < processor_count; i++) {
-      int snd_cnt = (int)sendcounts[i];
-      if ((int64_t)snd_cnt != sendcounts[i]) {
-        std::ostringstream errmsg;
-        errmsg << "ERROR: The number of items that must be communicated via MPI calls from\n"
-               << "       processor " << my_processor << " to processor " << i << " is " << sendcounts[i]
-               << "\n       which exceeds the storage capacity of the integers used by MPI functions.\n";
-        std::cerr << errmsg.str();
-        exit(EXIT_FAILURE);
+  template <typename INT>
+  void gds_isort_int(INT v[], size_t N)
+  {
+    size_t i,j;
+    size_t ndx = 0;
+    INT small;
+    INT tmp;
+  
+    if (N <= 1) return;
+    small = v[0];
+    for (i = 1; i < N; i++) {
+      if (v[i] < small) {
+	small = v[i];
+	ndx = i;
       }
     }
+    /* Put smallest value in slot 0 */
+    GDS_SWAP(v, 0, ndx);
 
-    size_t pow_2=power_2(processor_count);
-
-    for(size_t i=1; i < pow_2; i++) {
-      MPI_Status status;
-
-      int tag = 24713;
-      size_t exchange_proc = i ^ my_processor;
-      if(exchange_proc < (size_t)processor_count){
-        int snd_cnt = (int)sendcounts[exchange_proc]; // Converts from int64_t to int as needed by mpi
-        int rcv_cnt = (int)recvcounts[exchange_proc];
-        if ((size_t)my_processor < exchange_proc) {
-          MPI_Send(&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)), exchange_proc, tag, comm);
-          MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag, comm, &status);
-        }
-        else {
-          MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag, comm, &status);
-          MPI_Send(&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)), exchange_proc, tag, comm);
-        }
+    for (i=1; i <N; i++) {
+      tmp = v[i];
+      for (j=i; tmp < v[j-1]; j--) {
+	v[j] = v[j-1];
       }
+      v[j] = tmp;
     }
+  }
 
-    // Take care of this processor's data movement...
-    std::copy(&sendbuf[senddisp[my_processor]],
-              &sendbuf[senddisp[my_processor]+sendcounts[my_processor]],
-              &recvbuf[recvdisp[my_processor]]);
-    return 0;
+  template <typename INT>
+  void gds_qsort(std::vector<INT> &v)
+  {
+    if (v.size() <= 1) return;
+    gds_qsort_int(v.data(), 0, v.size()-1);
+    gds_isort_int(v.data(), v.size());
   }
 
+
   template <typename T>
-  int MY_Alltoallv(std::vector<T> &sendbuf, const std::vector<int64_t> &sendcnts, const std::vector<int64_t> &senddisp, 
-                   std::vector<T> &recvbuf, const std::vector<int64_t> &recvcnts, const std::vector<int64_t> &recvdisp, MPI_Comm comm)
-  {
-    // Wrapper to handle case where send/recv counts and displacements are 64-bit integers.
-    // Two cases:
-    // 1) They are of type 64-bit integers, but only storing data in the 32-bit integer range.
-    //    -- if (sendcnts[#proc-1] + senddisp[#proc-1] < 2^31, then we are ok
-    // 2) They are of type 64-bit integers, and storing data in the 64-bit integer range.
-    //    -- call special alltoallv which does point-to-point sends
-    assert(is_sorted(senddisp));
-    assert(is_sorted(recvdisp));
-
-    int processor_count = 0;
-    MPI_Comm_size(comm, &processor_count);
-    size_t max_comm = sendcnts[processor_count-1] + senddisp[processor_count-1];
-    size_t one = 1;
-    if (max_comm < one<<31) {
-      // count and displacement data in range, need to copy to integer vector.
-      std::vector<int> send_cnt(sendcnts.begin(), sendcnts.end());
-      std::vector<int> send_dis(senddisp.begin(), senddisp.end());
-      std::vector<int> recv_cnt(recvcnts.begin(), recvcnts.end());
-      std::vector<int> recv_dis(recvdisp.begin(), recvdisp.end());
-      return MPI_Alltoallv(TOPTR(sendbuf), (int*)TOPTR(send_cnt), (int*)TOPTR(send_dis), mpi_type(T(0)),
-                           TOPTR(recvbuf), (int*)TOPTR(recv_cnt), (int*)TOPTR(recv_dis), mpi_type(T(0)), comm);
+  bool is_sorted(const std::vector<T> &vec)
+  {
+    for (size_t i=1; i < vec.size(); i++) {
+      if (vec[i-1] > vec[i])
+        return false;
     }
-    else {
-      // Same as if each processor sent a message to every other process with:
-      //     MPI_Send(sendbuf+senddisp[i]*sizeof(sendtype),sendcnts[i], sendtype, i, tag, comm);
-      // And received a message from each processor with a call to:
-      //     MPI_Recv(recvbuf+recvdisp[i]*sizeof(recvtype),recvcnts[i], recvtype, i, tag, comm);
-      return MY_Alltoallv64(sendbuf, sendcnts, senddisp, recvbuf, recvcnts, recvdisp, comm);
+    return true;
+  }
 
+  int exodus_byte_size_api(int exoid)
+  {
+    // Check byte-size of integers stored on the database...
+    int mode = ex_int64_status(exoid) & EX_ALL_INT64_API;
+    if (mode) {
+      return 8;
+    } else {
+      return 4;
     }
   }
 
-  template <typename T>
-  int MY_Alltoallv(std::vector<T> &sendbuf, const std::vector<int> &sendcnts, const std::vector<int> &senddisp, 
-                   std::vector<T> &recvbuf, const std::vector<int> &recvcnts, const std::vector<int> &recvdisp,
-                   MPI_Comm comm)
+  void check_dynamic_cast(const void *ptr)
   {
-    assert(is_sorted(senddisp));
-    assert(is_sorted(recvdisp));
-
-    return MPI_Alltoallv(TOPTR(sendbuf), (int*)TOPTR(sendcnts), (int*)TOPTR(senddisp), mpi_type(T(0)),
-                         TOPTR(recvbuf), (int*)TOPTR(recvcnts), (int*)TOPTR(recvdisp), mpi_type(T(0)), comm);
+    if (ptr == nullptr) {
+      std::cerr << "INTERNAL ERROR: Invalid dynamic cast returned nullptr\n";
+      exit(EXIT_FAILURE);
+    }
   }
 
   template <typename T>
@@ -214,8 +185,7 @@ namespace {
   {
     std::sort(vec.begin(), vec.end());
     vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
-    // shrink-to-fit...
-    std::vector<T>(vec).swap(vec);
+    vec.shrink_to_fit();
   }
 
   template <typename T>
@@ -297,9 +267,7 @@ namespace {
     *ierr = ZOLTAN_OK;
 
     if (lids) {
-      for (size_t i = 0; i < element_count; i++) {
-        lids[i] = i;
-      }
+      std::iota(lids, lids+element_count, 0);
     }
 
     if (wdim) {
@@ -381,16 +349,16 @@ namespace {
     for (size_t i=0; i < el_blocks.size(); i++) {
       std::string type = Ioss::Utils::lowercase(el_blocks[i].topologyType);
       Ioss::ElementTopology *topology = Ioss::ElementTopology::factory(type, false);
-      if (topology != NULL) {
+      if (topology != nullptr) {
         Ioss::ElementTopology *boundary = topology->boundary_type(0);
-        if (boundary != NULL) {
+        if (boundary != nullptr) {
           common_nodes = std::min(common_nodes, boundary->number_boundaries());
         } else {
           // Different topologies on some element faces...
           size_t nb = topology->number_boundaries();
           for (size_t b=1; b <= nb; b++) {
             boundary = topology->boundary_type(b);
-            if (boundary != NULL) {
+            if (boundary != nullptr) {
               common_nodes = std::min(common_nodes, boundary->number_boundaries());
             }
           }
@@ -570,6 +538,7 @@ namespace Iopx {
     }
 
     std::sort(importElementMap.begin(), importElementMap.end());
+    //    gds_qsort(importElementMap);
 
     std::copy(importElementCount.begin(), importElementCount.end(), importElementIndex.begin());
     generate_index(importElementIndex);
@@ -642,7 +611,7 @@ namespace Iopx {
       size_t local_elem = 0;
 
       // All values are 0
-      localElementMap.reserve(local_elem);
+      localElementMap.resize(local_elem);
       exportElementCount.resize(processorCount+1);
       exportElementIndex.resize(processorCount+1);
       importElementCount.resize(processorCount+1);
@@ -735,8 +704,8 @@ namespace Iopx {
     exportElementCount[myProcessor] = 0;
 
     importElementCount.resize(processorCount+1);
-    MPI_Alltoall(TOPTR(exportElementCount), 1, mpi_type((INT)0),
-                 TOPTR(importElementCount), 1, mpi_type((INT)0), comm_);
+    MPI_Alltoall(TOPTR(exportElementCount), 1, Ioss::mpi_type((INT)0),
+                 TOPTR(importElementCount), 1, Ioss::mpi_type((INT)0), comm_);
 
     // Now fill the vectors with the elements ...
     size_t exp_size = std::accumulate(exportElementCount.begin(), exportElementCount.end(), 0);
@@ -762,8 +731,8 @@ namespace Iopx {
     std::copy(importElementCount.begin(), importElementCount.end(), importElementIndex.begin());
     generate_index(importElementIndex);
 
-    MY_Alltoallv(exportElementMap, exportElementCount, exportElementIndex, 
-                 importElementMap, importElementCount, importElementIndex, comm_);
+    Ioss::MY_Alltoallv(exportElementMap, exportElementCount, exportElementIndex, 
+		       importElementMap, importElementCount, importElementIndex, comm_);
 
 #if DEBUG_OUTPUT
     std::cerr << "Processor " << myProcessor << ":\t"
@@ -781,7 +750,7 @@ namespace Iopx {
                                                         idx_t *elem_partition)
   {
     idx_t wgt_flag = 0; // No weights
-    idx_t *elm_wgt = NULL;
+    idx_t *elm_wgt = nullptr;
     idx_t ncon = 1;
     idx_t num_flag = 0; // Use C-based numbering
     idx_t common_nodes = get_common_node_count(el_blocks, comm_);
@@ -816,8 +785,8 @@ namespace Iopx {
     }
     else if (method == "GEOM_KWAY" || method == "KWAY_GEOM") {
 
-      idx_t *dual_xadj = NULL;
-      idx_t *dual_adjacency = NULL;
+      idx_t *dual_xadj = nullptr;
+      idx_t *dual_adjacency = nullptr;
       int rc = ParMETIS_V3_Mesh2Dual(element_dist, pointer, adjacency,
                                      &num_flag, &common_nodes, &dual_xadj, &dual_adjacency, &comm_);
 
@@ -866,7 +835,7 @@ namespace Iopx {
   void DecompositionData<INT>::zoltan_decompose(const std::string &method)
   {
     float version = 0.0;
-    Zoltan_Initialize(0, NULL, &version);
+    Zoltan_Initialize(0, nullptr, &version);
 
     Zoltan zz(comm_);
 
@@ -892,14 +861,14 @@ namespace Iopx {
     int num_local  = 0;
     int num_import = 1;
     int  num_export = 1;
-    ZOLTAN_ID_PTR import_global_ids = NULL;
-    ZOLTAN_ID_PTR import_local_ids  = NULL;
-    ZOLTAN_ID_PTR export_global_ids = NULL;
-    ZOLTAN_ID_PTR export_local_ids  = NULL;
-    int *import_procs   = NULL;
-    int *import_to_part = NULL;
-    int *export_procs   = NULL;
-    int *export_to_part = NULL;
+    ZOLTAN_ID_PTR import_global_ids = nullptr;
+    ZOLTAN_ID_PTR import_local_ids  = nullptr;
+    ZOLTAN_ID_PTR export_global_ids = nullptr;
+    ZOLTAN_ID_PTR export_local_ids  = nullptr;
+    int *import_procs   = nullptr;
+    int *import_to_part = nullptr;
+    int *export_procs   = nullptr;
+    int *export_to_part = nullptr;
 
     num_local  = 1;
 
@@ -933,7 +902,7 @@ namespace Iopx {
         export_map.push_back(std::make_pair(export_procs[i],export_global_ids[i]));
       }
 
-      std::sort(export_map.begin(), export_map.end());
+      gds_qsort(export_map);
       exportElementMap.reserve(num_export);
       exportElementIndex.resize(processorCount+1);
       exportElementCount.resize(processorCount+1);
@@ -954,7 +923,7 @@ namespace Iopx {
         export_map.push_back(std::make_pair(export_procs[i],export_glob[i]));
       }
 
-      std::sort(export_map.begin(), export_map.end());
+      gds_qsort(export_map);
       exportElementMap.reserve(num_export);
       exportElementIndex.resize(processorCount+1);
       exportElementCount.resize(processorCount+1);
@@ -1023,8 +992,8 @@ namespace Iopx {
       }
     }
 
-    MPI_Alltoall(TOPTR(export_conn_size), 1, mpi_type((INT)0),
-                 TOPTR(import_conn_size), 1, mpi_type((INT)0), comm_);
+    MPI_Alltoall(TOPTR(export_conn_size), 1, Ioss::mpi_type((INT)0),
+                 TOPTR(import_conn_size), 1, Ioss::mpi_type((INT)0), comm_);
 
     // Now fill the vectors with the nodes ...
     size_t exp_size = std::accumulate(export_conn_size.begin(), export_conn_size.end(), 0);
@@ -1064,8 +1033,8 @@ namespace Iopx {
     {
       std::vector<INT> import_conn(imp_size);
 
-      MY_Alltoallv(export_conn, export_conn_size, export_disp,
-                   import_conn, import_conn_size, import_disp, comm_);
+      Ioss::MY_Alltoallv(export_conn, export_conn_size, export_disp,
+			 import_conn, import_conn_size, import_disp, comm_);
 
       // Done with export_conn...
       std::vector<INT>().swap(export_conn);
@@ -1108,8 +1077,8 @@ namespace Iopx {
     // Tell other processors how many nodes I will be importing from
     // them...
     importNodeCount[myProcessor] = 0;
-    MPI_Alltoall(TOPTR(importNodeCount), 1, mpi_type((INT)0),
-                 TOPTR(exportNodeCount), 1, mpi_type((INT)0), comm_);
+    MPI_Alltoall(TOPTR(importNodeCount), 1, Ioss::mpi_type((INT)0),
+                 TOPTR(exportNodeCount), 1, Ioss::mpi_type((INT)0), comm_);
 
     size_t import_sum = std::accumulate(importNodeCount.begin(), importNodeCount.end(), 0);
     size_t export_sum = std::accumulate(exportNodeCount.begin(), exportNodeCount.end(), 0);
@@ -1146,8 +1115,8 @@ namespace Iopx {
     std::copy(importNodeCount.begin(), importNodeCount.end(), importNodeIndex.begin());
     generate_index(importNodeIndex);
 
-    MY_Alltoallv(import_nodes,  importNodeCount, importNodeIndex, 
-                 exportNodeMap, exportNodeCount, exportNodeIndex, comm_);
+    Ioss::MY_Alltoallv(import_nodes,  importNodeCount, importNodeIndex, 
+		       exportNodeMap, exportNodeCount, exportNodeIndex, comm_);
 
     // Map that converts nodes from the global index (1-based) to a local-per-processor index (1-based)
     nodeGTL.swap(nodes);
@@ -1187,7 +1156,7 @@ namespace Iopx {
         node_proc_list.push_back(std::make_pair(exportNodeMap[i], p));
       }
     }
-    std::sort(node_proc_list.begin(), node_proc_list.end());
+    gds_qsort(node_proc_list);
 
     std::vector<std::pair<INT,int> > shared_nodes;
     for (size_t i=0; i < node_proc_list.size(); i++) {
@@ -1254,15 +1223,15 @@ namespace Iopx {
 
     // Tell other processors how many nodes/procs I am sending them...
     std::vector<INT> recv_comm_map_count(processorCount);
-    MPI_Alltoall(TOPTR(send_comm_map_count), 1, mpi_type((INT)0),
-                 TOPTR(recv_comm_map_count), 1, mpi_type((INT)0), comm_);
+    MPI_Alltoall(TOPTR(send_comm_map_count), 1, Ioss::mpi_type((INT)0),
+                 TOPTR(recv_comm_map_count), 1, Ioss::mpi_type((INT)0), comm_);
 
 
     std::vector<INT> recv_comm_map_disp(recv_comm_map_count);
     generate_index(recv_comm_map_disp);
     nodeCommMap.resize(recv_comm_map_disp[processorCount-1] + recv_comm_map_count[processorCount-1]);
-    MY_Alltoallv(send_comm_map, send_comm_map_count, send_comm_map_disp, 
-                 nodeCommMap, recv_comm_map_count, recv_comm_map_disp, comm_);
+    Ioss::MY_Alltoallv(send_comm_map, send_comm_map_count, send_comm_map_disp, 
+		       nodeCommMap, recv_comm_map_count, recv_comm_map_disp, comm_);
 
     // Map global 0-based index to local 1-based index.
     for (size_t i=0; i < nodeCommMap.size(); i+=2) {
@@ -1348,7 +1317,7 @@ namespace Iopx {
       }
       fileBlockIndex[b+1] = fileBlockIndex[b] + ebs[b].num_entry;
       el_blocks[b].topologyType = ebs[b].topology;
-      if (ebs[b].num_entry == 0 && (std::strcmp(ebs[b].topology, "NULL") == 0))
+      if (ebs[b].num_entry == 0 && (std::strcmp(ebs[b].topology, "nullptr") == 0))
         el_blocks[b].topologyType = "sphere";
         
       el_blocks[b].nodesPerEntity = ebs[b].num_nodes_per_entry;
@@ -1392,7 +1361,7 @@ namespace Iopx {
 #if DEBUG_OUTPUT
         std::cerr << "Processor " << myProcessor << " has " << overlap << " elements on element block " << id << "\n";
 #endif
-        ex_get_partial_conn(exodusId, EX_ELEM_BLOCK, id, blk_start, overlap, TOPTR(connectivity), NULL, NULL);
+        ex_get_partial_conn(exodusId, EX_ELEM_BLOCK, id, blk_start, overlap, TOPTR(connectivity), nullptr, nullptr);
         size_t el = 0;
         for (size_t elem = 0; elem < overlap; elem++) {
           pointer.push_back(adjacency.size());
@@ -1463,9 +1432,9 @@ namespace Iopx {
       node_sets[i].id_ = ids[i];
       sets[i].id = ids[i];
       sets[i].type = EX_NODE_SET;
-      sets[i].entry_list = NULL;
-      sets[i].extra_list = NULL;
-      sets[i].distribution_factor_list = NULL;
+      sets[i].entry_list = nullptr;
+      sets[i].extra_list = nullptr;
+      sets[i].distribution_factor_list = nullptr;
     }
 
     ex_get_sets(exodusId, sets.size(), TOPTR(sets));
@@ -1498,7 +1467,7 @@ namespace Iopx {
       if (myProcessor == root) {
         size_t offset = 0;
         for (size_t i=0; i < set_count; i++) {
-          ex_get_set(exodusId, EX_NODE_SET, sets[i].id, &nodelist[offset], NULL);
+          ex_get_set(exodusId, EX_NODE_SET, sets[i].id, &nodelist[offset], nullptr);
           offset += sets[i].num_entry;
         }
         assert(offset == nodelist_size);
@@ -1608,9 +1577,9 @@ namespace Iopx {
       side_sets[i].id_ = ids[i];
       sets[i].id = ids[i];
       sets[i].type = EX_SIDE_SET;
-      sets[i].entry_list = NULL;
-      sets[i].extra_list = NULL;
-      sets[i].distribution_factor_list = NULL;
+      sets[i].entry_list = nullptr;
+      sets[i].extra_list = nullptr;
+      sets[i].distribution_factor_list = nullptr;
     }
 
     ex_get_sets(exodusId, sets.size(), TOPTR(sets));
@@ -1643,7 +1612,7 @@ namespace Iopx {
       if (myProcessor == root) {
         size_t offset = 0;
         for (size_t i=0; i < set_count; i++) {
-          ex_get_set(exodusId, EX_SIDE_SET, sets[i].id, &elemlist[offset], NULL);
+          ex_get_set(exodusId, EX_SIDE_SET, sets[i].id, &elemlist[offset], nullptr);
           offset += sets[i].num_entry;
         }
         assert(offset == elemlist_size);
@@ -1837,8 +1806,8 @@ namespace Iopx {
 
     // Tell each processor how many nodes worth of data to send to
     // every other processor...
-    MPI_Alltoall(TOPTR(recv_count), 1, mpi_type((INT)0),
-                 TOPTR(send_count), 1, mpi_type((INT)0), comm_);
+    MPI_Alltoall(TOPTR(recv_count), 1, Ioss::mpi_type((INT)0),
+                 TOPTR(send_count), 1, Ioss::mpi_type((INT)0), comm_);
 
     send_count[myProcessor] = 0;
 
@@ -1873,8 +1842,8 @@ namespace Iopx {
       }
     }
 
-    MY_Alltoallv(node_comm_recv, recv_count, recv_disp, 
-                 node_comm_send, send_count, send_disp, comm_);
+    Ioss::MY_Alltoallv(node_comm_recv, recv_count, recv_disp, 
+		       node_comm_send, send_count, send_disp, comm_);
 
     // At this point, 'node_comm_send' contains the list of nodes that I need to provide
     // coordinate data for.
@@ -1920,8 +1889,8 @@ namespace Iopx {
       recv_disp[i]  *= spatialDimension;
     }
 
-    MY_Alltoallv(coord_send, send_count, send_disp, 
-                 coord_recv, recv_count, recv_disp, comm_);
+    Ioss::MY_Alltoallv(coord_send, send_count, send_disp, 
+		       coord_recv, recv_count, recv_disp, comm_);
 
     // Don't need coord_send data anymore ... clean out the vector.
     std::vector<double>().swap(coord_send);
@@ -2093,8 +2062,8 @@ namespace Iopx {
       }
 
       // Get my imported data and send my exported data...
-      MY_Alltoallv(export_data, exportNodeCount, exportNodeIndex,
-                   import_data, importNodeCount, importNodeIndex, comm_);
+      Ioss::MY_Alltoallv(export_data, exportNodeCount, exportNodeIndex,
+			 import_data, importNodeCount, importNodeIndex, comm_);
 
       // Copy the imported data into ioss_data...
       for (size_t i=0; i < importNodeMap.size(); i++) {
@@ -2134,8 +2103,8 @@ namespace Iopx {
       }
 
       // Get my imported data and send my exported data...
-      MY_Alltoallv(export_data, export_count, export_disp, 
-                   import_data, import_count, import_disp, comm_);
+      Ioss::MY_Alltoallv(export_data, export_count, export_disp, 
+			 import_data, import_count, import_disp, comm_);
 
       // Copy the imported data into ioss_data...
       for (size_t i=0; i < importNodeMap.size(); i++) {
@@ -2176,8 +2145,8 @@ namespace Iopx {
       }
 
       // Get my imported data and send my exported data...
-      MY_Alltoallv(export_data, exportElementCount, exportElementIndex, 
-                   import_data, importElementCount, importElementIndex, comm_);
+      Ioss::MY_Alltoallv(export_data, exportElementCount, exportElementIndex, 
+			 import_data, importElementCount, importElementIndex, comm_);
 
       // Copy the imported data into ioss_data...
       // Some comes before the local data...
@@ -2219,8 +2188,8 @@ namespace Iopx {
       }
 
       // Get my imported data and send my exported data...
-      MY_Alltoallv(export_data, export_count, export_disp, 
-                   import_data, import_count, import_disp, comm_);
+      Ioss::MY_Alltoallv(export_data, export_count, export_disp, 
+			 import_data, import_count, import_disp, comm_);
 
       // Copy the imported data into ioss_data...
       // Some comes before the local data...
@@ -2248,21 +2217,21 @@ namespace Iopx {
     int ierr = 0;
     if (field.get_name() == "mesh_model_coordinates_x") {
       ierr = ex_get_partial_coord(exodusId, nodeOffset+1, nodeCount,
-                                  TOPTR(tmp), NULL, NULL);
+                                  TOPTR(tmp), nullptr, nullptr);
       if (ierr >= 0)
         communicate_node_data(TOPTR(tmp), ioss_data, 1);
     }
 
     else if (field.get_name() == "mesh_model_coordinates_y") {
       ierr = ex_get_partial_coord(exodusId, nodeOffset+1, nodeCount,
-                                  NULL, TOPTR(tmp), NULL);
+                                  nullptr, TOPTR(tmp), nullptr);
       if (ierr >= 0)
         communicate_node_data(TOPTR(tmp), ioss_data, 1);
     }
 
     else if (field.get_name() == "mesh_model_coordinates_z") {
       ierr = ex_get_partial_coord(exodusId, nodeOffset+1, nodeCount,
-                                  NULL, NULL, TOPTR(tmp));
+                                  nullptr, nullptr, TOPTR(tmp));
       if (ierr >= 0)
         communicate_node_data(TOPTR(tmp), ioss_data, 1);
     }
@@ -2289,7 +2258,7 @@ namespace Iopx {
 
       for (size_t d = 0; d < spatialDimension; d++) {
         double* coord[3];
-        coord[0] = coord[1] = coord[2] = NULL;
+        coord[0] = coord[1] = coord[2] = nullptr;
         coord[d] = TOPTR(tmp);
         ierr = ex_get_partial_coord(exodusId, nodeOffset+1, nodeCount,
                                     coord[0], coord[1], coord[2]);
@@ -2328,7 +2297,7 @@ namespace Iopx {
 
     assert(sizeof(INT) == exodus_byte_size_api(exodusId));
     std::vector<INT> file_conn(count * nnpe);
-    ex_get_partial_conn(exodusId, EX_ELEM_BLOCK, id, offset+1, count, TOPTR(file_conn), NULL, NULL);
+    ex_get_partial_conn(exodusId, EX_ELEM_BLOCK, id, offset+1, count, TOPTR(file_conn), nullptr, nullptr);
     communicate_block_data(TOPTR(file_conn), data, blk_seq, nnpe);
 
     for (size_t i=0; i < blk.iossCount * nnpe; i++) {
@@ -2368,8 +2337,8 @@ namespace Iopx {
       }
 
       // Get my imported data and send my exported data...
-      MY_Alltoallv(exports, blk.exportCount, blk.exportIndex, 
-                   imports, blk.importCount, blk.importIndex, comm_);
+      Ioss::MY_Alltoallv(exports, blk.exportCount, blk.exportIndex, 
+			 imports, blk.importCount, blk.importIndex, comm_);
 
       // Map local and imported data to ioss_data.
       for (size_t i=0; i < blk.localMap.size(); i++) {
@@ -2399,8 +2368,8 @@ namespace Iopx {
       }
 
       // Get my imported data and send my exported data...
-      MY_Alltoallv(exports, export_count, export_disp, 
-                   imports, import_count, import_disp, comm_);
+      Ioss::MY_Alltoallv(exports, export_count, export_disp, 
+			 imports, import_count, import_disp, comm_);
 
       // Map local and imported data to ioss_data.
       for (size_t i=0; i < blk.localMap.size(); i++) {
@@ -2898,12 +2867,12 @@ namespace Iopx {
       // Read the nodeset data from the file..
       if (field.get_name() == "ids" || field.get_name() == "ids_raw") {
         file_data.resize(set.file_count());
-        ierr = ex_get_set(exodusId, type, id, TOPTR(file_data), NULL);
+        ierr = ex_get_set(exodusId, type, id, TOPTR(file_data), nullptr);
       } else if (field.get_name() == "sides") {
         // Sideset only...
         if (type == EX_SIDE_SET) {
           file_data.resize(set.file_count());
-          ierr = ex_get_set(exodusId, type, id, NULL, TOPTR(file_data));
+          ierr = ex_get_set(exodusId, type, id, nullptr, TOPTR(file_data));
         } else {
           return -1;
         }
@@ -2911,9 +2880,9 @@ namespace Iopx {
         ex_set set_param[1];
         set_param[0].id = id;
         set_param[0].type = type;
-        set_param[0].entry_list = NULL;
-        set_param[0].extra_list = NULL;
-        set_param[0].distribution_factor_list = NULL;
+        set_param[0].entry_list = nullptr;
+        set_param[0].extra_list = nullptr;
+        set_param[0].distribution_factor_list = nullptr;
         ierr = ex_get_sets(exodusId, 1, set_param);
 
         if (set_param[0].num_distribution_factor == 0) {
@@ -2982,9 +2951,9 @@ namespace Iopx {
         ex_set set_param[1];
         set_param[0].id = id;
         set_param[0].type = EX_SIDE_SET;
-        set_param[0].entry_list = NULL;
-        set_param[0].extra_list = NULL;
-        set_param[0].distribution_factor_list = NULL;
+        set_param[0].entry_list = nullptr;
+        set_param[0].extra_list = nullptr;
+        set_param[0].distribution_factor_list = nullptr;
         ex_get_sets(exodusId, 1, set_param);
         if (set_param[0].num_distribution_factor == 0) {
           // This should have been caught above.
@@ -3016,8 +2985,8 @@ namespace Iopx {
         ex_set set_param[1];
         set_param[0].id = id;
         set_param[0].type = EX_SIDE_SET;
-        set_param[0].entry_list = NULL;
-        set_param[0].extra_list = NULL;
+        set_param[0].entry_list = nullptr;
+        set_param[0].extra_list = nullptr;
         set_param[0].distribution_factor_list = TOPTR(file_data);
         ierr = ex_get_sets(exodusId, 1, set_param);
       }
@@ -3034,9 +3003,9 @@ namespace Iopx {
       ex_set set_param[1];
       set_param[0].id = id;
       set_param[0].type = EX_SIDE_SET;
-      set_param[0].entry_list = NULL;
-      set_param[0].extra_list = NULL;
-      set_param[0].distribution_factor_list = NULL;
+      set_param[0].entry_list = nullptr;
+      set_param[0].extra_list = nullptr;
+      set_param[0].distribution_factor_list = nullptr;
       ex_get_sets(exodusId, 1, set_param);
       df_count = set_param[0].num_distribution_factor;
     }
@@ -3085,8 +3054,8 @@ namespace Iopx {
       ex_set set_param[1];
       set_param[0].id = id;
       set_param[0].type = EX_SIDE_SET;
-      set_param[0].entry_list = NULL;
-      set_param[0].extra_list = NULL;
+      set_param[0].entry_list = nullptr;
+      set_param[0].extra_list = nullptr;
       set_param[0].distribution_factor_list = TOPTR(file_data);
       ex_get_sets(exodusId, 1, set_param);
     }
@@ -3218,8 +3187,8 @@ namespace Iopx {
     generate_index(rcv_offset);
     std::vector<int64_t> rcv_list(*rcv_offset.rbegin() + *rcv_count.rbegin());
 
-    MY_Alltoallv(snd_list, snd_count, snd_offset,
-                 rcv_list, rcv_count, rcv_offset, comm_);
+    Ioss::MY_Alltoallv(snd_list, snd_count, snd_offset,
+		       rcv_list, rcv_count, rcv_offset, comm_);
 
     // Iterate rcv_list and convert global ids to the global-implicit position...
     for (size_t i=0; i < rcv_list.size(); i++) {
@@ -3229,8 +3198,8 @@ namespace Iopx {
     }
 
     // Send the data back now...
-    MY_Alltoallv(rcv_list, rcv_count, rcv_offset,
-                 snd_list, snd_count, snd_offset, comm_);
+    Ioss::MY_Alltoallv(rcv_list, rcv_count, rcv_offset,
+		       snd_list, snd_count, snd_offset, comm_);
 
     // Fill in the remaining portions of the global_implicit_map...
     std::vector<int64_t> tmp_disp(snd_offset);
-- 
2.7.0