diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 1110d359f72c0..63a16c8c114ba 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -54,7 +54,7 @@ env:
 
 jobs:
   docker:
-    name: ${{ matrix.title }} ${{ matrix.simd-level }}
+    name: ${{ matrix.title }}
     runs-on: ${{ matrix.runs-on }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 75
@@ -68,7 +68,7 @@ jobs:
             llvm: "14"
             runs-on: ubuntu-latest
             simd-level: AVX2
-            title: AMD64 Conda C++
+            title: AMD64 Conda C++ AVX2
             ubuntu: "22.04"
           - arch: amd64
             clang-tools: "14"
@@ -226,7 +226,7 @@ jobs:
           ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
 
   windows:
-    name: AMD64 ${{ matrix.name }} C++17 ${{ matrix.simd-level }}
+    name: ${{ matrix.title }}
     runs-on: ${{ matrix.os }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 60
@@ -237,8 +237,8 @@ jobs:
           - windows-2019
         include:
           - os: windows-2019
-            name: Windows 2019
             simd-level: AVX2
+            title: AMD64 Windows 2019 C++17 AVX2
     env:
       ARROW_BOOST_USE_SHARED: OFF
       ARROW_BUILD_BENCHMARKS: ON
diff --git a/cpp/src/arrow/acero/bloom_filter_test.cc b/cpp/src/arrow/acero/bloom_filter_test.cc
index eadd098cf85cb..95375e277e2b8 100644
--- a/cpp/src/arrow/acero/bloom_filter_test.cc
+++ b/cpp/src/arrow/acero/bloom_filter_test.cc
@@ -22,19 +22,18 @@
 #include <condition_variable>
 #include <thread>
 #include <unordered_set>
+
 #include "arrow/acero/bloom_filter.h"
 #include "arrow/acero/task_util.h"
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/acero/util.h"
 #include "arrow/compute/key_hash.h"
 #include "arrow/util/bitmap_ops.h"
-#include "arrow/util/cpu_info.h"
 
 namespace arrow {
 
 using compute::Hashing32;
 using compute::Hashing64;
-using internal::CpuInfo;
 
 namespace acero {
 
@@ -172,13 +171,7 @@ void TestBloomSmallHashHelper(int64_t num_input_hashes, const T* input_hashes,
 // Output FPR and build and probe cost.
 //
 void TestBloomSmall(BloomFilterBuildStrategy strategy, int64_t num_build,
-                    int num_build_copies, bool use_avx2, bool enable_prefetch) {
-  int64_t hardware_flags = use_avx2 ? CpuInfo::AVX2 : 0;
-  if (hardware_flags && !CpuInfo::GetInstance()->IsSupported(hardware_flags)) {
-    // What else?
-    return;
-  }
-
+                    int num_build_copies, int64_t hardware_flags, bool enable_prefetch) {
   // Generate input keys
   //
   int64_t num_probe = 4 * num_build;
@@ -329,14 +322,8 @@ void TestBloomLargeHashHelper(int64_t hardware_flags, int64_t block,
 // Test with larger size Bloom filters (use large prime with arithmetic
 // sequence modulo 2^64).
 //
-void TestBloomLarge(BloomFilterBuildStrategy strategy, int64_t num_build, bool use_avx2,
-                    bool enable_prefetch) {
-  int64_t hardware_flags = use_avx2 ? CpuInfo::AVX2 : 0;
-  if (hardware_flags && !CpuInfo::GetInstance()->IsSupported(hardware_flags)) {
-    // What else?
-    return;
-  }
-
+void TestBloomLarge(BloomFilterBuildStrategy strategy, int64_t num_build,
+                    int64_t hardware_flags, bool enable_prefetch) {
   // Largest 63-bit prime
   constexpr uint64_t prime = 0x7FFFFFFFFFFFFFE7ULL;
 
@@ -467,42 +454,40 @@ TEST(BloomFilter, Basic) {
   num_build.push_back(1LL << log_large);
 #endif
 
-  constexpr int num_param_sets = 3;
-  struct {
-    bool use_avx2;
+  struct TestParam {
+    int64_t hardware_flags;
     bool enable_prefetch;
     bool insert_multiple_copies;
-  } params[num_param_sets];
-  for (int i = 0; i < num_param_sets; ++i) {
-    params[i].use_avx2 = (i == 1);
-    params[i].enable_prefetch = (i == 2);
-    params[i].insert_multiple_copies = (i == 3);
+  };
+  std::vector<TestParam> test_params;
+  for (const auto hardware_flags : HardwareFlagsForTesting()) {
+    test_params.push_back({hardware_flags, false, false});
   }
+  test_params.push_back({0, true, false});
+  test_params.push_back({0, false, true});
 
-  std::vector<BloomFilterBuildStrategy> strategy;
-  strategy.push_back(BloomFilterBuildStrategy::SINGLE_THREADED);
+  std::vector<BloomFilterBuildStrategy> strategies;
+  strategies.push_back(BloomFilterBuildStrategy::SINGLE_THREADED);
 #ifndef ARROW_VALGRIND
-  strategy.push_back(BloomFilterBuildStrategy::PARALLEL);
+  strategies.push_back(BloomFilterBuildStrategy::PARALLEL);
 #endif
 
   static constexpr int64_t min_rows_for_large = 2 * 1024 * 1024;
 
-  for (size_t istrategy = 0; istrategy < strategy.size(); ++istrategy) {
-    for (int iparam_set = 0; iparam_set < num_param_sets; ++iparam_set) {
-      ARROW_SCOPED_TRACE("%s ", params[iparam_set].use_avx2                 ? "AVX2"
-                                : params[iparam_set].enable_prefetch        ? "PREFETCH"
-                                : params[iparam_set].insert_multiple_copies ? "FOLDING"
-                                                                            : "REGULAR");
-      for (size_t inum_build = 0; inum_build < num_build.size(); ++inum_build) {
-        ARROW_SCOPED_TRACE("num_build ", static_cast<int>(num_build[inum_build]));
-        if (num_build[inum_build] >= min_rows_for_large) {
-          TestBloomLarge(strategy[istrategy], num_build[inum_build],
-                         params[iparam_set].use_avx2, params[iparam_set].enable_prefetch);
+  for (const auto& strategy : strategies) {
+    for (const auto& test_param : test_params) {
+      ARROW_SCOPED_TRACE("hardware_flags = ", test_param.hardware_flags,
+                         test_param.enable_prefetch ? " PREFETCH" : "",
+                         test_param.insert_multiple_copies ? " FOLDING" : "REGULAR");
+      for (const auto n : num_build) {
+        ARROW_SCOPED_TRACE("num_build ", n);
+        if (n >= min_rows_for_large) {
+          TestBloomLarge(strategy, n, test_param.hardware_flags,
+                         test_param.enable_prefetch);
 
         } else {
-          TestBloomSmall(strategy[istrategy], num_build[inum_build],
-                         params[iparam_set].insert_multiple_copies ? 8 : 1,
-                         params[iparam_set].use_avx2, params[iparam_set].enable_prefetch);
+          TestBloomSmall(strategy, n, test_param.insert_multiple_copies ? 8 : 1,
+                         test_param.hardware_flags, test_param.enable_prefetch);
         }
       }
     }
@@ -515,19 +500,18 @@ TEST(BloomFilter, Scaling) {
   num_build.push_back(1000000);
   num_build.push_back(4000000);
 
-  std::vector<BloomFilterBuildStrategy> strategy;
-  strategy.push_back(BloomFilterBuildStrategy::PARALLEL);
-
-  for (bool use_avx2 : {false, true}) {
-    for (size_t istrategy = 0; istrategy < strategy.size(); ++istrategy) {
-      for (size_t inum_build = 0; inum_build < num_build.size(); ++inum_build) {
-        ARROW_SCOPED_TRACE("num_build = ", static_cast<int>(num_build[inum_build]));
-        ARROW_SCOPED_TRACE("strategy = ",
-                           strategy[istrategy] == BloomFilterBuildStrategy::PARALLEL
-                               ? "PARALLEL"
-                               : "SINGLE_THREADED");
-        ARROW_SCOPED_TRACE("avx2 = ", use_avx2 ? "AVX2" : "SCALAR");
-        TestBloomLarge(strategy[istrategy], num_build[inum_build], use_avx2,
+  std::vector<BloomFilterBuildStrategy> strategies;
+  strategies.push_back(BloomFilterBuildStrategy::PARALLEL);
+
+  for (const auto hardware_flags : HardwareFlagsForTesting()) {
+    for (const auto& strategy : strategies) {
+      for (const auto n : num_build) {
+        ARROW_SCOPED_TRACE("num_build = ", n);
+        ARROW_SCOPED_TRACE("strategy = ", strategy == BloomFilterBuildStrategy::PARALLEL
+                                              ? "PARALLEL"
+                                              : "SINGLE_THREADED");
+        ARROW_SCOPED_TRACE("hardware_flags = ", hardware_flags);
+        TestBloomLarge(strategy, n, hardware_flags,
                        /*enable_prefetch=*/false);
       }
     }
diff --git a/cpp/src/arrow/acero/test_util_internal.cc b/cpp/src/arrow/acero/test_util_internal.cc
index 2042650be6acb..f50ca92238dc4 100644
--- a/cpp/src/arrow/acero/test_util_internal.cc
+++ b/cpp/src/arrow/acero/test_util_internal.cc
@@ -45,8 +45,10 @@
 #include "arrow/testing/builder.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
+#include "arrow/testing/util.h"
 #include "arrow/type.h"
 #include "arrow/util/async_generator.h"
+#include "arrow/util/cpu_info.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/unreachable.h"
@@ -54,6 +56,7 @@
 
 namespace arrow {
 
+using arrow::internal::CpuInfo;
 using arrow::internal::Executor;
 
 using compute::SortKey;
@@ -62,6 +65,7 @@ using compute::Take;
 namespace acero {
 
 namespace {
+
 void ValidateOutputImpl(const ArrayData& output) {
   ASSERT_OK(::arrow::internal::ValidateArrayFull(output));
   TestInitialized(output);
@@ -116,6 +120,11 @@ void ValidateOutput(const Datum& output) {
   }
 }
 
+std::vector<int64_t> HardwareFlagsForTesting() {
+  // Acero currently only has AVX2 optimizations
+  return arrow::GetSupportedHardwareFlags({CpuInfo::AVX2});
+}
+
 namespace {
 
 struct DummyNode : ExecNode {
diff --git a/cpp/src/arrow/acero/test_util_internal.h b/cpp/src/arrow/acero/test_util_internal.h
index 03f417028650b..569fb1254db4a 100644
--- a/cpp/src/arrow/acero/test_util_internal.h
+++ b/cpp/src/arrow/acero/test_util_internal.h
@@ -20,6 +20,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/vector.h"
 
+#include <cstdint>
 #include <functional>
 #include <random>
 #include <string>
@@ -33,12 +34,14 @@
 #include "arrow/util/async_generator.h"
 #include "arrow/util/pcg_random.h"
 
-namespace arrow {
-
-namespace acero {
+namespace arrow::acero {
 
 void ValidateOutput(const Datum& output);
 
+// Enumerate all hardware flags that can be tested on this platform
+// and would lead to different code paths being tested in Acero.
+std::vector<int64_t> HardwareFlagsForTesting();
+
 using StartProducingFunc = std::function<Status(ExecNode*)>;
 using StopProducingFunc = std::function<void(ExecNode*)>;
 
@@ -204,5 +207,4 @@ struct TableGenerationProperties {
 Result<std::shared_ptr<Table>> MakeRandomTimeSeriesTable(
     const TableGenerationProperties& properties);
 
-}  // namespace acero
-}  // namespace arrow
+}  // namespace arrow::acero
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index a17d6275a763a..0bd6fe86134ab 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -18,11 +18,20 @@
 # ----------------------------------------------------------------------
 # Tests that don't require the full kernel library
 
+# Define arrow_compute_testing object library for common test files
+if(ARROW_TESTING)
+  add_library(arrow_compute_kernels_testing OBJECT test_util.cc)
+  # Even though this is still just an object library we still need to "link" our
+  # dependencies so that include paths are configured correctly
+  target_link_libraries(arrow_compute_kernels_testing ${ARROW_GTEST_GTEST})
+endif()
+
 add_arrow_test(scalar_cast_test
                ${ARROW_COMPUTE_TEST_ARGS}
                SOURCES
                scalar_cast_test.cc
-               test_util.cc)
+               EXTRA_LINK_LIBS
+               arrow_compute_kernels_testing)
 
 # ----------------------------------------------------------------------
 # Scalar kernels
@@ -32,25 +41,36 @@ add_arrow_compute_test(scalar_type_test
                        scalar_boolean_test.cc
                        scalar_nested_test.cc
                        scalar_string_test.cc
-                       test_util.cc)
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
-add_arrow_compute_test(scalar_if_else_test SOURCES scalar_if_else_test.cc test_util.cc)
+add_arrow_compute_test(scalar_if_else_test
+                       SOURCES
+                       scalar_if_else_test.cc
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
-add_arrow_compute_test(scalar_temporal_test SOURCES scalar_temporal_test.cc test_util.cc)
+add_arrow_compute_test(scalar_temporal_test
+                       SOURCES
+                       scalar_temporal_test.cc
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
 add_arrow_compute_test(scalar_math_test
                        SOURCES
                        scalar_arithmetic_test.cc
                        scalar_compare_test.cc
                        scalar_round_arithmetic_test.cc
-                       test_util.cc)
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
 add_arrow_compute_test(scalar_utility_test
                        SOURCES
                        scalar_random_test.cc
                        scalar_set_lookup_test.cc
                        scalar_validity_test.cc
-                       test_util.cc)
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
 add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
@@ -75,12 +95,20 @@ add_arrow_compute_test(vector_test
                        vector_replace_test.cc
                        vector_run_end_encode_test.cc
                        select_k_test.cc
-                       test_util.cc)
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
-add_arrow_compute_test(vector_sort_test SOURCES vector_sort_test.cc test_util.cc)
+add_arrow_compute_test(vector_sort_test
+                       SOURCES
+                       vector_sort_test.cc
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
-add_arrow_compute_test(vector_selection_test SOURCES vector_selection_test.cc
-                       test_util.cc)
+add_arrow_compute_test(vector_selection_test
+                       SOURCES
+                       vector_selection_test.cc
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
 add_arrow_benchmark(vector_hash_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(vector_sort_benchmark PREFIX "arrow-compute")
@@ -94,7 +122,11 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
 
 # Aggregates
 
-add_arrow_compute_test(aggregate_test SOURCES aggregate_test.cc test_util.cc)
+add_arrow_compute_test(aggregate_test
+                       SOURCES
+                       aggregate_test.cc
+                       EXTRA_LINK_LIBS
+                       arrow_compute_kernels_testing)
 
 # ----------------------------------------------------------------------
 # Utilities
diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc
index f7c1834030c55..3e6d41525cf44 100644
--- a/cpp/src/arrow/compute/key_hash_test.cc
+++ b/cpp/src/arrow/compute/key_hash_test.cc
@@ -21,9 +21,11 @@
 #include <map>
 #include <random>
 #include <unordered_set>
+
 #include "arrow/array/builder_binary.h"
 #include "arrow/compute/key_hash.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/util.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/pcg_random.h"
 
@@ -34,6 +36,11 @@ using internal::CpuInfo;
 
 namespace compute {
 
+std::vector<int64_t> HardwareFlagsForTesting() {
+  // Our key-hash and key-map routines currently only have AVX2 optimizations
+  return GetSupportedHardwareFlags({CpuInfo::AVX2});
+}
+
 class TestVectorHash {
  private:
   template <typename Type, typename ArrayType = typename TypeTraits<Type>::ArrayType>
@@ -132,74 +139,62 @@ class TestVectorHash {
     const offset_t* key_offsets =
         reinterpret_cast<const offset_t*>(keys_array->raw_value_offsets());
 
-    std::vector<uint32_t> hashes_scalar32;
-    std::vector<uint64_t> hashes_scalar64;
-    hashes_scalar32.resize(num_rows);
-    hashes_scalar64.resize(num_rows);
-    std::vector<uint32_t> hashes_simd32;
-    std::vector<uint64_t> hashes_simd64;
-    hashes_simd32.resize(num_rows);
-    hashes_simd64.resize(num_rows);
-
-    const int64_t hardware_flags_scalar = 0LL;
-    const int64_t hardware_flags_simd = CpuInfo::AVX2;
-    const bool simd_supported = CpuInfo::GetInstance()->IsSupported(hardware_flags_simd);
+    // For each tested hardware flags, we will compute the hashes and check
+    // them for consistency.
+    const auto hardware_flags_for_testing = HardwareFlagsForTesting();
+    ASSERT_GT(hardware_flags_for_testing.size(), 0);
+    std::vector<std::vector<uint32_t>> hashes32(hardware_flags_for_testing.size());
+    std::vector<std::vector<uint64_t>> hashes64(hardware_flags_for_testing.size());
+    for (auto& h : hashes32) {
+      h.resize(num_rows);
+    }
+    for (auto& h : hashes64) {
+      h.resize(num_rows);
+    }
 
     constexpr int mini_batch_size = 1024;
     std::vector<uint32_t> temp_buffer;
     temp_buffer.resize(mini_batch_size * 4);
 
-    for (bool use_simd : {false, true}) {
-      if (use_simd && !simd_supported) {
-        // XXX what else?
-        continue;
-      }
+    for (int i = 0; i < static_cast<int>(hardware_flags_for_testing.size()); ++i) {
+      const auto hardware_flags = hardware_flags_for_testing[i];
       if (use_32bit_hash) {
         if (!use_varlen_input) {
-          Hashing32::HashFixed(use_simd ? hardware_flags_simd : hardware_flags_scalar,
+          Hashing32::HashFixed(hardware_flags,
                                /*combine_hashes=*/false, num_rows, fixed_length, keys,
-                               use_simd ? hashes_simd32.data() : hashes_scalar32.data(),
-                               temp_buffer.data());
+                               hashes32[i].data(), temp_buffer.data());
         } else {
           for (int first_row = 0; first_row < num_rows;) {
             int batch_size_next = std::min(num_rows - first_row, mini_batch_size);
 
-            Hashing32::HashVarLen(
-                use_simd ? hardware_flags_simd : hardware_flags_scalar,
-                /*combine_hashes=*/false, batch_size_next, key_offsets + first_row, keys,
-                (use_simd ? hashes_simd32.data() : hashes_scalar32.data()) + first_row,
-                temp_buffer.data());
+            Hashing32::HashVarLen(hardware_flags,
+                                  /*combine_hashes=*/false, batch_size_next,
+                                  key_offsets + first_row, keys,
+                                  hashes32[i].data() + first_row, temp_buffer.data());
 
             first_row += batch_size_next;
           }
         }
+        for (int j = 0; j < num_rows; ++j) {
+          hashes64[i][j] = hashes32[i][j];
+        }
       } else {
         if (!use_varlen_input) {
           Hashing64::HashFixed(
-              /*combine_hashes=*/false, num_rows, fixed_length, keys,
-              use_simd ? hashes_simd64.data() : hashes_scalar64.data());
+              /*combine_hashes=*/false, num_rows, fixed_length, keys, hashes64[i].data());
         } else {
           Hashing64::HashVarLen(
-              /*combine_hashes=*/false, num_rows, key_offsets, keys,
-              use_simd ? hashes_simd64.data() : hashes_scalar64.data());
+              /*combine_hashes=*/false, num_rows, key_offsets, keys, hashes64[i].data());
         }
       }
     }
 
-    if (use_32bit_hash) {
-      for (int i = 0; i < num_rows; ++i) {
-        hashes_scalar64[i] = hashes_scalar32[i];
-        if (simd_supported) {
-          hashes_simd64[i] = hashes_simd32[i];
-        }
-      }
-    }
-
-    // Verify that both scalar and AVX2 implementations give the same hashes
+    // Verify that all implementations (scalar, SIMD) give the same hashes
     //
-    if (simd_supported) {
-      for (int i = 0; i < num_rows; ++i) {
-        ASSERT_EQ(hashes_scalar64[i], hashes_simd64[i])
+    const auto& hashes_scalar64 = hashes64[0];
+    for (int i = 0; i < static_cast<int>(hardware_flags_for_testing.size()); ++i) {
+      for (int j = 0; j < num_rows; ++j) {
+        ASSERT_EQ(hashes64[i][j], hashes_scalar64[j])
             << "scalar and simd approaches yielded different hashes";
       }
     }
@@ -208,18 +203,15 @@ class TestVectorHash {
     // each time. Measure the number of unique hashes and compare to the number
     // of unique keys.
     //
-    std::map<int, uint64_t> unique_key_to_hash;
-    std::set<uint64_t> unique_hashes;
+    std::unordered_map<int, uint64_t> unique_key_to_hash;
+    std::unordered_set<uint64_t> unique_hashes;
     for (int i = 0; i < num_rows; ++i) {
-      std::map<int, uint64_t>::iterator iter = unique_key_to_hash.find(row_ids[i]);
-      if (iter == unique_key_to_hash.end()) {
-        unique_key_to_hash.insert(std::make_pair(row_ids[i], hashes_scalar64[i]));
-      } else {
-        ASSERT_EQ(iter->second, hashes_scalar64[i]);
-      }
-      if (unique_hashes.find(hashes_scalar64[i]) == unique_hashes.end()) {
-        unique_hashes.insert(hashes_scalar64[i]);
+      auto [it, inserted] =
+          unique_key_to_hash.try_emplace(row_ids[i], hashes_scalar64[i]);
+      if (!inserted) {
+        ASSERT_EQ(it->second, hashes_scalar64[i]);
       }
+      unique_hashes.insert(hashes_scalar64[i]);
     }
     float percent_hash_collisions =
         100.0f * static_cast<float>(num_unique - unique_hashes.size()) /
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index b59854480765b..e8a782575e278 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -43,6 +43,7 @@
 #include "arrow/table.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
+#include "arrow/util/cpu_info.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/pcg_random.h"
@@ -211,4 +212,18 @@ const std::vector<std::shared_ptr<DataType>>& all_dictionary_index_types() {
   return types;
 }
 
+std::vector<int64_t> GetSupportedHardwareFlags(
+    const std::vector<int64_t>& candidate_flags) {
+  std::vector<int64_t> hardware_flags;
+  // Always test fallback codepaths
+  hardware_flags.push_back(0);
+  for (const int64_t candidate_flag : candidate_flags) {
+    if (candidate_flag != 0 &&
+        internal::CpuInfo::GetInstance()->IsSupported(candidate_flag)) {
+      hardware_flags.push_back(candidate_flag);
+    }
+  }
+  return hardware_flags;
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h
index 4f4b03438fd58..b4b2785a36292 100644
--- a/cpp/src/arrow/testing/util.h
+++ b/cpp/src/arrow/testing/util.h
@@ -131,4 +131,10 @@ ARROW_TESTING_EXPORT std::string GetListenAddress();
 ARROW_TESTING_EXPORT
 const std::vector<std::shared_ptr<DataType>>& all_dictionary_index_types();
 
+// Get a list of supported hardware flags from the given candidates.
+// The result will always contain 0, meaning no optional CPU feature enabled at all.
+ARROW_TESTING_EXPORT
+std::vector<int64_t> GetSupportedHardwareFlags(
+    const std::vector<int64_t>& candidate_flags);
+
 }  // namespace arrow