Skip to content

Commit

Permalink
GH-35116: [CI][C++] Enable compile-time AVX2 on some CI platforms (#3…
Browse files Browse the repository at this point in the history
…6662)

AVX2 became mainline on Intel and AMD server CPUs around 2015, so it's unlikely to be unavailable on current cloud platforms:
https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2

Enable it at least on one Windows and one Linux CI platform.

x86 macOS is a legacy platform, so less interesting to exercise there (and I'm not sure the old CPUs in x86 Macs actually support AVX2).

Also, fix the buggy AVX2 activation logic in Acero and avoid force-testing AVX2 on incompatible systems.

* Closes: #35116

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
pitrou authored Jul 19, 2023
1 parent 80f77d1 commit 366e808
Show file tree
Hide file tree
Showing 34 changed files with 289 additions and 236 deletions.
14 changes: 11 additions & 3 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ jobs:
image: conda-cpp
llvm: "14"
runs-on: ubuntu-latest
title: AMD64 Conda C++
simd-level: AVX2
title: AMD64 Conda C++ AVX2
ubuntu: "22.04"
- arch: amd64
clang-tools: "14"
Expand All @@ -85,6 +86,7 @@ jobs:
ubuntu: "20.04"
env:
ARCH: ${{ matrix.arch }}
ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
CLANG_TOOLS: ${{ matrix.clang-tools }}
LLVM: ${{ matrix.llvm }}
UBUNTU: ${{ matrix.ubuntu }}
Expand Down Expand Up @@ -175,6 +177,10 @@ jobs:
ARROW_WITH_ZSTD: ON
GTest_SOURCE: BUNDLED
steps:
- name: CPU Info
run: |
sysctl -a | grep cpu
sysctl -a | grep "hw.optional"
- name: Checkout Arrow
uses: actions/checkout@v3
with:
Expand Down Expand Up @@ -220,7 +226,7 @@ jobs:
ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
windows:
name: AMD64 ${{ matrix.name }} C++17
name: ${{ matrix.title }}
runs-on: ${{ matrix.os }}
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 60
Expand All @@ -231,7 +237,8 @@ jobs:
- windows-2019
include:
- os: windows-2019
name: Windows 2019
simd-level: AVX2
title: AMD64 Windows 2019 C++17 AVX2
env:
ARROW_BOOST_USE_SHARED: OFF
ARROW_BUILD_BENCHMARKS: ON
Expand All @@ -246,6 +253,7 @@ jobs:
ARROW_MIMALLOC: ON
ARROW_ORC: ON
ARROW_PARQUET: ON
ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
ARROW_USE_GLOG: OFF
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
ARROW_WITH_BROTLI: OFF
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ cmake \
-DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
-DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
-DARROW_S3=${ARROW_S3:-OFF} \
-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \
-DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \
-DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-ON} \
-DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
Expand Down
49 changes: 26 additions & 23 deletions cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -62,29 +62,32 @@ if(ARROW_CPU_FLAG STREQUAL "x86")
"${ARROW_AVX512_FLAG} -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw")
check_cxx_compiler_flag(${ARROW_SSE4_2_FLAG} CXX_SUPPORTS_SSE4_2)
endif()
check_cxx_compiler_flag(${ARROW_AVX2_FLAG} CXX_SUPPORTS_AVX2)
if(MINGW)
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782
message(STATUS "Disable AVX512 support on MINGW for now")
else()
# Check for AVX512 support in the compiler.
set(OLD_CMAKE_REQURED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${ARROW_AVX512_FLAG}")
check_cxx_source_compiles("
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <immintrin.h>
#endif
int main() {
__m512i mask = _mm512_set1_epi32(0x1);
char out[32];
_mm512_storeu_si512(out, mask);
return 0;
}"
CXX_SUPPORTS_AVX512)
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS})
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
# Check for AVX extensions on 64-bit systems only, as 32-bit support seems iffy
check_cxx_compiler_flag(${ARROW_AVX2_FLAG} CXX_SUPPORTS_AVX2)
if(MINGW)
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782
message(STATUS "Disable AVX512 support on MINGW for now")
else()
# Check for AVX512 support in the compiler.
set(OLD_CMAKE_REQURED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${ARROW_AVX512_FLAG}")
check_cxx_source_compiles("
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <immintrin.h>
#endif
int main() {
__m512i mask = _mm512_set1_epi32(0x1);
char out[32];
_mm512_storeu_si512(out, mask);
return 0;
}"
CXX_SUPPORTS_AVX512)
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS})
endif()
endif()
# Runtime SIMD level it can get from compiler and ARROW_RUNTIME_SIMD_LEVEL
if(CXX_SUPPORTS_SSE4_2 AND ARROW_RUNTIME_SIMD_LEVEL MATCHES
Expand Down
22 changes: 11 additions & 11 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,15 @@ function(ADD_ARROW_BENCHMARK REL_TEST_NAME)
${ARG_UNPARSED_ARGUMENTS})
endfunction()

macro(append_avx2_src SRC)
macro(append_runtime_avx2_src SRC)
if(ARROW_HAVE_RUNTIME_AVX2)
list(APPEND ARROW_SRCS ${SRC})
set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX2_FLAG})
endif()
endmacro()

macro(append_avx512_src SRC)
macro(append_runtime_avx512_src SRC)
if(ARROW_HAVE_RUNTIME_AVX512)
list(APPEND ARROW_SRCS ${SRC})
set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
Expand Down Expand Up @@ -254,8 +254,8 @@ if(ARROW_JEMALLOC)
PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON)
endif()

append_avx2_src(util/bpacking_avx2.cc)
append_avx512_src(util/bpacking_avx512.cc)
append_runtime_avx2_src(util/bpacking_avx2.cc)
append_runtime_avx512_src(util/bpacking_avx512.cc)

if(ARROW_HAVE_NEON)
list(APPEND ARROW_SRCS util/bpacking_neon.cc)
Expand Down Expand Up @@ -425,11 +425,11 @@ list(APPEND
compute/row/row_internal.cc
compute/util.cc)

append_avx2_src(compute/key_hash_avx2.cc)
append_avx2_src(compute/key_map_avx2.cc)
append_avx2_src(compute/row/compare_internal_avx2.cc)
append_avx2_src(compute/row/encode_internal_avx2.cc)
append_avx2_src(compute/util_avx2.cc)
append_runtime_avx2_src(compute/key_hash_avx2.cc)
append_runtime_avx2_src(compute/key_map_avx2.cc)
append_runtime_avx2_src(compute/row/compare_internal_avx2.cc)
append_runtime_avx2_src(compute/row/encode_internal_avx2.cc)
append_runtime_avx2_src(compute/util_avx2.cc)

if(ARROW_COMPUTE)
# Include the remaining kernels
Expand Down Expand Up @@ -464,8 +464,8 @@ if(ARROW_COMPUTE)
compute/kernels/vector_select_k.cc
compute/kernels/vector_sort.cc)

append_avx2_src(compute/kernels/aggregate_basic_avx2.cc)
append_avx512_src(compute/kernels/aggregate_basic_avx512.cc)
append_runtime_avx2_src(compute/kernels/aggregate_basic_avx2.cc)
append_runtime_avx512_src(compute/kernels/aggregate_basic_avx512.cc)
endif()

if(ARROW_FILESYSTEM)
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/acero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ add_custom_target(arrow_acero)

arrow_install_all_headers("arrow/acero")

macro(append_acero_avx2_src SRC)
macro(append_acero_runtime_avx2_src SRC)
if(ARROW_HAVE_RUNTIME_AVX2)
list(APPEND ARROW_ACERO_SRCS ${SRC})
set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
Expand Down Expand Up @@ -56,8 +56,8 @@ set(ARROW_ACERO_SRCS
union_node.cc
util.cc)

append_acero_avx2_src(bloom_filter_avx2.cc)
append_acero_avx2_src(swiss_join_avx2.cc)
append_acero_runtime_avx2_src(bloom_filter_avx2.cc)
append_acero_runtime_avx2_src(swiss_join_avx2.cc)

set(ARROW_ACERO_SHARED_LINK_LIBS)
set(ARROW_ACERO_STATIC_LINK_LIBS)
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/acero/bloom_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ void BlockedBloomFilter::InsertImp(int64_t num_rows, const T* hashes) {
void BlockedBloomFilter::Insert(int64_t hardware_flags, int64_t num_rows,
const uint32_t* hashes) {
int64_t num_processed = 0;
#if defined(ARROW_HAVE_AVX2)
#if defined(ARROW_HAVE_RUNTIME_AVX2)
if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
num_processed = Insert_avx2(num_rows, hashes);
}
Expand All @@ -134,7 +134,7 @@ void BlockedBloomFilter::Insert(int64_t hardware_flags, int64_t num_rows,
void BlockedBloomFilter::Insert(int64_t hardware_flags, int64_t num_rows,
const uint64_t* hashes) {
int64_t num_processed = 0;
#if defined(ARROW_HAVE_AVX2)
#if defined(ARROW_HAVE_RUNTIME_AVX2)
if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
num_processed = Insert_avx2(num_rows, hashes);
}
Expand Down Expand Up @@ -181,7 +181,7 @@ void BlockedBloomFilter::Find(int64_t hardware_flags, int64_t num_rows,
bool enable_prefetch) const {
int64_t num_processed = 0;

#if defined(ARROW_HAVE_AVX2)
#if defined(ARROW_HAVE_RUNTIME_AVX2)
if (!(enable_prefetch && UsePrefetch()) &&
(hardware_flags & arrow::internal::CpuInfo::AVX2)) {
num_processed = Find_avx2(num_rows, hashes, result_bit_vector);
Expand All @@ -202,7 +202,7 @@ void BlockedBloomFilter::Find(int64_t hardware_flags, int64_t num_rows,
bool enable_prefetch) const {
int64_t num_processed = 0;

#if defined(ARROW_HAVE_AVX2)
#if defined(ARROW_HAVE_RUNTIME_AVX2)
if (!(enable_prefetch && UsePrefetch()) &&
(hardware_flags & arrow::internal::CpuInfo::AVX2)) {
num_processed = Find_avx2(num_rows, hashes, result_bit_vector);
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/acero/bloom_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@

#pragma once

#if defined(ARROW_HAVE_AVX2)
#if defined(ARROW_HAVE_RUNTIME_AVX2)
#include <immintrin.h>
#endif

#include <atomic>
#include <cstdint>
#include <memory>

#include "arrow/acero/partition_util.h"
#include "arrow/acero/util.h"
#include "arrow/memory_pool.h"
Expand Down Expand Up @@ -203,7 +204,7 @@ class ARROW_ACERO_EXPORT BlockedBloomFilter {

void SingleFold(int num_folds);

#if defined(ARROW_HAVE_AVX2)
#if defined(ARROW_HAVE_RUNTIME_AVX2)
inline __m256i mask_avx2(__m256i hash) const;
inline __m256i block_id_avx2(__m256i hash) const;
int64_t Insert_avx2(int64_t num_rows, const uint32_t* hashes);
Expand Down
5 changes: 1 addition & 4 deletions cpp/src/arrow/acero/bloom_filter_avx2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@
// under the License.

#include <immintrin.h>

#include "arrow/acero/bloom_filter.h"
#include "arrow/util/bit_util.h"

namespace arrow {
namespace acero {

#if defined(ARROW_HAVE_AVX2)

inline __m256i BlockedBloomFilter::mask_avx2(__m256i hash) const {
// AVX2 translation of mask() method
//
Expand Down Expand Up @@ -132,7 +131,5 @@ int64_t BlockedBloomFilter::Insert_avx2(int64_t num_rows, const uint64_t* hashes
return InsertImp_avx2(num_rows, hashes);
}

#endif

} // namespace acero
} // namespace arrow
Loading

0 comments on commit 366e808

Please sign in to comment.