diff --git a/CMake/ResolveDependency.cmake b/CMake/ResolveDependency.cmake index eba371fbcd60..7998d9d05024 100644 --- a/CMake/ResolveDependency.cmake +++ b/CMake/ResolveDependency.cmake @@ -40,6 +40,8 @@ set(CMAKE_TLS_VERIFY true) macro(build_dependency dependency_name) string(TOLOWER ${dependency_name} dependency_name_lower) + set(CMAKE_COMPILER_SELECTION_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) include(${dependency_name_lower}) endmacro() diff --git a/CMake/resolve_dependency_modules/absl.cmake b/CMake/resolve_dependency_modules/absl.cmake index 11c267b688e8..d11be1b0ea03 100644 --- a/CMake/resolve_dependency_modules/absl.cmake +++ b/CMake/resolve_dependency_modules/absl.cmake @@ -28,7 +28,8 @@ FetchContent_Declare( absl URL ${VELOX_ABSL_SOURCE_URL} URL_HASH ${VELOX_ABSL_BUILD_SHA256_CHECKSUM} - OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM) + OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM CMAKE_ARGS + ${CMAKE_COMPILER_SELECTION_ARGS}) set(ABSL_BUILD_TESTING OFF) set(ABSL_PROPAGATE_CXX_STD ON) diff --git a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt index 3f01df2fdc05..1890902dd7ee 100644 --- a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt +++ b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt @@ -37,7 +37,8 @@ if(VELOX_ENABLE_ARROW) -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DARROW_BUILD_STATIC=ON - -DThrift_SOURCE=${THRIFT_SOURCE}) + -DThrift_SOURCE=${THRIFT_SOURCE} + ${CMAKE_COMPILER_SELECTION_ARGS}) set(ARROW_LIBDIR ${ARROW_PREFIX}/install/${CMAKE_INSTALL_LIBDIR}) add_library(thrift STATIC IMPORTED GLOBAL) diff --git a/CMake/resolve_dependency_modules/boost/CMakeLists.txt b/CMake/resolve_dependency_modules/boost/CMakeLists.txt index c6d32b88c2b4..84e974426900 100644 --- a/CMake/resolve_dependency_modules/boost/CMakeLists.txt +++ b/CMake/resolve_dependency_modules/boost/CMakeLists.txt @@ -42,7 +42,8 @@ endif() FetchContent_Declare( Boost URL ${VELOX_BOOST_SOURCE_URL} - URL_HASH ${VELOX_BOOST_BUILD_SHA256_CHECKSUM}) + URL_HASH ${VELOX_BOOST_BUILD_SHA256_CHECKSUM} + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) # Configure the file before adding the header only libs configure_file(${CMAKE_CURRENT_LIST_DIR}/FindBoost.cmake.in diff --git a/CMake/resolve_dependency_modules/cpr.cmake b/CMake/resolve_dependency_modules/cpr.cmake index 45e0d16375a8..4fcb08c5b4e3 100644 --- a/CMake/resolve_dependency_modules/cpr.cmake +++ b/CMake/resolve_dependency_modules/cpr.cmake @@ -26,7 +26,6 @@ set(curl_SOURCE BUNDLED) resolve_dependency(curl) resolve_dependency_url(CPR) - message(STATUS "Building cpr from source") FetchContent_Declare( cpr @@ -34,7 +33,8 @@ FetchContent_Declare( URL_HASH ${VELOX_CPR_BUILD_SHA256_CHECKSUM} PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/cpr/cpr-libcurl-compatible.patch && git - apply ${CMAKE_CURRENT_LIST_DIR}/cpr/cpr-remove-sancheck.patch) + apply ${CMAKE_CURRENT_LIST_DIR}/cpr/cpr-remove-sancheck.patch CMAKE_ARGS + ${CMAKE_COMPILER_SELECTION_ARGS}) set(BUILD_SHARED_LIBS OFF) set(CPR_USE_SYSTEM_CURL OFF) # ZLIB has already been found by find_package(ZLIB, REQUIRED), set CURL_ZLIB=OFF diff --git a/CMake/resolve_dependency_modules/duckdb.cmake b/CMake/resolve_dependency_modules/duckdb.cmake index f606ab48ef5b..e030428242ba 100644 --- a/CMake/resolve_dependency_modules/duckdb.cmake +++ b/CMake/resolve_dependency_modules/duckdb.cmake @@ -34,7 +34,8 @@ FetchContent_Declare( PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/duckdb/remove-ccache.patch && git apply ${CMAKE_CURRENT_LIST_DIR}/duckdb/fix-duckdbversion.patch && git apply - ${CMAKE_CURRENT_LIST_DIR}/duckdb/re2.patch) + ${CMAKE_CURRENT_LIST_DIR}/duckdb/re2.patch CMAKE_ARGS + ${CMAKE_COMPILER_SELECTION_ARGS}) set(BUILD_UNITTESTS OFF) set(ENABLE_SANITIZER OFF) diff --git a/CMake/resolve_dependency_modules/fmt.cmake b/CMake/resolve_dependency_modules/fmt.cmake index 88d8d674d3a3..abc7f46922a6 100644 --- a/CMake/resolve_dependency_modules/fmt.cmake +++ b/CMake/resolve_dependency_modules/fmt.cmake @@ -25,7 +25,8 @@ message(STATUS "Building fmt from source") FetchContent_Declare( fmt URL ${VELOX_FMT_SOURCE_URL} - URL_HASH ${VELOX_FMT_BUILD_SHA256_CHECKSUM}) + URL_HASH ${VELOX_FMT_BUILD_SHA256_CHECKSUM} + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) # Force fmt to create fmt-config.cmake which can be found by other dependecies # (e.g. folly) set(FMT_INSTALL ON) diff --git a/CMake/resolve_dependency_modules/folly/CMakeLists.txt b/CMake/resolve_dependency_modules/folly/CMakeLists.txt index df7a489384ee..28234295007a 100644 --- a/CMake/resolve_dependency_modules/folly/CMakeLists.txt +++ b/CMake/resolve_dependency_modules/folly/CMakeLists.txt @@ -34,7 +34,7 @@ FetchContent_Declare( URL ${VELOX_FOLLY_SOURCE_URL} URL_HASH ${VELOX_FOLLY_BUILD_SHA256_CHECKSUM} PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/folly-no-export.patch - ${glog_patch}) + ${glog_patch} CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) if(ON_APPLE_M1) # folly will wrongly assume x86_64 if this is not set diff --git a/CMake/resolve_dependency_modules/gflags.cmake b/CMake/resolve_dependency_modules/gflags.cmake index 41d302c92313..36a8b166d4eb 100644 --- a/CMake/resolve_dependency_modules/gflags.cmake +++ b/CMake/resolve_dependency_modules/gflags.cmake @@ -27,7 +27,8 @@ FetchContent_Declare( gflags URL ${VELOX_GFLAGS_SOURCE_URL} URL_HASH ${VELOX_GFLAGS_BUILD_SHA256_CHECKSUM} - PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/gflags/gflags-config.patch) + PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/gflags/gflags-config.patch + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) set(GFLAGS_BUILD_STATIC_LIBS ON) set(GFLAGS_BUILD_gflags_LIB ON) diff --git a/CMake/resolve_dependency_modules/glog.cmake b/CMake/resolve_dependency_modules/glog.cmake index 54836009ed7c..212def0617b6 100644 --- a/CMake/resolve_dependency_modules/glog.cmake +++ b/CMake/resolve_dependency_modules/glog.cmake @@ -27,8 +27,10 @@ FetchContent_Declare( glog URL ${VELOX_GLOG_SOURCE_URL} URL_HASH ${VELOX_GLOG_BUILD_SHA256_CHECKSUM} - PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/glog/glog-no-export.patch - && git apply ${CMAKE_CURRENT_LIST_DIR}/glog/glog-config.patch) + PATCH_COMMAND + git apply ${CMAKE_CURRENT_LIST_DIR}/glog/glog-no-export.patch && git apply + ${CMAKE_CURRENT_LIST_DIR}/glog/glog-config.patch CMAKE_ARGS + ${CMAKE_COMPILER_SELECTION_ARGS}) set(BUILD_SHARED_LIBS OFF) set(WITH_UNWIND OFF) diff --git a/CMake/resolve_dependency_modules/google_cloud_cpp_storage.cmake b/CMake/resolve_dependency_modules/google_cloud_cpp_storage.cmake index 8106234925e7..82cdcb5ab6be 100644 --- a/CMake/resolve_dependency_modules/google_cloud_cpp_storage.cmake +++ b/CMake/resolve_dependency_modules/google_cloud_cpp_storage.cmake @@ -32,7 +32,8 @@ FetchContent_Declare( google_cloud_cpp URL ${VELOX_GOOGLE_CLOUD_CPP_SOURCE_URL} URL_HASH ${VELOX_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM} - OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM) + OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM CMAKE_ARGS + ${CMAKE_COMPILER_SELECTION_ARGS}) set(GOOGLE_CLOUD_CPP_ENABLE_EXAMPLES OFF) set(GOOGLE_CLOUD_CPP_ENABLE diff --git a/CMake/resolve_dependency_modules/grpc.cmake b/CMake/resolve_dependency_modules/grpc.cmake index f105356c231e..49dba696da3b 100644 --- a/CMake/resolve_dependency_modules/grpc.cmake +++ b/CMake/resolve_dependency_modules/grpc.cmake @@ -31,7 +31,8 @@ FetchContent_Declare( gRPC URL ${VELOX_GRPC_SOURCE_URL} URL_HASH ${VELOX_GRPC_BUILD_SHA256_CHECKSUM} - OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL) + OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL CMAKE_ARGS + ${CMAKE_COMPILER_SELECTION_ARGS}) set(gRPC_ABSL_PROVIDER "package" diff --git a/CMake/resolve_dependency_modules/gtest.cmake b/CMake/resolve_dependency_modules/gtest.cmake index 8f35b9e954b9..aa97ce86f73f 100644 --- a/CMake/resolve_dependency_modules/gtest.cmake +++ b/CMake/resolve_dependency_modules/gtest.cmake @@ -26,7 +26,8 @@ message(STATUS "Building gtest from source") FetchContent_Declare( gtest URL ${VELOX_GTEST_SOURCE_URL} - URL_HASH ${VELOX_GTEST_BUILD_SHA256_CHECKSUM}) + URL_HASH ${VELOX_GTEST_BUILD_SHA256_CHECKSUM} + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) FetchContent_MakeAvailable(gtest) diff --git a/CMake/resolve_dependency_modules/icu.cmake b/CMake/resolve_dependency_modules/icu.cmake index 2f46ae885b1d..94d3bdd82498 100644 --- a/CMake/resolve_dependency_modules/icu.cmake +++ b/CMake/resolve_dependency_modules/icu.cmake @@ -54,7 +54,8 @@ ExternalProject_Add( CONFIGURE_COMMAND /source/configure --prefix=${ICU_DIR} --libdir=${ICU_LIBRARIES} ${ICU_CFG} BUILD_COMMAND ${MAKE_PROGRAM} -j ${NUM_JOBS} - INSTALL_COMMAND ${HOST_ENV_CMAKE} ${MAKE_PROGRAM} install) + INSTALL_COMMAND ${HOST_ENV_CMAKE} ${MAKE_PROGRAM} install + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) add_library(ICU::ICU UNKNOWN IMPORTED) add_dependencies(ICU::ICU ICU-build) diff --git a/CMake/resolve_dependency_modules/protobuf.cmake b/CMake/resolve_dependency_modules/protobuf.cmake index e79d9871b9da..9ebfe409979c 100644 --- a/CMake/resolve_dependency_modules/protobuf.cmake +++ b/CMake/resolve_dependency_modules/protobuf.cmake @@ -39,7 +39,8 @@ FetchContent_Declare( protobuf URL ${VELOX_PROTOBUF_SOURCE_URL} URL_HASH ${VELOX_PROTOBUF_BUILD_SHA256_CHECKSUM} - OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM) + OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM CMAKE_ARGS + ${CMAKE_COMPILER_SELECTION_ARGS}) set(protobuf_BUILD_TESTS OFF) set(protobuf_ABSL_PROVIDER diff --git a/CMake/resolve_dependency_modules/pybind11.cmake b/CMake/resolve_dependency_modules/pybind11.cmake index 7874babaf714..86791c806d03 100644 --- a/CMake/resolve_dependency_modules/pybind11.cmake +++ b/CMake/resolve_dependency_modules/pybind11.cmake @@ -27,6 +27,7 @@ message(STATUS "Building Pybind11 from source") FetchContent_Declare( pybind11 URL ${VELOX_PYBIND11_SOURCE_URL} - URL_HASH ${VELOX_PYBIND11_BUILD_SHA256_CHECKSUM}) + URL_HASH ${VELOX_PYBIND11_BUILD_SHA256_CHECKSUM} + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) FetchContent_MakeAvailable(pybind11) diff --git a/CMake/resolve_dependency_modules/re2.cmake b/CMake/resolve_dependency_modules/re2.cmake index 6bc00c66c019..a87e690c608f 100644 --- a/CMake/resolve_dependency_modules/re2.cmake +++ b/CMake/resolve_dependency_modules/re2.cmake @@ -28,7 +28,8 @@ message(STATUS "Building re2 from source") FetchContent_Declare( re2 URL ${VELOX_RE2_SOURCE_URL} - URL_HASH SHA256=${VELOX_RE2_BUILD_SHA256_CHECKSUM}) + URL_HASH SHA256=${VELOX_RE2_BUILD_SHA256_CHECKSUM} + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) set(RE2_USE_ICU ON) set(RE2_BUILD_TESTING OFF) diff --git a/CMake/resolve_dependency_modules/simdjson.cmake b/CMake/resolve_dependency_modules/simdjson.cmake index 69e7f204494b..c7518b102984 100644 --- a/CMake/resolve_dependency_modules/simdjson.cmake +++ b/CMake/resolve_dependency_modules/simdjson.cmake @@ -27,6 +27,7 @@ message(STATUS "Building simdjson from source") FetchContent_Declare( simdjson URL ${VELOX_SIMDJSON_SOURCE_URL} - URL_HASH ${VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM}) + URL_HASH ${VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM} + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) FetchContent_MakeAvailable(simdjson) diff --git a/CMake/resolve_dependency_modules/stemmer.cmake b/CMake/resolve_dependency_modules/stemmer.cmake index dbaca146341b..a163189df55b 100644 --- a/CMake/resolve_dependency_modules/stemmer.cmake +++ b/CMake/resolve_dependency_modules/stemmer.cmake @@ -42,7 +42,7 @@ ExternalProject_Add( PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/libstemmer/Makefile.patch BUILD_BYPRODUCTS ${STEMMER_PREFIX}/src/libstemmer/${CMAKE_STATIC_LIBRARY_PREFIX}stemmer${CMAKE_STATIC_LIBRARY_SUFFIX} -) + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) add_library(stemmer STATIC IMPORTED GLOBAL) add_library(stemmer::stemmer ALIAS stemmer) diff --git a/CMake/resolve_dependency_modules/template.cmake b/CMake/resolve_dependency_modules/template.cmake index 466cc548af98..b86b4b27074e 100644 --- a/CMake/resolve_dependency_modules/template.cmake +++ b/CMake/resolve_dependency_modules/template.cmake @@ -25,6 +25,7 @@ message(STATUS "Building from source") FetchContent_Declare( URL ${VELOX__SOURCE_URL} - URL_HASH ${VELOX__BUILD_SHA256_CHECKSUM}) + URL_HASH ${VELOX__BUILD_SHA256_CHECKSUM} + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) FetchContent_MakeAvailable() diff --git a/CMake/resolve_dependency_modules/xsimd.cmake b/CMake/resolve_dependency_modules/xsimd.cmake index 7719e991987c..264b34117469 100644 --- a/CMake/resolve_dependency_modules/xsimd.cmake +++ b/CMake/resolve_dependency_modules/xsimd.cmake @@ -26,6 +26,7 @@ message(STATUS "Building xsimd from source") FetchContent_Declare( xsimd URL ${VELOX_XSIMD_SOURCE_URL} - URL_HASH ${VELOX_XSIMD_BUILD_SHA256_CHECKSUM}) + URL_HASH ${VELOX_XSIMD_BUILD_SHA256_CHECKSUM} + CMAKE_ARGS ${CMAKE_COMPILER_SELECTION_ARGS}) FetchContent_MakeAvailable(xsimd) diff --git a/CMakeLists.txt b/CMakeLists.txt index bb7c49907980..f9fd297c5c21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -206,6 +206,10 @@ if(${VELOX_FORCE_COLORED_OUTPUT}) elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") add_compile_options(-fcolor-diagnostics) + if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND "${CMAKE_CXX_COMPILER_VERSION}" + VERSION_GREATER_EQUAL 15) + set(CMAKE_EXE_LINKER_FLAGS "-latomic") + endif() endif() endif() diff --git a/Makefile b/Makefile index b604d560adf6..3529e3012674 100644 --- a/Makefile +++ b/Makefile @@ -208,3 +208,14 @@ python-build: python-test: $(MAKE) python-build extras="[tests]" DEBUG=1 ${PYTHON_EXECUTABLE} -m unittest -v + +clang-debug: #: Build with debugging symbols using Clang + $(MAKE) debug EXTRA_CMAKE_FLAGS=" ${EXTRA_CMAKE_FLAGS} \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++" + + +clang-release: #: Build the release version using Clang + $(MAKE) release EXTRA_CMAKE_FLAGS=" ${EXTRA_CMAKE_FLAGS} \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++" diff --git a/scripts/setup-centos9.sh b/scripts/setup-centos9.sh index 487dadba8af9..20667f04c623 100755 --- a/scripts/setup-centos9.sh +++ b/scripts/setup-centos9.sh @@ -32,18 +32,28 @@ SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}") source $SCRIPTDIR/setup-helper-functions.sh CPU_TARGET="${CPU_TARGET:-avx}" NPROC=$(getconf _NPROCESSORS_ONLN) -export CFLAGS=$(get_cxx_flags $CPU_TARGET) # Used by LZO. -export CXXFLAGS=$CFLAGS # Used by boost. -export CPPFLAGS=$CFLAGS # Used by LZO. +export CXXFLAGS=$(get_cxx_flags $CPU_TARGET) # Used by boost. +export CFLAGS=${CXXFLAGS//"-std=c++17"/} # Used by LZO. CMAKE_BUILD_TYPE="${BUILD_TYPE:-Release}" BUILD_DUCKDB="${BUILD_DUCKDB:-true}" -export CC=/opt/rh/gcc-toolset-12/root/bin/gcc -export CXX=/opt/rh/gcc-toolset-12/root/bin/g++ +USE_CLANG="${USE_CLANG:-false}" function dnf_install { dnf install -y -q --setopt=install_weak_deps=False "$@" } +function install_and_setup_clang15 { + dnf_install clang15 gcc-toolset-13-libatomic-devel + CLANG_NUM_ALTERNATIVES=`alternatives --list | grep "clang " | wc -l` + if [[ ${CLANG_NUM_ALTERNATIVES} -eq 0 ]]; then + alternatives --install /usr/bin/clang clang /usr/bin/clang-15 80 --follower /usr/bin/clang++ clang++ /usr/bin/clang++-15 + fi + (test ! -h /usr/local/bin/clang && ln -s /usr/bin/ccache /usr/local/bin/clang) || true + (test ! -h /usr/local/bin/clang++ && ln -s /usr/bin/ccache /usr/local/bin/clang++) || true + (test ! -h /usr/lib64/ccache/clang && ln -s /usr/bin/ccache /usr/lib64/ccache/clang) || true + (test ! -h /usr/lib64/ccache/clang++ && ln -s /usr/bin/ccache /usr/lib64/ccache/clang++) || true +} + # Install packages required for build. function install_build_prerequisites { dnf update -y @@ -52,7 +62,12 @@ function install_build_prerequisites { dnf update -y dnf_install ninja-build cmake ccache gcc-toolset-12 git wget which dnf_install autoconf automake python3-devel pip libtool + pip install cmake==3.28.3 + + if [[ ${USE_CLANG} != "false" ]]; then + install_and_setup_clang15 + fi } # Install dependencies from the package managers. @@ -101,9 +116,14 @@ function install_lzo { function install_boost { wget_and_untar https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.gz boost ( - cd boost - ./bootstrap.sh --prefix=/usr/local - ./b2 "-j$(nproc)" -d0 install threading=multi --without-python + cd boost + if [[ ${USE_CLANG} != "false" ]]; then + ./bootstrap.sh --prefix=/usr/local --with-toolset=clang + ./b2 "-j$(nproc)" -d0 install threading=multi toolset=clang --without-python + else + ./bootstrap.sh --prefix=/usr/local + ./b2 "-j$(nproc)" -d0 install threading=multi --without-python + fi ) } @@ -239,9 +259,15 @@ function install_velox_deps { ( if [[ $# -ne 0 ]]; then - # Activate gcc12; enable errors on unset variables afterwards. - source /opt/rh/gcc-toolset-12/enable || exit 1 - set -u + if [[ ${USE_CLANG} != "false" ]]; then + export CC=/usr/bin/clang + export CXX=/usr/bin/clang++ + else + # Activate gcc12; enable errors on unset variables afterwards. + source /opt/rh/gcc-toolset-12/enable || exit 1 + set -u + fi + for cmd in "$@"; do run_and_time "${cmd}" done @@ -253,9 +279,14 @@ function install_velox_deps { else echo "Skipping installation of build dependencies since INSTALL_PREREQUISITES is not set" fi - # Activate gcc12; enable errors on unset variables afterwards. - source /opt/rh/gcc-toolset-12/enable || exit 1 - set -u + if [[ ${USE_CLANG} != "false" ]]; then + export CC=/usr/bin/clang + export CXX=/usr/bin/clang++ + else + # Activate gcc12; enable errors on unset variables afterwards. + source /opt/rh/gcc-toolset-12/enable || exit 1 + set -u + fi install_velox_deps echo "All dependencies for Velox installed!" dnf clean all diff --git a/scripts/setup-ubuntu.sh b/scripts/setup-ubuntu.sh index e765958038bb..910cf56cca63 100755 --- a/scripts/setup-ubuntu.sh +++ b/scripts/setup-ubuntu.sh @@ -25,7 +25,7 @@ # $ scripts/setup-ubuntu.sh install_googletest install_fmt # -# Minimal setup for Ubuntu 20.04. +# Minimal setup for Ubuntu 22.04. set -eufx -o pipefail SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}") source $SCRIPTDIR/setup-helper-functions.sh @@ -43,6 +43,28 @@ DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)} BUILD_DUCKDB="${BUILD_DUCKDB:-true}" export CMAKE_BUILD_TYPE=Release SUDO="${SUDO:-"sudo --preserve-env"}" +USE_CLANG="${USE_CLANG:-false}" + +function install_and_setup_clang15 { + VERSION=`cat /etc/os-release | grep VERSION_ID` + if [[ ! ${VERSION} =~ "22.04" && ! ${VERSION} =~ "24.04" ]]; then + echo "The clang configuration is for Ubuntu 22.04 and 22.04. Unset USE_CLANG and redo the setup." + exit + fi + CLANG_PACKAGE_LIST=clang-15 + if [[ ${VERSION} =~ "22.04" ]]; then + CLANG_PACKAGE_LIST=clang-15 gcc-12 g++-12 libc++-12-dev + fi + ${SUDO} apt install ${CLANG_PACKAGE_LIST} -y + CLANG_NUM_ALTERNATIVES=`update-alternatives --list clang | grep "clang " | wc -l` + if [[ ${CLANG_NUM_ALTERNATIVES} -eq 0 ]]; then + ${SUDO} update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 80 --slave /usr/bin/clang++ clang++ /usr/bin/clang++-15 + fi + (test ! -h /usr/local/bin/clang && ${SUDO} ln -s /usr/bin/ccache /usr/local/bin/clang) || true + (test ! -h /usr/local/bin/clang++ && ${SUDO} ln -s /usr/bin/ccache /usr/local/bin/clang++) || true + (test ! -h /usr/lib64/ccache/clang && ${SUDO} ln -s /usr/bin/ccache /usr/lib/ccache/clang) || true + (test ! -h /usr/lib64/ccache/clang++ && ${SUDO} ln -s /usr/bin/ccache /usr/lib/ccache/clang++) || true +} # Install packages required for build. function install_build_prerequisites { @@ -60,8 +82,12 @@ function install_build_prerequisites { git \ wget - # Install to /usr/local to make it available to all users. - ${SUDO} pip3 install cmake==3.28.3 + # Install to /usr/local to make it available to all users. + ${SUDO} pip3 install cmake==3.28.3 + + if [[ ${USE_CLANG} != "false" ]]; then + install_and_setup_clang15 + fi } # Install packages required for build. @@ -99,9 +125,17 @@ function install_fmt { } function install_boost { - github_checkout boostorg/boost "${BOOST_VERSION}" --recursive - ./bootstrap.sh --prefix=/usr/local - ${SUDO} ./b2 "-j$(nproc)" -d0 install threading=multi --without-python + wget_and_untar https://github.com/boostorg/boost/releases/download/${BOOST_VERSION}/${BOOST_VERSION}.tar.gz boost + ( + cd boost + if [[ ${USE_CLANG} != "false" ]]; then + ./bootstrap.sh --prefix=/usr/local --with-toolset=clang + ${SUDO} ./b2 "-j$(nproc)" -d0 install threading=multi toolset=clang --without-python + else + ./bootstrap.sh --prefix=/usr/local + ${SUDO} ./b2 "-j$(nproc)" -d0 install threading=multi --without-python + fi + ) } function install_folly { @@ -213,6 +247,10 @@ function install_apt_deps { (return 2> /dev/null) && return # If script was sourced, don't run commands. ( + if [[ ${USE_CLANG} != "false" ]]; then + export CC=/usr/bin/clang + export CXX=/usr/bin/clang++ + fi if [[ $# -ne 0 ]]; then for cmd in "$@"; do run_and_time "${cmd}" diff --git a/velox/common/base/Doubles.h b/velox/common/base/Doubles.h index 4e51257f9967..2f986b487c5b 100644 --- a/velox/common/base/Doubles.h +++ b/velox/common/base/Doubles.h @@ -38,4 +38,12 @@ namespace facebook::velox { static constexpr double kMaxDoubleBelowInt64Max = 9223372036854774784.0; /// 2 ^ 63 static constexpr double kMinDoubleAboveInt64Max = 9223372036854775808.0; + +/// For int128::max() +/// 2 ^ 127 - 2 ^ 74 +static constexpr double kMaxDoubleBelowInt128Max = + 170141183460469212842221372237303250944.0; +/// 2 ^ 127 +static constexpr double kMinDoubleAboveInt128Max = + 170141183460469231731687303715884105728.0; } // namespace facebook::velox diff --git a/velox/dwio/dwrf/test/ColumnWriterTest.cpp b/velox/dwio/dwrf/test/ColumnWriterTest.cpp index 4af07b83238e..4268765f516b 100644 --- a/velox/dwio/dwrf/test/ColumnWriterTest.cpp +++ b/velox/dwio/dwrf/test/ColumnWriterTest.cpp @@ -841,15 +841,14 @@ void mapToStruct( } } -template +template void testMapWriter( MemoryPool& pool, const std::vector& batches, bool useFlatMap, bool disableDictionaryEncoding, bool testEncoded, - bool printMaps = true, - bool useStruct = false) { + bool printMaps = true) { const auto rowType = CppToType>>::create(); const auto dataType = rowType->childAt(0); const auto rowTypeWithId = TypeWithId::create(rowType); @@ -866,7 +865,7 @@ void testMapWriter( std::vector structs; std::unordered_map> structReaderContext; if (useFlatMap) { - if (useStruct) { + if constexpr (useStruct) { structs = batches; pBatches = &structs; std::vector uniqueKeys; @@ -1158,26 +1157,25 @@ TEST_F(ColumnWriterTest, TestMapWriterNestedRow) { testMapWriterRowImpl>(); } -template +template void testMapWriter( MemoryPool& pool, const VectorPtr& batch, bool useFlatMap, - bool printMaps = true, - bool useStruct = false) { + bool printMaps = true) { std::vector batches{batch, batch}; - testMapWriter( - pool, batches, useFlatMap, true, false, printMaps, useStruct); + testMapWriter( + pool, batches, useFlatMap, true, false, printMaps); if (useFlatMap) { - testMapWriter( - pool, batches, useFlatMap, false, false, printMaps, useStruct); - testMapWriter( - pool, batches, useFlatMap, true, true, printMaps, useStruct); + testMapWriter( + pool, batches, useFlatMap, false, false, printMaps); + testMapWriter( + pool, batches, useFlatMap, true, true, printMaps); } } -template -void testMapWriterNumericKey(bool useFlatMap, bool useStruct = false) { +template +void testMapWriterNumericKey(bool useFlatMap) { using b = MapBuilder; auto pool = memory::memoryManager()->addLeafPool(); @@ -1191,7 +1189,14 @@ void testMapWriterNumericKey(bool useFlatMap, bool useStruct = false) { typename b::pair{ std::numeric_limits::min(), std::numeric_limits::min()}}}); - testMapWriter(*pool, batch, useFlatMap, true, useStruct); + testMapWriter(*pool, batch, useFlatMap, true); +} + +// Workaround to avoid issues with two template arguments when wrapped in gtest +// EXPECT macros. +template +void testMapWriterNumericKeyUseStruct(bool useFlatMap) { + testMapWriterNumericKey(useFlatMap); } TEST_F(ColumnWriterTest, TestMapWriterFloatKey) { @@ -1203,8 +1208,8 @@ TEST_F(ColumnWriterTest, TestMapWriterFloatKey) { EXPECT_THROW( { - testMapWriterNumericKey( - /* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKeyUseStruct( + /* useFlatMap */ true); }, exception::LoggedException); } @@ -1212,7 +1217,7 @@ TEST_F(ColumnWriterTest, TestMapWriterFloatKey) { TEST_F(ColumnWriterTest, TestMapWriterInt64Key) { testMapWriterNumericKey(/* useFlatMap */ false); testMapWriterNumericKey(/* useFlatMap */ true); - testMapWriterNumericKey(/* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKey(/* useFlatMap */ true); } TEST_F(ColumnWriterTest, TestMapWriterDuplicatedInt64Key) { @@ -1234,22 +1239,22 @@ TEST_F(ColumnWriterTest, TestMapWriterDuplicatedInt64Key) { TEST_F(ColumnWriterTest, TestMapWriterInt32Key) { testMapWriterNumericKey(/* useFlatMap */ false); testMapWriterNumericKey(/* useFlatMap */ true); - testMapWriterNumericKey( - /* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKey( + /* useFlatMap */ true); } TEST_F(ColumnWriterTest, TestMapWriterInt16Key) { testMapWriterNumericKey(/* useFlatMap */ false); testMapWriterNumericKey(/* useFlatMap */ true); - testMapWriterNumericKey( - /* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKey( + /* useFlatMap */ true); } TEST_F(ColumnWriterTest, TestMapWriterInt8Key) { testMapWriterNumericKey(/* useFlatMap */ false); testMapWriterNumericKey(/* useFlatMap */ true); - testMapWriterNumericKey( - /* useFlatMap */ true, /* useStruct */ true); + testMapWriterNumericKey( + /* useFlatMap */ true); } TEST_F(ColumnWriterTest, TestMapWriterStringKey) { @@ -1265,8 +1270,8 @@ TEST_F(ColumnWriterTest, TestMapWriterStringKey) { testMapWriter(*pool_, batch, /* useFlatMap */ false); testMapWriter(*pool_, batch, /* useFlatMap */ true); - testMapWriter( - *pool_, batch, /* useFlatMap */ true, true, /* useStruct */ true); + testMapWriter( + *pool_, batch, /* useFlatMap */ true, true); } TEST_F(ColumnWriterTest, TestMapWriterDuplicatedStringKey) { @@ -1362,8 +1367,8 @@ TEST_F(ColumnWriterTest, TestMapWriterBinaryKey) { testMapWriter(*pool_, batch, /* useFlatMap */ false); testMapWriter(*pool_, batch, /* useFlatMap */ true); - testMapWriter( - *pool_, batch, /* useFlatMap */ true, true, /* useStruct */ true); + testMapWriter( + *pool_, batch, /* useFlatMap */ true, true); } template @@ -4300,7 +4305,7 @@ TEST_F(ColumnWriterTest, ColumnIdInStream) { ASSERT_NE(streams.getStream(si, {}, false), nullptr); } -template +template struct DictColumnWriterTestCase { DictColumnWriterTestCase(size_t size, bool writeDirect, const TypePtr& type) : size_(size), writeDirect_(writeDirect), type_(type) {} @@ -4369,6 +4374,7 @@ struct DictColumnWriterTestCase { * Map) * @return */ + template VectorPtr createDictionaryBatch( size_t size, std::function valueAt, @@ -4378,10 +4384,10 @@ struct DictColumnWriterTestCase { VectorPtr dictionaryVector; VectorPtr flatVector; - if (complexRowType == nullptr) { - flatVector = makeFlatVector(size, valueAt, isNullAt); - } else { + if constexpr (isComplexRowType) { flatVector = makeComplexVectors(complexRowType, size, isNullAt); + } else { + flatVector = makeFlatVector(size, valueAt, isNullAt); } auto wrappedVector = BaseVector::wrapInDictionary( @@ -4400,14 +4406,12 @@ struct DictColumnWriterTestCase { WriterContext context{config, memory::memoryManager()->addRootPool()}; context.initBuffer(); - // complexVectorType will be nullptr if the vector is not complex. - bool isComplexType = std::dynamic_pointer_cast(type_) || - std::dynamic_pointer_cast(type_) || - std::dynamic_pointer_cast(type_); - - auto complexVectorType = isComplexType ? rowType : nullptr; - auto batch = - createDictionaryBatch(size_, valueAt, isNullAt, complexVectorType); + VectorPtr batch; + if constexpr (isComplexTypeT) { + batch = createDictionaryBatch(size_, valueAt, isNullAt, rowType); + } else { + batch = createDictionaryBatch(size_, valueAt, isNullAt); + } const auto writer = BaseColumnWriter::create(context, *typeWithId); @@ -4457,7 +4461,7 @@ std::function randomNulls(int32_t n) { [n](vector_size_t /*index*/) { return folly::Random::rand32() % n == 0; }; } -template +template void testDictionary( const TypePtr& type, std::function isNullAt = nullptr, @@ -4465,18 +4469,17 @@ void testDictionary( constexpr int32_t vectorSize = 200; // Tests for null/non null data with direct or dict write - DictColumnWriterTestCase(vectorSize, true, type) + DictColumnWriterTestCase(vectorSize, true, type) .runTest(valueAt, isNullAt); - DictColumnWriterTestCase(vectorSize, false, type) + DictColumnWriterTestCase(vectorSize, false, type) .runTest(valueAt, isNullAt); // Tests for non null data with direct or dict write - DictColumnWriterTestCase(vectorSize, true, type).runTest(valueAt, [](int) { - return false; - }); + DictColumnWriterTestCase(vectorSize, true, type) + .runTest(valueAt, [](int) { return false; }); - DictColumnWriterTestCase(vectorSize, false, type) + DictColumnWriterTestCase(vectorSize, false, type) .runTest(valueAt, [](int) { return false; }); } @@ -4520,27 +4523,28 @@ TEST_F(ColumnWriterTest, rowDictionary) { // randomly // Row tests - testDictionary>(ROW({INTEGER()}), randomNulls(5)); + testDictionary, true>(ROW({INTEGER()}), randomNulls(5)); - testDictionary>( + testDictionary, true>( ROW({VARCHAR(), INTEGER()}), randomNulls(11)); - testDictionary>>( + testDictionary>, true>( ROW({ROW({VARCHAR(), INTEGER()})}), randomNulls(11)); - testDictionary>( + testDictionary, true>( ROW({INTEGER(), DOUBLE(), VARCHAR()}), randomNulls(5)); - testDictionary>( + testDictionary, true>( ROW({INTEGER(), VARCHAR(), DOUBLE(), VARCHAR()}), randomNulls(5)); - testDictionary, StringView>>( + testDictionary, StringView>, true>( ROW({ARRAY(VARCHAR()), VARCHAR()}), randomNulls(11)); testDictionary< Row, Array>>, - Row>>( + Row>, + true>( ROW( {MAP(INTEGER(), DOUBLE()), ARRAY(MAP(INTEGER(), ROW({INTEGER(), DOUBLE()}))), @@ -4550,17 +4554,19 @@ TEST_F(ColumnWriterTest, rowDictionary) { TEST_F(ColumnWriterTest, arrayDictionary) { // Array tests - testDictionary>(ARRAY(REAL()), randomNulls(7)); + testDictionary, true>(ARRAY(REAL()), randomNulls(7)); testDictionary< - Row, Row>>>>( + Row, Row>>>, + true>( ROW( {ARRAY(INTEGER()), ROW({VARCHAR(), ARRAY(MAP(VARCHAR(), VARCHAR()))})}), randomNulls(11)); testDictionary< - Array>>>>>>( + Array>>>>>, + true>( ARRAY(MAP( INTEGER(), ARRAY(MAP(TINYINT(), ROW({VARCHAR(), ARRAY(DOUBLE())}))))), randomNulls(7)); @@ -4568,20 +4574,21 @@ TEST_F(ColumnWriterTest, arrayDictionary) { TEST_F(ColumnWriterTest, mapDictionary) { // Map tests - testDictionary>( + testDictionary, true>( MAP(INTEGER(), DOUBLE()), randomNulls(7)); - testDictionary>( + testDictionary, true>( MAP(VARCHAR(), VARCHAR()), randomNulls(13)); testDictionary< Map>>>>>( + Map>>>>, + true>( MAP(VARCHAR(), MAP(INTEGER(), ARRAY(ROW({INTEGER(), INTEGER(), ARRAY(DOUBLE())})))), randomNulls(9)); - testDictionary>>>( + testDictionary>>, true>( MAP(INTEGER(), MAP(VARCHAR(), MAP(VARCHAR(), TINYINT()))), randomNulls(3)); } diff --git a/velox/type/DecimalUtil.h b/velox/type/DecimalUtil.h index 357030b2ea45..68d6d46e4236 100644 --- a/velox/type/DecimalUtil.h +++ b/velox/type/DecimalUtil.h @@ -19,6 +19,7 @@ #include #include "velox/common/base/CheckedArithmetic.h" #include "velox/common/base/CountBits.h" +#include "velox/common/base/Doubles.h" #include "velox/common/base/Exceptions.h" #include "velox/common/base/Nulls.h" #include "velox/common/base/Status.h" @@ -217,8 +218,15 @@ class DecimalUtil { if (!std::isfinite(value)) { return Status::UserError("The input value should be finite."); } - if (value <= std::numeric_limits::min() || - value >= std::numeric_limits::max()) { + + TInput maxValue; + if constexpr (std::is_same_v) { + maxValue = kMaxDoubleBelowInt64Max; + } else { + maxValue = kMaxDoubleBelowInt128Max; + } + + if (value <= std::numeric_limits::min() || value >= maxValue) { return Status::UserError("Result overflows."); }