From 55b13ebe9d3084da72e0d45c13dacc2a5c588a10 Mon Sep 17 00:00:00 2001 From: Wei-Ting Chen Date: Mon, 19 Apr 2021 11:09:39 +0800 Subject: [PATCH] [NSE-248] fix arrow dependency order (#259) * Only read .so.300.0.0 * Fix arroow dataset dependency issue * Add ARROW_S3=ON, Add symlink copy in CMakeList. --- .github/workflows/unittests.yml | 2 +- arrow-data-source/common/pom.xml | 66 +++++++++---------- arrow-data-source/pom.xml | 64 ++++++++++-------- .../{common => }/script/build_arrow.sh | 1 + native-sql-engine/core/pom.xml | 9 ++- native-sql-engine/cpp/src/CMakeLists.txt | 45 ++++++++++--- 6 files changed, 112 insertions(+), 75 deletions(-) rename arrow-data-source/{common => }/script/build_arrow.sh (99%) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index ff26c1169..efa405d60 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -54,7 +54,7 @@ jobs: cd native-sql-engine/cpp/ mkdir -p build cd build - cmake .. -DTESTS=1 + cmake .. -DBUILD_ARROW=0 -DTESTS=1 make cd src ctest -R diff --git a/arrow-data-source/common/pom.xml b/arrow-data-source/common/pom.xml index c4c4ed5b5..b3ef6317e 100644 --- a/arrow-data-source/common/pom.xml +++ b/arrow-data-source/common/pom.xml @@ -10,44 +10,42 @@ 4.0.0 spark-arrow-datasource-common + + + + org.apache.arrow + arrow-memory-netty + ${arrow.version} + + + org.apache.arrow + arrow-dataset + ${arrow.version} + + + io.netty + netty-common + + + io.netty + netty-buffer + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-annotations + + + compile + + ${project.basedir}/src/main/scala ${project.basedir}/src/test/scala - - exec-maven-plugin - org.codehaus.mojo - 1.6.0 - - - Build arrow - generate-resources - - exec - - - bash - - ${script.dir}/build_arrow.sh - --tests=${datasource.cpp_tests} - --build_arrow=${datasource.build_arrow} - --static_arrow=${datasource.static_arrow} - --arrow_root=${datasource.arrow_root} - - - - - - - maven-clean-plugin - - - - ${script.dir}/build - - - - org.apache.maven.plugins maven-source-plugin diff --git a/arrow-data-source/pom.xml b/arrow-data-source/pom.xml index 68ecb248d..f2bf83390 100644 --- a/arrow-data-source/pom.xml +++ b/arrow-data-source/pom.xml @@ -48,35 +48,6 @@ - - org.apache.arrow - arrow-memory-netty - ${arrow.version} - - - org.apache.arrow - arrow-dataset - ${arrow.version} - - - io.netty - netty-common - - - io.netty - netty-buffer - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-annotations - - - compile - org.scala-lang scala-library @@ -132,6 +103,41 @@ + + exec-maven-plugin + org.codehaus.mojo + 1.6.0 + false + + + Build arrow + generate-resources + + exec + + + bash + + ${script.dir}/build_arrow.sh + --tests=${datasource.cpp_tests} + --build_arrow=${datasource.build_arrow} + --static_arrow=${datasource.static_arrow} + --arrow_root=${datasource.arrow_root} + + + + + + + maven-clean-plugin + + + + ${script.dir}/build + + + + org.scalatest scalatest-maven-plugin diff --git a/arrow-data-source/common/script/build_arrow.sh b/arrow-data-source/script/build_arrow.sh similarity index 99% rename from arrow-data-source/common/script/build_arrow.sh rename to arrow-data-source/script/build_arrow.sh index fd13933cb..3ede7686b 100755 --- a/arrow-data-source/common/script/build_arrow.sh +++ b/arrow-data-source/script/build_arrow.sh @@ -67,6 +67,7 @@ pushd $ARROW_SOURCE_DIR cmake ./cpp \ -DARROW_BUILD_STATIC=OFF -DARROW_BUILD_SHARED=ON -DARROW_COMPUTE=ON \ + -DARROW_S3=ON \ -DARROW_GANDIVA_JAVA=ON \ -DARROW_GANDIVA=ON \ -DARROW_PARQUET=ON \ diff --git a/native-sql-engine/core/pom.xml b/native-sql-engine/core/pom.xml index c346edde6..5064374d3 100644 --- a/native-sql-engine/core/pom.xml +++ b/native-sql-engine/core/pom.xml @@ -44,7 +44,7 @@ ${cpp_tests} OFF ${static_arrow} - ${project.basedir}/../../arrow-data-source/common/script/build/arrow_install + ${project.basedir}/../../arrow-data-source/script/build/arrow_install ${arrow_root} ${build_protobuf} @@ -321,6 +321,11 @@ + + org.apache.maven.plugins + maven-resources-plugin + 3.0.1 + net.alchim31.maven scala-maven-plugin @@ -383,7 +388,7 @@ 1.0.0 false - true + false true false ${project.basedir}/src/main/scala diff --git a/native-sql-engine/cpp/src/CMakeLists.txt b/native-sql-engine/cpp/src/CMakeLists.txt index 2a36a9ba2..93f799458 100644 --- a/native-sql-engine/cpp/src/CMakeLists.txt +++ b/native-sql-engine/cpp/src/CMakeLists.txt @@ -17,7 +17,7 @@ set(ARROW_ROOT "/usr/local" CACHE PATH "Arrow Root dir") set(ARROW_BFS_INSTALL_DIR "/usr/local" CACHE PATH "Arrow Build from Source dir") set(ARROW_LIB_NAME arrow) set(GANDIVA_LIB_NAME gandiva) -set(ARROW_SHARED_LIBRARY_SUFFIX ".so.300") +set(ARROW_SHARED_LIBRARY_SUFFIX ".so") option(BUILD_ARROW "Build Arrow from Source" ON) option(STATIC_ARROW "Build Arrow with Static Libraries" OFF) @@ -137,6 +137,7 @@ macro(build_arrow STATIC_ARROW) -DARROW_BUILD_STATIC=OFF -DARROW_BUILD_SHARED=ON -DARROW_COMPUTE=ON + -DARROW_S3=ON -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON @@ -208,21 +209,49 @@ macro(build_arrow STATIC_ARROW) # Copy Arrow Shared Library to releases directory for package jar ExternalProject_Add_Step(arrow_ep copy_arrow_binary - COMMAND cp ${ARROW_PREFIX}/lib/libarrow.so.300 ${root_directory}/releases/ + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX} ${root_directory}/releases/ + COMMENT "Copy libarrow.so to releases/" + DEPENDEES mkdir download update patch configure build install java_install + WORKING_DIRECTORY "${ARROW_PREFIX}/" + ) + + ExternalProject_Add_Step(arrow_ep copy_arrow_binary_300 + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300 ${root_directory}/releases/ COMMENT "Copy libarrow.so.300 to releases/" DEPENDEES mkdir download update patch configure build install java_install WORKING_DIRECTORY "${ARROW_PREFIX}/" ) + ExternalProject_Add_Step(arrow_ep copy_arrow_binary_300_0_0 + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300.0.0 ${root_directory}/releases/ + COMMENT "Copy libarrow.so.300.0.0 to releases/" + DEPENDEES mkdir download update patch configure build install java_install + WORKING_DIRECTORY "${ARROW_PREFIX}/" + ) + # Copy Gandiva Shared Library to releases directory for package jar ExternalProject_Add_Step(arrow_ep copy_gandiva_binary - COMMAND cp ${ARROW_PREFIX}/lib/libgandiva.so.300 ${root_directory}/releases/ + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX} ${root_directory}/releases/ + COMMENT "Copy libgandiva.so to releases/" + DEPENDEES mkdir download update patch configure build install java_install + WORKING_DIRECTORY "${ARROW_PREFIX}/" + ) + + ExternalProject_Add_Step(arrow_ep copy_gandiva_binary_300 + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300 ${root_directory}/releases/ COMMENT "Copy libgandiva.so.300 to releases/" DEPENDEES mkdir download update patch configure build install java_install WORKING_DIRECTORY "${ARROW_PREFIX}/" ) + ExternalProject_Add_Step(arrow_ep copy_gandiva_binary_300_0_0 + COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300.0.0 ${root_directory}/releases/ + COMMENT "Copy libgandiva.so.300.0.0 to releases/" + DEPENDEES mkdir download update patch configure build install java_install + WORKING_DIRECTORY "${ARROW_PREFIX}/" + ) + # Copy Arrow Headers to releases/include ExternalProject_Add_Step(arrow_ep copy_arrow_header @@ -271,24 +300,22 @@ macro(find_arrow) set(ARROW_INCLUDE_DIR "${ARROW_ROOT}/include") message(STATUS "Set Arrow Include Directory in ${ARROW_BFS_INCLUDE_DIR} or ${ARROW_INCLUDE_DIR}") - find_library(ARROW_LIB NAMES libarrow.so.300 PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH) + find_library(ARROW_LIB NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX} PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH) if(NOT ARROW_LIB) message(FATAL_ERROR "Arrow Library Not Found") else() message(STATUS "Arrow Library Can Be Found in ${ARROW_LIB}") endif() - find_library(GANDIVA_LIB NAMES libgandiva.so.300 PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH) + find_library(GANDIVA_LIB NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX} PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH) if(NOT GANDIVA_LIB) message(FATAL_ERROR "Gandiva Library Not Found") else() message(STATUS "Gandiva Library Can Be Found in ${GANDIVA_LIB}") endif() - file(COPY ${ARROW_LIB}.0.0 DESTINATION ${root_directory}/releases/) - file(COPY ${ARROW_LIB} DESTINATION ${root_directory}/releases/) - file(COPY ${GANDIVA_LIB}.0.0 DESTINATION ${root_directory}/releases/) - file(COPY ${GANDIVA_LIB} DESTINATION ${root_directory}/releases/) + file(COPY ${ARROW_LIB} DESTINATION ${root_directory}/releases/ FOLLOW_SYMLINK_CHAIN) + file(COPY ${GANDIVA_LIB} DESTINATION ${root_directory}/releases/ FOLLOW_SYMLINK_CHAIN) if(EXISTS ${ARROW_BFS_INCLUDE_DIR}) message(STATUS "COPY and Set Arrow Header to: ${ARROW_BFS_INCLUDE_DIR}")