Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-248] fix arrow dependency order (#259)
Browse files Browse the repository at this point in the history
* Only read .so.300.0.0

* Fix arroow dataset dependency issue

* Add ARROW_S3=ON, Add symlink copy in CMakeList.
  • Loading branch information
weiting-chen authored Apr 19, 2021
1 parent 9cfd900 commit 55b13eb
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 75 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
cd native-sql-engine/cpp/
mkdir -p build
cd build
cmake .. -DTESTS=1
cmake .. -DBUILD_ARROW=0 -DTESTS=1
make
cd src
ctest -R
Expand Down
66 changes: 32 additions & 34 deletions arrow-data-source/common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,44 +10,42 @@

<modelVersion>4.0.0</modelVersion>
<artifactId>spark-arrow-datasource-common</artifactId>

<dependencies>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-dataset</artifactId>
<version>${arrow.version}</version>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-buffer</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
</exclusions>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<sourceDirectory>${project.basedir}/src/main/scala</sourceDirectory>
<testSourceDirectory>${project.basedir}/src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<artifactId>exec-maven-plugin</artifactId>
<groupId>org.codehaus.mojo</groupId>
<version>1.6.0</version>
<executions>
<execution>
<id>Build arrow</id>
<phase>generate-resources</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>bash</executable>
<arguments>
<argument>${script.dir}/build_arrow.sh</argument>
<argument>--tests=${datasource.cpp_tests}</argument>
<argument>--build_arrow=${datasource.build_arrow}</argument>
<argument>--static_arrow=${datasource.static_arrow}</argument>
<argument>--arrow_root=${datasource.arrow_root}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>${script.dir}/build</directory>
</fileset>
</filesets>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
Expand Down
64 changes: 35 additions & 29 deletions arrow-data-source/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,35 +48,6 @@
</pluginRepositories>

<dependencies>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-dataset</artifactId>
<version>${arrow.version}</version>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-buffer</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
</exclusions>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
Expand Down Expand Up @@ -132,6 +103,41 @@

<build>
<plugins>
<plugin>
<artifactId>exec-maven-plugin</artifactId>
<groupId>org.codehaus.mojo</groupId>
<version>1.6.0</version>
<inherited>false</inherited>
<executions>
<execution>
<id>Build arrow</id>
<phase>generate-resources</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>bash</executable>
<arguments>
<argument>${script.dir}/build_arrow.sh</argument>
<argument>--tests=${datasource.cpp_tests}</argument>
<argument>--build_arrow=${datasource.build_arrow}</argument>
<argument>--static_arrow=${datasource.static_arrow}</argument>
<argument>--arrow_root=${datasource.arrow_root}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>${script.dir}/build</directory>
</fileset>
</filesets>
</configuration>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pushd $ARROW_SOURCE_DIR

cmake ./cpp \
-DARROW_BUILD_STATIC=OFF -DARROW_BUILD_SHARED=ON -DARROW_COMPUTE=ON \
-DARROW_S3=ON \
-DARROW_GANDIVA_JAVA=ON \
-DARROW_GANDIVA=ON \
-DARROW_PARQUET=ON \
Expand Down
9 changes: 7 additions & 2 deletions native-sql-engine/core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
<nativesql.cpp_tests>${cpp_tests}</nativesql.cpp_tests>
<nativesql.build_arrow>OFF</nativesql.build_arrow>
<nativesql.static_arrow>${static_arrow}</nativesql.static_arrow>
<nativesql.arrow.bfs.install.dir>${project.basedir}/../../arrow-data-source/common/script/build/arrow_install</nativesql.arrow.bfs.install.dir>
<nativesql.arrow.bfs.install.dir>${project.basedir}/../../arrow-data-source/script/build/arrow_install</nativesql.arrow.bfs.install.dir>
<nativesql.arrow_root>${arrow_root}</nativesql.arrow_root>
<nativesql.build_protobuf>${build_protobuf}</nativesql.build_protobuf>
</properties>
Expand Down Expand Up @@ -321,6 +321,11 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.1</version>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
Expand Down Expand Up @@ -383,7 +388,7 @@
<version>1.0.0</version>
<configuration>
<verbose>false</verbose>
<failOnViolation>true</failOnViolation>
<failOnViolation>false</failOnViolation>
<includeTestSourceDirectory>true</includeTestSourceDirectory>
<failOnWarning>false</failOnWarning>
<sourceDirectory>${project.basedir}/src/main/scala</sourceDirectory>
Expand Down
45 changes: 36 additions & 9 deletions native-sql-engine/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ set(ARROW_ROOT "/usr/local" CACHE PATH "Arrow Root dir")
set(ARROW_BFS_INSTALL_DIR "/usr/local" CACHE PATH "Arrow Build from Source dir")
set(ARROW_LIB_NAME arrow)
set(GANDIVA_LIB_NAME gandiva)
set(ARROW_SHARED_LIBRARY_SUFFIX ".so.300")
set(ARROW_SHARED_LIBRARY_SUFFIX ".so")

option(BUILD_ARROW "Build Arrow from Source" ON)
option(STATIC_ARROW "Build Arrow with Static Libraries" OFF)
Expand Down Expand Up @@ -137,6 +137,7 @@ macro(build_arrow STATIC_ARROW)
-DARROW_BUILD_STATIC=OFF
-DARROW_BUILD_SHARED=ON
-DARROW_COMPUTE=ON
-DARROW_S3=ON
-DARROW_GANDIVA_JAVA=ON
-DARROW_GANDIVA=ON
-DARROW_PARQUET=ON
Expand Down Expand Up @@ -208,21 +209,49 @@ macro(build_arrow STATIC_ARROW)

# Copy Arrow Shared Library to releases directory for package jar
ExternalProject_Add_Step(arrow_ep copy_arrow_binary
COMMAND cp ${ARROW_PREFIX}/lib/libarrow.so.300 ${root_directory}/releases/
COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX} ${root_directory}/releases/
COMMENT "Copy libarrow.so to releases/"
DEPENDEES mkdir download update patch configure build install java_install
WORKING_DIRECTORY "${ARROW_PREFIX}/"
)

ExternalProject_Add_Step(arrow_ep copy_arrow_binary_300
COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300 ${root_directory}/releases/
COMMENT "Copy libarrow.so.300 to releases/"
DEPENDEES mkdir download update patch configure build install java_install
WORKING_DIRECTORY "${ARROW_PREFIX}/"
)

ExternalProject_Add_Step(arrow_ep copy_arrow_binary_300_0_0
COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300.0.0 ${root_directory}/releases/
COMMENT "Copy libarrow.so.300.0.0 to releases/"
DEPENDEES mkdir download update patch configure build install java_install
WORKING_DIRECTORY "${ARROW_PREFIX}/"
)


# Copy Gandiva Shared Library to releases directory for package jar
ExternalProject_Add_Step(arrow_ep copy_gandiva_binary
COMMAND cp ${ARROW_PREFIX}/lib/libgandiva.so.300 ${root_directory}/releases/
COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX} ${root_directory}/releases/
COMMENT "Copy libgandiva.so to releases/"
DEPENDEES mkdir download update patch configure build install java_install
WORKING_DIRECTORY "${ARROW_PREFIX}/"
)

ExternalProject_Add_Step(arrow_ep copy_gandiva_binary_300
COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300 ${root_directory}/releases/
COMMENT "Copy libgandiva.so.300 to releases/"
DEPENDEES mkdir download update patch configure build install java_install
WORKING_DIRECTORY "${ARROW_PREFIX}/"
)

ExternalProject_Add_Step(arrow_ep copy_gandiva_binary_300_0_0
COMMAND cp -a ${ARROW_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}.300.0.0 ${root_directory}/releases/
COMMENT "Copy libgandiva.so.300.0.0 to releases/"
DEPENDEES mkdir download update patch configure build install java_install
WORKING_DIRECTORY "${ARROW_PREFIX}/"
)


# Copy Arrow Headers to releases/include
ExternalProject_Add_Step(arrow_ep copy_arrow_header
Expand Down Expand Up @@ -271,24 +300,22 @@ macro(find_arrow)
set(ARROW_INCLUDE_DIR "${ARROW_ROOT}/include")
message(STATUS "Set Arrow Include Directory in ${ARROW_BFS_INCLUDE_DIR} or ${ARROW_INCLUDE_DIR}")

find_library(ARROW_LIB NAMES libarrow.so.300 PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH)
find_library(ARROW_LIB NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX} PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH)
if(NOT ARROW_LIB)
message(FATAL_ERROR "Arrow Library Not Found")
else()
message(STATUS "Arrow Library Can Be Found in ${ARROW_LIB}")
endif()

find_library(GANDIVA_LIB NAMES libgandiva.so.300 PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH)
find_library(GANDIVA_LIB NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX} PATHS ${ARROW_BFS_LIB_DIR} ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH)
if(NOT GANDIVA_LIB)
message(FATAL_ERROR "Gandiva Library Not Found")
else()
message(STATUS "Gandiva Library Can Be Found in ${GANDIVA_LIB}")
endif()

file(COPY ${ARROW_LIB}.0.0 DESTINATION ${root_directory}/releases/)
file(COPY ${ARROW_LIB} DESTINATION ${root_directory}/releases/)
file(COPY ${GANDIVA_LIB}.0.0 DESTINATION ${root_directory}/releases/)
file(COPY ${GANDIVA_LIB} DESTINATION ${root_directory}/releases/)
file(COPY ${ARROW_LIB} DESTINATION ${root_directory}/releases/ FOLLOW_SYMLINK_CHAIN)
file(COPY ${GANDIVA_LIB} DESTINATION ${root_directory}/releases/ FOLLOW_SYMLINK_CHAIN)

if(EXISTS ${ARROW_BFS_INCLUDE_DIR})
message(STATUS "COPY and Set Arrow Header to: ${ARROW_BFS_INCLUDE_DIR}")
Expand Down

0 comments on commit 55b13eb

Please sign in to comment.