Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove arrow dependency #16640

Merged
merged 52 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
a43e7f2
Remove libarrow dependency from libcudf and migrate to tests only
vyasr Aug 20, 2024
d1d23dd
Remove libarrow dependency from all Cython builds
vyasr Aug 20, 2024
424993f
Lots of cleanup and simplification of get_arrow.cmake
vyasr Aug 20, 2024
a1bf94f
Move module to tests
vyasr Aug 20, 2024
36a86b5
Remove now unnecessary args
vyasr Aug 20, 2024
aff2a66
Remove one unnecessary branch
vyasr Aug 20, 2024
37d2793
Remove some unnecessary specializations for some targets
vyasr Aug 20, 2024
07547b0
Switch to static arrow
vyasr Aug 20, 2024
14391ea
Clean out some unnecessary bits
vyasr Aug 20, 2024
2d544a1
Stop exporting since now it's a private dependency
vyasr Aug 20, 2024
d865616
Some variable simplification
vyasr Aug 20, 2024
3fe333d
Set Arrow_FOUND at the end
vyasr Aug 20, 2024
5040b5d
Remove now unused code string
vyasr Aug 20, 2024
e84e26b
Combine some conditions
vyasr Aug 20, 2024
9222d15
Reenable linting rules
vyasr Aug 20, 2024
c129ec7
Remove libarrow from cpp build reqs
vyasr Aug 20, 2024
282da76
Remove pyarrow from Python build deps
vyasr Aug 20, 2024
614992c
Drop numpy as a build dependency
vyasr Aug 20, 2024
987fdf0
Add get_arrow.cmake to JNI build
vyasr Aug 21, 2024
c8dce44
Stop installing arrow into wheels
vyasr Aug 22, 2024
6eb7e99
Revert changes to get_arrow.cmake
vyasr Aug 22, 2024
3ec9ee5
Disable libarrow from pyarrow
vyasr Aug 22, 2024
2e1c11e
Remove I/O and Python options
vyasr Aug 22, 2024
c6c4ba6
Remove no longer support armv8 option
vyasr Aug 22, 2024
c8b8e42
Remove export logic since we no longer install
vyasr Aug 22, 2024
4664212
Port changes over to Java copy
vyasr Aug 22, 2024
b5721cf
Two more tests need Arrow
vyasr Aug 22, 2024
ec9105b
Add Arrow dep to example and default Arrow linkage to static everywhere
vyasr Aug 22, 2024
e90fdf8
Add build type specifier
vyasr Aug 22, 2024
8af5b33
Temporarily disable interop example to validate everything else
vyasr Aug 22, 2024
b9cf424
Set the build type correctly
vyasr Aug 22, 2024
a6e8528
Go back to a single get_arrow.cmake
vyasr Aug 22, 2024
923d1f2
Update comment for interop example
vyasr Aug 22, 2024
351c358
Stop passing nonexistent args
vyasr Aug 22, 2024
bb5b267
Default to excluding Arrow from installation
vyasr Aug 22, 2024
9c3dc42
Merge remote-tracking branch 'upstream/branch-24.10' into feat/remove…
vyasr Aug 23, 2024
3730e99
Merge remote-tracking branch 'upstream/branch-24.10' into feat/remove…
vyasr Aug 26, 2024
d1d5518
Fix numpy pinning
vyasr Aug 26, 2024
433ba08
Add back arrow parquet support
vyasr Aug 26, 2024
2881ea4
Merge remote-tracking branch 'upstream/branch-24.10' into feat/remove…
vyasr Aug 26, 2024
602c4d6
Fix order
vyasr Aug 26, 2024
04d6e4f
Add missing set
vyasr Aug 26, 2024
556c2b5
Remove extra parquet options altogether
vyasr Aug 26, 2024
9bc6450
Revert "Remove extra parquet options altogether"
vyasr Aug 26, 2024
afa8cb7
Remove one more set of unnecessary libarrow deps
vyasr Aug 26, 2024
213f0db
Make sure install rules are in place for the JNI
vyasr Aug 26, 2024
d3e14e0
Revert unconditional parquet inclusion
vyasr Aug 26, 2024
15ecf66
Make sure boost is avialable for the java test build
vyasr Aug 26, 2024
4e653e9
Put back install rules
vyasr Aug 26, 2024
894ac93
Generate the install rules conditionally
vyasr Aug 27, 2024
aa2952f
Include rapids-export
vyasr Aug 27, 2024
29833e6
Merge remote-tracking branch 'upstream/branch-24.10' into feat/remove…
vyasr Aug 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"

python -m auditwheel repair \
--exclude libcudf.so \
--exclude libarrow.so.1601 \
--exclude libnvcomp.so \
--exclude libnvcomp_bitcomp.so \
--exclude libnvcomp_gdeflate.so \
Expand Down
2 changes: 1 addition & 1 deletion ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ package_dir="python/libcudf"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair --exclude libarrow.so.1601 -w ${package_dir}/final_dist ${package_dir}/dist/*
python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*

RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
1 change: 0 additions & 1 deletion ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"

python -m auditwheel repair \
--exclude libcudf.so \
--exclude libarrow.so.1601 \
--exclude libnvcomp.so \
--exclude libnvcomp_bitcomp.so \
--exclude libnvcomp_gdeflate.so \
Expand Down
6 changes: 0 additions & 6 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,11 @@ dependencies:
- hypothesis
- identify>=2.5.20
- ipython
- libarrow-acero==16.1.0.*
- libarrow-dataset==16.1.0.*
- libarrow==16.1.0.*
- libcufile-dev=1.4.0.31
- libcufile=1.4.0.31
- libcurand-dev=10.3.0.86
- libcurand=10.3.0.86
- libkvikio==24.10.*,>=0.0.0a0
- libparquet==16.1.0.*
- librdkafka>=1.9.0,<1.10.0a0
- librmm==24.10.*,>=0.0.0a0
- make
Expand All @@ -56,7 +52,6 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy
- numpy>=1.23,<2.0a0
- numpydoc
- nvcc_linux-64=11.8
Expand All @@ -68,7 +63,6 @@ dependencies:
- pandoc
- pre-commit
- ptxcompiler
- pyarrow==16.1.0.*
- pydata-sphinx-theme!=0.14.2
- pytest-benchmark
- pytest-cases>=3.8.2
Expand Down
6 changes: 0 additions & 6 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,9 @@ dependencies:
- hypothesis
- identify>=2.5.20
- ipython
- libarrow-acero==16.1.0.*
- libarrow-dataset==16.1.0.*
- libarrow==16.1.0.*
- libcufile-dev
- libcurand-dev
- libkvikio==24.10.*,>=0.0.0a0
- libparquet==16.1.0.*
- librdkafka>=1.9.0,<1.10.0a0
- librmm==24.10.*,>=0.0.0a0
- make
Expand All @@ -55,7 +51,6 @@ dependencies:
- ninja
- notebook
- numba>=0.57
- numpy
- numpy>=1.23,<2.0a0
- numpydoc
- nvcomp==3.0.6
Expand All @@ -65,7 +60,6 @@ dependencies:
- pandas>=2.0,<2.2.3dev0
- pandoc
- pre-commit
- pyarrow==16.1.0.*
- pydata-sphinx-theme!=0.14.2
- pynvjitlink>=0.0.0a0
- pytest-benchmark
Expand Down
6 changes: 1 addition & 5 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ requirements:
- rapids-build-backend >=0.3.0,<0.4.0.dev0
- scikit-build-core >=0.10.0
- dlpack >=0.8,<1.0
# TODO: Change to `2.0` for NumPy 2
- numpy 1.23
- pyarrow ==16.1.0.*
- libcudf ={{ version }}
- pylibcudf ={{ version }}
- rmm ={{ minor_version }}
Expand All @@ -84,9 +81,8 @@ requirements:
- pandas >=2.0,<2.2.3dev0
- cupy >=12.0.0
- numba >=0.57
# TODO: Update `numpy` in `host` when dropping `<2.0a0`
- numpy >=1.23,<2.0a0
vyasr marked this conversation as resolved.
Show resolved Hide resolved
- {{ pin_compatible('pyarrow', max_pin='x.x') }}
- pyarrow ==16.1.0.*
- libcudf ={{ version }}
- pylibcudf ={{ version }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
Expand Down
3 changes: 0 additions & 3 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ c_stdlib_version:
cmake_version:
- ">=3.26.4,!=3.30.0"

libarrow_version:
- "==16.1.0"

dlpack_version:
- ">=0.8,<1.0"

Expand Down
2 changes: 0 additions & 2 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ requirements:
{% endif %}
- cuda-version ={{ cuda_version }}
- nvcomp {{ nvcomp_version }}
- libarrow {{ libarrow_version }}
- dlpack {{ dlpack_version }}
- librdkafka {{ librdkafka_version }}
- fmt {{ fmt_version }}
Expand Down Expand Up @@ -92,7 +91,6 @@ outputs:
- cmake {{ cmake_version }}
host:
- cuda-version ={{ cuda_version }}
- libarrow {{ libarrow_version }}
run:
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
{% if cuda_major == "11" %}
Expand Down
6 changes: 1 addition & 5 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ requirements:
- rapids-build-backend >=0.3.0,<0.4.0.dev0
- scikit-build-core >=0.10.0
- dlpack >=0.8,<1.0
# TODO: Change to `2.0` for NumPy 2
- numpy 1.23
- pyarrow ==16.1.0.*
- libcudf ={{ version }}
- rmm ={{ minor_version }}
{% if cuda_major == "11" %}
Expand All @@ -81,9 +78,8 @@ requirements:
- python
- typing_extensions >=4.0.0
- pandas >=2.0,<2.2.3dev0
# TODO: Update `numpy` in `host` when dropping `<2.0a0`
- numpy >=1.23,<2.0a0
vyasr marked this conversation as resolved.
Show resolved Hide resolved
- {{ pin_compatible('pyarrow', max_pin='x.x') }}
- pyarrow ==16.1.0.*
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
Expand Down
27 changes: 1 addition & 26 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,6 @@ mark_as_advanced(CUDF_BUILD_TESTUTIL)
option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON)
option(CUDF_LARGE_STRINGS_DISABLED "Build with large string support disabled" OFF)
mark_as_advanced(CUDF_LARGE_STRINGS_DISABLED)
option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF)
option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF)
option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF)
option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF)
option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" OFF)
option(
CUDF_USE_PER_THREAD_DEFAULT_STREAM
"Build cuDF with per-thread default stream, including passing the per-thread default
Expand All @@ -81,8 +76,6 @@ option(CUDA_ENABLE_LINEINFO
option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
Expand All @@ -100,8 +93,6 @@ message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}")
message(VERBOSE "CUDF: Build cuDF shared libraries: ${BUILD_SHARED_LIBS}")
message(VERBOSE "CUDF: Use a file cache for JIT compiled kernels: ${JITIFY_USE_CACHE}")
message(VERBOSE "CUDF: Build and statically link Arrow libraries: ${CUDF_USE_ARROW_STATIC}")
message(VERBOSE "CUDF: Build and enable S3 filesystem support for Arrow: ${CUDF_ENABLE_ARROW_S3}")
message(VERBOSE "CUDF: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}")
message(
VERBOSE
Expand Down Expand Up @@ -192,8 +183,6 @@ include(cmake/thirdparty/get_nvcomp.cmake)
include(cmake/thirdparty/get_cccl.cmake)
# find rmm
include(cmake/thirdparty/get_rmm.cmake)
# find arrow
include(cmake/thirdparty/get_arrow.cmake)
# find flatbuffers
include(cmake/thirdparty/get_flatbuffers.cmake)
# find dlpack
Expand Down Expand Up @@ -806,7 +795,7 @@ add_dependencies(cudf jitify_preprocess_run)
# Specify the target module library dependencies
target_link_libraries(
cudf
PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
PUBLIC CCCL::CCCL rmm::rmm $<BUILD_LOCAL_INTERFACE:BS::thread_pool>
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
kvikio::kvikio $<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
)
Expand Down Expand Up @@ -1055,20 +1044,6 @@ following IMPORTED GLOBAL targets:
]=]
)

if(CUDF_ENABLE_ARROW_PARQUET)
string(
APPEND
install_code_string
[=[
if(NOT Parquet_DIR)
set(Parquet_DIR "${Arrow_DIR}")
endif()
set(ArrowDataset_DIR "${Arrow_DIR}")
find_dependency(ArrowDataset)
]=]
)
endif()

string(
APPEND
install_code_string
Expand Down
Loading
Loading