Skip to content

Commit

Permalink
ARROW-12506: [Python] Improve modularity of pyarrow codebase: _hdfsio…
Browse files Browse the repository at this point in the history
… module

Second batch of changes related to making pyarrow build more modular. `hdfs-io` is no longer included in `pyarrow.lib` but has been separated to its own module.

This PR is based on #10131

Closes #10159 from amol-/ARROW-12506-2

Authored-by: Alessandro Molina <amol@turbogears.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
amol- authored and pitrou committed Apr 29, 2021
1 parent beb5d18 commit 9391951
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 7 deletions.
3 changes: 2 additions & 1 deletion python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -387,10 +387,11 @@ endif()

set(CYTHON_EXTENSIONS
lib
_fs
_compute
_csv
_feather
_fs
_hdfsio
_json)

set(LINK_LIBS arrow_shared arrow_python_shared)
Expand Down
8 changes: 5 additions & 3 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,17 @@ def show_versions():
log_memory_allocations, jemalloc_set_decay_ms)

# I/O
from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
from pyarrow.lib import (NativeFile, PythonFile,
BufferedInputStream, BufferedOutputStream,
CompressedInputStream, CompressedOutputStream,
TransformInputStream, transcoding_input_stream,
FixedSizeBufferWriter,
BufferReader, BufferOutputStream,
OSFile, MemoryMappedFile, memory_map,
create_memory_map, have_libhdfs,
MockOutputStream, input_stream, output_stream)
create_memory_map, MockOutputStream,
input_stream, output_stream)

from pyarrow._hdfsio import HdfsFile, have_libhdfs

from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
concat_arrays, concat_tables)
Expand Down
10 changes: 10 additions & 0 deletions python/pyarrow/io-hdfs.pxi → python/pyarrow/_hdfsio.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@
# ----------------------------------------------------------------------
# HDFS IO implementation

# cython: language_level = 3

import re

from pyarrow.lib cimport check_status, _Weakrefable, NativeFile
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_fs cimport *
from pyarrow.lib import frombytes, tobytes, ArrowIOError

from queue import Queue, Empty as QueueEmpty, Full as QueueFull


Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@

from pyarrow.util import implements, _DEPR_MSG
from pyarrow.filesystem import FileSystem
import pyarrow.lib as lib
import pyarrow._hdfsio as _hdfsio


class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):
class HadoopFileSystem(_hdfsio.HadoopFileSystem, FileSystem):
"""
DEPRECATED: FileSystem interface for HDFS cluster.
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import threading
import time
import warnings
from io import BufferedIOBase, IOBase, TextIOBase, UnsupportedOperation
from queue import Queue, Empty as QueueEmpty

from pyarrow.util import _is_path_like, _stringify_path

Expand Down
1 change: 0 additions & 1 deletion python/pyarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ include "tensor.pxi"

# File IO
include "io.pxi"
include "io-hdfs.pxi"

# IPC / Messaging
include "ipc.pxi"
Expand Down
1 change: 1 addition & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def initialize_options(self):
'_plasma',
'_s3fs',
'_hdfs',
'_hdfsio',
'gandiva']

def _run_cmake(self):
Expand Down

0 comments on commit 9391951

Please sign in to comment.