Skip to content

Commit

Permalink
Add a gandiva Configuration class for the python binding so that dump…
Browse files Browse the repository at this point in the history
…ing IR can be done.
  • Loading branch information
niyue committed Dec 20, 2023
1 parent 3213dbf commit 7177d7f
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 7 deletions.
67 changes: 63 additions & 4 deletions python/pyarrow/gandiva.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ from pyarrow.includes.libgandiva cimport (
CNode, CProjector, CFilter,
CSelectionVector,
_ensure_selection_mode,
CConfiguration,
CConfigurationBuilder,
TreeExprBuilder_MakeExpression,
TreeExprBuilder_MakeFunction,
Expand Down Expand Up @@ -186,6 +187,10 @@ cdef class Projector(_Weakrefable):
self.pool = pool
return self

@property
def llvm_ir(self):
return self.projector.get().DumpIR().decode()

def evaluate(self, RecordBatch batch, SelectionVector selection=None):
"""
Evaluate the specified record batch and return the arrays at the
Expand Down Expand Up @@ -231,6 +236,10 @@ cdef class Filter(_Weakrefable):
self.filter = filter
return self

@property
def llvm_ir(self):
return self.filter.get().DumpIR().decode()

def evaluate(self, RecordBatch batch, MemoryPool pool, dtype='int32'):
"""
Evaluate the specified record batch and return a selection vector.
Expand Down Expand Up @@ -575,9 +584,47 @@ cdef class TreeExprBuilder(_Weakrefable):
condition.node)
return Condition.create(r)

cdef class Configuration(_Weakrefable):
cdef:
shared_ptr[CConfiguration] configuration

def __cinit__(self, bint optimize=True, bint dump_ir=False):
"""
Initialize the configuration with specified options.
Parameters
----------
optimize : bool, default True
Whether to enable optimizations.
dump_ir : bool, default False
Whether to dump LLVM IR.
"""
self.configuration = CConfigurationBuilder().build()
self.configuration.get().set_optimize(optimize)
self.configuration.get().set_dump_ir(dump_ir)

@staticmethod
cdef create(shared_ptr[CConfiguration] configuration):
"""
Create a Configuration instance from an existing CConfiguration pointer.
Parameters
----------
configuration : shared_ptr[CConfiguration]
Existing CConfiguration pointer.
Returns
-------
Configuration instance
"""
cdef Configuration self = Configuration.__new__(Configuration)
self.configuration = configuration
return self


cpdef make_projector(Schema schema, children, MemoryPool pool,
str selection_mode="NONE"):
str selection_mode="NONE",
Configuration configuration=None):
"""
Construct a projection using expressions.
Expand All @@ -594,6 +641,8 @@ cpdef make_projector(Schema schema, children, MemoryPool pool,
Memory pool used to allocate output arrays.
selection_mode : str, default "NONE"
Possible values are NONE, UINT16, UINT32, UINT64.
configuration : pyarrow.gandiva.Configuration, default None
Configuration for the projector.
Returns
-------
Expand All @@ -604,6 +653,9 @@ cpdef make_projector(Schema schema, children, MemoryPool pool,
c_vector[shared_ptr[CGandivaExpression]] c_children
shared_ptr[CProjector] result

if configuration is None:
configuration = Configuration()

for child in children:
if child is None:
raise TypeError("Expressions must not be None")
Expand All @@ -612,12 +664,13 @@ cpdef make_projector(Schema schema, children, MemoryPool pool,
check_status(
Projector_Make(schema.sp_schema, c_children,
_ensure_selection_mode(selection_mode),
CConfigurationBuilder.DefaultConfiguration(),
configuration.configuration,
&result))
return Projector.create(result, pool)


cpdef make_filter(Schema schema, Condition condition):
cpdef make_filter(Schema schema, Condition condition,
Configuration configuration=None):
"""
Construct a filter based on a condition.
Expand All @@ -630,6 +683,8 @@ cpdef make_filter(Schema schema, Condition condition):
Schema for the record batches, and the condition.
condition : pyarrow.gandiva.Condition
Filter condition.
configuration : pyarrow.gandiva.Configuration, default None
Configuration for the filter.
Returns
-------
Expand All @@ -638,8 +693,12 @@ cpdef make_filter(Schema schema, Condition condition):
cdef shared_ptr[CFilter] result
if condition is None:
raise TypeError("Condition must not be None")

if configuration is None:
configuration = Configuration()

check_status(
Filter_Make(schema.sp_schema, condition.condition, &result))
Filter_Make(schema.sp_schema, condition.condition, configuration.configuration, &result))
return Filter.create(result)


Expand Down
14 changes: 13 additions & 1 deletion python/pyarrow/includes/libgandiva.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ cdef extern from "gandiva/filter.h" namespace "gandiva" nogil:
cdef CStatus Filter_Make \
"gandiva::Filter::Make"(
shared_ptr[CSchema] schema, shared_ptr[CCondition] condition,
shared_ptr[CConfiguration] configuration,
shared_ptr[CFilter]* filter)

cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil:
Expand All @@ -278,9 +279,20 @@ cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil:
cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil:

cdef cppclass CConfiguration" gandiva::Configuration":
pass

CConfiguration()

CConfiguration(bint optimize, bint dump_ir)

void set_optimize(bint optimize)

void set_dump_ir(bint dump_ir)

cdef cppclass CConfigurationBuilder \
" gandiva::ConfigurationBuilder":
@staticmethod
shared_ptr[CConfiguration] DefaultConfiguration()

CConfigurationBuilder()

shared_ptr[CConfiguration] build()
11 changes: 9 additions & 2 deletions python/pyarrow/tests/test_gandiva.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,12 @@ def test_tree_exp_builder():

assert expr.result().type == pa.int32()

config = gandiva.Configuration(dump_ir=True)
projector = gandiva.make_projector(
schema, [expr], pa.default_memory_pool())
schema, [expr], pa.default_memory_pool(), "NONE", config)

# Gandiva generates compute kernel function named `@expr_X`
assert projector.llvm_ir.find("@expr_") != -1

a = pa.array([10, 12, -20, 5], type=pa.int32())
b = pa.array([5, 15, 15, 17], type=pa.int32())
Expand Down Expand Up @@ -101,7 +105,10 @@ def test_filter():

assert condition.result().type == pa.bool_()

filter = gandiva.make_filter(table.schema, condition)
config = gandiva.Configuration(dump_ir=True)
filter = gandiva.make_filter(table.schema, condition, config)
# Gandiva generates compute kernel function named `@expr_X`
assert filter.llvm_ir.find("@expr_") != -1

result = filter.evaluate(table.to_batches()[0], pa.default_memory_pool())
assert result.to_array().equals(pa.array(range(1000), type=pa.uint32()))
Expand Down

0 comments on commit 7177d7f

Please sign in to comment.