diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index 360eb4e8be758..2202ec64f2962 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -36,6 +36,7 @@ from pyarrow.includes.libgandiva cimport ( CNode, CProjector, CFilter, CSelectionVector, _ensure_selection_mode, + CConfiguration, CConfigurationBuilder, TreeExprBuilder_MakeExpression, TreeExprBuilder_MakeFunction, @@ -186,6 +187,10 @@ cdef class Projector(_Weakrefable): self.pool = pool return self + @property + def llvm_ir(self): + return self.projector.get().DumpIR().decode() + def evaluate(self, RecordBatch batch, SelectionVector selection=None): """ Evaluate the specified record batch and return the arrays at the @@ -231,6 +236,10 @@ cdef class Filter(_Weakrefable): self.filter = filter return self + @property + def llvm_ir(self): + return self.filter.get().DumpIR().decode() + def evaluate(self, RecordBatch batch, MemoryPool pool, dtype='int32'): """ Evaluate the specified record batch and return a selection vector. @@ -575,9 +584,47 @@ cdef class TreeExprBuilder(_Weakrefable): condition.node) return Condition.create(r) +cdef class Configuration(_Weakrefable): + cdef: + shared_ptr[CConfiguration] configuration + + def __cinit__(self, bint optimize=True, bint dump_ir=False): + """ + Initialize the configuration with specified options. + + Parameters + ---------- + optimize : bool, default True + Whether to enable optimizations. + dump_ir : bool, default False + Whether to dump LLVM IR. + """ + self.configuration = CConfigurationBuilder().build() + self.configuration.get().set_optimize(optimize) + self.configuration.get().set_dump_ir(dump_ir) + + @staticmethod + cdef create(shared_ptr[CConfiguration] configuration): + """ + Create a Configuration instance from an existing CConfiguration pointer. + + Parameters + ---------- + configuration : shared_ptr[CConfiguration] + Existing CConfiguration pointer. + + Returns + ------- + Configuration instance + """ + cdef Configuration self = Configuration.__new__(Configuration) + self.configuration = configuration + return self + cpdef make_projector(Schema schema, children, MemoryPool pool, - str selection_mode="NONE"): + str selection_mode="NONE", + Configuration configuration=None): """ Construct a projection using expressions. @@ -594,6 +641,8 @@ cpdef make_projector(Schema schema, children, MemoryPool pool, Memory pool used to allocate output arrays. selection_mode : str, default "NONE" Possible values are NONE, UINT16, UINT32, UINT64. + configuration : pyarrow.gandiva.Configuration, default None + Configuration for the projector. Returns ------- @@ -604,6 +653,9 @@ cpdef make_projector(Schema schema, children, MemoryPool pool, c_vector[shared_ptr[CGandivaExpression]] c_children shared_ptr[CProjector] result + if configuration is None: + configuration = Configuration() + for child in children: if child is None: raise TypeError("Expressions must not be None") @@ -612,12 +664,13 @@ cpdef make_projector(Schema schema, children, MemoryPool pool, check_status( Projector_Make(schema.sp_schema, c_children, _ensure_selection_mode(selection_mode), - CConfigurationBuilder.DefaultConfiguration(), + configuration.configuration, &result)) return Projector.create(result, pool) -cpdef make_filter(Schema schema, Condition condition): +cpdef make_filter(Schema schema, Condition condition, + Configuration configuration=None): """ Construct a filter based on a condition. @@ -630,6 +683,8 @@ cpdef make_filter(Schema schema, Condition condition): Schema for the record batches, and the condition. condition : pyarrow.gandiva.Condition Filter condition. + configuration : pyarrow.gandiva.Configuration, default None + Configuration for the filter. Returns ------- @@ -638,8 +693,12 @@ cpdef make_filter(Schema schema, Condition condition): cdef shared_ptr[CFilter] result if condition is None: raise TypeError("Condition must not be None") + + if configuration is None: + configuration = Configuration() + check_status( - Filter_Make(schema.sp_schema, condition.condition, &result)) + Filter_Make(schema.sp_schema, condition.condition, configuration.configuration, &result)) return Filter.create(result) diff --git a/python/pyarrow/includes/libgandiva.pxd b/python/pyarrow/includes/libgandiva.pxd index fa3b72bad61be..7d76576bef2b9 100644 --- a/python/pyarrow/includes/libgandiva.pxd +++ b/python/pyarrow/includes/libgandiva.pxd @@ -252,6 +252,7 @@ cdef extern from "gandiva/filter.h" namespace "gandiva" nogil: cdef CStatus Filter_Make \ "gandiva::Filter::Make"( shared_ptr[CSchema] schema, shared_ptr[CCondition] condition, + shared_ptr[CConfiguration] configuration, shared_ptr[CFilter]* filter) cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil: @@ -278,9 +279,20 @@ cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil: cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil: cdef cppclass CConfiguration" gandiva::Configuration": - pass + + CConfiguration() + + CConfiguration(bint optimize, bint dump_ir) + + void set_optimize(bint optimize) + + void set_dump_ir(bint dump_ir) cdef cppclass CConfigurationBuilder \ " gandiva::ConfigurationBuilder": @staticmethod shared_ptr[CConfiguration] DefaultConfiguration() + + CConfigurationBuilder() + + shared_ptr[CConfiguration] build() diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py index 23c26a73fa7df..80d119a48530d 100644 --- a/python/pyarrow/tests/test_gandiva.py +++ b/python/pyarrow/tests/test_gandiva.py @@ -47,8 +47,12 @@ def test_tree_exp_builder(): assert expr.result().type == pa.int32() + config = gandiva.Configuration(dump_ir=True) projector = gandiva.make_projector( - schema, [expr], pa.default_memory_pool()) + schema, [expr], pa.default_memory_pool(), "NONE", config) + + # Gandiva generates compute kernel function named `@expr_X` + assert projector.llvm_ir.find("@expr_") != -1 a = pa.array([10, 12, -20, 5], type=pa.int32()) b = pa.array([5, 15, 15, 17], type=pa.int32()) @@ -101,7 +105,10 @@ def test_filter(): assert condition.result().type == pa.bool_() - filter = gandiva.make_filter(table.schema, condition) + config = gandiva.Configuration(dump_ir=True) + filter = gandiva.make_filter(table.schema, condition, config) + # Gandiva generates compute kernel function named `@expr_X` + assert filter.llvm_ir.find("@expr_") != -1 result = filter.evaluate(table.to_batches()[0], pa.default_memory_pool()) assert result.to_array().equals(pa.array(range(1000), type=pa.uint32()))