From b382a67122d6f5d1a1e4f944cbc966fd16597560 Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Wed, 12 Jun 2024 01:51:22 +0800 Subject: [PATCH] More doc --- cpp/src/arrow/compute/special_form.h | 20 +++++++++++++++++++ .../special_forms/if_else_special_form.cc | 10 ++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/special_form.h b/cpp/src/arrow/compute/special_form.h index c50285128797c..b538151bb6e5b 100644 --- a/cpp/src/arrow/compute/special_form.h +++ b/cpp/src/arrow/compute/special_form.h @@ -28,8 +28,28 @@ namespace arrow { namespace compute { +/// The concept "special form" is borrowed from Lisp +/// (https://courses.cs.northwestern.edu/325/readings/special-forms.html). Velox also uses +/// the same term. A special form behaves like a function call except that it has special +/// evaluation rules, mostly for arguments. +/// For example, the `if_else(cond, expr1, expr2)` special form first evaluates the +/// argument `cond` and obtains a boolean array: +/// [true, false, true, false] +/// then the argument `expr1` should ONLY be evaluated for row: +/// [0, 2] +/// and the argument `expr2` should ONLY be evaluated for row: +/// [1, 3] +/// Consider, if `expr1`/`expr2` has some observable side-effects (e.g., division by zero +/// error) on row [1, 3]/[0, 2], these side-effects would be undesirably observed if +/// evaluated using a regular function call, which always evaluates all its arguments +/// eagerly. +/// Other special forms include `case_when`, `and`, and `or`, etc. +/// In a vectorized execution engine, a special form normally takes advantage of +/// "selection vector" to mask rows of arguments to be evaluated. class ARROW_EXPORT SpecialForm { public: + /// A poor man's factory method to create a special form by name. + /// TODO: More formal factory, a registry maybe? static Result> Make(const std::string& name); virtual ~SpecialForm() = default; diff --git a/cpp/src/arrow/compute/special_forms/if_else_special_form.cc b/cpp/src/arrow/compute/special_forms/if_else_special_form.cc index 7968ff9a78e57..cade1a3ed79cc 100644 --- a/cpp/src/arrow/compute/special_forms/if_else_special_form.cc +++ b/cpp/src/arrow/compute/special_forms/if_else_special_form.cc @@ -29,16 +29,17 @@ namespace compute { Result IfElseSpecialForm::Execute(const Expression::Call& call, const ExecBatch& input, ExecContext* exec_context) { - DCHECK(!call.kernel->selection_vector_aware); - DCHECK(!input.selection_vector); + // The kernel (if_else) is not selection-vector-aware, so the input should not have a + // selection vector. + DCHECK(!call.kernel->selection_vector_aware && !input.selection_vector); std::vector arguments(call.arguments.size()); ARROW_ASSIGN_OR_RAISE(arguments[0], ExecuteScalarExpression(call.arguments[0], input, exec_context)); - // Use cond as selection vector for IF. + // Use cond as selection vector for IF branch. // TODO: Consider chunked array for arguments[0]. auto if_sel = std::make_shared(arguments[0].array()); - // Duplicate and invert cond as selection vector for ELSE. + // Duplicate and invert cond as selection vector for ELSE branch. ARROW_ASSIGN_OR_RAISE( auto else_sel, if_sel->Copy(CPUDevice::memory_manager(exec_context->memory_pool()))); @@ -53,6 +54,7 @@ Result IfElseSpecialForm::Execute(const Expression::Call& call, ARROW_ASSIGN_OR_RAISE( arguments[2], ExecuteScalarExpression(call.arguments[2], else_input, exec_context)); + // Leveraging if_else kernel with all arguments evaluated. return ExecuteCallNonRecursive(call, input, arguments, exec_context); }