Skip to content

Commit

Permalink
Merge pull request #42 from wlmcewen/master
Browse files Browse the repository at this point in the history
AC-1604 sparkql built-in conjunction evaluator
  • Loading branch information
wlmcewen authored Jul 28, 2016
2 parents 46f516a + 7527537 commit c4b7187
Show file tree
Hide file tree
Showing 8 changed files with 271 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
v1.1.0, 2016-07-28 ([changes](https://github.com/sparkapi/sparkql/compare/v1.0.3...v1.1.0))
-------------------
* [IMPROVEMENT] Evaluation class for sparkql boolean algebra processing

v1.0.3, 2016-06-06 ([changes](https://github.com/sparkapi/sparkql/compare/v1.0.2...v1.0.3))
-------------------
* [IMPROVEMENT] Expression limit lifted to 75 expressions
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.3
1.1.0
2 changes: 2 additions & 0 deletions lib/sparkql.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
require "sparkql/token"
require "sparkql/errors"
require "sparkql/expression_state"
require "sparkql/expression_resolver"
require "sparkql/evaluator"
require "sparkql/lexer"
require "sparkql/function_resolver"
require "sparkql/parser_tools"
Expand Down
152 changes: 152 additions & 0 deletions lib/sparkql/evaluator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# Using an instance of ExpressionResolver to resolve the individual expressions,
# this class will evaluate the rest of a parsed sparkql string to true or false.
# Namely, this class will handle all the nesting, boolean algebra, and dropped
# fields. Plus, it has some optimizations built in to skip the processing for
# any expressions that don't contribute to the net result of the filter.
class Sparkql::Evaluator

attr_reader :processed_count

def initialize expression_resolver
@resolver = expression_resolver
end

def evaluate(expressions)
@processed_count = 0
@index = {
level: 0,
block_group: 0,
conjunction: "And",
conjunction_level: 0,
match: true,
good_ors: false,
expressions: 0
}
@groups = [@index]
expressions.each do |expression|
handle_group(expression)
adjust_expression_for_dropped_field(expression)
check_for_good_ors(expression)
next if skip?(expression)
evaluate_expression(expression)
end
cleanup
return @index[:match]
end

private

# prepare the group stack for the next expression
def handle_group(expression)
if @index[:block_group] == expression[:block_group]
# Noop
elsif @index[:block_group] < expression[:block_group]
@index = new_group(expression)
@groups.push(@index)
else
# Turn the group into an expression, resolve down to previous group(s)
smoosh_group(expression)
end
end

# Here's the real meat. We use an internal stack to represent the result of
# each block_group. This logic is re-used when merging the final result of one
# block group with the previous.
def evaluate_expression(expression)
@processed_count += 1
evaluate_node(expression, @resolver.resolve(expression))
end
def evaluate_node(node, result)
if result == :drop
@dropped_expression = node
return result
end
if node[:unary] == "Not"
result = !result
end
if node[:conjunction] == 'Not' &&
(node[:conjunction_level] == node[:level] ||
node[:conjunction_level] == @index[:level])
@index[:match] = !result
elsif node[:conjunction] == 'And' || @index[:expressions] == 0
@index[:match] = result if @index[:match]
elsif node[:conjunction] == 'Or' && result
@index[:match] = result
end
@index[:expressions] += 1
result
end

# Optimization logic, once we find any set of And'd expressions that pass and
# run into an Or at the same level, we can skip further processing at that
# level.
def check_for_good_ors(expression)
if expression[:conjunction] == 'Or'
good_index = @index
unless expression[:conjunction_level] == @index[:level]
good_index = nil
# Well crap, now we need to go back and find that level by hand
@groups.reverse_each do |i|
if i[:level] == expression[:conjunction_level]
good_index = i
end
end
end
if !good_index.nil? && good_index[:expressions] > 0 && good_index[:match]
good_index[:good_ors] = true
end
end
end

# We can skip further expression processing when And-d with a false expression
# or a "good Or" was already encountered.
def skip?(expression)
@index[:good_ors] ||
!@index[:match] && expression[:conjunction] == 'And'
end

def new_group(expression)
{
level: expression[:level],
block_group: expression[:block_group],
conjunction: expression[:conjunction],
conjunction_level: expression[:conjunction_level],
match: true,
good_ors: false,
expressions: 0
}
end

# When the last expression was dropped, we need to repair the filter by
# stealing the conjunction of that dropped field.
def adjust_expression_for_dropped_field(expression)
if @dropped_expression.nil?
return
elsif @dropped_expression[:block_group] == expression[:block_group]
expression[:conjunction] = @dropped_expression[:conjunction]
expression[:conjunction_level] = @dropped_expression[:conjunction_level]
end
@dropped_expression = nil
end

# This is similar to the cleanup step, but happens when we return from a
# nesting level. Before we can proceed, we need wrap up the result of the
# nested group.
def smoosh_group(expression)
until @groups.last[:block_group] == expression[:block_group]
last = @groups.pop
@index = @groups.last
evaluate_node(last, last[:match])
end
end

# pop off the group stack, evaluating each group with the previous as we go.
def cleanup
while @groups.size > 1
last = @groups.pop
@index = @groups.last
evaluate_node(last, last[:match])
end
@groups.last[:match]
end
end
11 changes: 11 additions & 0 deletions lib/sparkql/expression_resolver.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Base class for handling expression resolution
class Sparkql::ExpressionResolver

VALID_RESULTS = [true, false, :drop]

# Evaluate the result of this expression. Allows for any of the values in
# VALID_RESULTS
def resolve(expression)
true
end
end
9 changes: 5 additions & 4 deletions lib/sparkql/parser_tools.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,17 @@ def tokenize_conjunction(exp1, conj, exp2)
end

def tokenize_unary_conjunction(conj, exp)

# Handles the case when a SparkQL filter string
# begins with a unary operator, and is nested, such as:
# Not (Not Field Eq 1)
# Not (Not Field Eq 1)
# In this instance we treat the outer unary as a conjunction.
if @expression_count == 1 && @lexer.level > 0
exp.first[:conjunction] = conj
exp.first[:conjunction] = conj
exp.first[:conjunction_level] = @lexer.level - 1
end

exp.first[:unary] = conj
exp.first[:unary_level] = @lexer.level

exp
end

Expand Down
12 changes: 12 additions & 0 deletions test/support/boolean_or_bust_expression_resolver.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# A super simple expression resolver for testing... returns the boolean value as
# the result for the expression, or when not a boolean, drops the expression.
class BooleanOrBustExpressionResolver < Sparkql::ExpressionResolver

def resolve(expression)
if expression[:type] == :boolean
"true" == expression[:value]
else
:drop
end
end
end
84 changes: 84 additions & 0 deletions test/unit/evaluator_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
require 'test_helper'
require 'support/boolean_or_bust_expression_resolver'

class EvaluatorTest < Test::Unit::TestCase
include Sparkql

def test_simple
assert sample('Test Eq true')
assert !sample('Test Eq false')
assert sample("Test Eq 'Drop'")
end

def test_conjunction
assert sample('Test Eq true And Test Eq true')
assert !sample('Test Eq false And Test Eq true')
assert !sample('Test Eq false And Test Eq false')
# Ors
assert sample("Test Eq true Or Test Eq true")
assert sample("Test Eq true Or Test Eq false")
assert sample("Test Eq false Or Test Eq true")
assert !sample("Test Eq false Or Test Eq false")
end

def test_dropped_field_handling
assert sample("Test Eq 'Drop' And Test Eq true")
assert !sample("Test Eq 'Drop' And Test Eq false")
assert !sample("Test Eq 'Drop' Or Test Eq false")
assert sample("Test Eq 'Drop' Or Test Eq true")
assert sample("Test Eq false And Test Eq 'Drop' Or Test Eq true")
assert sample("Test Eq false Or (Test Eq 'Drop' And Test Eq true)")
end

def test_nesting
assert sample("Test Eq true Or (Test Eq true) And Test Eq false And (Test Eq true)")
assert sample("Test Eq true Or ((Test Eq false) And Test Eq false) And (Test Eq false)")
assert sample("(Test Eq false Or Test Eq true) Or (Test Eq false Or Test Eq false)")
assert sample("(Test Eq true And Test Eq true) Or (Test Eq false)")
assert sample("(Test Eq true And Test Eq true) Or (Test Eq false And Test Eq true)")
assert !sample("(Test Eq false And Test Eq true) Or (Test Eq false)")
assert sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq true)")
assert !sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) And Test Eq true")
assert !sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) Or Test Eq false")
assert sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) Or Test Eq true")
end

def test_nots
assert !sample("Not Test Eq true")
assert sample("Not Test Eq false")
assert !sample("Not (Test Eq true)")
assert sample("Not (Test Eq false)")
assert sample("Test Eq true Not Test Eq false")
assert !sample("Test Eq true Not Test Eq true")
assert sample("Test Eq true Not (Test Eq false Or Test Eq false)")
assert sample("Test Eq true Not (Test Eq false And Test Eq false)")
assert !sample("Test Eq true Not (Test Eq false Or Test Eq true)")
assert !sample("Test Eq true Not (Test Eq true Or Test Eq false)")
assert !sample("Test Eq true Not (Not Test Eq false)")
assert sample("Not (Not Test Eq true)")
assert sample("Not (Not(Not Test Eq true))")
end

def test_optimizations
assert sample("Test Eq true Or Test Eq false And Test Eq false")
assert_equal 1, @evaluator.processed_count
assert sample("Test Eq false Or Test Eq true And Test Eq true")
assert_equal 3, @evaluator.processed_count
assert sample("(Test Eq true Or Test Eq false) And Test Eq true")
assert_equal 2, @evaluator.processed_count
assert sample("(Test Eq false Or Test Eq true) And Test Eq true")
assert_equal 3, @evaluator.processed_count
end

# Here's some examples from prospector's tests that have been simplified a bit.
def test_advanced
assert !sample("MlsStatus Eq false And PropertyType Eq true And (City Eq true Or City Eq false)")
end

def sample filter
@parser = Parser.new
@expressions = @parser.parse(filter)
@evaluator = Evaluator.new(BooleanOrBustExpressionResolver.new())
@evaluator.evaluate(@expressions)
end
end

0 comments on commit c4b7187

Please sign in to comment.