-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #42 from wlmcewen/master
AC-1604 sparkql built-in conjunction evaluator
- Loading branch information
Showing
8 changed files
with
271 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.0.3 | ||
1.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
# Using an instance of ExpressionResolver to resolve the individual expressions, | ||
# this class will evaluate the rest of a parsed sparkql string to true or false. | ||
# Namely, this class will handle all the nesting, boolean algebra, and dropped | ||
# fields. Plus, it has some optimizations built in to skip the processing for | ||
# any expressions that don't contribute to the net result of the filter. | ||
class Sparkql::Evaluator | ||
|
||
attr_reader :processed_count | ||
|
||
def initialize expression_resolver | ||
@resolver = expression_resolver | ||
end | ||
|
||
def evaluate(expressions) | ||
@processed_count = 0 | ||
@index = { | ||
level: 0, | ||
block_group: 0, | ||
conjunction: "And", | ||
conjunction_level: 0, | ||
match: true, | ||
good_ors: false, | ||
expressions: 0 | ||
} | ||
@groups = [@index] | ||
expressions.each do |expression| | ||
handle_group(expression) | ||
adjust_expression_for_dropped_field(expression) | ||
check_for_good_ors(expression) | ||
next if skip?(expression) | ||
evaluate_expression(expression) | ||
end | ||
cleanup | ||
return @index[:match] | ||
end | ||
|
||
private | ||
|
||
# prepare the group stack for the next expression | ||
def handle_group(expression) | ||
if @index[:block_group] == expression[:block_group] | ||
# Noop | ||
elsif @index[:block_group] < expression[:block_group] | ||
@index = new_group(expression) | ||
@groups.push(@index) | ||
else | ||
# Turn the group into an expression, resolve down to previous group(s) | ||
smoosh_group(expression) | ||
end | ||
end | ||
|
||
# Here's the real meat. We use an internal stack to represent the result of | ||
# each block_group. This logic is re-used when merging the final result of one | ||
# block group with the previous. | ||
def evaluate_expression(expression) | ||
@processed_count += 1 | ||
evaluate_node(expression, @resolver.resolve(expression)) | ||
end | ||
def evaluate_node(node, result) | ||
if result == :drop | ||
@dropped_expression = node | ||
return result | ||
end | ||
if node[:unary] == "Not" | ||
result = !result | ||
end | ||
if node[:conjunction] == 'Not' && | ||
(node[:conjunction_level] == node[:level] || | ||
node[:conjunction_level] == @index[:level]) | ||
@index[:match] = !result | ||
elsif node[:conjunction] == 'And' || @index[:expressions] == 0 | ||
@index[:match] = result if @index[:match] | ||
elsif node[:conjunction] == 'Or' && result | ||
@index[:match] = result | ||
end | ||
@index[:expressions] += 1 | ||
result | ||
end | ||
|
||
# Optimization logic, once we find any set of And'd expressions that pass and | ||
# run into an Or at the same level, we can skip further processing at that | ||
# level. | ||
def check_for_good_ors(expression) | ||
if expression[:conjunction] == 'Or' | ||
good_index = @index | ||
unless expression[:conjunction_level] == @index[:level] | ||
good_index = nil | ||
# Well crap, now we need to go back and find that level by hand | ||
@groups.reverse_each do |i| | ||
if i[:level] == expression[:conjunction_level] | ||
good_index = i | ||
end | ||
end | ||
end | ||
if !good_index.nil? && good_index[:expressions] > 0 && good_index[:match] | ||
good_index[:good_ors] = true | ||
end | ||
end | ||
end | ||
|
||
# We can skip further expression processing when And-d with a false expression | ||
# or a "good Or" was already encountered. | ||
def skip?(expression) | ||
@index[:good_ors] || | ||
!@index[:match] && expression[:conjunction] == 'And' | ||
end | ||
|
||
def new_group(expression) | ||
{ | ||
level: expression[:level], | ||
block_group: expression[:block_group], | ||
conjunction: expression[:conjunction], | ||
conjunction_level: expression[:conjunction_level], | ||
match: true, | ||
good_ors: false, | ||
expressions: 0 | ||
} | ||
end | ||
|
||
# When the last expression was dropped, we need to repair the filter by | ||
# stealing the conjunction of that dropped field. | ||
def adjust_expression_for_dropped_field(expression) | ||
if @dropped_expression.nil? | ||
return | ||
elsif @dropped_expression[:block_group] == expression[:block_group] | ||
expression[:conjunction] = @dropped_expression[:conjunction] | ||
expression[:conjunction_level] = @dropped_expression[:conjunction_level] | ||
end | ||
@dropped_expression = nil | ||
end | ||
|
||
# This is similar to the cleanup step, but happens when we return from a | ||
# nesting level. Before we can proceed, we need wrap up the result of the | ||
# nested group. | ||
def smoosh_group(expression) | ||
until @groups.last[:block_group] == expression[:block_group] | ||
last = @groups.pop | ||
@index = @groups.last | ||
evaluate_node(last, last[:match]) | ||
end | ||
end | ||
|
||
# pop off the group stack, evaluating each group with the previous as we go. | ||
def cleanup | ||
while @groups.size > 1 | ||
last = @groups.pop | ||
@index = @groups.last | ||
evaluate_node(last, last[:match]) | ||
end | ||
@groups.last[:match] | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Base class for handling expression resolution | ||
class Sparkql::ExpressionResolver | ||
|
||
VALID_RESULTS = [true, false, :drop] | ||
|
||
# Evaluate the result of this expression. Allows for any of the values in | ||
# VALID_RESULTS | ||
def resolve(expression) | ||
true | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# A super simple expression resolver for testing... returns the boolean value as | ||
# the result for the expression, or when not a boolean, drops the expression. | ||
class BooleanOrBustExpressionResolver < Sparkql::ExpressionResolver | ||
|
||
def resolve(expression) | ||
if expression[:type] == :boolean | ||
"true" == expression[:value] | ||
else | ||
:drop | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
require 'test_helper' | ||
require 'support/boolean_or_bust_expression_resolver' | ||
|
||
class EvaluatorTest < Test::Unit::TestCase | ||
include Sparkql | ||
|
||
def test_simple | ||
assert sample('Test Eq true') | ||
assert !sample('Test Eq false') | ||
assert sample("Test Eq 'Drop'") | ||
end | ||
|
||
def test_conjunction | ||
assert sample('Test Eq true And Test Eq true') | ||
assert !sample('Test Eq false And Test Eq true') | ||
assert !sample('Test Eq false And Test Eq false') | ||
# Ors | ||
assert sample("Test Eq true Or Test Eq true") | ||
assert sample("Test Eq true Or Test Eq false") | ||
assert sample("Test Eq false Or Test Eq true") | ||
assert !sample("Test Eq false Or Test Eq false") | ||
end | ||
|
||
def test_dropped_field_handling | ||
assert sample("Test Eq 'Drop' And Test Eq true") | ||
assert !sample("Test Eq 'Drop' And Test Eq false") | ||
assert !sample("Test Eq 'Drop' Or Test Eq false") | ||
assert sample("Test Eq 'Drop' Or Test Eq true") | ||
assert sample("Test Eq false And Test Eq 'Drop' Or Test Eq true") | ||
assert sample("Test Eq false Or (Test Eq 'Drop' And Test Eq true)") | ||
end | ||
|
||
def test_nesting | ||
assert sample("Test Eq true Or (Test Eq true) And Test Eq false And (Test Eq true)") | ||
assert sample("Test Eq true Or ((Test Eq false) And Test Eq false) And (Test Eq false)") | ||
assert sample("(Test Eq false Or Test Eq true) Or (Test Eq false Or Test Eq false)") | ||
assert sample("(Test Eq true And Test Eq true) Or (Test Eq false)") | ||
assert sample("(Test Eq true And Test Eq true) Or (Test Eq false And Test Eq true)") | ||
assert !sample("(Test Eq false And Test Eq true) Or (Test Eq false)") | ||
assert sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq true)") | ||
assert !sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) And Test Eq true") | ||
assert !sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) Or Test Eq false") | ||
assert sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) Or Test Eq true") | ||
end | ||
|
||
def test_nots | ||
assert !sample("Not Test Eq true") | ||
assert sample("Not Test Eq false") | ||
assert !sample("Not (Test Eq true)") | ||
assert sample("Not (Test Eq false)") | ||
assert sample("Test Eq true Not Test Eq false") | ||
assert !sample("Test Eq true Not Test Eq true") | ||
assert sample("Test Eq true Not (Test Eq false Or Test Eq false)") | ||
assert sample("Test Eq true Not (Test Eq false And Test Eq false)") | ||
assert !sample("Test Eq true Not (Test Eq false Or Test Eq true)") | ||
assert !sample("Test Eq true Not (Test Eq true Or Test Eq false)") | ||
assert !sample("Test Eq true Not (Not Test Eq false)") | ||
assert sample("Not (Not Test Eq true)") | ||
assert sample("Not (Not(Not Test Eq true))") | ||
end | ||
|
||
def test_optimizations | ||
assert sample("Test Eq true Or Test Eq false And Test Eq false") | ||
assert_equal 1, @evaluator.processed_count | ||
assert sample("Test Eq false Or Test Eq true And Test Eq true") | ||
assert_equal 3, @evaluator.processed_count | ||
assert sample("(Test Eq true Or Test Eq false) And Test Eq true") | ||
assert_equal 2, @evaluator.processed_count | ||
assert sample("(Test Eq false Or Test Eq true) And Test Eq true") | ||
assert_equal 3, @evaluator.processed_count | ||
end | ||
|
||
# Here's some examples from prospector's tests that have been simplified a bit. | ||
def test_advanced | ||
assert !sample("MlsStatus Eq false And PropertyType Eq true And (City Eq true Or City Eq false)") | ||
end | ||
|
||
def sample filter | ||
@parser = Parser.new | ||
@expressions = @parser.parse(filter) | ||
@evaluator = Evaluator.new(BooleanOrBustExpressionResolver.new()) | ||
@evaluator.evaluate(@expressions) | ||
end | ||
end |