diff --git a/CHANGELOG.md b/CHANGELOG.md index 34937c7..df1d8c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +v1.1.0, 2016-07-28 ([changes](https://github.com/sparkapi/sparkql/compare/v1.0.3...v1.1.0)) +------------------- + * [IMPROVEMENT] Evaluation class for sparkql boolean algebra processing + v1.0.3, 2016-06-06 ([changes](https://github.com/sparkapi/sparkql/compare/v1.0.2...v1.0.3)) ------------------- * [IMPROVEMENT] Expression limit lifted to 75 expressions diff --git a/VERSION b/VERSION index 21e8796..9084fa2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.3 +1.1.0 diff --git a/lib/sparkql.rb b/lib/sparkql.rb index 4c81734..bba4494 100644 --- a/lib/sparkql.rb +++ b/lib/sparkql.rb @@ -2,6 +2,8 @@ require "sparkql/token" require "sparkql/errors" require "sparkql/expression_state" +require "sparkql/expression_resolver" +require "sparkql/evaluator" require "sparkql/lexer" require "sparkql/function_resolver" require "sparkql/parser_tools" diff --git a/lib/sparkql/evaluator.rb b/lib/sparkql/evaluator.rb new file mode 100644 index 0000000..077e1c6 --- /dev/null +++ b/lib/sparkql/evaluator.rb @@ -0,0 +1,152 @@ +# Using an instance of ExpressionResolver to resolve the individual expressions, +# this class will evaluate the rest of a parsed sparkql string to true or false. +# Namely, this class will handle all the nesting, boolean algebra, and dropped +# fields. Plus, it has some optimizations built in to skip the processing for +# any expressions that don't contribute to the net result of the filter. +class Sparkql::Evaluator + + attr_reader :processed_count + + def initialize expression_resolver + @resolver = expression_resolver + end + + def evaluate(expressions) + @processed_count = 0 + @index = { + level: 0, + block_group: 0, + conjunction: "And", + conjunction_level: 0, + match: true, + good_ors: false, + expressions: 0 + } + @groups = [@index] + expressions.each do |expression| + handle_group(expression) + adjust_expression_for_dropped_field(expression) + check_for_good_ors(expression) + next if skip?(expression) + evaluate_expression(expression) + end + cleanup + return @index[:match] + end + + private + + # prepare the group stack for the next expression + def handle_group(expression) + if @index[:block_group] == expression[:block_group] + # Noop + elsif @index[:block_group] < expression[:block_group] + @index = new_group(expression) + @groups.push(@index) + else + # Turn the group into an expression, resolve down to previous group(s) + smoosh_group(expression) + end + end + + # Here's the real meat. We use an internal stack to represent the result of + # each block_group. This logic is re-used when merging the final result of one + # block group with the previous. + def evaluate_expression(expression) + @processed_count += 1 + evaluate_node(expression, @resolver.resolve(expression)) + end + def evaluate_node(node, result) + if result == :drop + @dropped_expression = node + return result + end + if node[:unary] == "Not" + result = !result + end + if node[:conjunction] == 'Not' && + (node[:conjunction_level] == node[:level] || + node[:conjunction_level] == @index[:level]) + @index[:match] = !result + elsif node[:conjunction] == 'And' || @index[:expressions] == 0 + @index[:match] = result if @index[:match] + elsif node[:conjunction] == 'Or' && result + @index[:match] = result + end + @index[:expressions] += 1 + result + end + + # Optimization logic, once we find any set of And'd expressions that pass and + # run into an Or at the same level, we can skip further processing at that + # level. + def check_for_good_ors(expression) + if expression[:conjunction] == 'Or' + good_index = @index + unless expression[:conjunction_level] == @index[:level] + good_index = nil + # Well crap, now we need to go back and find that level by hand + @groups.reverse_each do |i| + if i[:level] == expression[:conjunction_level] + good_index = i + end + end + end + if !good_index.nil? && good_index[:expressions] > 0 && good_index[:match] + good_index[:good_ors] = true + end + end + end + + # We can skip further expression processing when And-d with a false expression + # or a "good Or" was already encountered. + def skip?(expression) + @index[:good_ors] || + !@index[:match] && expression[:conjunction] == 'And' + end + + def new_group(expression) + { + level: expression[:level], + block_group: expression[:block_group], + conjunction: expression[:conjunction], + conjunction_level: expression[:conjunction_level], + match: true, + good_ors: false, + expressions: 0 + } + end + + # When the last expression was dropped, we need to repair the filter by + # stealing the conjunction of that dropped field. + def adjust_expression_for_dropped_field(expression) + if @dropped_expression.nil? + return + elsif @dropped_expression[:block_group] == expression[:block_group] + expression[:conjunction] = @dropped_expression[:conjunction] + expression[:conjunction_level] = @dropped_expression[:conjunction_level] + end + @dropped_expression = nil + end + + # This is similar to the cleanup step, but happens when we return from a + # nesting level. Before we can proceed, we need wrap up the result of the + # nested group. + def smoosh_group(expression) + until @groups.last[:block_group] == expression[:block_group] + last = @groups.pop + @index = @groups.last + evaluate_node(last, last[:match]) + end + end + + # pop off the group stack, evaluating each group with the previous as we go. + def cleanup + while @groups.size > 1 + last = @groups.pop + @index = @groups.last + evaluate_node(last, last[:match]) + end + @groups.last[:match] + end +end diff --git a/lib/sparkql/expression_resolver.rb b/lib/sparkql/expression_resolver.rb new file mode 100644 index 0000000..e837288 --- /dev/null +++ b/lib/sparkql/expression_resolver.rb @@ -0,0 +1,11 @@ +# Base class for handling expression resolution +class Sparkql::ExpressionResolver + + VALID_RESULTS = [true, false, :drop] + + # Evaluate the result of this expression. Allows for any of the values in + # VALID_RESULTS + def resolve(expression) + true + end +end diff --git a/lib/sparkql/parser_tools.rb b/lib/sparkql/parser_tools.rb index ca382a9..695b93e 100644 --- a/lib/sparkql/parser_tools.rb +++ b/lib/sparkql/parser_tools.rb @@ -60,16 +60,17 @@ def tokenize_conjunction(exp1, conj, exp2) end def tokenize_unary_conjunction(conj, exp) - # Handles the case when a SparkQL filter string # begins with a unary operator, and is nested, such as: - # Not (Not Field Eq 1) + # Not (Not Field Eq 1) + # In this instance we treat the outer unary as a conjunction. if @expression_count == 1 && @lexer.level > 0 - exp.first[:conjunction] = conj + exp.first[:conjunction] = conj + exp.first[:conjunction_level] = @lexer.level - 1 end - exp.first[:unary] = conj exp.first[:unary_level] = @lexer.level + exp end diff --git a/test/support/boolean_or_bust_expression_resolver.rb b/test/support/boolean_or_bust_expression_resolver.rb new file mode 100644 index 0000000..2cdf044 --- /dev/null +++ b/test/support/boolean_or_bust_expression_resolver.rb @@ -0,0 +1,12 @@ +# A super simple expression resolver for testing... returns the boolean value as +# the result for the expression, or when not a boolean, drops the expression. +class BooleanOrBustExpressionResolver < Sparkql::ExpressionResolver + + def resolve(expression) + if expression[:type] == :boolean + "true" == expression[:value] + else + :drop + end + end +end diff --git a/test/unit/evaluator_test.rb b/test/unit/evaluator_test.rb new file mode 100644 index 0000000..36b5c76 --- /dev/null +++ b/test/unit/evaluator_test.rb @@ -0,0 +1,84 @@ +require 'test_helper' +require 'support/boolean_or_bust_expression_resolver' + +class EvaluatorTest < Test::Unit::TestCase + include Sparkql + + def test_simple + assert sample('Test Eq true') + assert !sample('Test Eq false') + assert sample("Test Eq 'Drop'") + end + + def test_conjunction + assert sample('Test Eq true And Test Eq true') + assert !sample('Test Eq false And Test Eq true') + assert !sample('Test Eq false And Test Eq false') + # Ors + assert sample("Test Eq true Or Test Eq true") + assert sample("Test Eq true Or Test Eq false") + assert sample("Test Eq false Or Test Eq true") + assert !sample("Test Eq false Or Test Eq false") + end + + def test_dropped_field_handling + assert sample("Test Eq 'Drop' And Test Eq true") + assert !sample("Test Eq 'Drop' And Test Eq false") + assert !sample("Test Eq 'Drop' Or Test Eq false") + assert sample("Test Eq 'Drop' Or Test Eq true") + assert sample("Test Eq false And Test Eq 'Drop' Or Test Eq true") + assert sample("Test Eq false Or (Test Eq 'Drop' And Test Eq true)") + end + + def test_nesting + assert sample("Test Eq true Or (Test Eq true) And Test Eq false And (Test Eq true)") + assert sample("Test Eq true Or ((Test Eq false) And Test Eq false) And (Test Eq false)") + assert sample("(Test Eq false Or Test Eq true) Or (Test Eq false Or Test Eq false)") + assert sample("(Test Eq true And Test Eq true) Or (Test Eq false)") + assert sample("(Test Eq true And Test Eq true) Or (Test Eq false And Test Eq true)") + assert !sample("(Test Eq false And Test Eq true) Or (Test Eq false)") + assert sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq true)") + assert !sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) And Test Eq true") + assert !sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) Or Test Eq false") + assert sample("Test Eq true And ((Test Eq true And Test Eq false) Or Test Eq false) Or Test Eq true") + end + + def test_nots + assert !sample("Not Test Eq true") + assert sample("Not Test Eq false") + assert !sample("Not (Test Eq true)") + assert sample("Not (Test Eq false)") + assert sample("Test Eq true Not Test Eq false") + assert !sample("Test Eq true Not Test Eq true") + assert sample("Test Eq true Not (Test Eq false Or Test Eq false)") + assert sample("Test Eq true Not (Test Eq false And Test Eq false)") + assert !sample("Test Eq true Not (Test Eq false Or Test Eq true)") + assert !sample("Test Eq true Not (Test Eq true Or Test Eq false)") + assert !sample("Test Eq true Not (Not Test Eq false)") + assert sample("Not (Not Test Eq true)") + assert sample("Not (Not(Not Test Eq true))") + end + + def test_optimizations + assert sample("Test Eq true Or Test Eq false And Test Eq false") + assert_equal 1, @evaluator.processed_count + assert sample("Test Eq false Or Test Eq true And Test Eq true") + assert_equal 3, @evaluator.processed_count + assert sample("(Test Eq true Or Test Eq false) And Test Eq true") + assert_equal 2, @evaluator.processed_count + assert sample("(Test Eq false Or Test Eq true) And Test Eq true") + assert_equal 3, @evaluator.processed_count + end + + # Here's some examples from prospector's tests that have been simplified a bit. + def test_advanced + assert !sample("MlsStatus Eq false And PropertyType Eq true And (City Eq true Or City Eq false)") + end + + def sample filter + @parser = Parser.new + @expressions = @parser.parse(filter) + @evaluator = Evaluator.new(BooleanOrBustExpressionResolver.new()) + @evaluator.evaluate(@expressions) + end +end