From 8739d5ebf7e6c5355d5727327bf7f2200d66c0a5 Mon Sep 17 00:00:00 2001 From: Atri Sharma Date: Mon, 15 Jul 2019 19:04:27 +0530 Subject: [PATCH] LUCENE-8810: Honor MaxClausesCount in BooleanQuery (#787) During Flattening, BooleanQuery will always try to flatten nested clauses during rewrite. However, this can cause the maximum number of clauses to be violated by the new query. This commit disables flattening in the specific case. --- .../apache/lucene/search/BooleanQuery.java | 28 +++++++++++-------- .../lucene/search/TestBooleanRewrites.java | 15 ++++++++++ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 80924a9dd292..0484fe70598a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -481,23 +481,27 @@ public Query rewrite(IndexReader reader) throws IOException { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch); boolean actuallyRewritten = false; - for (BooleanClause clause : clauses) { - if (clause.getOccur() == Occur.SHOULD && clause.getQuery() instanceof BooleanQuery) { - BooleanQuery innerQuery = (BooleanQuery) clause.getQuery(); - if (innerQuery.isPureDisjunction()) { - actuallyRewritten = true; - for (BooleanClause innerClause : innerQuery.clauses()) { - builder.add(innerClause); + try { + for (BooleanClause clause : clauses) { + if (clause.getOccur() == Occur.SHOULD && clause.getQuery() instanceof BooleanQuery) { + BooleanQuery innerQuery = (BooleanQuery) clause.getQuery(); + if (innerQuery.isPureDisjunction()) { + actuallyRewritten = true; + for (BooleanClause innerClause : innerQuery.clauses()) { + builder.add(innerClause); + } + } else { + builder.add(clause); } } else { builder.add(clause); } - } else { - builder.add(clause); } - } - if (actuallyRewritten) { - return builder.build(); + if (actuallyRewritten) { + return builder.build(); + } + } catch (TooManyClauses exception) { + // No-op : Do not flatten when the new query will violate max clause count } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 497035b83f83..def979ee1a3a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -598,4 +598,19 @@ public void testDiscardShouldClauses() throws IOException { w.close(); dir.close(); } + + public void testFlattenInnerDisjunctionsWithMoreThan1024Terms() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + + BooleanQuery.Builder builder1024 = new BooleanQuery.Builder(); + for(int i = 0; i < 1024; i++) { + builder1024.add(new TermQuery(new Term("foo", "bar-" + i)), Occur.SHOULD); + } + Query inner = builder1024.build(); + Query query = new BooleanQuery.Builder() + .add(inner, Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .build(); + assertSame(query, searcher.rewrite(query)); + } }