From e572b9a754a71da1f5bdb53c283b936ab803def2 Mon Sep 17 00:00:00 2001 From: YanTangZhai Date: Tue, 2 Dec 2014 20:27:14 +0800 Subject: [PATCH 1/6] Update HiveStrategies.scala --- .../scala/org/apache/spark/sql/hive/HiveStrategies.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 56fc85239e1c0..07d82f6fd5db5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -194,8 +194,10 @@ private[hive] trait HiveStrategies { // Filter out all predicates that only deal with partition keys, these are given to the // hive table scan operator to be used for partition pruning. val partitionKeyIds = AttributeSet(relation.partitionKeys) - val (pruningPredicates, otherPredicates) = predicates.partition { - _.references.subsetOf(partitionKeyIds) + val (pruningPredicates, otherPredicates) = predicates.partition { x => + x.references.baseSet != null && + !x.references.baseSet.isEmpty && + x.references.subsetOf(partitionKeyIds) } pruneFilterProject( From 72accf1a715b2bfb6d237e842a0fafaf9394bb59 Mon Sep 17 00:00:00 2001 From: YanTangZhai Date: Wed, 3 Dec 2014 11:46:25 +0800 Subject: [PATCH 2/6] Update HiveQuerySuite.scala --- .../apache/spark/sql/hive/execution/HiveQuerySuite.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index af45dfd6e28c2..5a410915d9722 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -413,6 +413,13 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { createQueryTest("select null from table", "SELECT null FROM src LIMIT 1") + createQueryTest("predicates contains an empty AttributeSet() references", + """ + |SELECT a FROM ( + | SELECT 1 AS a FROM src LIMIT 1 ) table + |WHERE abs(20141202) is not null + """.stripMargin) + test("implement identity function using case statement") { val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src") .map { case Row(i: Int) => i } From efa9b039c0fa4c1674c3ad7081e402ca8bc7ae40 Mon Sep 17 00:00:00 2001 From: YanTangZhai Date: Wed, 3 Dec 2014 14:47:52 +0800 Subject: [PATCH 3/6] Update HiveQuerySuite.scala --- .../spark/sql/hive/execution/HiveQuerySuite.scala | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 5a410915d9722..3a35d51d5d029 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -413,12 +413,14 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { createQueryTest("select null from table", "SELECT null FROM src LIMIT 1") - createQueryTest("predicates contains an empty AttributeSet() references", - """ - |SELECT a FROM ( - | SELECT 1 AS a FROM src LIMIT 1 ) table - |WHERE abs(20141202) is not null - """.stripMargin) + test("predicates contains an empty AttributeSet() references") { + sql( + """ + |SELECT a FROM ( + | SELECT 1 AS a FROM src LIMIT 1 ) table + |WHERE abs(20141202) is not null + """.stripMargin).collect() + } test("implement identity function using case statement") { val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src") From 70a35447b05296b0a69e2fdccb6e83957e67f4f4 Mon Sep 17 00:00:00 2001 From: yantangzhai Date: Thu, 18 Dec 2014 16:37:08 +0800 Subject: [PATCH 4/6] [SPARK-4693] [SQL] PruningPredicates may be wrong if predicates contains an empty AttributeSet() references --- .../spark/sql/catalyst/expressions/AttributeSet.scala | 2 ++ .../scala/org/apache/spark/sql/hive/HiveStrategies.scala | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala index 2b4969b7cfec0..4b1185d7cb75f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala @@ -112,4 +112,6 @@ class AttributeSet private (val baseSet: Set[AttributeEquals]) override def toSeq: Seq[Attribute] = baseSet.map(_.a).toArray.toSeq override def toString = "{" + baseSet.map(_.a).mkString(", ") + "}" + + def isEmpty: Boolean = baseSet.isEmpty } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 07d82f6fd5db5..489f1e1617d7b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -194,10 +194,9 @@ private[hive] trait HiveStrategies { // Filter out all predicates that only deal with partition keys, these are given to the // hive table scan operator to be used for partition pruning. val partitionKeyIds = AttributeSet(relation.partitionKeys) - val (pruningPredicates, otherPredicates) = predicates.partition { x => - x.references.baseSet != null && - !x.references.baseSet.isEmpty && - x.references.subsetOf(partitionKeyIds) + val (pruningPredicates, otherPredicates) = predicates.partition { predicate => + !predicate.references.isEmpty && + predicate.references.subsetOf(partitionKeyIds) } pruneFilterProject( From 37cfdf5effe0de72a86974b65a6ddff87debfffa Mon Sep 17 00:00:00 2001 From: yantangzhai Date: Thu, 18 Dec 2014 16:40:13 +0800 Subject: [PATCH 5/6] [SPARK-4693] [SQL] PruningPredicates may be wrong if predicates contains an empty AttributeSet() references --- .../apache/spark/sql/catalyst/expressions/AttributeSet.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala index 4b1185d7cb75f..957d1608ec80d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala @@ -113,5 +113,5 @@ class AttributeSet private (val baseSet: Set[AttributeEquals]) override def toString = "{" + baseSet.map(_.a).mkString(", ") + "}" - def isEmpty: Boolean = baseSet.isEmpty + def isEmpty: Boolean = baseSet.isEmpty } From 620ebe3df79fce1c8dbdea971eea99971af5b9d9 Mon Sep 17 00:00:00 2001 From: yantangzhai Date: Thu, 18 Dec 2014 16:51:21 +0800 Subject: [PATCH 6/6] [SPARK-4693] [SQL] PruningPredicates may be wrong if predicates contains an empty AttributeSet() references --- .../apache/spark/sql/catalyst/expressions/AttributeSet.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala index 957d1608ec80d..171845ad14e3e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala @@ -113,5 +113,5 @@ class AttributeSet private (val baseSet: Set[AttributeEquals]) override def toString = "{" + baseSet.map(_.a).mkString(", ") + "}" - def isEmpty: Boolean = baseSet.isEmpty + override def isEmpty: Boolean = baseSet.isEmpty }