Skip to content

Commit

Permalink
[SPARK-9754][SQL] Remove TypeCheck in debug package.
Browse files Browse the repository at this point in the history
TypeCheck no longer applies in the new "Tungsten" world.

Author: Reynold Xin <rxin@databricks.com>

Closes apache#8043 from rxin/SPARK-9754 and squashes the following commits:

4ec471e [Reynold Xin] [SPARK-9754][SQL] Remove TypeCheck in debug package.
  • Loading branch information
rxin committed Aug 8, 2015
1 parent 85be65b commit 998f4ff
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,16 @@

package org.apache.spark.sql.execution

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.unsafe.types.UTF8String

import scala.collection.mutable.HashSet

import org.apache.spark.{AccumulatorParam, Accumulator, Logging}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.trees.TreeNodeRef
import org.apache.spark.sql.types._
import org.apache.spark.{Accumulator, AccumulatorParam, Logging}

/**
* :: DeveloperApi ::
* Contains methods for debugging query execution.
*
* Usage:
Expand All @@ -53,10 +48,8 @@ package object debug {
}

/**
* :: DeveloperApi ::
* Augments [[DataFrame]]s with debug methods.
*/
@DeveloperApi
implicit class DebugQuery(query: DataFrame) extends Logging {
def debug(): Unit = {
val plan = query.queryExecution.executedPlan
Expand All @@ -72,23 +65,6 @@ package object debug {
case _ =>
}
}

def typeCheck(): Unit = {
val plan = query.queryExecution.executedPlan
val visited = new collection.mutable.HashSet[TreeNodeRef]()
val debugPlan = plan transform {
case s: SparkPlan if !visited.contains(new TreeNodeRef(s)) =>
visited += new TreeNodeRef(s)
TypeCheck(s)
}
try {
logDebug(s"Results returned: ${debugPlan.execute().count()}")
} catch {
case e: Exception =>
def unwrap(e: Throwable): Throwable = if (e.getCause == null) e else unwrap(e.getCause)
logDebug(s"Deepest Error: ${unwrap(e)}")
}
}
}

private[sql] case class DebugNode(child: SparkPlan) extends UnaryNode {
Expand Down Expand Up @@ -148,76 +124,4 @@ package object debug {
}
}
}

/**
* Helper functions for checking that runtime types match a given schema.
*/
private[sql] object TypeCheck {
def typeCheck(data: Any, schema: DataType): Unit = (data, schema) match {
case (null, _) =>

case (row: InternalRow, s: StructType) =>
row.toSeq(s).zip(s.map(_.dataType)).foreach { case(d, t) => typeCheck(d, t) }
case (a: ArrayData, ArrayType(elemType, _)) =>
a.foreach(elemType, (_, e) => {
typeCheck(e, elemType)
})
case (m: MapData, MapType(keyType, valueType, _)) =>
m.keyArray().foreach(keyType, (_, e) => {
typeCheck(e, keyType)
})
m.valueArray().foreach(valueType, (_, e) => {
typeCheck(e, valueType)
})

case (_: Long, LongType) =>
case (_: Int, IntegerType) =>
case (_: UTF8String, StringType) =>
case (_: Float, FloatType) =>
case (_: Byte, ByteType) =>
case (_: Short, ShortType) =>
case (_: Boolean, BooleanType) =>
case (_: Double, DoubleType) =>
case (_: Int, DateType) =>
case (_: Long, TimestampType) =>
case (v, udt: UserDefinedType[_]) => typeCheck(v, udt.sqlType)

case (d, t) => sys.error(s"Invalid data found: got $d (${d.getClass}) expected $t")
}
}

/**
* Augments [[DataFrame]]s with debug methods.
*/
private[sql] case class TypeCheck(child: SparkPlan) extends SparkPlan {
import TypeCheck._

override def nodeName: String = ""

/* Only required when defining this class in a REPL.
override def makeCopy(args: Array[Object]): this.type =
TypeCheck(args(0).asInstanceOf[SparkPlan]).asInstanceOf[this.type]
*/

def output: Seq[Attribute] = child.output

def children: List[SparkPlan] = child :: Nil

protected override def doExecute(): RDD[InternalRow] = {
child.execute().map { row =>
try typeCheck(row, child.schema) catch {
case e: Exception =>
sys.error(
s"""
|ERROR WHEN TYPE CHECKING QUERY
|==============================
|$e
|======== BAD TREE ============
|$child
""".stripMargin)
}
row
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,4 @@ class DebuggingSuite extends SparkFunSuite {
test("DataFrame.debug()") {
testData.debug()
}

test("DataFrame.typeCheck()") {
testData.typeCheck()
}
}

0 comments on commit 998f4ff

Please sign in to comment.