Skip to content

Commit

Permalink
Generate strings with the format like Hive for unit tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
yhuai committed Jun 19, 2014
1 parent 9787fff commit 8003cf3
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,15 @@ case class CacheCommand(tableName: String, doCache: Boolean) extends Command
/**
* Returned for the "DESCRIBE tableName" command.
* @param table The table to be described.
* @param isFormatted True if "DESCRIBE FORMATTED" is used. Otherwise, false.
* It is effective only when the table is a Hive table.
* @param isExtended True if "DESCRIBE EXTENDED" is used. Otherwise, false.
* It is effective only when the table is a Hive table.
*/
case class DescribeCommand(
table: LogicalPlan,
isFormatted: Boolean,
isExtended: Boolean) extends Command {
override def output = Seq(
BoundReference(0, AttributeReference("name", StringType, nullable = false)()),
BoundReference(1, AttributeReference("type", StringType, nullable = false)()),
// Column names are based on Hive.
BoundReference(0, AttributeReference("col_name", StringType, nullable = false)()),
BoundReference(1, AttributeReference("data_type", StringType, nullable = false)()),
BoundReference(2, AttributeReference("comment", StringType, nullable = false)()))
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.execution.QueryExecutionException
import org.apache.spark.sql.execution.{Command => PhysicalCommand}
import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand

/**
* Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
Expand Down Expand Up @@ -291,6 +292,10 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
* execution is simply passed back to Hive.
*/
def stringResult(): Seq[String] = executedPlan match {
case describeHiveTableCommand: DescribeHiveTableCommand =>
// If it is a describe command for a Hive table, we want to have the output format
// be similar with Hive.
describeHiveTableCommand.hiveString
case command: PhysicalCommand =>
command.sideEffectResult.map(_.toString)

Expand Down
46 changes: 25 additions & 21 deletions sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -391,30 +391,34 @@ private[hive] object HiveQl {

case Token("TOK_DESCTABLE", describeArgs) =>
// Reference: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
val Some(tableType) :: formatted :: extended :: _ :: Nil =
val Some(tableType) :: formatted :: extended :: pretty :: Nil =
getClauses(Seq("TOK_TABTYPE", "FORMATTED", "EXTENDED", "PRETTY"), describeArgs)
// TODO: support PRETTY?
tableType match {
case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => {
nameParts.head match {
case Token(".", dbName :: tableName :: Nil) =>
// It is describing a table with the format like "describe db.table".
val (db, tableName) = extractDbNameTableName(nameParts.head)
DescribeCommand(
UnresolvedRelation(db, tableName, None), formatted.isDefined, extended.isDefined)
case Token(".", dbName :: tableName :: colName :: Nil) =>
// It is describing a column with the format like "describe db.table column".
NativePlaceholder
case tableName =>
// It is describing a table with the format like "describe table".
DescribeCommand(
UnresolvedRelation(None, tableName.getText, None),
formatted.isDefined,
extended.isDefined)
if (formatted.isDefined || pretty.isDefined) {
// FORMATTED and PRETTY are not supported and this statement will be treated as
// a Hive native command.
NativePlaceholder
} else {
tableType match {
case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => {
nameParts.head match {
case Token(".", dbName :: tableName :: Nil) =>
// It is describing a table with the format like "describe db.table".
val (db, tableName) = extractDbNameTableName(nameParts.head)
DescribeCommand(
UnresolvedRelation(db, tableName, None), extended.isDefined)
case Token(".", dbName :: tableName :: colName :: Nil) =>
// It is describing a column with the format like "describe db.table column".
NativePlaceholder
case tableName =>
// It is describing a table with the format like "describe table".
DescribeCommand(
UnresolvedRelation(None, tableName.getText, None),
extended.isDefined)
}
}
// All other cases.
case _ => NativePlaceholder
}
// All other cases.
case _ => NativePlaceholder
}

case Token("TOK_CREATETABLE", children)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ private[hive] trait HiveStrategies {
resolvedTable match {
case t: MetastoreRelation =>
Seq(DescribeHiveTableCommand(
t, describe.output, describe.isFormatted, describe.isExtended)(context))
t, describe.output, describe.isExtended)(context))
case o: LogicalPlan =>
Seq(DescribeCommand(planLater(o), describe.output)(context))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -462,32 +462,43 @@ case class NativeCommand(
case class DescribeHiveTableCommand(
table: MetastoreRelation,
output: Seq[Attribute],
isFormatted: Boolean,
isExtended: Boolean)(
@transient context: HiveContext)
extends LeafNode with Command {

override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = {
val cols: Seq[FieldSchema] = table.hiveQlTable.getCols
val parCols: Seq[FieldSchema] = table.hiveQlTable.getPartCols
val columnInfo = cols.map(field => (field.getName, field.getType, field.getComment))
val partColumnInfo = parCols.map(field => (field.getName, field.getType, field.getComment))
// Strings with the format like Hive. It is used for result comparison in our unit tests.
lazy val hiveString: Seq[String] = {
val alignment = 20
val delim = "\t"

val formattedPart = if (isFormatted) {
(MetaDataFormatUtils.getTableInformation(table.hiveQlTable), null, null) :: Nil
} else {
Nil
sideEffectResult.map {
case (name, dataType, comment) =>
String.format("%-" + alignment + "s", name) + delim +
String.format("%-" + alignment + "s", dataType) + delim +
String.format("%-" + alignment + "s", Option(comment).getOrElse("None"))
}
}

val extendedPart = if (isExtended) {
("Detailed Table Information", table.hiveQlTable.getTTable.toString, null) :: Nil
} else {
Nil
override protected[sql] lazy val sideEffectResult: Seq[(String, String, String)] = {
// Trying to mimic the format of Hive's output. But not exactly the same.
var results: Seq[(String, String, String)] = Nil

val columns: Seq[FieldSchema] = table.hiveQlTable.getCols
val partitionColumns: Seq[FieldSchema] = table.hiveQlTable.getPartCols
results ++= columns.map(field => (field.getName, field.getType, field.getComment))
if (!partitionColumns.isEmpty) {
val partColumnInfo =
partitionColumns.map(field => (field.getName, field.getType, field.getComment))
results ++=
partColumnInfo ++ Seq(("# Partition Information", "", "")) ++
Seq((s"# ${output.get(0).name}", output.get(1).name, output.get(2).name)) ++ partColumnInfo
}

if (isExtended) {
results ++= Seq(("Detailed Table Information", table.hiveQlTable.getTTable.toString, ""))
}

// Trying to mimic the format of Hive's output. But not 100% the same.
columnInfo ++ partColumnInfo ++ Seq(("# Partition Information", null, null)) ++
partColumnInfo ++ formattedPart ++ extendedPart
results
}

override def execute(): RDD[Row] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ abstract class HiveComparisonTest
case _: SetCommand => Seq("0")
case _: LogicalNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "")
case _: ExplainCommand => answer
case _: DescribeCommand =>
answer.filterNot(
r => nonDeterministicLine(r) || ignoredLine(r)).map(_.trim).filterNot(
r => r == "" || r == "\n")
case plan => if (isSorted(plan)) answer else answer.sorted
}
orderedAnswer.map(cleanPaths)
Expand All @@ -169,6 +173,16 @@ abstract class HiveComparisonTest
protected def nonDeterministicLine(line: String) =
nonDeterministicLineIndicators.exists(line contains _)

// This list contains indicators for those lines which do not have actual results and we
// want to ignore.
lazy val ignoredLineIndicators = Seq(
"# Partition Information",
"# col_name"
)

protected def ignoredLine(line: String) =
ignoredLineIndicators.exists(line contains _)

/**
* Removes non-deterministic paths from `str` so cached answers will compare correctly.
*/
Expand Down Expand Up @@ -329,11 +343,17 @@ abstract class HiveComparisonTest

if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && preparedHive != catalyst) {

val hivePrintOut = s"== HIVE - ${hive.size} row(s) ==" +: preparedHive
val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive
val catalystPrintOut = s"== CATALYST - ${catalyst.size} row(s) ==" +: catalyst

val resultComparison = sideBySide(hivePrintOut, catalystPrintOut).mkString("\n")

println("hive output")
hive.foreach(println)

println("catalyst printout")
catalyst.foreach(println)

if (recomputeCache) {
logger.warn(s"Clearing cache files for failed test $testCaseName")
hiveCacheFiles.foreach(_.delete())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,16 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// After stop taking the `stringOrError` route, exceptions are thrown from these cases.
// See SPARK-2129 for details.
"join_view",
"mergejoins_mixed"
"mergejoins_mixed",

// Returning the result of a describe state as a JSON object is not supported.
"describe_table_json",
"describe_database_json",
"describe_formatted_view_partitioned_json",

// Hive returns the results of describe as plain text. Comments with multiple lines
// introduce extra in the Hive results, which make the result comparison fail.
"describe_comment_indent"
)

/**
Expand Down Expand Up @@ -292,11 +301,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"default_partition_name",
"delimiter",
"desc_non_existent_tbl",
"describe_comment_indent",
"describe_database_json",
"describe_formatted_view_partitioned",
"describe_formatted_view_partitioned_json",
"describe_table_json",
"diff_part_input_formats",
"disable_file_format_check",
"drop_function",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ class HiveQuerySuite extends HiveComparisonTest {
Array("dt", "string", null))
) {
hql("DESCRIBE test_describe_commands1")
.select('name, 'type, 'comment)
.select('col_name, 'data_type, 'comment)
.collect()
}

Expand All @@ -295,7 +295,7 @@ class HiveQuerySuite extends HiveComparisonTest {
Array("dt", "string", null))
) {
hql("DESCRIBE default.test_describe_commands1")
.select('name, 'type, 'comment)
.select('col_name, 'data_type, 'comment)
.collect()
}

Expand Down Expand Up @@ -347,7 +347,7 @@ class HiveQuerySuite extends HiveComparisonTest {
Array("b", "StringType", null))
) {
hql("DESCRIBE test_describe_commands2")
.select('name, 'type, 'comment)
.select('col_name, 'data_type, 'comment)
.collect()
}
}
Expand Down

0 comments on commit 8003cf3

Please sign in to comment.