Skip to content

Commit

Permalink
Add test case with workaround for reading partitioned avro files.
Browse files Browse the repository at this point in the history
  • Loading branch information
marmbrus committed Sep 9, 2014
1 parent c419e4f commit fea2124
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 1 deletion.
66 changes: 65 additions & 1 deletion sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,71 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
|)
""".stripMargin.cmd,
s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}' INTO TABLE episodes".cmd
)
),
// THIS TABLE IS NOT THE SAME AS THE HIVE TEST TABLE episodes_partitioned AS DYNAMIC PARITIONING
// IS NOT YET SUPPORTED
TestTable("episodes_part",
s"""CREATE TABLE episodes_part (title STRING, air_date STRING, doctor INT)
|PARTITIONED BY (doctor_pt INT)
|ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
|STORED AS
|INPUTFORMAT '${classOf[AvroContainerInputFormat].getCanonicalName}'
|OUTPUTFORMAT '${classOf[AvroContainerOutputFormat].getCanonicalName}'
|TBLPROPERTIES (
| 'avro.schema.literal'='{
| "type": "record",
| "name": "episodes",
| "namespace": "testing.hive.avro.serde",
| "fields": [
| {
| "name": "title",
| "type": "string",
| "doc": "episode title"
| },
| {
| "name": "air_date",
| "type": "string",
| "doc": "initial date"
| },
| {
| "name": "doctor",
| "type": "int",
| "doc": "main actor playing the Doctor in episode"
| }
| ]
| }'
|)
""".stripMargin.cmd,
// WORKAROUND: Required to pass schema to SerDe for partitioned tables.
// TODO: Pass this automatically from the table to partitions.
s"""
|ALTER TABLE episodes_part SET SERDEPROPERTIES (
| 'avro.schema.literal'='{
| "type": "record",
| "name": "episodes",
| "namespace": "testing.hive.avro.serde",
| "fields": [
| {
| "name": "title",
| "type": "string",
| "doc": "episode title"
| },
| {
| "name": "air_date",
| "type": "string",
| "doc": "initial date"
| },
| {
| "name": "doctor",
| "type": "int",
| "doc": "main actor playing the Doctor in episode"
| }
| ]
| }'
|)
""".stripMargin.cmd,
s"INSERT OVERWRITE TABLE episodes_part PARTITION (doctor_pt=1) SELECT title, air_date, doctor FROM episodes".cmd
)
)

hiveQTestUtilTables.foreach(registerTestTable)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
The Eleventh Hour 3 April 2010 11 1
The Doctor's Wife 14 May 2011 11 1
Horror of Fang Rock 3 September 1977 4 1
An Unearthly Child 23 November 1963 1 1
The Mysterious Planet 6 September 1986 6 1
Rose 26 March 2005 9 1
The Power of the Daleks 5 November 1966 2 1
Castrolava 4 January 1982 5 1
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,6 @@ class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")

createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")

createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")
}

0 comments on commit fea2124

Please sign in to comment.