Skip to content

Commit

Permalink
[HUDI-1489] Fix null pointer exception when reading updated written b…
Browse files Browse the repository at this point in the history
…ootstrap table (apache#2370)

Co-authored-by: Wenning Ding <wenningd@amazon.com>
  • Loading branch information
2 people authored and nbalajee committed Dec 28, 2020
1 parent a39951f commit 93e6b54
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 239 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ class HoodieBootstrapRelation(@transient val _sqlContext: SQLContext,
// Get required schemas for column pruning
var requiredDataSchema = StructType(Seq())
var requiredSkeletonSchema = StructType(Seq())
// requiredColsSchema is the schema of requiredColumns, note that requiredColumns is in a random order
// so requiredColsSchema is not always equal to (requiredSkeletonSchema.fields ++ requiredDataSchema.fields)
var requiredColsSchema = StructType(Seq())
requiredColumns.foreach(col => {
var field = dataSchema.find(_.name == col)
if (field.isDefined) {
Expand All @@ -99,6 +102,7 @@ class HoodieBootstrapRelation(@transient val _sqlContext: SQLContext,
field = skeletonSchema.find(_.name == col)
requiredSkeletonSchema = requiredSkeletonSchema.add(field.get)
}
requiredColsSchema = requiredColsSchema.add(field.get)
})

// Prepare readers for reading data file and skeleton files
Expand Down Expand Up @@ -129,7 +133,7 @@ class HoodieBootstrapRelation(@transient val _sqlContext: SQLContext,
sparkSession = _sqlContext.sparkSession,
dataSchema = fullSchema,
partitionSchema = StructType(Seq.empty),
requiredSchema = StructType(requiredSkeletonSchema.fields ++ requiredDataSchema.fields),
requiredSchema = requiredColsSchema,
filters = filters,
options = Map.empty,
hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf())
Expand Down
Loading

0 comments on commit 93e6b54

Please sign in to comment.