Skip to content

Commit

Permalink
[SPARK-41015][SQL][PROTOBUF] UnitTest null check for data generator
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
null check for data generator after type conversion.

### Why are the changes needed?
To fix a test failure.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
I have tested all the random numbers manually, current unit tests.

Closes #38515 from SandishKumarHN/SPARK-41015-UTests.

Authored-by: SandishKumarHN <sanysandish@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
  • Loading branch information
SandishKumarHN authored and MaxGekk committed Nov 8, 2022
1 parent 5c6c5e6 commit 75643f4
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ private[sql] object ProtobufUtils extends Logging {
).toList
fileDescriptorList
} catch {
case e: Descriptors.DescriptorValidationException =>
case e: Exception =>
throw QueryCompilationErrors.failedParsingDescriptorError(descFilePath, e)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
// cd connector/protobuf/src/test/resources/protobuf
// protoc --java_out=./ basicmessage.proto
// protoc --include_imports --descriptor_set_out=basicmessage.desc --java_out=org/apache/spark/sql/protobuf/ basicmessage.proto
// protoc --descriptor_set_out=basicmessage_noimports.desc --java_out=org/apache/spark/sql/protobuf/ basicmessage.proto

syntax = "proto3";

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

�
basicmessage.proto$org.apache.spark.sql.protobuf.protosnestedenum.proto"�
BasicMessage
id (Rid!
string_value ( R stringValue
int32_value (R
int32Value
int64_value (R
int64Value!
double_value (R doubleValue
float_value (R
floatValue

bool_value (R boolValue
bytes_value ( R
bytesValueS
rnested_enum (20.org.apache.spark.sql.protobuf.protos.NestedEnumR rnestedEnumBBBasicMessageProtobproto3
Original file line number Diff line number Diff line change
Expand Up @@ -123,16 +123,21 @@ class ProtobufCatalystDataConversionSuite
StringType -> ("StringMsg", ""))

testingTypes.foreach { dt =>
val seed = 1 + scala.util.Random.nextInt((1024 - 1) + 1)
val seed = scala.util.Random.nextInt(RandomDataGenerator.MAX_STR_LEN)
test(s"single $dt with seed $seed") {

val (messageName, defaultValue) = catalystTypesToProtoMessages(dt.fields(0).dataType)

val rand = new scala.util.Random(seed)
val generator = RandomDataGenerator.forType(dt, rand = rand).get
var data = generator()
while (data.asInstanceOf[Row].get(0) == defaultValue) // Do not use default values, since
data = generator() // from_protobuf() returns null in v3.
var data = generator().asInstanceOf[Row]
// Do not use default values, since from_protobuf() returns null in v3.
while (
data != null &&
(data.get(0) == defaultValue ||
(dt == BinaryType &&
data.get(0).asInstanceOf[Array[Byte]].isEmpty)))
data = generator().asInstanceOf[Row]

val converter = CatalystTypeConverters.createToCatalystConverter(dt)
val input = Literal.create(converter(data), dt)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,4 +677,18 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Seri
=== inputDf.select("durationMsg.duration").take(1).toSeq(0).get(0))
}
}

test("raise cannot construct protobuf descriptor error") {
val df = Seq(ByteString.empty().toByteArray).toDF("value")
val testFileDescriptor = testFile("basicmessage_noimports.desc").replace("file:/", "/")

val e = intercept[AnalysisException] {
df.select(functions.from_protobuf($"value", "BasicMessage", testFileDescriptor) as 'sample)
.where("sample.string_value == \"slam\"").show()
}
checkError(
exception = e,
errorClass = "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR",
parameters = Map("descFilePath" -> testFileDescriptor))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,29 @@ class ProtobufSerdeSuite extends SharedSparkSession {
withFieldMatchType(Deserializer.create(CATALYST_STRUCT, protoNestedFile, _))
}

test("raise cannot parse and construct protobuf descriptor error") {
// passing serde_suite.proto instead serde_suite.desc
var testFileDesc = testFile("serde_suite.proto").replace("file:/", "/")
val e1 = intercept[AnalysisException] {
ProtobufUtils.buildDescriptor(testFileDesc, "FieldMissingInSQLRoot")
}

checkError(
exception = e1,
errorClass = "CANNOT_PARSE_PROTOBUF_DESCRIPTOR",
parameters = Map("descFilePath" -> testFileDesc))

testFileDesc = testFile("basicmessage_noimports.desc").replace("file:/", "/")
val e2 = intercept[AnalysisException] {
ProtobufUtils.buildDescriptor(testFileDesc, "FieldMissingInSQLRoot")
}

checkError(
exception = e2,
errorClass = "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR",
parameters = Map("descFilePath" -> testFileDesc))
}

/**
* Attempt to convert `catalystSchema` to `protoSchema` (or vice-versa if `deserialize` is
* true), assert that it fails, and assert that the _cause_ of the thrown exception has a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3325,7 +3325,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
def descrioptorParseError(descFilePath: String, cause: Throwable): Throwable = {
new AnalysisException(
errorClass = "CANNOT_PARSE_PROTOBUF_DESCRIPTOR",
messageParameters = Map.empty("descFilePath" -> descFilePath),
messageParameters = Map("descFilePath" -> descFilePath),
cause = Option(cause.getCause))
}

Expand All @@ -3339,7 +3339,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
def failedParsingDescriptorError(descFilePath: String, cause: Throwable): Throwable = {
new AnalysisException(
errorClass = "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR",
messageParameters = Map.empty("descFilePath" -> descFilePath),
messageParameters = Map("descFilePath" -> descFilePath),
cause = Option(cause.getCause))
}

Expand Down

0 comments on commit 75643f4

Please sign in to comment.