Skip to content

Commit

Permalink
[SPARK-48907][SQL] Fix the value explicitTypes in `COLLATION_MISMAT…
Browse files Browse the repository at this point in the history
…CH.EXPLICIT`

### What changes were proposed in this pull request?
The pr aims to
- fix the value `explicitTypes` in `COLLATION_MISMATCH.EXPLICIT`.
- use `checkError` to check exception in `CollationSQLExpressionsSuite` and `CollationStringExpressionsSuite`.

### Why are the changes needed?
Only fix bug, eg:
```
SELECT concat_ws(' ', collate('Spark', 'UTF8_LCASE'), collate('SQL', 'UNICODE'))
```

- Before:
  ```
  [COLLATION_MISMATCH.EXPLICIT] Could not determine which collation to use for string functions and operators. Error occurred due to the mismatch between explicit collations: `string collate UTF8_LCASE`.`string collate UNICODE`. Decide on a single explicit collation and remove others. SQLSTATE: 42P21
  ```
  <img width="747" alt="image" src="https://github.com/user-attachments/assets/4e026cb5-2875-4370-9bb9-878f0b607f41">

- After:
  ```
  [COLLATION_MISMATCH.EXPLICIT] Could not determine which collation to use for string functions and operators. Error occurred due to the mismatch between explicit collations: [`string collate UTF8_LCASE`, `string collate UNICODE`]. Decide on a single explicit collation and remove others. SQLSTATE: 42P21
  ```
  <img width="738" alt="image" src="https://github.com/user-attachments/assets/86f489a2-9f2d-4f59-bdb1-95c051a93ee8">

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Updated existed UT.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes apache#47365 from panbingkun/SPARK-48907.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
panbingkun authored and cloud-fan committed Jul 17, 2024
1 parent 74ca836 commit 5d16c31
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@
"subClass" : {
"EXPLICIT" : {
"message" : [
"Error occurred due to the mismatch between explicit collations: <explicitTypes>. Decide on a single explicit collation and remove others."
"Error occurred due to the mismatch between explicit collations: [<explicitTypes>]. Decide on a single explicit collation and remove others."
]
},
"IMPLICIT" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3675,7 +3675,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
new AnalysisException(
errorClass = "COLLATION_MISMATCH.EXPLICIT",
messageParameters = Map(
"explicitTypes" -> toSQLId(explicitTypes)
"explicitTypes" -> explicitTypes.map(toSQLId).mkString(", ")
)
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -680,11 +680,14 @@ class CollationSQLExpressionsSuite
val number = "xx"
val query = s"SELECT to_number('$number', '999');"
withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
val e = intercept[SparkIllegalArgumentException] {
val testQuery = sql(query)
testQuery.collect()
}
assert(e.getErrorClass === "INVALID_FORMAT.MISMATCH_INPUT")
checkError(
exception = intercept[SparkIllegalArgumentException] {
val testQuery = sql(query)
testQuery.collect()
},
errorClass = "INVALID_FORMAT.MISMATCH_INPUT",
parameters = Map("inputType" -> "\"STRING\"", "input" -> "xx", "format" -> "999")
)
}
}

Expand Down Expand Up @@ -996,11 +999,13 @@ class CollationSQLExpressionsSuite
withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
val query = s"SELECT raise_error('${t.errorMessage}')"
// Result & data type
val userException = intercept[SparkRuntimeException] {
sql(query).collect()
}
assert(userException.getErrorClass === "USER_RAISED_EXCEPTION")
assert(userException.getMessage.contains(t.errorMessage))
checkError(
exception = intercept[SparkRuntimeException] {
sql(query).collect()
},
errorClass = "USER_RAISED_EXCEPTION",
parameters = Map("errorMessage" -> t.errorMessage)
)
}
})
}
Expand Down Expand Up @@ -1172,10 +1177,13 @@ class CollationSQLExpressionsSuite
}
})
// Collation mismatch
val collationMismatch = intercept[AnalysisException] {
sql("SELECT mask(collate('ab-CD-12-@$','UNICODE'),collate('X','UNICODE_CI'),'x','0','#')")
}
assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
checkError(
exception = intercept[AnalysisException] {
sql("SELECT mask(collate('ab-CD-12-@$','UNICODE'),collate('X','UNICODE_CI'),'x','0','#')")
},
errorClass = "COLLATION_MISMATCH.EXPLICIT",
parameters = Map("explicitTypes" -> "`string collate UNICODE`, `string collate UNICODE_CI`")
)
}

test("Support XmlToStructs xml expression with collation") {
Expand Down Expand Up @@ -1360,11 +1368,14 @@ class CollationSQLExpressionsSuite
val json = "{\"a\":1,"
val query = s"SELECT parse_json('$json');"
withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
val e = intercept[SparkException] {
val testQuery = sql(query)
testQuery.collect()
}
assert(e.getErrorClass === "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION")
checkError(
exception = intercept[SparkException] {
val testQuery = sql(query)
testQuery.collect()
},
errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
parameters = Map("badRecord" -> "{\"a\":1,", "failFastMode" -> "FAILFAST")
)
}
}

Expand Down Expand Up @@ -1461,11 +1472,14 @@ class CollationSQLExpressionsSuite
val json = "[1, \"Spark\"]"
val query = s"SELECT variant_get(parse_json('$json'), '$$[1]', 'int');"
withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
val e = intercept[SparkRuntimeException] {
val testQuery = sql(query)
testQuery.collect()
}
assert(e.getErrorClass === "INVALID_VARIANT_CAST")
checkError(
exception = intercept[SparkRuntimeException] {
val testQuery = sql(query)
testQuery.collect()
},
errorClass = "INVALID_VARIANT_CAST",
parameters = Map("value" -> "\"Spark\"", "dataType" -> "\"INT\"")
)
}
}

Expand Down Expand Up @@ -2289,10 +2303,20 @@ class CollationSQLExpressionsSuite
s"""
|SELECT REFLECT('java.lang.Integer', 'toHexString',"2");
|""".stripMargin
val typeException = intercept[ExtendedAnalysisException] {
sql(queryFail).collect()
}
assert(typeException.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_STATIC_METHOD")
checkError(
exception = intercept[ExtendedAnalysisException] {
sql(queryFail).collect()
},
errorClass = "DATATYPE_MISMATCH.UNEXPECTED_STATIC_METHOD",
parameters = Map(
"methodName" -> "toHexString",
"className" -> "java.lang.Integer",
"sqlExpr" -> "\"reflect(java.lang.Integer, toHexString, 2)\""),
context = ExpectedContext(
fragment = """REFLECT('java.lang.Integer', 'toHexString',"2")""",
start = 8,
stop = 54)
)
}

// TODO: Add more tests for other SQL expressions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ class CollationSQLRegexpSuite
sql(s"SELECT regexp_replace(collate('ABCDE','$c1'), '.c.', collate('FFF','$c2'))")
},
errorClass = "COLLATION_MISMATCH.EXPLICIT",
parameters = Map("explicitTypes" -> "`string`.`string collate UTF8_LCASE`")
parameters = Map("explicitTypes" -> "`string`, `string collate UTF8_LCASE`")
)
// Unsupported collations
case class RegExpReplaceTestFail(l: String, r: String, c: String)
Expand Down
Loading

0 comments on commit 5d16c31

Please sign in to comment.