-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-24586][SQL] Upcast should not allow casting from string to other types #21586
Changes from all commits
93f43ad
13a7846
a199906
7c6d9a8
cf8d05f
e558ad4
14e3de3
a638a53
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2562,7 +2562,7 @@ class Analyzer( | |
case e => e.sql | ||
} | ||
throw new AnalysisException(s"Cannot up cast $fromStr from " + | ||
s"${from.dataType.catalogString} to ${to.catalogString} as it may truncate\n" + | ||
s"${from.dataType.catalogString} to ${to.catalogString}.\n" + | ||
"The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") + | ||
"You can either add an explicit cast to the input data or choose a higher precision " + | ||
"type of the field in the target object") | ||
|
@@ -2575,11 +2575,15 @@ class Analyzer( | |
case p => p transformExpressions { | ||
case u @ UpCast(child, _, _) if !child.resolved => u | ||
|
||
case UpCast(child, dataType, walkedTypePath) | ||
if Cast.mayTruncate(child.dataType, dataType) => | ||
case UpCast(child, dt: AtomicType, _) | ||
if SQLConf.get.getConf(SQLConf.LEGACY_LOOSE_UPCAST) && | ||
child.dataType == StringType => | ||
Cast(child, dt.asNullable) | ||
|
||
case UpCast(child, dataType, walkedTypePath) if !Cast.canUpCast(child.dataType, dataType) => | ||
fail(child, dataType, walkedTypePath) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the error message of thrown There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
|
||
case UpCast(child, dataType, walkedTypePath) => Cast(child, dataType.asNullable) | ||
case UpCast(child, dataType, _) => Cast(child, dataType.asNullable) | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -120,35 +120,36 @@ object Cast { | |
} | ||
|
||
/** | ||
* Return true iff we may truncate during casting `from` type to `to` type. e.g. long -> int, | ||
* timestamp -> date. | ||
* Returns true iff we can safely up-cast the `from` type to `to` type without any truncating or | ||
* precision lose or possible runtime failures. For example, long -> int, string -> int are not | ||
* up-cast. | ||
*/ | ||
def mayTruncate(from: DataType, to: DataType): Boolean = (from, to) match { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this will be ugly, but I still prefer to adding a conf for falling back to the original behavior. |
||
case (from: NumericType, to: DecimalType) if !to.isWiderThan(from) => true | ||
case (from: DecimalType, to: NumericType) if !from.isTighterThan(to) => true | ||
case (from, to) if illegalNumericPrecedence(from, to) => true | ||
case (TimestampType, DateType) => true | ||
case (StringType, to: NumericType) => true | ||
case _ => false | ||
} | ||
|
||
private def illegalNumericPrecedence(from: DataType, to: DataType): Boolean = { | ||
val fromPrecedence = TypeCoercion.numericPrecedence.indexOf(from) | ||
val toPrecedence = TypeCoercion.numericPrecedence.indexOf(to) | ||
toPrecedence >= 0 && fromPrecedence > toPrecedence | ||
} | ||
|
||
/** | ||
* Returns true iff we can safely cast the `from` type to `to` type without any truncating or | ||
* precision lose, e.g. int -> long, date -> timestamp. | ||
*/ | ||
def canSafeCast(from: AtomicType, to: AtomicType): Boolean = (from, to) match { | ||
def canUpCast(from: DataType, to: DataType): Boolean = (from, to) match { | ||
case _ if from == to => true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added support for complex types |
||
case (from: NumericType, to: DecimalType) if to.isWiderThan(from) => true | ||
case (from: DecimalType, to: NumericType) if from.isTighterThan(to) => true | ||
case (from, to) if legalNumericPrecedence(from, to) => true | ||
case (f, t) if legalNumericPrecedence(f, t) => true | ||
case (DateType, TimestampType) => true | ||
case (_, StringType) => true | ||
|
||
// Spark supports casting between long and timestamp, please see `longToTimestamp` and | ||
// `timestampToLong` for details. | ||
case (TimestampType, LongType) => true | ||
case (LongType, TimestampType) => true | ||
|
||
case (ArrayType(fromType, fn), ArrayType(toType, tn)) => | ||
resolvableNullability(fn, tn) && canUpCast(fromType, toType) | ||
|
||
case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) => | ||
resolvableNullability(fn, tn) && canUpCast(fromKey, toKey) && canUpCast(fromValue, toValue) | ||
|
||
case (StructType(fromFields), StructType(toFields)) => | ||
fromFields.length == toFields.length && | ||
fromFields.zip(toFields).forall { | ||
case (f1, f2) => | ||
resolvableNullability(f1.nullable, f2.nullable) && canUpCast(f1.dataType, f2.dataType) | ||
} | ||
|
||
case _ => false | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cloud-fan shall we mention setting
spark.sql.legacy.looseUpcast=true
to preserve old behavior?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah we should
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are you interested in creating a PR (with a new JIRA) to fix the doc? 2.4 is EOL but I think it's good to fix the docs anyway.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure. I will create one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#35519 has been created.