Skip to content

Commit

Permalink
[SPARK-26503][CORE] Get rid of spark.sql.legacy.timeParser.enabled
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Per discussion in #23391 (comment) this proposes to just remove the old pre-Spark-3 time parsing behavior.

This is a rebase of #23411

## How was this patch tested?

Existing tests.

Closes #23495 from srowen/SPARK-26503.2.

Authored-by: Sean Owen <sean.owen@databricks.com>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
  • Loading branch information
srowen committed Jan 11, 2019
1 parent 1f1d98c commit 51a6ba0
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 336 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@ package org.apache.spark.sql.catalyst.util
import java.time.{Instant, ZoneId}
import java.util.Locale

import scala.util.Try

import org.apache.commons.lang3.time.FastDateFormat

import org.apache.spark.sql.internal.SQLConf

sealed trait DateFormatter extends Serializable {
def parse(s: String): Int // returns days since epoch
def format(days: Int): String
Expand Down Expand Up @@ -56,43 +50,8 @@ class Iso8601DateFormatter(
}
}

class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter {
@transient
private lazy val format = FastDateFormat.getInstance(pattern, locale)

override def parse(s: String): Int = {
val milliseconds = format.parse(s).getTime
DateTimeUtils.millisToDays(milliseconds)
}

override def format(days: Int): String = {
val date = DateTimeUtils.toJavaDate(days)
format.format(date)
}
}

class LegacyFallbackDateFormatter(
pattern: String,
locale: Locale) extends LegacyDateFormatter(pattern, locale) {
override def parse(s: String): Int = {
Try(super.parse(s)).orElse {
// If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
// compatibility.
Try(DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(s).getTime))
}.getOrElse {
// In Spark 1.5.0, we store the data as number of days since epoch in string.
// So, we just convert it to Int.
s.toInt
}
}
}

object DateFormatter {
def apply(format: String, locale: Locale): DateFormatter = {
if (SQLConf.get.legacyTimeParserEnabled) {
new LegacyFallbackDateFormatter(format, locale)
} else {
new Iso8601DateFormatter(format, locale)
}
new Iso8601DateFormatter(format, locale)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ import java.time.format.DateTimeParseException
import java.time.temporal.TemporalQueries
import java.util.{Locale, TimeZone}

import scala.util.Try

import org.apache.commons.lang3.time.FastDateFormat

import org.apache.spark.sql.internal.SQLConf

sealed trait TimestampFormatter extends Serializable {
/**
* Parses a timestamp in a string and converts it to microseconds.
Expand Down Expand Up @@ -79,37 +73,8 @@ class Iso8601TimestampFormatter(
}
}

class LegacyTimestampFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends TimestampFormatter {
@transient
private lazy val format = FastDateFormat.getInstance(pattern, timeZone, locale)

protected def toMillis(s: String): Long = format.parse(s).getTime

override def parse(s: String): Long = toMillis(s) * DateTimeUtils.MICROS_PER_MILLIS

override def format(us: Long): String = {
format.format(DateTimeUtils.toJavaTimestamp(us))
}
}

class LegacyFallbackTimestampFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends LegacyTimestampFormatter(pattern, timeZone, locale) {
override def toMillis(s: String): Long = {
Try {super.toMillis(s)}.getOrElse(DateTimeUtils.stringToTime(s).getTime)
}
}

object TimestampFormatter {
def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = {
if (SQLConf.get.legacyTimeParserEnabled) {
new LegacyFallbackTimestampFormatter(format, timeZone, locale)
} else {
new Iso8601TimestampFormatter(format, timeZone, locale)
}
new Iso8601TimestampFormatter(format, timeZone, locale)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1625,13 +1625,6 @@ object SQLConf {
"a SparkConf entry.")
.booleanConf
.createWithDefault(true)

val LEGACY_TIME_PARSER_ENABLED = buildConf("spark.sql.legacy.timeParser.enabled")
.doc("When set to true, java.text.SimpleDateFormat is used for formatting and parsing " +
" dates/timestamps in a locale-sensitive manner. When set to false, classes from " +
"java.time.* packages are used for the same purpose.")
.booleanConf
.createWithDefault(false)
}

/**
Expand Down Expand Up @@ -2057,8 +2050,6 @@ class SQLConf extends Serializable with Logging {
def setCommandRejectsSparkCoreConfs: Boolean =
getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)

def legacyTimeParserEnabled: Boolean = getConf(SQLConf.LEGACY_TIME_PARSER_ENABLED)

/** ********************** SQLConf functionality methods ************ */

/** Set Spark SQL configuration properties. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.JsonFactory

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._

class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
Expand All @@ -43,61 +42,45 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
}

test("inferring timestamp type") {
Seq(true, false).foreach { legacyParser =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
checkTimestampType("yyyy", """{"a": "2018"}""")
checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
checkTimestampType(
"yyyy-MM-dd'T'HH:mm:ss.SSS",
"""{"a": "2018-12-02T21:04:00.123"}""")
checkTimestampType(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
"""{"a": "2018-12-02T21:04:00.123567+01:00"}""")
}
}
checkTimestampType("yyyy", """{"a": "2018"}""")
checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
checkTimestampType(
"yyyy-MM-dd'T'HH:mm:ss.SSS",
"""{"a": "2018-12-02T21:04:00.123"}""")
checkTimestampType(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
"""{"a": "2018-12-02T21:04:00.123567+01:00"}""")
}

test("prefer decimals over timestamps") {
Seq(true, false).foreach { legacyParser =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
checkType(
options = Map(
"prefersDecimal" -> "true",
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
),
json = """{"a": "20181202.210400123"}""",
dt = DecimalType(17, 9)
)
}
}
checkType(
options = Map(
"prefersDecimal" -> "true",
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
),
json = """{"a": "20181202.210400123"}""",
dt = DecimalType(17, 9)
)
}

test("skip decimal type inferring") {
Seq(true, false).foreach { legacyParser =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
checkType(
options = Map(
"prefersDecimal" -> "false",
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
),
json = """{"a": "20181202.210400123"}""",
dt = TimestampType
)
}
}
checkType(
options = Map(
"prefersDecimal" -> "false",
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
),
json = """{"a": "20181202.210400123"}""",
dt = TimestampType
)
}

test("fallback to string type") {
Seq(true, false).foreach { legacyParser =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
checkType(
options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
json = """{"a": "20181202.210400123"}""",
dt = StringType
)
}
}
checkType(
options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
json = """{"a": "20181202.210400123"}""",
dt = StringType
)
}

test("disable timestamp inferring") {
Expand Down
Loading

0 comments on commit 51a6ba0

Please sign in to comment.