Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor buildDateTimeFormatter to return velox::Expected #11246

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 42 additions & 15 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1575,10 +1575,14 @@ std::shared_ptr<DateTimeFormatter> buildMysqlDateTimeFormatter(
return builder.setType(DateTimeFormatterType::MYSQL).build();
}

std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
Expected<std::shared_ptr<DateTimeFormatter>> buildJodaDateTimeFormatter(
const std::string_view& format) {
if (format.empty()) {
VELOX_USER_FAIL("Invalid pattern specification");
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(
Status::UserError("Invalid pattern specification"));
}

DateTimeFormatterBuilder builder(format.size());
Expand All @@ -1598,16 +1602,19 @@ std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
// Case 2: find closing single quote
int64_t count = numLiteralChars(startTokenPtr + 1, end);
if (count == -1) {
VELOX_USER_FAIL("No closing single quote for literal");
} else {
for (int64_t i = 1; i <= count; i++) {
builder.appendLiteral(startTokenPtr + i, 1);
if (*(startTokenPtr + i) == '\'') {
i += 1;
}
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(
Status::UserError("No closing single quote for literal"));
}
for (int64_t i = 1; i <= count; i++) {
builder.appendLiteral(startTokenPtr + i, 1);
if (*(startTokenPtr + i) == '\'') {
i += 1;
}
cur += count + 2;
}
cur += count + 2;
}
} else {
int count = 1;
Expand Down Expand Up @@ -1686,7 +1693,11 @@ std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
break;
default:
if (isalpha(*startTokenPtr)) {
VELOX_UNSUPPORTED("Specifier {} is not supported.", *startTokenPtr);
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(Status::UserError(
"Specifier {} is not supported.", *startTokenPtr));
} else {
builder.appendLiteral(startTokenPtr, cur - startTokenPtr);
}
Expand All @@ -1697,10 +1708,16 @@ std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
return builder.setType(DateTimeFormatterType::JODA).build();
}

std::shared_ptr<DateTimeFormatter> buildSimpleDateTimeFormatter(
Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
const std::string_view& format,
bool lenient) {
VELOX_USER_CHECK(!format.empty(), "Format pattern should not be empty.");
if (format.empty()) {
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(
Status::UserError("Format pattern should not be empty"));
}

DateTimeFormatterBuilder builder(format.size());
const char* cur = format.data();
Expand All @@ -1721,7 +1738,13 @@ std::shared_ptr<DateTimeFormatter> buildSimpleDateTimeFormatter(
// Append literal characters from the start until the next closing
// literal sequence single quote.
int64_t count = numLiteralChars(startTokenPtr + 1, end);
VELOX_USER_CHECK_NE(count, -1, "No closing single quote for literal");
if (count == -1) {
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(
Status::UserError("No closing single quote for literal"));
}
for (int64_t i = 1; i <= count; i++) {
builder.appendLiteral(startTokenPtr + i, 1);
if (*(startTokenPtr + i) == '\'') {
Expand Down Expand Up @@ -1809,7 +1832,11 @@ std::shared_ptr<DateTimeFormatter> buildSimpleDateTimeFormatter(
break;
default:
if (isalpha(*startTokenPtr)) {
VELOX_UNSUPPORTED("Specifier {} is not supported.", *startTokenPtr);
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
}
return folly::makeUnexpected(Status::UserError(
"Specifier {} is not supported.", *startTokenPtr));
} else {
builder.appendLiteral(startTokenPtr, cur - startTokenPtr);
}
Expand Down
4 changes: 2 additions & 2 deletions velox/functions/lib/DateTimeFormatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,10 @@ class DateTimeFormatter {
std::shared_ptr<DateTimeFormatter> buildMysqlDateTimeFormatter(
const std::string_view& format);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also make this method return Expected for consistency?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course. Updated.


std::shared_ptr<DateTimeFormatter> buildJodaDateTimeFormatter(
Expected<std::shared_ptr<DateTimeFormatter>> buildJodaDateTimeFormatter(
const std::string_view& format);

std::shared_ptr<DateTimeFormatter> buildSimpleDateTimeFormatter(
Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
const std::string_view& format,
bool lenient);

Expand Down
61 changes: 34 additions & 27 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ class DateTimeFormatterTest : public testing::Test {
});
}

std::shared_ptr<DateTimeFormatter> getJodaDateTimeFormatter(
const std::string_view& format) {
return buildJodaDateTimeFormatter(format).thenOrThrow(
folly::identity,
[&](const Status& status) { VELOX_USER_FAIL("{}", status.message()); });
}

void testTokenRange(
char specifier,
int numTokenStart,
Expand All @@ -74,7 +81,7 @@ class DateTimeFormatterTest : public testing::Test {
std::string pattern(i, specifier);
std::vector<DateTimeToken> expected;
expected = {DateTimeToken(FormatPattern{token, i})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter(pattern)->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter(pattern)->tokens());
}
}

Expand All @@ -88,8 +95,8 @@ class DateTimeFormatterTest : public testing::Test {
DateTimeResult parseJoda(
const std::string_view& input,
const std::string_view& format) {
auto dateTimeResultExpected =
buildJodaDateTimeFormatter(format)->parse(input);
auto formatter = getJodaDateTimeFormatter(format);
auto dateTimeResultExpected = formatter->parse(input);
return dateTimeResult(dateTimeResultExpected);
}

Expand All @@ -107,7 +114,7 @@ class DateTimeFormatterTest : public testing::Test {
const std::string_view& input,
const std::string_view& format) {
auto dateTimeResultExpected =
buildJodaDateTimeFormatter(format)->parse(input);
getJodaDateTimeFormatter(format)->parse(input);
auto result = dateTimeResult(dateTimeResultExpected);
if (result.timezoneId == 0) {
return "+00:00";
Expand Down Expand Up @@ -241,11 +248,11 @@ TEST_F(JodaDateTimeFormatterTest, validJodaBuild) {

// G specifier case
expected = {DateTimeToken(FormatPattern{DateTimeFormatSpecifier::ERA, 2})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("G")->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter("G")->tokens());
// minRepresentDigits should be unchanged with higher number of specifier for
// ERA
expected = {DateTimeToken(FormatPattern{DateTimeFormatSpecifier::ERA, 2})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("GGGG")->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter("GGGG")->tokens());

// C specifier case
testTokenRange('C', 1, 3, DateTimeFormatSpecifier::CENTURY_OF_ERA);
Expand Down Expand Up @@ -281,12 +288,12 @@ TEST_F(JodaDateTimeFormatterTest, validJodaBuild) {
// a specifier case
expected = {
DateTimeToken(FormatPattern{DateTimeFormatSpecifier::HALFDAY_OF_DAY, 2})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("a")->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter("a")->tokens());
// minRepresentDigits should be unchanged with higher number of specifier for
// HALFDAY_OF_DAY
expected = {
DateTimeToken(FormatPattern{DateTimeFormatSpecifier::HALFDAY_OF_DAY, 2})};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("aa")->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter("aa")->tokens());

// K specifier case
testTokenRange('K', 1, 4, DateTimeFormatSpecifier::HOUR_OF_HALFDAY);
Expand Down Expand Up @@ -317,22 +324,22 @@ TEST_F(JodaDateTimeFormatterTest, validJodaBuild) {

// Literal case
expected = {DateTimeToken(" ")};
EXPECT_EQ(expected, buildJodaDateTimeFormatter(" ")->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter(" ")->tokens());
expected = {DateTimeToken("1234567890")};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("1234567890")->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter("1234567890")->tokens());
expected = {DateTimeToken("'")};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("''")->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter("''")->tokens());
expected = {DateTimeToken("abcdefghijklmnopqrstuvwxyz")};
EXPECT_EQ(
expected,
buildJodaDateTimeFormatter("'abcdefghijklmnopqrstuvwxyz'")->tokens());
getJodaDateTimeFormatter("'abcdefghijklmnopqrstuvwxyz'")->tokens());
expected = {DateTimeToken("'abcdefg'hijklmnop'qrstuv'wxyz'")};
EXPECT_EQ(
expected,
buildJodaDateTimeFormatter("'''abcdefg''hijklmnop''qrstuv''wxyz'''")
getJodaDateTimeFormatter("'''abcdefg''hijklmnop''qrstuv''wxyz'''")
->tokens());
expected = {DateTimeToken("'1234abcd")};
EXPECT_EQ(expected, buildJodaDateTimeFormatter("''1234'abcd'")->tokens());
EXPECT_EQ(expected, getJodaDateTimeFormatter("''1234'abcd'")->tokens());

// Specifier combinations
expected = {
Expand Down Expand Up @@ -381,22 +388,22 @@ TEST_F(JodaDateTimeFormatterTest, validJodaBuild) {

EXPECT_EQ(
expected,
buildJodaDateTimeFormatter(
getJodaDateTimeFormatter(
"''CCC-YYYY/xxx//www-00-eeee--EEEEEE---yyyyy///DDDDMM-MMMMddddKKhhhkkHHmmsSSSSSSzzzZZZGGGG'abcdefghijklmnopqrstuvwxyz'aaa")
->tokens());
}

TEST_F(JodaDateTimeFormatterTest, invalidJodaBuild) {
// Invalid specifiers
EXPECT_THROW(buildJodaDateTimeFormatter("q"), VeloxUserError);
EXPECT_THROW(buildJodaDateTimeFormatter("r"), VeloxUserError);
EXPECT_THROW(buildJodaDateTimeFormatter("g"), VeloxUserError);
EXPECT_TRUE(buildJodaDateTimeFormatter("q").hasError());
EXPECT_TRUE(buildJodaDateTimeFormatter("r").hasError());
EXPECT_TRUE(buildJodaDateTimeFormatter("g").hasError());

// Unclosed literal sequence
EXPECT_THROW(buildJodaDateTimeFormatter("'abcd"), VeloxUserError);
EXPECT_TRUE(buildJodaDateTimeFormatter("'abcd").hasError());

// Empty format string
EXPECT_THROW(buildJodaDateTimeFormatter(""), VeloxUserError);
EXPECT_TRUE(buildJodaDateTimeFormatter("").hasError());
}

TEST_F(JodaDateTimeFormatterTest, invalid) {
Expand Down Expand Up @@ -1317,20 +1324,20 @@ TEST_F(JodaDateTimeFormatterTest, formatResultSize) {
auto* timezone = tz::locateZone("GMT");

EXPECT_EQ(
buildJodaDateTimeFormatter("yyyy-MM-dd")->maxResultSize(timezone), 12);
EXPECT_EQ(buildJodaDateTimeFormatter("yyyy-MM")->maxResultSize(timezone), 9);
EXPECT_EQ(buildJodaDateTimeFormatter("y")->maxResultSize(timezone), 6);
getJodaDateTimeFormatter("yyyy-MM-dd")->maxResultSize(timezone), 12);
EXPECT_EQ(getJodaDateTimeFormatter("yyyy-MM")->maxResultSize(timezone), 9);
EXPECT_EQ(getJodaDateTimeFormatter("y")->maxResultSize(timezone), 6);
EXPECT_EQ(
buildJodaDateTimeFormatter("yyyy////MM////dd")->maxResultSize(timezone),
getJodaDateTimeFormatter("yyyy////MM////dd")->maxResultSize(timezone),
18);
EXPECT_EQ(
buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS")
getJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS")
->maxResultSize(timezone),
31);
// No padding. CENTURY_OF_ERA can be at most 3 digits.
EXPECT_EQ(buildJodaDateTimeFormatter("C")->maxResultSize(timezone), 3);
EXPECT_EQ(getJodaDateTimeFormatter("C")->maxResultSize(timezone), 3);
// Needs to pad to make result contain 4 digits.
EXPECT_EQ(buildJodaDateTimeFormatter("CCCC")->maxResultSize(timezone), 4);
EXPECT_EQ(getJodaDateTimeFormatter("CCCC")->maxResultSize(timezone), 4);
}

TEST_F(JodaDateTimeFormatterTest, betterErrorMessaging) {
Expand Down
29 changes: 22 additions & 7 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1643,9 +1643,12 @@ struct FormatDateTimeFunction {
}

FOLLY_ALWAYS_INLINE void setFormatter(const arg_type<Varchar>& formatString) {
jodaDateTime_ = buildJodaDateTimeFormatter(
std::string_view(formatString.data(), formatString.size()));
maxResultSize_ = jodaDateTime_->maxResultSize(sessionTimeZone_);
buildJodaDateTimeFormatter(
std::string_view(formatString.data(), formatString.size()))
.thenOrThrow([this](auto formatter) {
jodaDateTime_ = formatter;
maxResultSize_ = jodaDateTime_->maxResultSize(sessionTimeZone_);
});
}

void format(
Expand Down Expand Up @@ -1680,7 +1683,10 @@ struct ParseDateTimeFunction {
const arg_type<Varchar>* format) {
if (format != nullptr) {
format_ = buildJodaDateTimeFormatter(
std::string_view(format->data(), format->size()));
std::string_view(format->data(), format->size()))
.thenOrThrow(folly::identity, [&](const Status& status) {
VELOX_USER_FAIL("{}", status.message());
});
isConstFormat_ = true;
}

Expand All @@ -1696,7 +1702,10 @@ struct ParseDateTimeFunction {
const arg_type<Varchar>& format) {
if (!isConstFormat_) {
format_ = buildJodaDateTimeFormatter(
std::string_view(format.data(), format.size()));
std::string_view(format.data(), format.size()))
.thenOrThrow(folly::identity, [&](const Status& status) {
VELOX_USER_FAIL("{}", status.message());
});
}
auto dateTimeResult =
format_->parse(std::string_view(input.data(), input.size()));
Expand Down Expand Up @@ -1770,6 +1779,13 @@ template <typename T>
struct ToISO8601Function {
VELOX_DEFINE_FUNCTION_TYPES(T);

ToISO8601Function() {
auto formatter =
functions::buildJodaDateTimeFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSZZ");
VELOX_CHECK(!formatter.hasError(), "Default format should always be valid");
formatter_ = formatter.value();
}
rui-mo marked this conversation as resolved.
Show resolved Hide resolved

FOLLY_ALWAYS_INLINE void initialize(
const std::vector<TypePtr>& inputTypes,
const core::QueryConfig& config,
Expand Down Expand Up @@ -1814,8 +1830,7 @@ struct ToISO8601Function {
}

const tz::TimeZone* timeZone_{nullptr};
std::shared_ptr<DateTimeFormatter> formatter_ =
functions::buildJodaDateTimeFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSZZ");
std::shared_ptr<DateTimeFormatter> formatter_;
rui-mo marked this conversation as resolved.
Show resolved Hide resolved
};

template <typename T>
Expand Down
41 changes: 21 additions & 20 deletions velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,26 +109,27 @@ void castToString(
auto* flatResult = result.as<FlatVector<StringView>>();
const auto* timestamps = input.as<SimpleVector<int64_t>>();

auto formatter =
functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS zzzz");

context.applyToSelectedNoThrow(rows, [&](auto row) {
const auto timestampWithTimezone = timestamps->valueAt(row);

const auto timestamp = unpackTimestampUtc(timestampWithTimezone);
const auto timeZoneId = unpackZoneKeyId(timestampWithTimezone);
const auto* timezonePtr = tz::locateZone(tz::getTimeZoneName(timeZoneId));

exec::StringWriter<false> result(flatResult, row);

const auto maxResultSize = formatter->maxResultSize(timezonePtr);
result.reserve(maxResultSize);
const auto resultSize =
formatter->format(timestamp, timezonePtr, maxResultSize, result.data());
result.resize(resultSize);

result.finalize();
});
functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS zzzz")
.thenOrThrow([&](auto formatter) {
context.applyToSelectedNoThrow(rows, [&](auto row) {
const auto timestampWithTimezone = timestamps->valueAt(row);

const auto timestamp = unpackTimestampUtc(timestampWithTimezone);
const auto timeZoneId = unpackZoneKeyId(timestampWithTimezone);
const auto* timezonePtr =
tz::locateZone(tz::getTimeZoneName(timeZoneId));

exec::StringWriter<false> result(flatResult, row);

const auto maxResultSize = formatter->maxResultSize(timezonePtr);
result.reserve(maxResultSize);
const auto resultSize = formatter->format(
timestamp, timezonePtr, maxResultSize, result.data());
result.resize(resultSize);

result.finalize();
});
});
}

void castToTimestamp(
Expand Down
Loading
Loading