Skip to content

Commit

Permalink
Support display of timezone for datetime and datetime64 (#838)
Browse files Browse the repository at this point in the history
* support display of timezone for datetime and datetime64

* revise code and fix datetime64 timezone problem

* fix bug and test cases

* ensure datetime parsing and revise tests

* fix code structure/tests

* Merge pull request #55146 from zvonand/zvonand-parsing-dt64-fix

Improve parsing DateTime64 from timestamp represented as string

* fix bugs existed in parsing

* Add proton starts/ends syntax
  • Loading branch information
Jasmine-ge authored Oct 19, 2024
1 parent 33b8999 commit abab246
Show file tree
Hide file tree
Showing 88 changed files with 1,441 additions and 1,175 deletions.
5 changes: 4 additions & 1 deletion src/DataTypes/Serializations/SerializationDateTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ void SerializationDateTime::serializeText(const IColumn & column, size_t row_num
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, ostr, time_zone);
if (has_explicit_time_zone)
writeDateTimeTextWithTimeZone(value, ostr, time_zone);
else
writeDateTimeText(value, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeIntText(value, ostr);
Expand Down
5 changes: 4 additions & 1 deletion src/DataTypes/Serializations/SerializationDateTime64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ void SerializationDateTime64::serializeText(const IColumn & column, size_t row_n
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, scale, ostr, time_zone);
if (has_explicit_time_zone)
writeDateTimeTextWithTimeZone(value, scale, ostr, time_zone);
else
writeDateTimeText(value, scale, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeDateTimeUnixTimestamp(value, scale, ostr);
Expand Down
112 changes: 98 additions & 14 deletions src/IO/ReadHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,7 @@ template void readDateTextFallback<void>(LocalDate &, ReadBuffer &);
template bool readDateTextFallback<bool>(LocalDate &, ReadBuffer &);


template <typename ReturnType>
template <typename ReturnType, bool dt64_mode>
ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
Expand All @@ -998,31 +998,61 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
static constexpr auto date_broken_down_length = 10;
/// hh:mm:ss
static constexpr auto time_broken_down_length = 8;
/// YYYY-MM-DD hh:mm:ss
static constexpr auto date_time_broken_down_length = date_broken_down_length + 1 + time_broken_down_length;

char s[date_time_broken_down_length];
/// proton: starts
/// +zz:zz
static constexpr auto zone_broken_down_length = 6;
/// YYYY-MM-DD hh:mm:ss+zz:zz
static constexpr auto date_time_with_zone_broken_down_length = date_broken_down_length + 1 + time_broken_down_length + zone_broken_down_length;

char s[date_time_with_zone_broken_down_length];
/// proton: ends
char * s_pos = s;

/** Read characters, that could represent unix timestamp.
* Only unix timestamp of at least 5 characters is supported.
* Only unix timestamp of at least 5 characters is supported by default, exception is thrown for a shorter one
* (unless parsing a string like '1.23' or '-12': there is no ambiguity, it is a DT64 timestamp).
* Then look at 5th character. If it is a number - treat whole as unix timestamp.
* If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format.
*/

int negative_multiplier = 1;

if (!buf.eof() && *buf.position() == '-')
{
if constexpr (dt64_mode)
{
negative_multiplier = -1;
++buf.position();
}
else
{
if constexpr (throw_exception)
throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime");
else
return false;
}
}

/// A piece similar to unix timestamp, maybe scaled to subsecond precision.
while (s_pos < s + date_time_broken_down_length && !buf.eof() && isNumericASCII(*buf.position()))
while (s_pos < s + date_time_with_zone_broken_down_length && !buf.eof() && isNumericASCII(*buf.position()))
{
*s_pos = *buf.position();
++s_pos;
++buf.position();
}

/// 2015-01-01 01:02:03 or 2015-01-01
if (s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position()))
/// if negative, it is a timestamp with no ambiguity
if (negative_multiplier == 1 && s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position()))
{
const auto already_read_length = s_pos - s;
const size_t remaining_date_size = date_broken_down_length - already_read_length;
/// proton: starts
/// If have time zone symbol
bool has_time_zone_offset = false;
Int8 time_zone_offset_hour = 0;
Int8 time_zone_offset_minute = 0;
/// proton: ends

size_t size = buf.read(s_pos, remaining_date_size);
if (size != remaining_date_size)
Expand Down Expand Up @@ -1062,35 +1092,89 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
minute = (s[3] - '0') * 10 + (s[4] - '0');
second = (s[6] - '0') * 10 + (s[7] - '0');
}
/// proton: starts
if (!buf.eof() && (*buf.position() == '+' || *buf.position() == '-'))
{

has_time_zone_offset = true;
char timezone_sign = *buf.position();
++buf.position();

char tz[zone_broken_down_length];
size = buf.read(tz, zone_broken_down_length - 1);
tz[size] = 0;

if (size != zone_broken_down_length - 1 || tz[2] != ':')
{
throw ParsingException(std::string("Invalid timezone format ") + tz, ErrorCodes::CANNOT_PARSE_DATETIME);
}

time_zone_offset_hour = (tz[0] - '0') * 10 + (tz[1] - '0');
time_zone_offset_minute = (tz[3] - '0') * 10 + (tz[4] - '0');

if (timezone_sign == '-')
{
time_zone_offset_hour = -time_zone_offset_hour;
time_zone_offset_minute = -time_zone_offset_minute;
}

}
else if (!buf.eof() && *buf.position() == 'Z')
{
has_time_zone_offset = true;
++buf.position();
}

if (unlikely(year == 0))
{
datetime = 0;
}
else if (has_time_zone_offset)
{
const DateLUTImpl * utc_time_zone = &DateLUT::instance("UTC");
datetime = utc_time_zone->makeDateTime(year, month, day, hour, minute, second);
if (time_zone_offset_hour)
datetime -= time_zone_offset_hour * 3600;

if (time_zone_offset_minute)
datetime -= time_zone_offset_minute * 60;
}
else
{
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
}
/// proton: ends
}
else
{
if (s_pos - s >= 5)
datetime = 0;
bool too_short = s_pos - s <= 4;

if (!too_short || dt64_mode)
{
/// Not very efficient.
datetime = 0;
for (const char * digit_pos = s; digit_pos < s_pos; ++digit_pos)
datetime = datetime * 10 + *digit_pos - '0';
}
else
datetime *= negative_multiplier;

if (too_short && negative_multiplier != -1)
{
if constexpr (throw_exception)
throw ParsingException("Cannot parse datetime", ErrorCodes::CANNOT_PARSE_DATETIME);
throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse datetime");
else
return false;
}

}

return ReturnType(true);
}

template void readDateTimeTextFallback<void>(time_t &, ReadBuffer &, const DateLUTImpl &);
template bool readDateTimeTextFallback<bool>(time_t &, ReadBuffer &, const DateLUTImpl &);
template void readDateTimeTextFallback<void, false>(time_t &, ReadBuffer &, const DateLUTImpl &);
template void readDateTimeTextFallback<void, true>(time_t &, ReadBuffer &, const DateLUTImpl &);
template bool readDateTimeTextFallback<bool, false>(time_t &, ReadBuffer &, const DateLUTImpl &);
template bool readDateTimeTextFallback<bool, true>(time_t &, ReadBuffer &, const DateLUTImpl &);


void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
Expand Down
130 changes: 106 additions & 24 deletions src/IO/ReadHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -907,25 +907,44 @@ inline T parseFromString(std::string_view str)
}


template <typename ReturnType = void>
template <typename ReturnType = void, bool dt64_mode = false>
ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut);

/** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone.
* As an exception, also supported parsing of unix timestamp in form of decimal number.
*/
template <typename ReturnType = void>
template <typename ReturnType = void, bool dt64_mode = false>
inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut)
{
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;

if constexpr (!dt64_mode)
{
if (!buf.eof() && !isNumericASCII(*buf.position()))
{
if constexpr (throw_exception)
throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse datetime");
else
return false;
}
}

/// Optimistic path, when whole value is in buffer.
const char * s = buf.position();

/// YYYY-MM-DD hh:mm:ss+zz:zz
static constexpr auto date_time_with_time_zone_broken_down_length = 25;
/// YYYY-MM-DD hh:mm:ss
static constexpr auto date_time_broken_down_length = 19;

/// proton: starts
/// YYYY-MM-DD
static constexpr auto date_broken_down_length = 10;
bool optimistic_path_for_date_time_input = s + date_time_broken_down_length <= buf.buffer().end();

if (optimistic_path_for_date_time_input)
bool optimistic_path_for_date_time_with_zone_input = s + date_time_with_time_zone_broken_down_length <= buf.buffer().end();
/// proton: ends

if (optimistic_path_for_date_time_with_zone_input)
{
if (s[4] < '0' || s[4] > '9')
{
Expand All @@ -946,36 +965,96 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
second = (s[17] - '0') * 10 + (s[18] - '0');
}

if (unlikely(year == 0))
datetime = 0;
else
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);

if (dt_long)
buf.position() += date_time_broken_down_length;
else
buf.position() += date_broken_down_length;

/// proton: starts
/// processing time zone
bool has_time_zone_offset = false;
Int8 time_zone_offset_hour = 0;
Int8 time_zone_offset_minute = 0;
UInt8 timezone_length = 6;

if (*buf.position() == '+' || *buf.position() == '-')
{
has_time_zone_offset = true;
char timezone_sign = *buf.position();
++buf.position();

char tz[timezone_length];
auto size = buf.read(tz, timezone_length - 1);
tz[size] = 0;

if (size != timezone_length - 1 || tz[2] != ':')
throw ParsingException(std::string("Cannot parse Timezone ") + tz, ErrorCodes::CANNOT_PARSE_DATETIME);

time_zone_offset_hour = (tz[0] - '0') * 10 + (tz[1] - '0');
time_zone_offset_minute = (tz[3] - '0') * 10 + (tz[4] - '0');

if (timezone_sign == '-')
{
time_zone_offset_hour = -time_zone_offset_hour;
time_zone_offset_minute = -time_zone_offset_minute;
}
}
else if (*buf.position() == 'Z')
{
has_time_zone_offset = true;
++buf.position();
}

if (unlikely(year == 0))
{
datetime = 0;
}
else if (has_time_zone_offset)
{
datetime = DateLUT::instance("UTC").makeDateTime(year, month, day, hour, minute, second);
if (time_zone_offset_hour)
datetime -= time_zone_offset_hour * 3600;

if (time_zone_offset_minute)
datetime -= time_zone_offset_minute * 60;
}
else
{
datetime = date_lut.makeDateTime(year, month, day, hour, minute, second);
}
/// proton: ends

return ReturnType(true);
}
else
/// Why not readIntTextUnsafe? Because for needs of AdFox, parsing of unix timestamp with leading zeros is supported: 000...NNNN.
return readIntTextImpl<time_t, ReturnType, ReadIntTextCheckOverflow::CHECK_OVERFLOW>(datetime, buf);
}
else
return readDateTimeTextFallback<ReturnType>(datetime, buf, date_lut);
return readDateTimeTextFallback<ReturnType, dt64_mode>(datetime, buf, date_lut);
}

template <typename ReturnType>
inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut)
{
time_t whole;
if (!readDateTimeTextImpl<bool>(whole, buf, date_lut))
time_t whole = 0;
bool is_negative_timestamp = (!buf.eof() && *buf.position() == '-');
bool is_empty = buf.eof();

if (!is_empty)
{
return ReturnType(false);
try
{
readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut);
}
catch (const DB::ParsingException & exception)
{
if (buf.eof() || *buf.position() != '.')
throw exception;
}
}

int negative_multiplier = 1;
int negative_fraction_multiplier = 1;

DB::DecimalUtils::DecimalComponents<DateTime64> components{static_cast<DateTime64::NativeType>(whole), 0};

Expand Down Expand Up @@ -1003,18 +1082,18 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
while (!buf.eof() && isNumericASCII(*buf.position()))
++buf.position();

/// Fractional part (subseconds) is treated as positive by users
/// (as DateTime64 itself is a positive, although underlying decimal is negative)
/// setting fractional part to be negative when whole is 0 results in wrong value,
/// so we multiply result by -1.
if (components.whole < 0 && components.fractional != 0)
/// Fractional part (subseconds) is treated as positive by users, but represented as a negative number.
/// E.g. `1925-12-12 13:14:15.123` is represented internally as timestamp `-1390214744.877`.
/// Thus need to convert <negative_timestamp>.<fractional> to <negative_timestamp+1>.<1-0.<fractional>>
/// Also, setting fractional part to be negative when whole is 0 results in wrong value, in this case multiply result by -1.
if (!is_negative_timestamp && components.whole < 0 && components.fractional != 0)
{
const auto scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale);
++components.whole;
components.fractional = scale_multiplier - components.fractional;
if (!components.whole)
{
negative_multiplier = -1;
negative_fraction_multiplier = -1;
}
}
}
Expand All @@ -1029,12 +1108,15 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re

bool is_ok = true;
if constexpr (std::is_same_v<ReturnType, void>)
datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale);
{
datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale) * negative_fraction_multiplier;
}
else
{
is_ok = DecimalUtils::tryGetDecimalFromComponents<DateTime64>(components, scale, datetime64);

datetime64 *= negative_multiplier;

if (is_ok)
datetime64 *= negative_fraction_multiplier;
}

return ReturnType(is_ok);
}
Expand Down
Loading

0 comments on commit abab246

Please sign in to comment.