From cebd4814ff650a057ac01b4aa3af11239f86c949 Mon Sep 17 00:00:00 2001 From: Shylock Hg <33566796+Shylock-Hg@users.noreply.github.com> Date: Tue, 31 Aug 2021 18:32:16 +0800 Subject: [PATCH 1/2] Add the time parser. --- src/common/datatypes/Date.cpp | 15 +- .../function/test/FunctionManagerTest.cpp | 15 +- src/common/time/CMakeLists.txt | 1 + src/common/time/TimeParser.cpp | 419 ++++++++++++++++++ src/common/time/TimeParser.h | 137 ++++++ src/common/time/TimeUtils.h | 60 +-- src/common/time/TimezoneInfo.cpp | 1 - src/common/time/test/CMakeLists.txt | 14 + src/common/time/test/TimeParserTest.cpp | 249 +++++++++++ src/daemons/GraphDaemon.cpp | 7 + src/daemons/MetaDaemon.cpp | 7 + src/daemons/StorageDaemon.cpp | 7 + tests/Makefile | 2 +- tests/common/nebula_service.py | 4 +- .../mutate/InsertWithTimeType.feature | 70 +++ 15 files changed, 949 insertions(+), 59 deletions(-) create mode 100644 src/common/time/TimeParser.cpp create mode 100644 src/common/time/TimeParser.h create mode 100644 src/common/time/test/TimeParserTest.cpp diff --git a/src/common/datatypes/Date.cpp b/src/common/datatypes/Date.cpp index 0bb1fdb0432..53128041ed7 100644 --- a/src/common/datatypes/Date.cpp +++ b/src/common/datatypes/Date.cpp @@ -13,6 +13,11 @@ namespace nebula { +static inline std::string decimal(const std::string& number) { + auto find = std::find(number.begin(), number.end(), '.'); + return std::string(find, number.end()); +} + const int64_t kDaysSoFar[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}; const int64_t kLeapDaysSoFar[] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}; @@ -99,22 +104,26 @@ std::string Date::toString() const { } std::string Time::toString() const { + auto microsecStr = folly::stringPrintf("%.9f", static_cast(microsec) / 1000000.0); + auto decimalPart = decimal(microsecStr); // It's in current timezone already - return folly::stringPrintf("%02d:%02d:%02d.%06d", hour, minute, sec, microsec); + return folly::stringPrintf("%02d:%02d:%02d%s", hour, minute, sec, decimalPart.c_str()); } std::string DateTime::toString() const { + auto microsecStr = folly::stringPrintf("%.9f", static_cast(microsec) / 1000000.0); + auto decimalPart = decimal(microsecStr); // It's in current timezone already return folly::stringPrintf( "%hd-%02hhu-%02hhu" - "T%02hhu:%02hhu:%02hhu.%u", + "T%02hhu:%02hhu:%02hhu%s", static_cast(year), static_cast(month), static_cast(day), static_cast(hour), static_cast(minute), static_cast(sec), - static_cast(microsec)); + decimalPart.c_str()); } } // namespace nebula diff --git a/src/common/function/test/FunctionManagerTest.cpp b/src/common/function/test/FunctionManagerTest.cpp index 817eb6ee15e..d94b45326cc 100644 --- a/src/common/function/test/FunctionManagerTest.cpp +++ b/src/common/function/test/FunctionManagerTest.cpp @@ -37,10 +37,12 @@ class FunctionManagerTest : public ::testing::Test { } auto res = result.value()(argsRef); if (res.type() != expect.type()) { - return ::testing::AssertionFailure() << "function return type check failed: " << expr; + return ::testing::AssertionFailure() << "function return type check failed, expect " + << expect.type() << ", got " << res.type(); } if (res != expect) { - return ::testing::AssertionFailure() << "function return value check failed: " << expr; + return ::testing::AssertionFailure() + << "function return value check failed, expect " << expect << ", got " << res; } return ::testing::AssertionSuccess(); } @@ -309,7 +311,7 @@ TEST_F(FunctionManagerTest, functionCall) { TEST_FUNCTION(toString, args_["toString_bool"], "true"); TEST_FUNCTION(toString, args_["string"], "AbcDeFG"); TEST_FUNCTION(toString, args_["date"], "1984-10-11"); - TEST_FUNCTION(toString, args_["datetime"], "1984-10-11T12:31:14.341"); + TEST_FUNCTION(toString, args_["datetime"], "1984-10-11T12:31:14.000341000"); TEST_FUNCTION(toString, args_["nullvalue"], Value::kNullValue); } { @@ -318,8 +320,9 @@ TEST_F(FunctionManagerTest, functionCall) { DateTime dateTime(2021, 10, 31, 8, 5, 34, 29); TEST_FUNCTION(concat, std::vector({"hello", 1, "world"}), "hello1world"); TEST_FUNCTION(concat, std::vector({true, 2, date}), "true22021-10-31"); - TEST_FUNCTION(concat, std::vector({true, dateTime}), "true2021-10-31T08:05:34.29"); - TEST_FUNCTION(concat, std::vector({2.3, time}), "2.309:39:21.000012"); + TEST_FUNCTION( + concat, std::vector({true, dateTime}), "true2021-10-31T08:05:34.000029000"); + TEST_FUNCTION(concat, std::vector({2.3, time}), "2.309:39:21.000012000"); TEST_FUNCTION(concat, args_["two"], "24"); TEST_FUNCTION(concat_ws, std::vector({",", 1}), "1"); TEST_FUNCTION(concat_ws, std::vector({"@", 1, "world"}), "1@world"); @@ -328,7 +331,7 @@ TEST_F(FunctionManagerTest, functionCall) { "1ABtrueABworld"); TEST_FUNCTION(concat_ws, std::vector({".", 1, true, Value::kNullValue, "world", time}), - "1.true.world.09:39:21.000012"); + "1.true.world.09:39:21.000012000"); } { TEST_FUNCTION(toBoolean, args_["int"], Value::kNullBadType); diff --git a/src/common/time/CMakeLists.txt b/src/common/time/CMakeLists.txt index 959efda8a8f..92e6c4e90db 100644 --- a/src/common/time/CMakeLists.txt +++ b/src/common/time/CMakeLists.txt @@ -15,6 +15,7 @@ nebula_add_library( TimeUtils.cpp TimezoneInfo.cpp TimeConversion.cpp + TimeParser.cpp ) nebula_add_subdirectory(test) diff --git a/src/common/time/TimeParser.cpp b/src/common/time/TimeParser.cpp new file mode 100644 index 00000000000..ba559a34976 --- /dev/null +++ b/src/common/time/TimeParser.cpp @@ -0,0 +1,419 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/time/TimeParser.h" + +#include + +#include "common/time/TimeConversion.h" +#include "common/time/TimeUtils.h" +#include "common/time/TimezoneInfo.h" + +namespace nebula { +namespace time { + +/*static*/ const std::vector TimeParser::dateTimeStates = { + // State, ShiftMap + {kInitial, + [](Token, Token, Context &ctx) -> StatusOr { + if (ctx.type == ExpectType::kDateTime) { + return kDateYear; + } else if (ctx.type == ExpectType::kDate) { + return kDateYear; + } else if (ctx.type == ExpectType::kTime) { + return kTimeHour; + } + return Status::Error("Unknown time ctx.type."); + }}, + {kDateYear, + [](Token t, Token n, Context &ctx) -> StatusOr { + if (t.type == TokenType::kNumber) { + if (t.val < std::numeric_limits::min() || + t.val > std::numeric_limits::max()) { + return Status::Error("The year number `%d' exceed the number limit.", + static_cast(t.val)); + } + ctx.result.year = t.val; + switch (n.type) { + case TokenType::kMinus: + return kDateMonth; + case TokenType::kTimePrefix: + if (ctx.type == ExpectType::kDate) { + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + return kTimeHour; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + }}, + {kDateMonth, + [](Token t, Token n, Context &ctx) -> StatusOr { + if (t.type == TokenType::kMinus) { + return kDateMonth; + } + if (t.type == TokenType::kNumber) { + if (t.val < 1 || t.val > 12) { + return Status::Error("The month number `%d' exceed the number limit.", + static_cast(t.val)); + } + ctx.result.month = t.val; + switch (n.type) { + case TokenType::kMinus: + return kDateDay; + case TokenType::kTimePrefix: + if (ctx.type == ExpectType::kDate) { + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + return kTimeHour; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + }}, + {kDateDay, + [](Token t, Token n, Context &ctx) -> StatusOr { + if (t.type == TokenType::kMinus) { + return kDateDay; + } + if (t.type == TokenType::kNumber) { + if (t.val < 1 || t.val > 31) { + return Status::Error("The day number `%d' exceed the number limit.", + static_cast(t.val)); + } + ctx.result.day = t.val; + switch (n.type) { + case TokenType::kTimePrefix: + if (ctx.type == ExpectType::kDate) { + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + return kTimeHour; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + }}, + {kTimeHour, + [](Token t, Token n, Context &ctx) -> StatusOr { + if (t.type == TokenType::kTimePrefix) { + return kTimeHour; + } + if (t.type == TokenType::kNumber) { + if (t.val < 0 || t.val > 23) { + return Status::Error("The hour number `%d' exceed the number limit.", + static_cast(t.val)); + } + ctx.result.hour = t.val; + switch (n.type) { + case TokenType::kTimeDelimiter: + return kTimeMinute; + case TokenType::kPlus: + case TokenType::kMinus: + return kUtcOffset; + case TokenType::kTimeZoneName: + return kTimeZone; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + return Status::OK(); + }}, + {kTimeMinute, + [](Token t, Token n, Context &ctx) -> StatusOr { + if (t.type == TokenType::kTimeDelimiter) { + return kTimeMinute; + } + if (t.type == TokenType::kNumber) { + if (t.val < 0 || t.val > 59) { + return Status::Error("The minute number `%d' exceed the number limit.", + static_cast(t.val)); + } + ctx.result.minute = t.val; + switch (n.type) { + case TokenType::kTimeDelimiter: + return kTimeSecond; + case TokenType::kPlus: + case TokenType::kMinus: + return kUtcOffset; + case TokenType::kTimeZoneName: + return kTimeZone; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + return Status::OK(); + }}, + {kTimeSecond, + [](Token t, Token n, Context &ctx) -> StatusOr { + if (t.type == TokenType::kTimeDelimiter) { + return kTimeSecond; + } + if (t.type == TokenType::kNumber) { + if (t.val < 0 || t.val >= 60) { + return Status::Error("The second number `%f' exceed the number limit.", t.val); + } + double integer{0}; + double fraction = std::modf(t.val, &integer); + ctx.result.sec = integer; + ctx.result.microsec = std::round(fraction * 1000000); + switch (n.type) { + case TokenType::kPlus: + case TokenType::kMinus: + return kUtcOffset; + case TokenType::kTimeZoneName: + return kTimeZone; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + return Status::OK(); + }}, + {kUtcOffset, + [](Token t, Token, Context &ctx) -> StatusOr { + switch (t.type) { + case TokenType::kPlus: + case TokenType::kMinus: + if (ctx.utcSign != TokenType::kUnknown) { + return Status::Error("Unexpected token `%s'.", toString(t.type)); + } + ctx.utcSign = t.type; + return kUtcOffsetHour; + default: + return Status::Error("Unexpected token `%s'.", toString(t.type)); + } + }}, + {kUtcOffsetHour, + [](Token t, Token, Context &ctx) -> StatusOr { + switch (t.type) { + case TokenType::kNumber: + if (t.val > 23) { + return Status::Error("Unexpected utc offset hours number `%d'.", + static_cast(t.val)); + } + ctx.utcOffsetSecs = + (ctx.utcSign == TokenType::kPlus ? t.val * 60 * 60 : -t.val * 60 * 60); + return kUtcOffsetMinute; + default: + return Status::Error("Unexpected token `%s'.", toString(t.type)); + } + }}, + {kUtcOffsetMinute, + [](Token t, Token n, Context &ctx) -> StatusOr { + switch (t.type) { + case TokenType::kTimeDelimiter: + if (n.type != TokenType::kNumber) { + return Status::Error("Unexpected read-ahead token `%s'.", toString(n.type)); + } + return kUtcOffsetMinute; + case TokenType::kNumber: + if (t.val > 59) { + return Status::Error("Unexpected utc offset minutes number `%d'.", + static_cast(t.val)); + } + ctx.utcOffsetSecs += (ctx.utcSign == TokenType::kPlus ? t.val * 60 : -t.val * 60); + if (n.type == TokenType::kPlaceHolder) { + ctx.result = TimeConversion::dateTimeShift(ctx.result, -ctx.utcOffsetSecs); + return kEnd; + } else if (n.type == TokenType::kTimeZoneName) { + return kTimeZone; + } else { + return Status::Error("Unexpected read-head token `%s'.", toString(n.type)); + } + default: + return Status::Error("Unexpected token `%s'.", toString(t.type)); + } + }}, + {kTimeZone, + [](Token t, Token n, Context &ctx) -> StatusOr { + DCHECK(t.type == TokenType::kTimeZoneName); + if (n.type != TokenType::kPlaceHolder) { + return Status::Error("Unexpected read-head token `%s'.", toString(n.type)); + } + int32_t utcOffsetSecs = 0; + Timezone tz; + auto result = tz.loadFromDb(t.str); + NG_RETURN_IF_ERROR(result); + if (ctx.utcSign != TokenType::kUnknown) { + if (tz.utcOffsetSecs() != ctx.utcOffsetSecs) { + return Status::Error("Mismatched time zone offset and time zone name."); + } else { + utcOffsetSecs = ctx.utcOffsetSecs; + } + } else { + utcOffsetSecs = tz.utcOffsetSecs(); + } + ctx.result = TimeConversion::dateTimeShift(ctx.result, -utcOffsetSecs); + return kEnd; + }}, + {kEnd, [](Token, Token, Context &) -> StatusOr { return Status::OK(); }}, +}; + +/*static*/ const char *TimeParser::toString(TokenType t) { + switch (t) { + case TokenType::kUnknown: + return "Unknown"; + case TokenType::kPlaceHolder: + return "PlaceHolder"; + case TokenType::kNumber: + return "Number"; + case TokenType::kPlus: + return "+"; + case TokenType::kMinus: + return "-"; + case TokenType::kTimeDelimiter: + return "TimeDelimiter"; + case TokenType::kTimePrefix: + return "TimePrefix"; + case TokenType::kTimeZoneName: + return "TimeZoneName"; + } + LOG(FATAL) << "Unknown token " << static_cast(t); + return "Unknown token"; +} + +/*static*/ std::string TimeParser::toString(const Token &t) { + std::stringstream ss; + ss << toString(t.type); + ss << "("; + if (t.type == TokenType::kNumber) { + ss << t.val; + } + if (t.type == TokenType::kTimeZoneName) { + ss << t.str; + } + ss << ")"; + return ss.str(); +} + +/*static*/ const char *TimeParser::toString(State state) { + switch (state) { + case kInitial: + return "Initial"; + case kDateYear: + return "DateYear"; + case kDateMonth: + return "DateMonth"; + case kDateDay: + return "DateDay"; + case kTimeHour: + return "TimeHour"; + case kTimeMinute: + return "TimeMinute"; + case kTimeSecond: + return "TimeSecond"; + case kUtcOffset: + return "UtcOffset"; + case kUtcOffsetHour: + return "UtcOffsetHour"; + case kUtcOffsetMinute: + return "UtcOffsetMinute"; + case kTimeZone: + return "TimeZone"; + case kEnd: + return "End"; + case kSize: + return "Size"; + } + DLOG(FATAL) << "Unknown state " << static_cast(state); + return "Unknown"; +} + +Status TimeParser::lex(folly::StringPiece str) { + tokens_.reserve(8); + std::string digits; + digits.reserve(8); + auto c = str.start(); + while (*c != '\0') { + if (std::isdigit(*c) || kFractionPrefix == *c) { + digits.push_back(*c); + if (!(std::isdigit(*(c + 1)) || kFractionPrefix == *(c + 1))) { + if (digits.front() == '.') { + return Status::Error("Unexpected character `%c'.", digits.front()); + } + if (digits.back() == '.') { + return Status::Error("Expected character fraction."); + } + Token t; + try { + t.val = folly::to(digits); + } catch (std::exception &e) { + return Status::Error("%s", e.what()); + } + t.type = TokenType::kNumber; + tokens_.emplace_back(t); + digits.clear(); + } + } else if (kTimeDelimiter == *c) { + tokens_.emplace_back(Token{TokenType::kTimeDelimiter, 0, ""}); + } else if (kTimePrefix == *c || kTimeSpacePrefix == *c) { + tokens_.emplace_back(Token{TokenType::kTimePrefix, 0, ""}); + } else if (kPlus == *c) { + tokens_.emplace_back(Token{TokenType::kPlus, 0, ""}); + } else if (kMinus == *c) { + tokens_.emplace_back(Token{TokenType::kMinus, 0, ""}); + } else if (kLeftBracket == *c) { + std::string s; + while (*(++c) != kRightBracket) { + if (*c == '\0') { + return Status::Error("Unterminated bracket."); + } + s.push_back(*c); + } + tokens_.emplace_back(Token{TokenType::kTimeZoneName, 0, std::move(s)}); + } else { + return Status::Error("Illegal character `%c'.", *c); + } + ++c; + } + // Only for read-ahead placeholder + tokens_.emplace_back(Token{TokenType::kPlaceHolder, 0, ""}); + return Status::OK(); +} + +Status TimeParser::parse() { + auto result = dateTimeStates[kInitial].next({}, {}, ctx_); + NG_RETURN_IF_ERROR(result); + auto current = result.value(); + for (std::size_t i = 0; i < tokens_.size() - 1; ++i) { + result = dateTimeStates[current].next(tokens_[i], tokens_[i + 1], ctx_); + NG_RETURN_IF_ERROR(result); + current = result.value(); + if (current == kEnd) { + break; + } + } + if (current != kEnd) { + return Status::Error("Not end parse."); + } + return Status::OK(); +} + +} // namespace time +} // namespace nebula diff --git a/src/common/time/TimeParser.h b/src/common/time/TimeParser.h new file mode 100644 index 00000000000..0da70851240 --- /dev/null +++ b/src/common/time/TimeParser.h @@ -0,0 +1,137 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef COMMON_TIME_TIMEPARSER_H_ +#define COMMON_TIME_TIMEPARSER_H_ + +#include "common/base/Base.h" +#include "common/base/Status.h" +#include "common/base/StatusOr.h" +#include "common/datatypes/Date.h" + +namespace nebula { +namespace time { + +// parser the date/time/datetime literal +class TimeParser { + public: + TimeParser() = default; + + StatusOr parseDate(folly::StringPiece str) { + ctx_.type = ExpectType::kDate; + NG_RETURN_IF_ERROR(lex(str)); + NG_RETURN_IF_ERROR(parse()); + return Date{static_cast(ctx_.result.year), + static_cast(ctx_.result.month), + static_cast(ctx_.result.day)}; + } + + StatusOr