From 8223d6511d2a00293f85ebee50eab3ed012bb33b Mon Sep 17 00:00:00 2001 From: Shylock Hg <33566796+Shylock-Hg@users.noreply.github.com> Date: Thu, 24 Sep 2020 15:14:36 +0800 Subject: [PATCH 01/13] Add the parser to parse time. --- src/common/algorithm/test/CMakeLists.txt | 4 +- src/common/concurrent/test/CMakeLists.txt | 1 + src/common/datatypes/Date.h | 37 ++- src/common/thread/test/CMakeLists.txt | 1 + src/common/time/CMakeLists.txt | 1 + src/common/time/TimeParser.cpp | 302 ++++++++++++++++++++++ src/common/time/TimeParser.h | 116 +++++++++ src/common/time/test/CMakeLists.txt | 18 +- src/common/time/test/TimeParserTest.cpp | 211 +++++++++++++++ 9 files changed, 688 insertions(+), 3 deletions(-) create mode 100644 src/common/time/TimeParser.cpp create mode 100644 src/common/time/TimeParser.h create mode 100644 src/common/time/test/TimeParserTest.cpp diff --git a/src/common/algorithm/test/CMakeLists.txt b/src/common/algorithm/test/CMakeLists.txt index a50d172cb..31a606f16 100644 --- a/src/common/algorithm/test/CMakeLists.txt +++ b/src/common/algorithm/test/CMakeLists.txt @@ -6,6 +6,8 @@ nebula_add_test( NAME reservoir_sampling_test SOURCES ReservoirSamplingTest.cpp - OBJECTS $ + OBJECTS + $ + $ LIBRARIES gtest gtest_main ) diff --git a/src/common/concurrent/test/CMakeLists.txt b/src/common/concurrent/test/CMakeLists.txt index 33561278a..bb3266bbd 100644 --- a/src/common/concurrent/test/CMakeLists.txt +++ b/src/common/concurrent/test/CMakeLists.txt @@ -9,6 +9,7 @@ nebula_add_test( SOURCES BarrierTest.cpp LatchTest.cpp OBJECTS + $ $ $ $ diff --git a/src/common/datatypes/Date.h b/src/common/datatypes/Date.h index 3fc5ac677..dd88248c8 100644 --- a/src/common/datatypes/Date.h +++ b/src/common/datatypes/Date.h @@ -24,7 +24,12 @@ struct Date { int8_t day; // 1 - 31 Date() : year{0}, month{1}, day{1} {} - Date(int16_t y, int8_t m, int8_t d) : year{y}, month{m}, day{d} {} + Date(int16_t y, int8_t m, int8_t d) : year{y}, month{m}, day{d} { + DCHECK_GT(month, 0); + DCHECK_LE(month, 12); + DCHECK_GT(day, 0); + DCHECK_LE(day, 31); + } // Tak the number of days since -32768/1/1, and convert to the real date explicit Date(uint64_t days); @@ -81,6 +86,19 @@ struct Time { int8_t sec; int32_t microsec; + Time() : hour{0}, minute{0}, sec{0}, microsec{0} {} + Time(int8_t h, int8_t min, int8_t s, int32_t us) + : hour{h}, minute{min}, sec{s}, microsec{us} { + DCHECK_GE(hour, 0); + DCHECK_LT(hour, 24); + DCHECK_GE(minute, 0); + DCHECK_LT(minute, 60); + DCHECK_GE(sec, 0); + DCHECK_LT(sec, 60); + DCHECK_GE(microsec, 0); + DCHECK_LT(microsec, 1000000); + } + void clear() { hour = 0; minute = 0; @@ -112,6 +130,23 @@ struct DateTime { int8_t sec; int32_t microsec; + DateTime() : year{0}, month{1}, day{1}, hour{0}, minute{0}, sec{0}, microsec{0} {} + DateTime(int16_t y, int8_t m, int8_t d, int8_t h, int8_t min, int8_t s, int32_t us) + : year{y}, month{m}, day{d}, hour{h}, minute{min}, sec{s}, microsec{us} { + DCHECK_GT(month, 0); + DCHECK_LE(month, 12); + DCHECK_GT(day, 0); + DCHECK_LE(day, 31); + DCHECK_GE(hour, 0); + DCHECK_LT(hour, 24); + DCHECK_GE(minute, 0); + DCHECK_LT(minute, 60); + DCHECK_GE(sec, 0); + DCHECK_LT(sec, 60); + DCHECK_GE(microsec, 0); + DCHECK_LT(microsec, 1000000); + } + void clear() { year = 0; month = 0; diff --git a/src/common/thread/test/CMakeLists.txt b/src/common/thread/test/CMakeLists.txt index b6ac76cae..e4786fe41 100644 --- a/src/common/thread/test/CMakeLists.txt +++ b/src/common/thread/test/CMakeLists.txt @@ -11,6 +11,7 @@ nebula_add_test( GenericWorkerTest.cpp GenericThreadPoolTest.cpp OBJECTS + $ $ $ $ diff --git a/src/common/time/CMakeLists.txt b/src/common/time/CMakeLists.txt index 7546e3dae..c3b55f69b 100644 --- a/src/common/time/CMakeLists.txt +++ b/src/common/time/CMakeLists.txt @@ -8,6 +8,7 @@ nebula_add_library( detail/TscHelper.cpp Duration.cpp WallClock.cpp + TimeParser.cpp ) nebula_add_subdirectory(test) diff --git a/src/common/time/TimeParser.cpp b/src/common/time/TimeParser.cpp new file mode 100644 index 000000000..c4b26aed3 --- /dev/null +++ b/src/common/time/TimeParser.cpp @@ -0,0 +1,302 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/time/TimeParser.h" + +namespace nebula { +namespace time { + +/*static*/ const std::vector TimeParser::dateTimeStates = { + // State, ShiftMap + {kInitial, + [](Token, Token, DateTime&, ExpectType type) -> StatusOr { + if (type == ExpectType::kDateTime) { + return kDateYear; + } else if (type == ExpectType::kDate) { + return kDateYear; + } else if (type == ExpectType::kTime) { + return kTimeHour; + } + return Status::Error("Unknown time type."); + }}, + {kDateYear, + [](Token t, Token n, DateTime& val, ExpectType type) -> StatusOr { + if (t.type == TokenType::kNumber) { + if (t.val < std::numeric_limits::min() || + t.val > std::numeric_limits::max()) { + return Status::Error("The year number `%d' exceed the number limit.", t.val); + } + val.year = t.val; + switch (n.type) { + case TokenType::kDateDelimiter: + return kDateMonth; + case TokenType::kTimePrefix: + if (type == ExpectType::kDate) { + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + return kTimeHour; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + }}, + {kDateMonth, + [](Token t, Token n, DateTime& val, ExpectType type) -> StatusOr { + if (t.type == TokenType::kDateDelimiter) { + return kDateMonth; + } + if (t.type == TokenType::kNumber) { + if (t.val < 1 || t.val > 12) { + return Status::Error("The month number `%d' exceed the number limit.", t.val); + } + val.month = t.val; + switch (n.type) { + case TokenType::kDateDelimiter: + return kDateDay; + case TokenType::kTimePrefix: + if (type == ExpectType::kDate) { + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + return kTimeHour; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + }}, + {kDateDay, + [](Token t, Token n, DateTime& val, ExpectType type) -> StatusOr { + if (t.type == TokenType::kDateDelimiter) { + return kDateDay; + } + if (t.type == TokenType::kNumber) { + if (t.val < 1 || t.val > 31) { + return Status::Error("The day number `%d' exceed the number limit.", t.val); + } + val.day = t.val; + switch (n.type) { + case TokenType::kTimePrefix: + if (type == ExpectType::kDate) { + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + return kTimeHour; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + }}, + {kTimeHour, + [](Token t, Token n, DateTime& val, ExpectType) -> StatusOr { + if (t.type == TokenType::kTimePrefix) { + return kTimeHour; + } + if (t.type == TokenType::kNumber) { + if (t.val < 0 || t.val > 23) { + return Status::Error("The hour number `%d' exceed the number limit.", t.val); + } + val.hour = t.val; + switch (n.type) { + case TokenType::kTimeDelimiter: + return kTimeMinute; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + return Status::OK(); + }}, + {kTimeMinute, + [](Token t, Token n, DateTime& val, ExpectType) -> StatusOr { + if (t.type == TokenType::kTimeDelimiter) { + return kTimeMinute; + } + if (t.type == TokenType::kNumber) { + if (t.val < 0 || t.val > 59) { + return Status::Error("The minute number `%d' exceed the number limit.", t.val); + } + val.minute = t.val; + switch (n.type) { + case TokenType::kTimeDelimiter: + return kTimeSecond; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + return Status::OK(); + }}, + {kTimeSecond, + [](Token t, Token n, DateTime& val, ExpectType) -> StatusOr { + if (t.type == TokenType::kTimeDelimiter) { + return kTimeSecond; + } + if (t.type == TokenType::kNumber) { + if (t.val < 0 || t.val > 59) { + return Status::Error("The second number `%d' exceed the number limit.", t.val); + } + val.sec = t.val; + switch (n.type) { + case TokenType::kMicroSecondPrefix: + return kTimeMicroSecond; + case TokenType::kUTCoffsetPrefixPlus: + case TokenType::kUTCoffsetPrefixMinus: + return kUtcOffset; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected read-ahead token `%s'.", + toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + return Status::OK(); + }}, + {kTimeMicroSecond, + [](Token t, Token n, DateTime& val, ExpectType) -> StatusOr { + if (t.type == TokenType::kMicroSecondPrefix) { + return kTimeMicroSecond; + } + if (t.type == TokenType::kNumber) { + if (t.val < 0 || t.val > 999999) { + return Status::Error("The microsecond number `%d' exceed the number limit.", + t.val); + } + val.microsec = t.val; + switch (n.type) { + case TokenType::kMicroSecondPrefix: + return kTimeMicroSecond; + case TokenType::kUTCoffsetPrefixPlus: + case TokenType::kUTCoffsetPrefixMinus: + return kUtcOffset; + case TokenType::kPlaceHolder: + return kEnd; + default: + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + } else { + return Status::Error("Unexpected token `%s'.", toString(n.type)); + } + return Status::OK(); + }}, + {kUtcOffset, + [](Token, Token, DateTime&, ExpectType) -> StatusOr { + // TODD(shylock) support when has the timezone info + return Status::NotSupported("Utc offset not supported now."); + }}, + {kEnd, [](Token, Token, DateTime&, ExpectType) -> StatusOr { return Status::OK(); }}, +}; + +/*static*/ const char* TimeParser::toString(TokenType t) { + switch (t) { + case TokenType::kUnknown: + return "Unknown"; + case TokenType::kPlaceHolder: + return "PlaceHolder"; + case TokenType::kNumber: + return "Number"; + case TokenType::kDateDelimiter: + return "DateDelimiter"; + case TokenType::kTimeDelimiter: + return "TimeDelimiter"; + case TokenType::kTimePrefix: + return "TimePrefix"; + case TokenType::kMicroSecondPrefix: + return "MicroSecondPrefix"; + case TokenType::kUTCoffsetPrefixPlus: + return "UTCoffsetPrefixPlus"; + case TokenType::kUTCoffsetPrefixMinus: + return "UTCoffsetPrefixMinus"; + } + LOG(FATAL) << "Unknown token " << static_cast(t); +} + +Status TimeParser::lex(folly::StringPiece str) { + tokens_.reserve(8); + std::string digits; + digits.reserve(8); + auto c = str.start(); + while (*c != '\0') { + if (std::isdigit(*c)) { + digits.push_back(*c); + if (!std::isdigit(*(c + 1))) { + Token t; + try { + t.val = folly::to(digits); + } catch (std::exception& e) { + return Status::Error("%s", e.what()); + } + t.type = TokenType::kNumber; + tokens_.emplace_back(t); + digits.clear(); + } + } else if (kDateDelimiter == *c) { + tokens_.emplace_back(Token{TokenType::kDateDelimiter, 0}); + } else if (kTimeDelimiter == *c) { + tokens_.emplace_back(Token{TokenType::kTimeDelimiter, 0}); + } else if (kTimePrefix == *c) { + tokens_.emplace_back(Token{TokenType::kTimePrefix, 0}); + } else if (kMicroSecondPrefix == *c) { + tokens_.emplace_back(Token{TokenType::kMicroSecondPrefix, 0}); + } else if (kUTCoffsetPrefixPlus == *c) { + tokens_.emplace_back(Token{TokenType::kUTCoffsetPrefixPlus, 0}); + } else if (kUTCoffsetPrefixMinus == *c) { + tokens_.emplace_back(Token{TokenType::kUTCoffsetPrefixMinus, 0}); + } else { + return Status::Error("Illegal character `%c'.", *c); + } + ++c; + } + // Only for read-ahead placeholder + tokens_.emplace_back(Token{TokenType::kPlaceHolder, 0}); + return Status::OK(); +} + +Status TimeParser::parse() { + auto result = dateTimeStates[kInitial].next({}, {}, result_, type_); + NG_RETURN_IF_ERROR(result); + auto current = result.value(); + for (std::size_t i = 0; i < tokens_.size() - 1; ++i) { + result = dateTimeStates[current].next(tokens_[i], tokens_[i + 1], result_, type_); + NG_RETURN_IF_ERROR(result); + current = result.value(); + if (current == kEnd) { + break; + } + } + if (current != kEnd) { + return Status::Error("Not end parse."); + } + return Status::OK(); +} + +} // namespace time +} // namespace nebula diff --git a/src/common/time/TimeParser.h b/src/common/time/TimeParser.h new file mode 100644 index 000000000..3196ee615 --- /dev/null +++ b/src/common/time/TimeParser.h @@ -0,0 +1,116 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef COMMON_TIME_TIMEPARSER_H_ +#define COMMON_TIME_TIMEPARSER_H_ + +#include "common/base/Base.h" +#include "common/base/Status.h" +#include "common/base/StatusOr.h" +#include "common/datatypes/Date.h" + +namespace nebula { +namespace time { + +// parser the date/time/datetime literal +class TimeParser { +public: + TimeParser() = default; + + StatusOr parseDate(folly::StringPiece str) { + type_ = ExpectType::kDate; + NG_RETURN_IF_ERROR(lex(str)); + NG_RETURN_IF_ERROR(parse()); + return Date{result_.year, result_.month, result_.day}; + } + + StatusOr