From 5f36fefd0a98f4e8b626f58f58deac51b96c26c3 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 24 Jun 2021 00:32:16 +0800 Subject: [PATCH] Add function castBIGINT_timestamp (#22) * Add function castBIGINT_timestamp * fix * wip --- cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 11 +++++++++++ cpp/src/gandiva/function_registry_common.h | 5 ++++- cpp/src/gandiva/function_registry_datetime.cc | 9 +++++++++ cpp/src/gandiva/function_signature.cc | 8 ++++++++ cpp/src/gandiva/gdv_function_stubs.cc | 3 ++- cpp/src/gandiva/gdv_function_stubs.h | 1 + cpp/src/gandiva/jni/jni_common.cc | 15 +++++++++++---- cpp/src/gandiva/precompiled/arithmetic_ops.cc | 2 ++ cpp/src/gandiva/precompiled/hash.cc | 3 ++- cpp/src/gandiva/precompiled/time.cc | 15 ++++++++++++++- cpp/src/gandiva/precompiled/types.h | 3 ++- .../arrow/gandiva/expression/ArrowTypeHelper.java | 4 ++++ 12 files changed, 70 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 226452404d06b..bb6d2cc3568eb 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1101,6 +1101,17 @@ TEST(Cast, TimestampToTimestamp) { options.allow_time_truncate = true; CheckCast(will_be_truncated, coarse, options); } + + for (auto types : { + TimestampTypePair{timestamp(TimeUnit::MILLI, "UTC+8"), timestamp(TimeUnit::MILLI)} + }) { + auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200000000000, 1000000000, 2000000000]"); + auto promoted = + ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]"); + + // multiply/promote + CheckCast(coarse, promoted); + } } TEST(Cast, TimestampZeroCopy) { diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h index 5ce21125abe0f..dce052b1f9c55 100644 --- a/cpp/src/gandiva/function_registry_common.h +++ b/cpp/src/gandiva/function_registry_common.h @@ -55,6 +55,9 @@ inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); } inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); } inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); } + +inline DataTypePtr timestampusutc() { return arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"); } + inline DataTypePtr decimal128() { return arrow::decimal(38, 0); } struct KeyHash { @@ -268,7 +271,7 @@ typedef std::unordered_map GetDateTimeFunctionRegistry() { NativeFunction("castTIME", {}, DataTypeVector{timestamp()}, time32(), kResultNullIfNull, "castTIME_timestamp"), + NativeFunction("castBIGINT", {}, DataTypeVector{timestamp()}, int64(), + kResultNullIfNull, "castBIGINT_timestamp"), + NativeFunction("castBIGINT", {}, DataTypeVector{day_time_interval()}, int64(), kResultNullIfNull, "castBIGINT_daytimeinterval"), @@ -139,6 +142,12 @@ std::vector GetDateTimeFunctionRegistry() { NativeFunction("castDATE", {}, DataTypeVector{date64()}, date32(), kResultNullIfNull, "castDATE_date64"), + + NativeFunction("castTIMESTAMP", {}, DataTypeVector{date32()}, timestamp(), + kResultNullIfNull, "castTIMESTAMP_date32"), + + NativeFunction("castDATE", {}, DataTypeVector{timestamp()}, date32(), + kResultNullIfNull, "castDATE32_timestamp"), DATE_TYPES(LAST_DAY_SAFE_NULL_IF_NULL, last_day, {}); return date_time_fn_registry_; diff --git a/cpp/src/gandiva/function_signature.cc b/cpp/src/gandiva/function_signature.cc index 6dc6416178e15..665973833b6e0 100644 --- a/cpp/src/gandiva/function_signature.cc +++ b/cpp/src/gandiva/function_signature.cc @@ -45,6 +45,14 @@ bool DataTypeEquals(const DataTypePtr& left, const DataTypePtr& right) { return (dleft != NULL) && (dright != NULL) && (dleft->byte_width() == dright->byte_width()); } + case arrow::Type::TIMESTAMP: { + // Signature for timestamp treated the same if both are with zone or without zone. + auto tleft = checked_cast(left.get()); + auto tright = checked_cast(right.get()); + return (tleft != NULL) && (tright != NULL) && + (tleft->unit() == tright->unit()) && + (tleft->timezone().empty() == tleft->timezone().empty()); + } default: return left->Equals(right); } diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 421ede8d103e0..0886acb10e93d 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -311,7 +311,8 @@ CRC_FUNCTION(binary) INNER(date64) \ INNER(date32) \ INNER(time32) \ - INNER(timestamp) + INNER(timestamp) \ + INNER(timestampusutc) // Expand inner macro for all numeric types. #define SHA_VAR_LEN_PARAMS(INNER) \ diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index d39d2940423df..5d52fb8fe4083 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -39,6 +39,7 @@ using gdv_date64 = int64_t; using gdv_date32 = int32_t; using gdv_time32 = int32_t; using gdv_timestamp = int64_t; +using gdv_timestampusutc = int64_t; using gdv_utf8 = char*; using gdv_binary = char*; using gdv_day_time_interval = int64_t; diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc index 5a4cbb031889d..81c13f600b060 100644 --- a/cpp/src/gandiva/jni/jni_common.cc +++ b/cpp/src/gandiva/jni/jni_common.cc @@ -155,19 +155,26 @@ DataTypePtr ProtoTypeToTime64(const types::ExtGandivaType& ext_type) { } DataTypePtr ProtoTypeToTimestamp(const types::ExtGandivaType& ext_type) { + arrow::TimeUnit::type unit; switch (ext_type.timeunit()) { case types::SEC: - return arrow::timestamp(arrow::TimeUnit::SECOND); + unit = arrow::TimeUnit::SECOND; + break; case types::MILLISEC: - return arrow::timestamp(arrow::TimeUnit::MILLI); + unit = arrow::TimeUnit::MILLI; + break; case types::MICROSEC: - return arrow::timestamp(arrow::TimeUnit::MICRO); + unit = arrow::TimeUnit::MICRO; + break; case types::NANOSEC: - return arrow::timestamp(arrow::TimeUnit::NANO); + unit = arrow::TimeUnit::NANO; + break; default: std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for timestamp\n"; return nullptr; } + const std::string& zone_id = ext_type.timezone(); + return arrow::timestamp(unit, zone_id); } DataTypePtr ProtoTypeToInterval(const types::ExtGandivaType& ext_type) { diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops.cc b/cpp/src/gandiva/precompiled/arithmetic_ops.cc index f96e6be87b3d8..6fa04da419f35 100644 --- a/cpp/src/gandiva/precompiled/arithmetic_ops.cc +++ b/cpp/src/gandiva/precompiled/arithmetic_ops.cc @@ -41,6 +41,7 @@ extern "C" { INNER(NAME, date64, OP) \ INNER(NAME, date32, OP) \ INNER(NAME, timestamp, OP) \ + INNER(NAME, timestampusutc, OP) \ INNER(NAME, time32, OP) #define NUMERIC_DATE_TYPES(INNER, NAME, OP) \ @@ -362,6 +363,7 @@ NUMERIC_TYPES(IS_TRUE_OR_FALSE_NUMERIC, isfalse, !) INNER(date32) \ INNER(date64) \ INNER(timestamp) \ + INNER(timestampusutc) \ INNER(time32) #define NUMERIC_BOOL_DATE_FUNCTION(INNER) \ diff --git a/cpp/src/gandiva/precompiled/hash.cc b/cpp/src/gandiva/precompiled/hash.cc index 030099168411f..70698c36e8ead 100644 --- a/cpp/src/gandiva/precompiled/hash.cc +++ b/cpp/src/gandiva/precompiled/hash.cc @@ -307,7 +307,8 @@ FORCE_INLINE gdv_int32 hash64_spark_int64_int32(gdv_int64 val, gdv_boolean is_va INNER(NAME, date64) \ INNER(NAME, date32) \ INNER(NAME, time32) \ - INNER(NAME, timestamp) + INNER(NAME, timestamp) \ + INNER(NAME, timestampusutc) NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash) NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 49232fcf9bf78..b9efb820836d5 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -826,10 +826,18 @@ gdv_timestamp convertTimestampUnit_us(gdv_timestamp timestamp_in_micro) { return timestamp_in_micro / 1000; } -gdv_date32 castDATE_date64(gdv_date64 date_in_millis) { +gdv_date32 castDATE32_date64(gdv_date64 date_in_millis) { return static_cast(date_in_millis / (MILLIS_IN_DAY)); } +gdv_timestamp castTIMESTAMP_date32(gdv_date32 in_day) { + return static_cast(in_day * (MILLIS_IN_DAY)); +} + +gdv_date32 castDATE32_timestamp(gdv_timestamp timestamp_in_millis) { + return static_cast(timestamp_in_millis / (MILLIS_IN_DAY)); +} + const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in, gdv_int64 length, gdv_int32* out_len) { gdv_int64 year = extractYear_timestamp(in); @@ -896,6 +904,11 @@ gdv_int64 extractMillis_daytimeinterval(gdv_day_time_interval in) { return static_cast(millis); } +FORCE_INLINE +gdv_int64 castBIGINT_timestamp(gdv_timestamp in) { + return in; +} + FORCE_INLINE gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) { return extractMillis_daytimeinterval(in) + diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index 4f943bfa449ed..39c23183e932f 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -37,6 +37,7 @@ using gdv_date64 = int64_t; using gdv_date32 = int32_t; using gdv_time32 = int32_t; using gdv_timestamp = int64_t; +using gdv_timestampusutc = int64_t; using gdv_utf8 = char*; using gdv_binary = char*; using gdv_day_time_interval = int64_t; @@ -424,7 +425,7 @@ gdv_date64 castDATE_timestamp(gdv_timestamp); gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis); gdv_timestamp convertTimestampUnit_ms(gdv_timestamp); gdv_timestamp convertTimestampUnit_us(gdv_timestamp); -gdv_date32 castDATE_date64(gdv_date64 date); +gdv_date32 castDATE32_date64(gdv_date64 date); const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64, gdv_int32*); gdv_date64 last_day_from_timestamp(gdv_date64 millis); diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java index 90f8684b455a8..3503bbbf83577 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java @@ -201,6 +201,10 @@ private static void initArrowTypeTimestamp(ArrowType.Timestamp timestampType, // not supported } } + String timezone = timestampType.getTimezone(); + if (timezone != null) { + builder.setTimeZone(timezone); + } } private static void initArrowTypeInterval(ArrowType.Interval interval,