From d0466eca208313f3a8b669794e0bdc920151e19e Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Tue, 12 Apr 2022 21:34:18 +0800 Subject: [PATCH] minor: enable Date32/64 to String/LargeString cast (#1534) --- arrow/src/compute/kernels/cast.rs | 108 ++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 0f4c1c42a537..2fdc807aa48e 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -153,6 +153,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { (LargeUtf8, Date32 | Date64 | Timestamp(TimeUnit::Nanosecond, None)) => true, (LargeUtf8, _) => DataType::is_numeric(to_type), (Timestamp(_, _), Utf8) | (Timestamp(_, _), LargeUtf8) => true, + (Date32, Utf8) | (Date32, LargeUtf8) => true, + (Date64, Utf8) | (Date64, LargeUtf8) => true, (_, Utf8 | LargeUtf8) => DataType::is_numeric(from_type) || from_type == &Binary, // start numeric casts @@ -736,6 +738,8 @@ pub fn cast_with_options( cast_timestamp_to_string::(array) } }, + Date32 => cast_date32_to_string::(array), + Date64 => cast_date64_to_string::(array), Binary => { let array = array.as_any().downcast_ref::().unwrap(); Ok(Arc::new( @@ -790,6 +794,8 @@ pub fn cast_with_options( cast_timestamp_to_string::(array) } }, + Date32 => cast_date32_to_string::(array), + Date64 => cast_date64_to_string::(array), Binary => { let array = array.as_any().downcast_ref::().unwrap(); Ok(Arc::new( @@ -1365,6 +1371,44 @@ where )) } +/// Cast date32 types to Utf8/LargeUtf8 +fn cast_date32_to_string( + array: &ArrayRef, +) -> Result { + let array = array.as_any().downcast_ref::().unwrap(); + + Ok(Arc::new( + (0..array.len()) + .map(|ix| { + if array.is_null(ix) { + None + } else { + array.value_as_date(ix).map(|v| v.to_string()) + } + }) + .collect::>(), + )) +} + +/// Cast date64 types to Utf8/LargeUtf8 +fn cast_date64_to_string( + array: &ArrayRef, +) -> Result { + let array = array.as_any().downcast_ref::().unwrap(); + + Ok(Arc::new( + (0..array.len()) + .map(|ix| { + if array.is_null(ix) { + None + } else { + array.value_as_datetime(ix).map(|v| v.to_string()) + } + }) + .collect::>(), + )) +} + /// Cast numeric types to Utf8 fn cast_numeric_to_string(array: &ArrayRef) -> Result where @@ -2850,6 +2894,48 @@ mod tests { } } + #[test] + fn test_cast_string_to_date32() { + let a1 = Arc::new(StringArray::from(vec![ + Some("2018-12-25"), + Some("Not a valid date"), + None, + ])) as ArrayRef; + let a2 = Arc::new(LargeStringArray::from(vec![ + Some("2018-12-25"), + Some("Not a valid date"), + None, + ])) as ArrayRef; + for array in &[a1, a2] { + let b = cast(array, &DataType::Date32).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + assert_eq!(17890, c.value(0)); + assert!(c.is_null(1)); + assert!(c.is_null(2)); + } + } + + #[test] + fn test_cast_string_to_date64() { + let a1 = Arc::new(StringArray::from(vec![ + Some("2020-09-08T12:00:00"), + Some("Not a valid date"), + None, + ])) as ArrayRef; + let a2 = Arc::new(LargeStringArray::from(vec![ + Some("2020-09-08T12:00:00"), + Some("Not a valid date"), + None, + ])) as ArrayRef; + for array in &[a1, a2] { + let b = cast(array, &DataType::Date64).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + assert_eq!(1599566400000, c.value(0)); + assert!(c.is_null(1)); + assert!(c.is_null(2)); + } + } + #[test] fn test_cast_date32_to_int32() { let a = Date32Array::from(vec![10000, 17890]); @@ -2929,6 +3015,28 @@ mod tests { assert!(c.is_null(2)); } + #[test] + fn test_cast_date32_to_string() { + let a = Date32Array::from(vec![10000, 17890]); + let array = Arc::new(a) as ArrayRef; + let b = cast(&array, &DataType::Utf8).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + assert_eq!(&DataType::Utf8, c.data_type()); + assert_eq!("1997-05-19", c.value(0)); + assert_eq!("2018-12-25", c.value(1)); + } + + #[test] + fn test_cast_date64_to_string() { + let a = Date64Array::from(vec![10000 * 86400000, 17890 * 86400000]); + let array = Arc::new(a) as ArrayRef; + let b = cast(&array, &DataType::Utf8).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + assert_eq!(&DataType::Utf8, c.data_type()); + assert_eq!("1997-05-19 00:00:00", c.value(0)); + assert_eq!("2018-12-25 00:00:00", c.value(1)); + } + #[test] fn test_cast_between_timestamps() { let a = TimestampMillisecondArray::from_opt_vec(