Skip to content

Commit

Permalink
add LargeUtf8 handling
Browse files Browse the repository at this point in the history
  • Loading branch information
andygrove committed Apr 27, 2024
1 parent 83ad6a2 commit 6cea67f
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions core/src/execution/datafusion/expressions/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,23 @@ impl Cast {
DataType::Dictionary(key_type, value_type),
DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64,
) if key_type.as_ref() == &DataType::Int32
&& value_type.as_ref() == &DataType::Utf8 =>
&& (value_type.as_ref() == &DataType::Utf8
|| value_type.as_ref() == &DataType::LargeUtf8) =>
{
// TODO: we are unpacking a dictionary-encoded array and then performing
// the cast. We could potentially improve performance here by casting the
// dictionary values directly without unpacking the array first, although this
// would add more complexity to the code
let unpacked_array = Self::unpack_dict_string_array::<Int32Type>(&array)?;
Self::cast_string_to_int::<i32>(to_type, &unpacked_array, self.eval_mode)?
match value_type.as_ref() {
DataType::Utf8 => {
Self::cast_string_to_int::<i32>(to_type, &unpacked_array, self.eval_mode)?
}
DataType::LargeUtf8 => {
Self::cast_string_to_int::<i64>(to_type, &unpacked_array, self.eval_mode)?
}
_ => unreachable!("invalid value type for dictionary-encoded string array"),
}
}
_ => {
// when we have no Spark-specific casting we delegate to DataFusion
Expand Down

0 comments on commit 6cea67f

Please sign in to comment.