Skip to content

Commit

Permalink
Support casting from binary to dictionary of binary (#3482)
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya authored Jan 16, 2023
1 parent 95cf030 commit 07fd434
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 2 deletions.
4 changes: 2 additions & 2 deletions arrow-array/src/builder/generic_bytes_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ where
let keys = self.keys_builder.finish();

let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8));
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));

let builder = keys
.into_data()
Expand All @@ -285,7 +285,7 @@ where
let keys = self.keys_builder.finish_cloned();

let data_type =
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8));
DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));

let builder = keys
.into_data()
Expand Down
26 changes: 26 additions & 0 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3303,6 +3303,8 @@ fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
),
Utf8 => pack_string_to_dictionary::<K>(array, cast_options),
LargeUtf8 => pack_string_to_dictionary::<K>(array, cast_options),
Binary => pack_binary_to_dictionary::<K>(array, cast_options),
LargeBinary => pack_binary_to_dictionary::<K>(array, cast_options),
_ => Err(ArrowError::CastError(format!(
"Unsupported output type for dictionary packing: {:?}",
dict_value_type
Expand Down Expand Up @@ -3366,6 +3368,30 @@ where
Ok(Arc::new(b.finish()))
}

// Packs the data as a BinaryDictionaryArray, if possible, with the
// key types of K
fn pack_binary_to_dictionary<K>(
array: &ArrayRef,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
{
let cast_values = cast_with_options(array, &DataType::Binary, cast_options)?;
let values = cast_values.as_any().downcast_ref::<BinaryArray>().unwrap();
let mut b = BinaryDictionaryBuilder::<K>::with_capacity(values.len(), 1024, 1024);

// copy each element one at a time
for i in 0..values.len() {
if values.is_null(i) {
b.append_null();
} else {
b.append(values.value(i))?;
}
}
Ok(Arc::new(b.finish()))
}

/// Helper function that takes a primitive array and casts to a (generic) list array.
fn cast_primitive_to_list<OffsetSize: OffsetSizeTrait + NumCast>(
array: &ArrayRef,
Expand Down
2 changes: 2 additions & 0 deletions arrow/tests/array_cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,9 @@ fn get_all_types() -> Vec<DataType> {
),
Dictionary(Box::new(DataType::Int8), Box::new(DataType::Int32)),
Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
Dictionary(Box::new(DataType::Int16), Box::new(DataType::Binary)),
Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Binary)),
Decimal128(38, 0),
Dictionary(Box::new(DataType::Int8), Box::new(Decimal128(38, 0))),
Dictionary(Box::new(DataType::Int16), Box::new(Decimal128(38, 0))),
Expand Down

0 comments on commit 07fd434

Please sign in to comment.