Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify dictionary sort #4605

Merged
merged 1 commit into from
Aug 1, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 16 additions & 57 deletions arrow-ord/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -390,28 +390,15 @@ pub fn sort_to_indices(
descending: false,
nulls_first: value_null_first,
});
downcast_dictionary_array!(
values => match values.values().data_type() {
dt if DataType::is_primitive(dt) => {
let dict_values = values.values();
let sorted_value_indices = sort_to_indices(dict_values, value_options, None)?;
let value_indices_map = sorted_rank(&sorted_value_indices);
sort_primitive_dictionary::<_, _>(values, &value_indices_map, v, n, options, limit, cmp)
},
DataType::Utf8 => {
let dict_values = values.values();
let sorted_value_indices = sort_to_indices(dict_values, value_options, None)?;
let value_indices_map = sorted_rank(&sorted_value_indices);
sort_string_dictionary::<_>(values, &value_indices_map, v, n, &options, limit)
},
t => return Err(ArrowError::ComputeError(format!(
"Unsupported dictionary value type {t}"
))),
},
t => return Err(ArrowError::ComputeError(format!(
"Unsupported datatype {t}"
))),
)
downcast_dictionary_array! {
values => {
let dict_values = values.values();
let sorted_value_indices = sort_to_indices(dict_values, value_options, None)?;
let rank = sorted_rank(&sorted_value_indices);
sort_dictionary(values, &rank, v, n, options, limit)
}
_ => unreachable!(),
}
}
DataType::Binary | DataType::FixedSizeBinary(_) => {
sort_binary::<i32>(values, v, n, &options, limit)
Expand Down Expand Up @@ -563,28 +550,23 @@ fn sorted_rank(sorted_value_indices: &UInt32Array) -> Vec<u32> {
out
}

/// Sort dictionary encoded primitive values
fn sort_primitive_dictionary<K, F>(
values: &DictionaryArray<K>,
value_indices_map: &[u32],
/// Sort dictionary given the sorted rank of each key
fn sort_dictionary<K: ArrowDictionaryKeyType>(
dict: &DictionaryArray<K>,
rank: &[u32],
value_indices: Vec<u32>,
null_indices: Vec<u32>,
options: SortOptions,
limit: Option<usize>,
cmp: F,
Copy link
Contributor Author

@tustvold tustvold Jul 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This argument was always just passed PartialOrd::cmp and so there was no reason to make it generic

) -> UInt32Array
where
K: ArrowDictionaryKeyType,
F: Fn(u32, u32) -> Ordering,
{
let keys: &PrimitiveArray<K> = values.keys();
) -> UInt32Array {
let keys: &PrimitiveArray<K> = dict.keys();

// create tuples that are used for sorting
let valids = value_indices
.into_iter()
.map(|index| {
let key: K::Native = keys.value(index as usize);
(index, value_indices_map[key.as_usize()])
(index, rank[key.as_usize()])
})
.collect::<Vec<(u32, u32)>>();

Expand Down Expand Up @@ -877,29 +859,6 @@ fn sort_string<Offset: OffsetSizeTrait>(
)
}

/// Sort dictionary encoded strings
fn sort_string_dictionary<T: ArrowDictionaryKeyType>(
values: &DictionaryArray<T>,
value_indices_map: &[u32],
value_indices: Vec<u32>,
null_indices: Vec<u32>,
options: &SortOptions,
limit: Option<usize>,
) -> UInt32Array {
let keys: &PrimitiveArray<T> = values.keys();

// create tuples that are used for sorting
let valids = value_indices
.into_iter()
.map(|index| {
let key: T::Native = keys.value(index as usize);
(index, value_indices_map[key.as_usize()])
})
.collect::<Vec<(u32, u32)>>();

sort_primitive_inner::<_, _>(keys.len(), null_indices, cmp, options, limit, valids)
}

/// shared implementation between dictionary encoded and plain string arrays
#[inline]
fn sort_string_helper<'a, A: Array, F>(
Expand Down
Loading