Skip to content

Commit

Permalink
fix: Fix incorrectly added sorted flag after append for lexically ord…
Browse files Browse the repository at this point in the history
…ered categorical series (#20414)
  • Loading branch information
siddharth-vi authored Dec 31, 2024
1 parent 68100f2 commit c5cf3f9
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ pub fn call_categorical_merge_operation<I: CategoricalMergeOperation>(
) -> PolarsResult<CategoricalChunked> {
let rev_map_left = cat_left.get_rev_map();
let rev_map_right = cat_right.get_rev_map();
let (new_physical, new_rev_map) = match (&**rev_map_left, &**rev_map_right) {
let (mut new_physical, new_rev_map) = match (&**rev_map_left, &**rev_map_right) {
(RevMapping::Global(_, _, idl), RevMapping::Global(_, _, idr)) if idl == idr => {
let mut rev_map_merger = GlobalRevMapMerger::new(rev_map_left.clone());
rev_map_merger.merge_map(rev_map_right)?;
Expand Down Expand Up @@ -176,6 +176,12 @@ pub fn call_categorical_merge_operation<I: CategoricalMergeOperation>(
},
_ => polars_bail!(string_cache_mismatch),
};
// During merge operation, the sorted flag might get set on the underlying physical.
// Ensure that the sorted flag is not set if we use lexical order
if cat_left.uses_lexical_ordering() {
new_physical.set_sorted_flag(IsSorted::Not)
}

// SAFETY: physical and rev map are correctly constructed above
unsafe {
Ok(CategoricalChunked::from_cats_and_rev_map_unchecked(
Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/unit/datatypes/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,15 @@ def test_cat_append_lexical_sorted_flag() -> None:

assert not (df2["y"].is_sorted())

s = pl.Series("a", ["z", "k", "a"], pl.Categorical("lexical"))
s1 = s[[0]]
s2 = s[[1]]
s3 = s[[2]]
s1.append(s2)
s1.append(s3)

assert not (s1.is_sorted())


def test_get_cat_categories_multiple_chunks() -> None:
df = pl.DataFrame(
Expand Down

0 comments on commit c5cf3f9

Please sign in to comment.