Skip to content

Commit

Permalink
fix: TopK operator should return correct results on dictionary column…
Browse files Browse the repository at this point in the history
… with nulls
  • Loading branch information
viirya committed Oct 23, 2024
1 parent cb3e977 commit 297c242
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
4 changes: 3 additions & 1 deletion native/core/src/execution/operators/copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,9 @@ fn copy_or_unpack_array(array: &Arc<dyn Array>, mode: &CopyMode) -> Result<Array
match array.data_type() {
DataType::Dictionary(_, value_type) => {
let options = CastOptions::default();
cast_with_options(array, value_type.as_ref(), &options)
// We need to copy the array after `cast` because arrow-rs `take` kernel which is used
// to unpack dictionary array might reuse the input array's null buffer.
Ok(copy_array(&cast_with_options(array, value_type.as_ref(), &options)?))
}
_ => {
if mode == &CopyMode::UnpackOrDeepCopy {
Expand Down
19 changes: 19 additions & 0 deletions spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,25 @@ class CometExecSuite extends CometTestBase {
}
}

test("TopK operator should return correct results on dictionary column with nulls") {
withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
withTable("test_data") {
val tableDF = spark.sparkContext
.parallelize(Seq((1, null, "A"), (2, "BBB", "B"), (3, "BBB", "B"), (4, "BBB", "B")), 3)
.toDF("c1", "c2", "c3")
tableDF
.coalesce(1)
.sortWithinPartitions("c1")
.writeTo("test_data")
.using("parquet")
.create()

val df = sql("SELECT * FROM test_data ORDER BY c1 LIMIT 3")
checkSparkAnswer(df)
}
}
}

test("DPP fallback") {
withTempDir { path =>
// create test data
Expand Down

0 comments on commit 297c242

Please sign in to comment.