Skip to content

Commit

Permalink
Do not rename struct fields when coercing types in CASE (#14384)
Browse files Browse the repository at this point in the history
* Fix field name during struct equality coercion

* fix bug

* Add more tests

* Update tests per #14396
  • Loading branch information
alamb authored Feb 1, 2025
1 parent 9d1bfc1 commit c33cb85
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 7 deletions.
22 changes: 15 additions & 7 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -961,23 +961,31 @@ fn struct_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
return None;
}

let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
let coerced_types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
.map(|(lhs, rhs)| comparison_coercion(lhs.data_type(), rhs.data_type()))
.collect::<Option<Vec<DataType>>>()?;

let fields = types
// preserve the field name and nullability
let orig_fields = std::iter::zip(lhs_fields.iter(), rhs_fields.iter());

let fields: Vec<FieldRef> = coerced_types
.into_iter()
.enumerate()
.map(|(i, datatype)| {
Arc::new(Field::new(format!("c{i}"), datatype, true))
})
.collect::<Vec<FieldRef>>();
.zip(orig_fields)
.map(|(datatype, (lhs, rhs))| coerce_fields(datatype, lhs, rhs))
.collect();
Some(Struct(fields.into()))
}
_ => None,
}
}

/// returns the result of coercing two fields to a common type
fn coerce_fields(common_type: DataType, lhs: &FieldRef, rhs: &FieldRef) -> FieldRef {
let is_nullable = lhs.is_nullable() || rhs.is_nullable();
let name = lhs.name(); // pick the name from the left field
Arc::new(Field::new(name, common_type, is_nullable))
}

/// Returns the output type of applying mathematics operations such as
/// `+` to arguments of `lhs_type` and `rhs_type`.
fn mathematics_numerical_coercion(
Expand Down
110 changes: 110 additions & 0 deletions datafusion/sqllogictest/test_files/case.slt
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,113 @@ NULL NULL false

statement ok
drop table foo


# Test coercion of inner struct field names
# Reproducer for https://github.com/apache/datafusion/issues/14383
statement ok
create table t as values
(
100, -- column1 int (so the case isn't constant folded)
{ 'foo': 'bar' }, -- column2 has List of Struct w/ Utf8
{ 'foo': arrow_cast('baz', 'Utf8View') }, -- column3 has List of Struct w/ Utf8View
{ 'foo': arrow_cast('blarg', 'Utf8View') } -- column4 has List of Struct w/ Utf8View
);


# Note field name is foo
query ???
SELECT column2, column3, column4 FROM t;
----
{foo: bar} {foo: baz} {foo: blarg}

# Coerce fields, expect the field name to be the name of the first arg to case
# the field should not be named 'c0'
query ?
SELECT
case
when column1 > 0 then column2
when column1 < 0 then column3
else column4
end
FROM t;
----
{foo: bar}

query ?
SELECT
case
when column1 > 0 then column3 -- different arg order shouldn't affect name
when column1 < 0 then column4
else column2
end
FROM t;
----
{foo: baz}

query ?
SELECT
case
when column1 > 0 then column4 -- different arg order shouldn't affect name
when column1 < 0 then column2
else column3
end
FROM t;
----
{foo: blarg}

statement ok
drop table t


# Test coercion of inner struct field names with different orders / missing fields
statement ok
create table t as values
(
100, -- column1 int (so the case isn't constant folded)
{ 'foo': 'a', 'xxx': 'b' }, -- column2: Struct with fields foo, xxx
{ 'xxx': 'c', 'foo': 'd' }, -- column3: Struct with fields xxx, foo
{ 'xxx': 'e' } -- column4: Struct with field xxx (no second field)
);

# Note field names are in different orders
query ???
SELECT column2, column3, column4 FROM t;
----
{foo: a, xxx: b} {xxx: c, foo: d} {xxx: e}

# coerce structs with different field orders,
# (note the *value*s are from column2 but the field name is 'xxx', as the coerced
# type takes the field name from the last argument (column3)
query ?
SELECT
case
when column1 > 0 then column2 -- always true
else column3
end
FROM t;
----
{xxx: a, foo: b}

# coerce structs with different field orders
query ?
SELECT
case
when column1 < 0 then column2 -- always false
else column3
end
FROM t;
----
{xxx: c, foo: d}

# coerce structs with subset of fields
query error Failed to coerce then
SELECT
case
when column1 > 0 then column3
else column4
end
FROM t;

statement ok
drop table t

0 comments on commit c33cb85

Please sign in to comment.