Skip to content

Commit

Permalink
Write FixedLenByteArray stats for FixedLenByteArray columns (not Byte…
Browse files Browse the repository at this point in the history
…Array stats) (#662)
  • Loading branch information
alamb authored Aug 10, 2021
1 parent fc04931 commit fa5acd9
Showing 1 changed file with 24 additions and 7 deletions.
31 changes: 24 additions & 7 deletions parquet/src/column/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -924,11 +924,28 @@ impl<T: DataType> ColumnWriterImpl<T> {
Type::INT96 => gen_stats_section!(Int96, int96, min, max, distinct, nulls),
Type::FLOAT => gen_stats_section!(f32, float, min, max, distinct, nulls),
Type::DOUBLE => gen_stats_section!(f64, double, min, max, distinct, nulls),
Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
Type::BYTE_ARRAY => {
let min = min.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
let max = max.as_ref().map(|v| ByteArray::from(v.as_bytes().to_vec()));
Statistics::byte_array(min, max, distinct, nulls, false)
}
Type::FIXED_LEN_BYTE_ARRAY => {
let min = min
.as_ref()
.map(|v| ByteArray::from(v.as_bytes().to_vec()))
.map(|ba| {
let ba: FixedLenByteArray = ba.into();
ba
});
let max = max
.as_ref()
.map(|v| ByteArray::from(v.as_bytes().to_vec()))
.map(|ba| {
let ba: FixedLenByteArray = ba.into();
ba
});
Statistics::fixed_len_byte_array(min, max, distinct, nulls, false)
}
}
}

Expand Down Expand Up @@ -1797,13 +1814,13 @@ mod tests {

let stats = statistics_roundtrip::<FixedLenByteArrayType>(&input);
assert!(stats.has_min_max_set());
// should it be FixedLenByteArray?
// https://github.com/apache/arrow-rs/issues/660
if let Statistics::ByteArray(stats) = stats {
assert_eq!(stats.min(), &ByteArray::from("aaw "));
assert_eq!(stats.max(), &ByteArray::from("zz "));
if let Statistics::FixedLenByteArray(stats) = stats {
let expected_min: FixedLenByteArray = ByteArray::from("aaw ").into();
assert_eq!(stats.min(), &expected_min);
let expected_max: FixedLenByteArray = ByteArray::from("zz ").into();
assert_eq!(stats.max(), &expected_max);
} else {
panic!("expecting Statistics::ByteArray, got {:?}", stats);
panic!("expecting Statistics::FixedLenByteArray, got {:?}", stats);
}
}

Expand Down

0 comments on commit fa5acd9

Please sign in to comment.