diff --git a/parquet/src/file/metadata.rs b/parquet/src/file/metadata.rs index 4cb2e9ab2a6a..b1a812c0ecd5 100644 --- a/parquet/src/file/metadata.rs +++ b/parquet/src/file/metadata.rs @@ -616,7 +616,7 @@ impl ColumnChunkMetaData { let data_page_offset = col_metadata.data_page_offset; let index_page_offset = col_metadata.index_page_offset; let dictionary_page_offset = col_metadata.dictionary_page_offset; - let statistics = statistics::from_thrift(column_type, col_metadata.statistics); + let statistics = statistics::from_thrift(column_type, col_metadata.statistics)?; let encoding_stats = col_metadata .encoding_stats .as_ref() diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index f685f14bd92f..629606e587d4 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -461,7 +461,7 @@ pub(crate) fn decode_page( encoding: Encoding::try_from(header.encoding)?, def_level_encoding: Encoding::try_from(header.definition_level_encoding)?, rep_level_encoding: Encoding::try_from(header.repetition_level_encoding)?, - statistics: statistics::from_thrift(physical_type, header.statistics), + statistics: statistics::from_thrift(physical_type, header.statistics)?, } } PageType::DATA_PAGE_V2 => { @@ -477,7 +477,7 @@ pub(crate) fn decode_page( def_levels_byte_len: header.definition_levels_byte_length as u32, rep_levels_byte_len: header.repetition_levels_byte_length as u32, is_compressed, - statistics: statistics::from_thrift(physical_type, header.statistics), + statistics: statistics::from_thrift(physical_type, header.statistics)?, } } _ => { diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index 939ce037f968..b36e37a80c97 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -44,6 +44,7 @@ use crate::format::Statistics as TStatistics; use crate::basic::Type; use crate::data_type::private::ParquetValueType; use crate::data_type::*; +use crate::errors::{ParquetError, Result}; use crate::util::bit_util::from_le_slice; pub(crate) mod private { @@ -119,15 +120,18 @@ macro_rules! statistics_enum_func { pub fn from_thrift( physical_type: Type, thrift_stats: Option, -) -> Option { - match thrift_stats { +) -> Result> { + Ok(match thrift_stats { Some(stats) => { // Number of nulls recorded, when it is not available, we just mark it as 0. let null_count = stats.null_count.unwrap_or(0); - assert!( - null_count >= 0, - "Statistics null count is negative ({null_count})" - ); + + if null_count < 0 { + return Err(ParquetError::General(format!( + "Statistics null count is negative {}", + null_count + ))); + } // Generic null count. let null_count = null_count as u64; @@ -221,7 +225,7 @@ pub fn from_thrift( Some(res) } None => None, - } + }) } // Convert Statistics into Thrift definition. @@ -594,7 +598,7 @@ mod tests { } #[test] - #[should_panic(expected = "Statistics null count is negative (-10)")] + #[should_panic(expected = "General(\"Statistics null count is negative -10\")")] fn test_statistics_negative_null_count() { let thrift_stats = TStatistics { max: None, @@ -605,13 +609,13 @@ mod tests { min_value: None, }; - from_thrift(Type::INT32, Some(thrift_stats)); + from_thrift(Type::INT32, Some(thrift_stats)).unwrap(); } #[test] fn test_statistics_thrift_none() { - assert_eq!(from_thrift(Type::INT32, None), None); - assert_eq!(from_thrift(Type::BYTE_ARRAY, None), None); + assert_eq!(from_thrift(Type::INT32, None).unwrap(), None); + assert_eq!(from_thrift(Type::BYTE_ARRAY, None).unwrap(), None); } #[test] @@ -715,7 +719,7 @@ mod tests { fn check_stats(stats: Statistics) { let tpe = stats.physical_type(); let thrift_stats = to_thrift(Some(&stats)); - assert_eq!(from_thrift(tpe, thrift_stats), Some(stats)); + assert_eq!(from_thrift(tpe, thrift_stats).unwrap(), Some(stats)); } check_stats(Statistics::boolean(Some(false), Some(true), None, 7, true)); diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 3b2dd8289455..7e1034ae7b63 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -1135,7 +1135,8 @@ mod tests { statistics: from_thrift( physical_type, to_thrift(statistics.as_ref()), - ), + ) + .unwrap(), } } Page::DataPageV2 { @@ -1168,7 +1169,8 @@ mod tests { statistics: from_thrift( physical_type, to_thrift(statistics.as_ref()), - ), + ) + .unwrap(), } } Page::DictionaryPage {