Skip to content

Commit

Permalink
refactor: from_thrift avoid panic (#4642)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackwener authored Aug 4, 2023
1 parent b15838c commit 273dcc1
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 17 deletions.
2 changes: 1 addition & 1 deletion parquet/src/file/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ impl ColumnChunkMetaData {
let data_page_offset = col_metadata.data_page_offset;
let index_page_offset = col_metadata.index_page_offset;
let dictionary_page_offset = col_metadata.dictionary_page_offset;
let statistics = statistics::from_thrift(column_type, col_metadata.statistics);
let statistics = statistics::from_thrift(column_type, col_metadata.statistics)?;
let encoding_stats = col_metadata
.encoding_stats
.as_ref()
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/file/serialized_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ pub(crate) fn decode_page(
encoding: Encoding::try_from(header.encoding)?,
def_level_encoding: Encoding::try_from(header.definition_level_encoding)?,
rep_level_encoding: Encoding::try_from(header.repetition_level_encoding)?,
statistics: statistics::from_thrift(physical_type, header.statistics),
statistics: statistics::from_thrift(physical_type, header.statistics)?,
}
}
PageType::DATA_PAGE_V2 => {
Expand All @@ -477,7 +477,7 @@ pub(crate) fn decode_page(
def_levels_byte_len: header.definition_levels_byte_length as u32,
rep_levels_byte_len: header.repetition_levels_byte_length as u32,
is_compressed,
statistics: statistics::from_thrift(physical_type, header.statistics),
statistics: statistics::from_thrift(physical_type, header.statistics)?,
}
}
_ => {
Expand Down
28 changes: 16 additions & 12 deletions parquet/src/file/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use crate::format::Statistics as TStatistics;
use crate::basic::Type;
use crate::data_type::private::ParquetValueType;
use crate::data_type::*;
use crate::errors::{ParquetError, Result};
use crate::util::bit_util::from_le_slice;

pub(crate) mod private {
Expand Down Expand Up @@ -119,15 +120,18 @@ macro_rules! statistics_enum_func {
pub fn from_thrift(
physical_type: Type,
thrift_stats: Option<TStatistics>,
) -> Option<Statistics> {
match thrift_stats {
) -> Result<Option<Statistics>> {
Ok(match thrift_stats {
Some(stats) => {
// Number of nulls recorded, when it is not available, we just mark it as 0.
let null_count = stats.null_count.unwrap_or(0);
assert!(
null_count >= 0,
"Statistics null count is negative ({null_count})"
);

if null_count < 0 {
return Err(ParquetError::General(format!(
"Statistics null count is negative {}",
null_count
)));
}

// Generic null count.
let null_count = null_count as u64;
Expand Down Expand Up @@ -221,7 +225,7 @@ pub fn from_thrift(
Some(res)
}
None => None,
}
})
}

// Convert Statistics into Thrift definition.
Expand Down Expand Up @@ -594,7 +598,7 @@ mod tests {
}

#[test]
#[should_panic(expected = "Statistics null count is negative (-10)")]
#[should_panic(expected = "General(\"Statistics null count is negative -10\")")]
fn test_statistics_negative_null_count() {
let thrift_stats = TStatistics {
max: None,
Expand All @@ -605,13 +609,13 @@ mod tests {
min_value: None,
};

from_thrift(Type::INT32, Some(thrift_stats));
from_thrift(Type::INT32, Some(thrift_stats)).unwrap();
}

#[test]
fn test_statistics_thrift_none() {
assert_eq!(from_thrift(Type::INT32, None), None);
assert_eq!(from_thrift(Type::BYTE_ARRAY, None), None);
assert_eq!(from_thrift(Type::INT32, None).unwrap(), None);
assert_eq!(from_thrift(Type::BYTE_ARRAY, None).unwrap(), None);
}

#[test]
Expand Down Expand Up @@ -715,7 +719,7 @@ mod tests {
fn check_stats(stats: Statistics) {
let tpe = stats.physical_type();
let thrift_stats = to_thrift(Some(&stats));
assert_eq!(from_thrift(tpe, thrift_stats), Some(stats));
assert_eq!(from_thrift(tpe, thrift_stats).unwrap(), Some(stats));
}

check_stats(Statistics::boolean(Some(false), Some(true), None, 7, true));
Expand Down
6 changes: 4 additions & 2 deletions parquet/src/file/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,8 @@ mod tests {
statistics: from_thrift(
physical_type,
to_thrift(statistics.as_ref()),
),
)
.unwrap(),
}
}
Page::DataPageV2 {
Expand Down Expand Up @@ -1168,7 +1169,8 @@ mod tests {
statistics: from_thrift(
physical_type,
to_thrift(statistics.as_ref()),
),
)
.unwrap(),
}
}
Page::DictionaryPage {
Expand Down

0 comments on commit 273dcc1

Please sign in to comment.