Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parquet clippy lints (#1254) #2377

Merged
merged 3 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .github/workflows/parquet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,4 @@ jobs:
rustup component add clippy
- name: Run clippy
run: |
# Only run clippy for the library at this time,
# as there are clippy errors for other targets
cargo clippy -p parquet --all-features --lib -- -D warnings
# https://github.com/apache/arrow-rs/issues/1254
#cargo clippy -p parquet --all-targets --all-features -- -D warnings
cargo clippy -p parquet --all-targets --all-features -- -D warnings
5 changes: 1 addition & 4 deletions parquet/src/arrow/array_reader/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,21 +104,18 @@ fn build_list_reader(

let data_type = field.arrow_type.clone();
let item_reader = build_reader(&children[0], row_groups)?;
let item_type = item_reader.get_data_type().clone();

match is_large {
false => Ok(Box::new(ListArrayReader::<i32>::new(
item_reader,
data_type,
item_type,
field.def_level,
field.rep_level,
field.nullable,
)) as _),
true => Ok(Box::new(ListArrayReader::<i64>::new(
item_reader,
data_type,
item_type,
field.def_level,
field.rep_level,
field.nullable,
Expand Down Expand Up @@ -318,7 +315,7 @@ mod tests {
use super::*;
use crate::arrow::parquet_to_arrow_schema;
use crate::file::reader::{FileReader, SerializedFileReader};
use crate::util::test_common::get_test_file;
use crate::util::test_common::file_util::get_test_file;
use arrow::datatypes::Field;
use std::sync::Arc;

Expand Down
14 changes: 3 additions & 11 deletions parquet/src/arrow/array_reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ use std::sync::Arc;
pub struct ListArrayReader<OffsetSize: OffsetSizeTrait> {
item_reader: Box<dyn ArrayReader>,
data_type: ArrowType,
item_type: ArrowType,
/// The definition level at which this list is not null
def_level: i16,
/// The repetition level that corresponds to a new value in this array
Expand All @@ -49,15 +48,13 @@ impl<OffsetSize: OffsetSizeTrait> ListArrayReader<OffsetSize> {
pub fn new(
item_reader: Box<dyn ArrayReader>,
data_type: ArrowType,
item_type: ArrowType,
def_level: i16,
rep_level: i16,
nullable: bool,
) -> Self {
Self {
item_reader,
data_type,
item_type,
def_level,
rep_level,
nullable,
Expand Down Expand Up @@ -304,13 +301,13 @@ mod tests {
// ]

let l3_item_type = ArrowType::Int32;
let l3_type = list_type::<OffsetSize>(l3_item_type.clone(), true);
let l3_type = list_type::<OffsetSize>(l3_item_type, true);

let l2_item_type = l3_type.clone();
let l2_type = list_type::<OffsetSize>(l2_item_type.clone(), true);
let l2_type = list_type::<OffsetSize>(l2_item_type, true);

let l1_item_type = l2_type.clone();
let l1_type = list_type::<OffsetSize>(l1_item_type.clone(), false);
let l1_type = list_type::<OffsetSize>(l1_item_type, false);

let leaf = PrimitiveArray::<Int32Type>::from_iter(vec![
Some(1),
Expand Down Expand Up @@ -387,7 +384,6 @@ mod tests {
let l3 = ListArrayReader::<OffsetSize>::new(
Box::new(item_array_reader),
l3_type,
l3_item_type,
5,
3,
true,
Expand All @@ -396,7 +392,6 @@ mod tests {
let l2 = ListArrayReader::<OffsetSize>::new(
Box::new(l3),
l2_type,
l2_item_type,
3,
2,
false,
Expand All @@ -405,7 +400,6 @@ mod tests {
let mut l1 = ListArrayReader::<OffsetSize>::new(
Box::new(l2),
l1_type,
l1_item_type,
2,
1,
true,
Expand Down Expand Up @@ -456,7 +450,6 @@ mod tests {
let mut list_array_reader = ListArrayReader::<OffsetSize>::new(
Box::new(item_array_reader),
list_type::<OffsetSize>(ArrowType::Int32, true),
ArrowType::Int32,
1,
1,
false,
Expand Down Expand Up @@ -509,7 +502,6 @@ mod tests {
let mut list_array_reader = ListArrayReader::<OffsetSize>::new(
Box::new(item_array_reader),
list_type::<OffsetSize>(ArrowType::Int32, true),
ArrowType::Int32,
2,
1,
true,
Expand Down
2 changes: 2 additions & 0 deletions parquet/src/arrow/array_reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub struct MapArrayReader {
value_reader: Box<dyn ArrayReader>,
data_type: ArrowType,
map_def_level: i16,
#[allow(unused)]
map_rep_level: i16,
}

Expand All @@ -47,6 +48,7 @@ impl MapArrayReader {
key_reader,
value_reader,
data_type,
// These are the wrong way round https://github.com/apache/arrow-rs/issues/1699
map_def_level: rep_level,
map_rep_level: def_level,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is correct, but changing it caused the tests to fail...

}
Expand Down
4 changes: 1 addition & 3 deletions parquet/src/arrow/array_reader/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ where
pages: Box<dyn PageIterator>,
def_levels_buffer: Option<Buffer>,
rep_levels_buffer: Option<Buffer>,
column_desc: ColumnDescPtr,
record_reader: RecordReader<T>,
}

Expand All @@ -50,14 +49,13 @@ where
{
/// Construct null array reader.
pub fn new(pages: Box<dyn PageIterator>, column_desc: ColumnDescPtr) -> Result<Self> {
let record_reader = RecordReader::<T>::new(column_desc.clone());
let record_reader = RecordReader::<T>::new(column_desc);

Ok(Self {
data_type: ArrowType::Null,
pages,
def_levels_buffer: None,
rep_levels_buffer: None,
column_desc,
record_reader,
})
}
Expand Down
7 changes: 3 additions & 4 deletions parquet/src/arrow/array_reader/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ where
pages: Box<dyn PageIterator>,
def_levels_buffer: Option<Buffer>,
rep_levels_buffer: Option<Buffer>,
column_desc: ColumnDescPtr,
record_reader: RecordReader<T>,
}

Expand All @@ -67,14 +66,13 @@ where
.clone(),
};

let record_reader = RecordReader::<T>::new(column_desc.clone());
let record_reader = RecordReader::<T>::new(column_desc);

Ok(Self {
data_type,
pages,
def_levels_buffer: None,
rep_levels_buffer: None,
column_desc,
record_reader,
})
}
Expand Down Expand Up @@ -244,14 +242,15 @@ mod tests {
use crate::data_type::Int32Type;
use crate::schema::parser::parse_message_type;
use crate::schema::types::SchemaDescriptor;
use crate::util::test_common::make_pages;
use crate::util::test_common::rand_gen::make_pages;
use crate::util::InMemoryPageIterator;
use arrow::array::PrimitiveArray;
use arrow::datatypes::ArrowPrimitiveType;

use rand::distributions::uniform::SampleUniform;
use std::collections::VecDeque;

#[allow(clippy::too_many_arguments)]
fn make_column_chunks<T: DataType>(
column_desc: ColumnDescPtr,
encoding: Encoding,
Expand Down
1 change: 0 additions & 1 deletion parquet/src/arrow/array_reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ mod tests {
let list_reader = ListArrayReader::<i32>::new(
Box::new(reader),
expected_l.data_type().clone(),
ArrowType::Int32,
3,
1,
true,
Expand Down
3 changes: 1 addition & 2 deletions parquet/src/arrow/array_reader/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ pub fn utf8_column() -> ColumnDescPtr {

/// Encode `data` with the provided `encoding`
pub fn encode_byte_array(encoding: Encoding, data: &[ByteArray]) -> ByteBufferPtr {
let descriptor = utf8_column();
let mut encoder = get_encoder::<ByteArrayType>(descriptor, encoding).unwrap();
let mut encoder = get_encoder::<ByteArrayType>(encoding).unwrap();

encoder.put(data).unwrap();
encoder.flush_buffer().unwrap()
Expand Down
7 changes: 5 additions & 2 deletions parquet/src/arrow/arrow_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ pub(crate) struct RowSelection {

impl RowSelection {
/// Select `row_count` rows
#[allow(unused)]
pub fn select(row_count: usize) -> Self {
Self {
row_count,
Expand All @@ -93,6 +94,7 @@ impl RowSelection {
}

/// Skip `row_count` rows
#[allow(unused)]
pub fn skip(row_count: usize) -> Self {
Self {
row_count,
Expand All @@ -109,7 +111,7 @@ pub struct ArrowReaderOptions {

impl ArrowReaderOptions {
/// Create a new [`ArrowReaderOptions`] with the default settings
fn new() -> Self {
pub fn new() -> Self {
Self::default()
}

Expand All @@ -129,6 +131,7 @@ impl ArrowReaderOptions {
/// Scan rows from the parquet file according to the provided `selection`
///
/// TODO: Make public once row selection fully implemented (#1792)
#[allow(unused)]
pub(crate) fn with_row_selection(
self,
selection: impl Into<Vec<RowSelection>>,
Expand Down Expand Up @@ -433,7 +436,7 @@ mod tests {
use crate::file::writer::SerializedFileWriter;
use crate::schema::parser::parse_message_type;
use crate::schema::types::{Type, TypePtr};
use crate::util::test_common::RandGen;
use crate::util::test_common::rand_gen::RandGen;

#[test]
fn test_arrow_reader_all_columns() {
Expand Down
50 changes: 19 additions & 31 deletions parquet/src/arrow/buffer/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,21 @@

use crate::data_type::{ByteArray, FixedLenByteArray, Int96};
use arrow::array::{
Array, ArrayRef, BasicDecimalArray, BinaryArray, BinaryBuilder, Decimal128Array,
FixedSizeBinaryArray, FixedSizeBinaryBuilder, IntervalDayTimeArray,
IntervalDayTimeBuilder, IntervalYearMonthArray, IntervalYearMonthBuilder,
LargeBinaryArray, LargeBinaryBuilder, LargeStringArray, LargeStringBuilder,
StringArray, StringBuilder, TimestampNanosecondArray,
Array, ArrayRef, BasicDecimalArray, Decimal128Array, FixedSizeBinaryArray,
FixedSizeBinaryBuilder, IntervalDayTimeArray, IntervalDayTimeBuilder,
IntervalYearMonthArray, IntervalYearMonthBuilder, TimestampNanosecondArray,
};
use std::convert::{From, TryInto};
use std::sync::Arc;

use crate::errors::Result;
use std::marker::PhantomData;

#[cfg(test)]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is kind of gross, but I hope to remove this module eventually #1661

use arrow::array::{
BinaryArray, BinaryBuilder, LargeStringArray, LargeStringBuilder, StringArray,
StringBuilder,
};

/// A converter is used to consume record reader's content and convert it to arrow
/// primitive array.
pub trait Converter<S, T> {
Expand Down Expand Up @@ -185,8 +188,10 @@ impl Converter<Vec<Option<Int96>>, TimestampNanosecondArray> for Int96ArrayConve
}
}

#[cfg(test)]
pub struct Utf8ArrayConverter {}

#[cfg(test)]
impl Converter<Vec<Option<ByteArray>>, StringArray> for Utf8ArrayConverter {
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<StringArray> {
let data_size = source
Expand All @@ -206,8 +211,10 @@ impl Converter<Vec<Option<ByteArray>>, StringArray> for Utf8ArrayConverter {
}
}

#[cfg(test)]
pub struct LargeUtf8ArrayConverter {}

#[cfg(test)]
impl Converter<Vec<Option<ByteArray>>, LargeStringArray> for LargeUtf8ArrayConverter {
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<LargeStringArray> {
let data_size = source
Expand All @@ -227,8 +234,10 @@ impl Converter<Vec<Option<ByteArray>>, LargeStringArray> for LargeUtf8ArrayConve
}
}

#[cfg(test)]
pub struct BinaryArrayConverter {}

#[cfg(test)]
impl Converter<Vec<Option<ByteArray>>, BinaryArray> for BinaryArrayConverter {
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<BinaryArray> {
let mut builder = BinaryBuilder::new(source.len());
Expand All @@ -243,33 +252,9 @@ impl Converter<Vec<Option<ByteArray>>, BinaryArray> for BinaryArrayConverter {
}
}

pub struct LargeBinaryArrayConverter {}

impl Converter<Vec<Option<ByteArray>>, LargeBinaryArray> for LargeBinaryArrayConverter {
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<LargeBinaryArray> {
let mut builder = LargeBinaryBuilder::new(source.len());
for v in source {
match v {
Some(array) => builder.append_value(array.data()),
None => builder.append_null(),
}
}

Ok(builder.finish())
}
}

#[cfg(test)]
pub type Utf8Converter =
ArrayRefConverter<Vec<Option<ByteArray>>, StringArray, Utf8ArrayConverter>;
pub type LargeUtf8Converter =
ArrayRefConverter<Vec<Option<ByteArray>>, LargeStringArray, LargeUtf8ArrayConverter>;
pub type BinaryConverter =
ArrayRefConverter<Vec<Option<ByteArray>>, BinaryArray, BinaryArrayConverter>;
pub type LargeBinaryConverter = ArrayRefConverter<
Vec<Option<ByteArray>>,
LargeBinaryArray,
LargeBinaryArrayConverter,
>;

pub type Int96Converter =
ArrayRefConverter<Vec<Option<Int96>>, TimestampNanosecondArray, Int96ArrayConverter>;
Expand Down Expand Up @@ -299,11 +284,13 @@ pub type DecimalFixedLengthByteArrayConverter = ArrayRefConverter<
pub type DecimalByteArrayConvert =
ArrayRefConverter<Vec<Option<ByteArray>>, Decimal128Array, DecimalArrayConverter>;

#[cfg(test)]
pub struct FromConverter<S, T> {
_source: PhantomData<S>,
_dest: PhantomData<T>,
}

#[cfg(test)]
impl<S, T> FromConverter<S, T>
where
T: From<S>,
Expand All @@ -316,6 +303,7 @@ where
}
}

#[cfg(test)]
impl<S, T> Converter<S, T> for FromConverter<S, T>
where
T: From<S>,
Expand Down
1 change: 1 addition & 0 deletions parquet/src/arrow/buffer/dictionary_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ impl<K: ScalarValue, V: ScalarValue> Default for DictionaryBuffer<K, V> {
impl<K: ScalarValue + ArrowNativeType + Ord, V: ScalarValue + OffsetSizeTrait>
DictionaryBuffer<K, V>
{
#[allow(unused)]
pub fn len(&self) -> usize {
match self {
Self::Dict { keys, .. } => keys.len(),
Expand Down
Loading