From 720d858217f28485324a567e0946bcecdc90a583 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 8 Jun 2022 08:43:03 +0200 Subject: [PATCH 1/3] prevent unneeded offset check --- src/array/list/mutable.rs | 48 ++++++++++++++++++++++++-------------- src/io/avro/read/nested.rs | 32 +++++++++++++++---------- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/src/array/list/mutable.rs b/src/array/list/mutable.rs index 6e5ffa2eb19..26bd29476a0 100644 --- a/src/array/list/mutable.rs +++ b/src/array/list/mutable.rs @@ -50,12 +50,16 @@ impl Default for MutableListArray { impl From> for ListArray { fn from(mut other: MutableListArray) -> Self { - ListArray::new( - other.data_type, - other.offsets.into(), - other.values.as_arc(), - other.validity.map(|x| x.into()), - ) + // Safety: + // MutableListArray has monotonically increasing offsets + unsafe { + ListArray::new_unchecked( + other.data_type, + other.offsets.into(), + other.values.as_arc(), + other.validity.map(|x| x.into()), + ) + } } } @@ -209,21 +213,29 @@ impl MutableArray for MutableLis } fn as_box(&mut self) -> Box { - Box::new(ListArray::new( - self.data_type.clone(), - std::mem::take(&mut self.offsets).into(), - self.values.as_arc(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) + // Safety: + // MutableListArray has monotonically increasing offsets + unsafe { + Box::new(ListArray::new_unchecked( + self.data_type.clone(), + std::mem::take(&mut self.offsets).into(), + self.values.as_arc(), + std::mem::take(&mut self.validity).map(|x| x.into()), + )) + } } fn as_arc(&mut self) -> Arc { - Arc::new(ListArray::new( - self.data_type.clone(), - std::mem::take(&mut self.offsets).into(), - self.values.as_arc(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) + // Safety: + // MutableListArray has monotonically increasing offsets + unsafe { + Arc::new(ListArray::new_unchecked( + self.data_type.clone(), + std::mem::take(&mut self.offsets).into(), + self.values.as_arc(), + std::mem::take(&mut self.validity).map(|x| x.into()), + )) + } } fn data_type(&self) -> &DataType { diff --git a/src/io/avro/read/nested.rs b/src/io/avro/read/nested.rs index daa07fd911c..935e9ee2008 100644 --- a/src/io/avro/read/nested.rs +++ b/src/io/avro/read/nested.rs @@ -80,21 +80,29 @@ impl MutableArray for DynMutableListArray { } fn as_box(&mut self) -> Box { - Box::new(ListArray::new( - self.data_type.clone(), - std::mem::take(&mut self.offsets).into(), - self.values.as_arc(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) + // Safety: + // MutableListArray has monotonically increasing offsets + unsafe { + Box::new(ListArray::new_unchecked( + self.data_type.clone(), + std::mem::take(&mut self.offsets).into(), + self.values.as_arc(), + std::mem::take(&mut self.validity).map(|x| x.into()), + )) + } } fn as_arc(&mut self) -> Arc { - Arc::new(ListArray::new( - self.data_type.clone(), - std::mem::take(&mut self.offsets).into(), - self.values.as_arc(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) + // Safety: + // MutableListArray has monotonically increasing offsets + unsafe { + Arc::new(ListArray::new_unchecked( + self.data_type.clone(), + std::mem::take(&mut self.offsets).into(), + self.values.as_arc(), + std::mem::take(&mut self.validity).map(|x| x.into()), + )) + } } fn data_type(&self) -> &DataType { From 6aca7c6c9046e9267308bde563ba10207d758303 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 8 Jun 2022 08:47:12 +0200 Subject: [PATCH 2/3] utf8 unneeded check --- src/io/json_integration/read/array.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/io/json_integration/read/array.rs b/src/io/json_integration/read/array.rs index 6210a02ca91..2f2c9cac307 100644 --- a/src/io/json_integration/read/array.rs +++ b/src/io/json_integration/read/array.rs @@ -185,7 +185,14 @@ fn to_utf8(json_col: &ArrowJsonColumn, data_type: DataType) -> Arc( From 49e5c82417002f0593d16a5e995ee5b590579925 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 8 Jun 2022 13:20:37 +0200 Subject: [PATCH 3/3] revert io to pass ci --- src/io/avro/read/nested.rs | 32 ++++++++++----------------- src/io/json_integration/read/array.rs | 9 +------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/src/io/avro/read/nested.rs b/src/io/avro/read/nested.rs index 935e9ee2008..daa07fd911c 100644 --- a/src/io/avro/read/nested.rs +++ b/src/io/avro/read/nested.rs @@ -80,29 +80,21 @@ impl MutableArray for DynMutableListArray { } fn as_box(&mut self) -> Box { - // Safety: - // MutableListArray has monotonically increasing offsets - unsafe { - Box::new(ListArray::new_unchecked( - self.data_type.clone(), - std::mem::take(&mut self.offsets).into(), - self.values.as_arc(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) - } + Box::new(ListArray::new( + self.data_type.clone(), + std::mem::take(&mut self.offsets).into(), + self.values.as_arc(), + std::mem::take(&mut self.validity).map(|x| x.into()), + )) } fn as_arc(&mut self) -> Arc { - // Safety: - // MutableListArray has monotonically increasing offsets - unsafe { - Arc::new(ListArray::new_unchecked( - self.data_type.clone(), - std::mem::take(&mut self.offsets).into(), - self.values.as_arc(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) - } + Arc::new(ListArray::new( + self.data_type.clone(), + std::mem::take(&mut self.offsets).into(), + self.values.as_arc(), + std::mem::take(&mut self.validity).map(|x| x.into()), + )) } fn data_type(&self) -> &DataType { diff --git a/src/io/json_integration/read/array.rs b/src/io/json_integration/read/array.rs index 2f2c9cac307..6210a02ca91 100644 --- a/src/io/json_integration/read/array.rs +++ b/src/io/json_integration/read/array.rs @@ -185,14 +185,7 @@ fn to_utf8(json_col: &ArrowJsonColumn, data_type: DataType) -> Arc(