From f4830fdad6b5265067d2e6531b3c110f5532e372 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 27 Nov 2023 00:35:00 -0500 Subject: [PATCH 1/5] Public from methods on mutable arrays --- src/array/coord/mod.rs | 3 +- src/array/linestring/mutable.rs | 94 ++++++++++++++++++++++++++------- src/array/point/mutable.rs | 89 +++++++++++++++++++------------ src/io/geos/array/point.rs | 6 +-- 4 files changed, 135 insertions(+), 57 deletions(-) diff --git a/src/array/coord/mod.rs b/src/array/coord/mod.rs index f946b391..22cb44f7 100644 --- a/src/array/coord/mod.rs +++ b/src/array/coord/mod.rs @@ -12,8 +12,9 @@ pub use combined::{CoordBuffer, MutableCoordBuffer}; pub use interleaved::{InterleavedCoordBuffer, MutableInterleavedCoordBuffer}; pub use separated::{MutableSeparatedCoordBuffer, SeparatedCoordBuffer}; -#[derive(Debug, Clone, PartialEq)] +#[derive(Default, Debug, Clone, PartialEq)] pub enum CoordType { + #[default] Interleaved, Separated, } diff --git a/src/array/linestring/mutable.rs b/src/array/linestring/mutable.rs index 6a8e2936..74d29e1b 100644 --- a/src/array/linestring/mutable.rs +++ b/src/array/linestring/mutable.rs @@ -1,8 +1,8 @@ // use super::array::check; use crate::array::mutable_offset::OffsetsBuilder; use crate::array::{ - LineStringArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, MutableMultiPointArray, - WKBArray, + CoordType, LineStringArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, + MutableMultiPointArray, MutableSeparatedCoordBuffer, WKBArray, }; use crate::error::{GeoArrowError, Result}; use crate::geo_traits::LineStringTrait; @@ -30,14 +30,33 @@ pub struct MutableLineStringArray { impl MutableLineStringArray { /// Creates a new empty [`MutableLineStringArray`]. pub fn new() -> Self { - Self::with_capacities(0, 0) + Self::new_with_options(Default::default()) + } + + pub fn new_with_options(coord_type: CoordType) -> Self { + Self::with_capacities_and_options(0, 0, coord_type) } /// Creates a new [`MutableLineStringArray`] with a capacity. pub fn with_capacities(coord_capacity: usize, geom_capacity: usize) -> Self { - let coords = MutableInterleavedCoordBuffer::with_capacity(coord_capacity); + Self::with_capacities_and_options(coord_capacity, geom_capacity, Default::default()) + } + + pub fn with_capacities_and_options( + coord_capacity: usize, + geom_capacity: usize, + coord_type: CoordType, + ) -> Self { + let coords = match coord_type { + CoordType::Interleaved => MutableCoordBuffer::Interleaved( + MutableInterleavedCoordBuffer::with_capacity(coord_capacity), + ), + CoordType::Separated => MutableCoordBuffer::Separated( + MutableSeparatedCoordBuffer::with_capacity(coord_capacity), + ), + }; Self { - coords: MutableCoordBuffer::Interleaved(coords), + coords, geom_offsets: OffsetsBuilder::with_capacity(geom_capacity), validity: NullBufferBuilder::new(geom_capacity), } @@ -155,6 +174,32 @@ impl MutableLineStringArray { pub fn into_array_ref(self) -> Arc { Arc::new(self.into_arrow()) } + + pub fn from_line_strings( + geoms: &[impl LineStringTrait], + coord_type: CoordType, + ) -> Self { + let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some)); + second_pass( + geoms.iter().map(Some), + coord_capacity, + geom_capacity, + coord_type, + ) + } + + pub fn from_nullable_line_strings( + geoms: &[Option>], + coord_type: CoordType, + ) -> Self { + let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(|x| x.as_ref())); + second_pass( + geoms.iter().map(|x| x.as_ref()), + coord_capacity, + geom_capacity, + coord_type, + ) + } } impl IntoArrow for MutableLineStringArray { @@ -186,11 +231,10 @@ impl From> for GenericListArray } pub(crate) fn first_pass<'a>( - geoms: impl Iterator>, - geoms_length: usize, + geoms: impl ExactSizeIterator + Iterator>, ) -> (usize, usize) { let mut coord_capacity = 0; - let geom_capacity = geoms_length; + let geom_capacity = geoms.len(); for line_string in geoms.into_iter().flatten() { coord_capacity += line_string.num_coords(); @@ -203,8 +247,13 @@ pub(crate) fn second_pass<'a, O: OffsetSizeTrait>( geoms: impl Iterator + 'a)>>, coord_capacity: usize, geom_capacity: usize, + coord_type: CoordType, ) -> MutableLineStringArray { - let mut array = MutableLineStringArray::with_capacities(coord_capacity, geom_capacity); + let mut array = MutableLineStringArray::with_capacities_and_options( + coord_capacity, + geom_capacity, + coord_type, + ); geoms .into_iter() @@ -216,8 +265,13 @@ pub(crate) fn second_pass<'a, O: OffsetSizeTrait>( impl> From> for MutableLineStringArray { fn from(geoms: Vec) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some), geoms.len()); - second_pass(geoms.iter().map(Some), coord_capacity, geom_capacity) + let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some)); + second_pass( + geoms.iter().map(Some), + coord_capacity, + geom_capacity, + Default::default(), + ) } } @@ -225,13 +279,12 @@ impl> From>> for MutableLineStringArray { fn from(geoms: Vec>) -> Self { - let geoms_len = geoms.len(); - let (coord_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms_len); + let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(|x| x.as_ref())); second_pass( geoms.iter().map(|x| x.as_ref()), coord_capacity, geom_capacity, + Default::default(), ) } } @@ -240,8 +293,13 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some), geoms.len()); - second_pass(geoms.iter().map(Some), coord_capacity, geom_capacity) + let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some)); + second_pass( + geoms.iter().map(Some), + coord_capacity, + geom_capacity, + Default::default(), + ) } } @@ -249,12 +307,12 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, Option>) -> Self { - let (coord_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); + let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(|x| x.as_ref())); second_pass( geoms.iter().map(|x| x.as_ref()), coord_capacity, geom_capacity, + Default::default(), ) } } diff --git a/src/array/point/mutable.rs b/src/array/point/mutable.rs index 824b00a4..de3846b0 100644 --- a/src/array/point/mutable.rs +++ b/src/array/point/mutable.rs @@ -1,7 +1,10 @@ use std::sync::Arc; // use super::array::check; -use crate::array::{MutableCoordBuffer, MutableInterleavedCoordBuffer, PointArray, WKBArray}; +use crate::array::{ + CoordType, MutableCoordBuffer, MutableInterleavedCoordBuffer, MutableSeparatedCoordBuffer, + PointArray, WKBArray, +}; use crate::error::GeoArrowError; use crate::geo_traits::PointTrait; use crate::io::wkb::reader::point::WKBPoint; @@ -21,14 +24,30 @@ pub struct MutablePointArray { impl MutablePointArray { /// Creates a new empty [`MutablePointArray`]. pub fn new() -> Self { - Self::with_capacity(0) + Self::new_with_options(Default::default()) + } + + pub fn new_with_options(coord_type: CoordType) -> Self { + Self::with_capacity_and_options(0, coord_type) } /// Creates a new [`MutablePointArray`] with a capacity. pub fn with_capacity(capacity: usize) -> Self { - let coords = MutableInterleavedCoordBuffer::with_capacity(capacity); + Self::with_capacity_and_options(capacity, Default::default()) + } + + /// Creates a new [`MutablePointArray`] with a capacity. + pub fn with_capacity_and_options(capacity: usize, coord_type: CoordType) -> Self { + let coords = match coord_type { + CoordType::Interleaved => MutableCoordBuffer::Interleaved( + MutableInterleavedCoordBuffer::with_capacity(capacity), + ), + CoordType::Separated => { + MutableCoordBuffer::Separated(MutableSeparatedCoordBuffer::with_capacity(capacity)) + } + }; Self { - coords: MutableCoordBuffer::Interleaved(coords), + coords, validity: NullBufferBuilder::new(capacity), } } @@ -107,6 +126,28 @@ impl MutablePointArray { self.coords.push_xy(0., 0.); self.validity.append(false); } + + pub fn from_points<'a>( + geoms: impl ExactSizeIterator + Iterator + 'a)>, + coord_type: CoordType, + ) -> Self { + let mut mutable_array = Self::with_capacity_and_options(geoms.len(), coord_type); + geoms + .into_iter() + .for_each(|maybe_point| mutable_array.push_point(Some(maybe_point))); + mutable_array + } + + pub fn from_nullable_points<'a>( + geoms: impl ExactSizeIterator + Iterator + 'a)>>, + coord_type: CoordType, + ) -> MutablePointArray { + let mut mutable_array = Self::with_capacity_and_options(geoms.len(), coord_type); + geoms + .into_iter() + .for_each(|maybe_point| mutable_array.push_point(maybe_point)); + mutable_array + } } impl MutableGeometryArray for MutablePointArray { @@ -159,53 +200,33 @@ impl From for Arc { } } -fn from_coords<'a>( - geoms: impl Iterator + 'a)>, - geoms_length: usize, -) -> MutablePointArray { - let mut mutable_array = MutablePointArray::with_capacity(geoms_length); - geoms - .into_iter() - .for_each(|maybe_point| mutable_array.push_point(Some(maybe_point))); - mutable_array -} - -pub(crate) fn from_nullable_coords<'a>( - geoms: impl Iterator + 'a)>>, - geoms_length: usize, -) -> MutablePointArray { - let mut mutable_array = MutablePointArray::with_capacity(geoms_length); - geoms - .into_iter() - .for_each(|maybe_point| mutable_array.push_point(maybe_point)); - mutable_array -} - impl> From> for MutablePointArray { fn from(value: Vec) -> Self { - let geoms_length = value.len(); - from_coords(value.iter(), geoms_length) + MutablePointArray::from_points(value.iter(), Default::default()) } } impl> From>> for MutablePointArray { fn from(geoms: Vec>) -> Self { - let geoms_length = geoms.len(); - from_nullable_coords(geoms.iter().map(|x| x.as_ref()), geoms_length) + MutablePointArray::from_nullable_points( + geoms.iter().map(|x| x.as_ref()), + Default::default(), + ) } } impl> From> for MutablePointArray { fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self { - let geoms_length = geoms.len(); - from_coords(geoms.iter(), geoms_length) + MutablePointArray::from_points(geoms.iter(), Default::default()) } } impl> From>> for MutablePointArray { fn from(geoms: bumpalo::collections::Vec<'_, Option>) -> Self { - let geoms_length = geoms.len(); - from_nullable_coords(geoms.iter().map(|x| x.as_ref()), geoms_length) + MutablePointArray::from_nullable_points( + geoms.iter().map(|x| x.as_ref()), + Default::default(), + ) } } diff --git a/src/io/geos/array/point.rs b/src/io/geos/array/point.rs index 718fda72..be1e201a 100644 --- a/src/io/geos/array/point.rs +++ b/src/io/geos/array/point.rs @@ -1,4 +1,3 @@ -use crate::array::point::mutable::from_nullable_coords; use crate::array::{MutablePointArray, PointArray}; use crate::error::GeoArrowError; use crate::io::geos::scalar::GEOSPoint; @@ -7,15 +6,14 @@ impl<'a> TryFrom>>> for MutablePointArray { type Error = GeoArrowError; fn try_from(value: Vec>>) -> std::result::Result { - let length = value.len(); // TODO: don't use new_unchecked let geos_linestring_objects: Vec> = value .into_iter() .map(|geom| geom.map(GEOSPoint::new_unchecked)) .collect(); - Ok(from_nullable_coords( + Ok(MutablePointArray::from_nullable_points( geos_linestring_objects.iter().map(|item| item.as_ref()), - length, + Default::default(), )) } } From 99cd0cfeabf38f203b146f1ca5551ec11dea4848 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 27 Nov 2023 00:39:58 -0500 Subject: [PATCH 2/5] optional coord type --- src/array/linestring/mutable.rs | 8 ++++---- src/array/point/mutable.rs | 10 ++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/array/linestring/mutable.rs b/src/array/linestring/mutable.rs index 74d29e1b..98956760 100644 --- a/src/array/linestring/mutable.rs +++ b/src/array/linestring/mutable.rs @@ -177,27 +177,27 @@ impl MutableLineStringArray { pub fn from_line_strings( geoms: &[impl LineStringTrait], - coord_type: CoordType, + coord_type: Option, ) -> Self { let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some)); second_pass( geoms.iter().map(Some), coord_capacity, geom_capacity, - coord_type, + coord_type.unwrap_or_default(), ) } pub fn from_nullable_line_strings( geoms: &[Option>], - coord_type: CoordType, + coord_type: Option, ) -> Self { let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(|x| x.as_ref())); second_pass( geoms.iter().map(|x| x.as_ref()), coord_capacity, geom_capacity, - coord_type, + coord_type.unwrap_or_default(), ) } } diff --git a/src/array/point/mutable.rs b/src/array/point/mutable.rs index de3846b0..0219be51 100644 --- a/src/array/point/mutable.rs +++ b/src/array/point/mutable.rs @@ -129,9 +129,10 @@ impl MutablePointArray { pub fn from_points<'a>( geoms: impl ExactSizeIterator + Iterator + 'a)>, - coord_type: CoordType, + coord_type: Option, ) -> Self { - let mut mutable_array = Self::with_capacity_and_options(geoms.len(), coord_type); + let mut mutable_array = + Self::with_capacity_and_options(geoms.len(), coord_type.unwrap_or_default()); geoms .into_iter() .for_each(|maybe_point| mutable_array.push_point(Some(maybe_point))); @@ -140,9 +141,10 @@ impl MutablePointArray { pub fn from_nullable_points<'a>( geoms: impl ExactSizeIterator + Iterator + 'a)>>, - coord_type: CoordType, + coord_type: Option, ) -> MutablePointArray { - let mut mutable_array = Self::with_capacity_and_options(geoms.len(), coord_type); + let mut mutable_array = + Self::with_capacity_and_options(geoms.len(), coord_type.unwrap_or_default()); geoms .into_iter() .for_each(|maybe_point| mutable_array.push_point(maybe_point)); From 6aa3eca652dde5deb143f8f28731b906672ae9ae Mon Sep 17 00:00:00 2001 From: Josiah Parry Date: Mon, 27 Nov 2023 11:11:54 -0500 Subject: [PATCH 3/5] Adds `new_with_capacity()` for all other geometry types (#279) * add multable multilinepoint array with options * new_with_options() for MultiLineString * new_with_options() for Polygon * new_with_options() for MultiPolygon * run cargo fmt * address PR comments * cargo fmt --- src/array/multilinestring/mutable.rs | 33 ++++++++++++++++++++++--- src/array/multipoint/mutable.rs | 30 ++++++++++++++++++---- src/array/multipolygon/mutable.rs | 37 +++++++++++++++++++++++++--- src/array/polygon/mutable.rs | 33 ++++++++++++++++++++++--- 4 files changed, 116 insertions(+), 17 deletions(-) diff --git a/src/array/multilinestring/mutable.rs b/src/array/multilinestring/mutable.rs index 899cd2c4..c23823b7 100644 --- a/src/array/multilinestring/mutable.rs +++ b/src/array/multilinestring/mutable.rs @@ -3,8 +3,8 @@ use std::sync::Arc; // use super::array::check; use crate::array::mutable_offset::OffsetsBuilder; use crate::array::{ - MultiLineStringArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, MutablePolygonArray, - WKBArray, + CoordType, MultiLineStringArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, + MutablePolygonArray, MutableSeparatedCoordBuffer, WKBArray, }; use crate::error::{GeoArrowError, Result}; use crate::geo_traits::{LineStringTrait, MultiLineStringTrait}; @@ -41,15 +41,40 @@ impl MutableMultiLineStringArray { MutablePolygonArray::new().into() } + pub fn new_with_options(coord_type: CoordType) -> Self { + Self::with_capacities_and_options(0, 0, 0, coord_type) + } + /// Creates a new [`MutableMultiLineStringArray`] with a capacity. pub fn with_capacities( coord_capacity: usize, ring_capacity: usize, geom_capacity: usize, ) -> Self { - let coords = MutableInterleavedCoordBuffer::with_capacity(coord_capacity); + Self::with_capacities_and_options( + coord_capacity, + ring_capacity, + geom_capacity, + Default::default(), + ) + } + + pub fn with_capacities_and_options( + coord_capacity: usize, + ring_capacity: usize, + geom_capacity: usize, + coord_type: CoordType, + ) -> Self { + let coords = match coord_type { + CoordType::Interleaved => MutableCoordBuffer::Interleaved( + MutableInterleavedCoordBuffer::with_capacity(coord_capacity), + ), + CoordType::Separated => MutableCoordBuffer::Separated( + MutableSeparatedCoordBuffer::with_capacity(coord_capacity), + ), + }; Self { - coords: MutableCoordBuffer::Interleaved(coords), + coords, geom_offsets: OffsetsBuilder::with_capacity(geom_capacity), ring_offsets: OffsetsBuilder::with_capacity(ring_capacity), validity: NullBufferBuilder::new(geom_capacity), diff --git a/src/array/multipoint/mutable.rs b/src/array/multipoint/mutable.rs index e48017c5..26ed09fd 100644 --- a/src/array/multipoint/mutable.rs +++ b/src/array/multipoint/mutable.rs @@ -3,8 +3,8 @@ use std::sync::Arc; // use super::array::check; use crate::array::mutable_offset::OffsetsBuilder; use crate::array::{ - MultiPointArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, MutableLineStringArray, - WKBArray, + CoordType, MultiPointArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, + MutableLineStringArray, MutableSeparatedCoordBuffer, WKBArray, }; use crate::error::{GeoArrowError, Result}; use crate::geo_traits::{MultiPointTrait, PointTrait}; @@ -29,14 +29,34 @@ pub struct MutableMultiPointArray { impl MutableMultiPointArray { /// Creates a new empty [`MutableMultiPointArray`]. pub fn new() -> Self { - Self::with_capacities(0, 0) + Self::new_with_options(Default::default()) } + /// Creates a new [`MutableMultiPointArray`] with a specified [`CoordType`] + pub fn new_with_options(coord_type: CoordType) -> Self { + Self::with_capacities_and_options(0, 0, coord_type) + } /// Creates a new [`MutableMultiPointArray`] with a capacity. pub fn with_capacities(coord_capacity: usize, geom_capacity: usize) -> Self { - let coords = MutableInterleavedCoordBuffer::with_capacity(coord_capacity); + Self::with_capacities_and_options(coord_capacity, geom_capacity, Default::default()) + } + + // with capacities and options enables us to write with_capacities based on this method + pub fn with_capacities_and_options( + coord_capacity: usize, + geom_capacity: usize, + coord_type: CoordType, + ) -> Self { + let coords = match coord_type { + CoordType::Interleaved => MutableCoordBuffer::Interleaved( + MutableInterleavedCoordBuffer::with_capacity(coord_capacity), + ), + CoordType::Separated => MutableCoordBuffer::Separated( + MutableSeparatedCoordBuffer::with_capacity(coord_capacity), + ), + }; Self { - coords: MutableCoordBuffer::Interleaved(coords), + coords, geom_offsets: OffsetsBuilder::with_capacity(geom_capacity), validity: NullBufferBuilder::new(geom_capacity), } diff --git a/src/array/multipolygon/mutable.rs b/src/array/multipolygon/mutable.rs index d3cf69b7..bdf3da02 100644 --- a/src/array/multipolygon/mutable.rs +++ b/src/array/multipolygon/mutable.rs @@ -3,7 +3,8 @@ use std::sync::Arc; // use super::array::check; use crate::array::mutable_offset::OffsetsBuilder; use crate::array::{ - MultiPolygonArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, WKBArray, + CoordType, MultiPolygonArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, + MutableSeparatedCoordBuffer, WKBArray, }; use crate::error::{GeoArrowError, Result}; use crate::geo_traits::{LineStringTrait, MultiPolygonTrait, PolygonTrait}; @@ -44,7 +45,11 @@ pub struct MutableMultiPolygonArray { impl MutableMultiPolygonArray { /// Creates a new empty [`MutableMultiPolygonArray`]. pub fn new() -> Self { - Self::with_capacities(0, 0, 0, 0) + Self::new_with_options(Default::default()) + } + + pub fn new_with_options(coord_type: CoordType) -> Self { + Self::with_capacities_and_options(0, 0, 0, 0, coord_type) } /// Creates a new [`MutableMultiPolygonArray`] with a capacity. @@ -54,9 +59,33 @@ impl MutableMultiPolygonArray { polygon_capacity: usize, geom_capacity: usize, ) -> Self { - let coords = MutableInterleavedCoordBuffer::with_capacity(coord_capacity); + Self::with_capacities_and_options( + coord_capacity, + ring_capacity, + polygon_capacity, + geom_capacity, + Default::default(), + ) + } + + pub fn with_capacities_and_options( + coord_capacity: usize, + ring_capacity: usize, + polygon_capacity: usize, + geom_capacity: usize, + coord_type: CoordType, + ) -> Self { + let coords = match coord_type { + CoordType::Interleaved => MutableCoordBuffer::Interleaved( + MutableInterleavedCoordBuffer::with_capacity(coord_capacity), + ), + CoordType::Separated => MutableCoordBuffer::Separated( + MutableSeparatedCoordBuffer::with_capacity(coord_capacity), + ), + }; + Self { - coords: MutableCoordBuffer::Interleaved(coords), + coords, geom_offsets: OffsetsBuilder::with_capacity(geom_capacity), polygon_offsets: OffsetsBuilder::with_capacity(polygon_capacity), ring_offsets: OffsetsBuilder::with_capacity(ring_capacity), diff --git a/src/array/polygon/mutable.rs b/src/array/polygon/mutable.rs index 4cee3f0c..d8af0c59 100644 --- a/src/array/polygon/mutable.rs +++ b/src/array/polygon/mutable.rs @@ -3,8 +3,8 @@ use std::sync::Arc; // use super::array::check; use crate::array::mutable_offset::OffsetsBuilder; use crate::array::{ - MutableCoordBuffer, MutableInterleavedCoordBuffer, MutableMultiLineStringArray, PolygonArray, - WKBArray, + CoordType, MutableCoordBuffer, MutableInterleavedCoordBuffer, MutableMultiLineStringArray, + MutableSeparatedCoordBuffer, PolygonArray, WKBArray, }; use crate::error::{GeoArrowError, Result}; use crate::geo_traits::{LineStringTrait, PolygonTrait}; @@ -43,15 +43,40 @@ impl MutablePolygonArray { Self::with_capacities(0, 0, 0) } + pub fn new_with_options(coord_type: CoordType) -> Self { + Self::with_capacities_and_options(0, 0, 0, coord_type) + } + /// Creates a new [`MutablePolygonArray`] with given capacities and no validity. pub fn with_capacities( coord_capacity: usize, ring_capacity: usize, geom_capacity: usize, ) -> Self { - let coords = MutableInterleavedCoordBuffer::with_capacity(coord_capacity); + Self::with_capacities_and_options( + coord_capacity, + ring_capacity, + geom_capacity, + Default::default(), + ) + } + + pub fn with_capacities_and_options( + coord_capacity: usize, + ring_capacity: usize, + geom_capacity: usize, + coord_type: CoordType, + ) -> Self { + let coords = match coord_type { + CoordType::Interleaved => MutableCoordBuffer::Interleaved( + MutableInterleavedCoordBuffer::with_capacity(coord_capacity), + ), + CoordType::Separated => MutableCoordBuffer::Separated( + MutableSeparatedCoordBuffer::with_capacity(coord_capacity), + ), + }; Self { - coords: MutableCoordBuffer::Interleaved(coords), + coords, geom_offsets: OffsetsBuilder::with_capacity(geom_capacity), ring_offsets: OffsetsBuilder::with_capacity(ring_capacity), validity: NullBufferBuilder::new(geom_capacity), From e990075982861fcc67cf6506cbcdb2c484205fc6 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 27 Nov 2023 13:52:25 -0500 Subject: [PATCH 4/5] wip public first_pass and second_pass api --- src/array/linestring/mutable.rs | 127 ++++++++++++++++---------------- 1 file changed, 62 insertions(+), 65 deletions(-) diff --git a/src/array/linestring/mutable.rs b/src/array/linestring/mutable.rs index 98956760..3d7219e8 100644 --- a/src/array/linestring/mutable.rs +++ b/src/array/linestring/mutable.rs @@ -62,6 +62,21 @@ impl MutableLineStringArray { } } + pub fn with_capacities_from_iter<'a>( + geoms: impl Iterator>, + ) -> Self { + let (coord_capacity, geom_capacity) = count_from_iter(geoms); + Self::with_capacities(coord_capacity, geom_capacity) + } + + pub fn with_capacities_and_options_from_iter<'a>( + geoms: impl Iterator>, + coord_type: CoordType, + ) -> Self { + let (coord_capacity, geom_capacity) = count_from_iter(geoms); + Self::with_capacities_and_options(coord_capacity, geom_capacity, coord_type) + } + /// Reserves capacity for at least `additional` more LineStrings to be inserted /// in the given `Vec`. The collection may reserve more space to /// speculatively avoid frequent reallocations. After calling `reserve`, @@ -72,6 +87,22 @@ impl MutableLineStringArray { self.geom_offsets.reserve(geom_additional); } + pub fn reserve_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, geom_capacity) = count_from_iter(geoms); + self.reserve(coord_capacity, geom_capacity) + } + + pub fn reserve_exact_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, geom_capacity) = count_from_iter(geoms); + self.reserve_exact(coord_capacity, geom_capacity) + } + /// Reserves the minimum capacity for at least `additional` more LineStrings to /// be inserted in the given `Vec`. Unlike [`reserve`], this will not /// deliberately over-allocate to speculatively avoid frequent allocations. @@ -86,6 +117,7 @@ impl MutableLineStringArray { /// [`reserve`]: Vec::reserve pub fn reserve_exact(&mut self, coord_additional: usize, geom_additional: usize) { self.coords.reserve_exact(coord_additional); + // TODO: implement reserve_exact on OffsetsBuilder self.geom_offsets.reserve(geom_additional); } @@ -145,6 +177,16 @@ impl MutableLineStringArray { Ok(()) } + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_multi_point| self.push_line_string(maybe_multi_point)) + .unwrap(); + } + /// Push a raw coordinate to the underlying coordinate array. /// /// # Safety @@ -179,26 +221,22 @@ impl MutableLineStringArray { geoms: &[impl LineStringTrait], coord_type: Option, ) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some)); - second_pass( - geoms.iter().map(Some), - coord_capacity, - geom_capacity, - coord_type.unwrap_or_default(), - ) + let mut array = + Self::with_capacities_and_options_from_iter(geoms.iter().map(Some), coord_type.unwrap_or_default()); + array.extend_from_iter(geoms.iter().map(Some)); + array } pub fn from_nullable_line_strings( geoms: &[Option>], coord_type: Option, ) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(|x| x.as_ref())); - second_pass( + let mut array = Self::with_capacities_and_options_from_iter( geoms.iter().map(|x| x.as_ref()), - coord_capacity, - geom_capacity, coord_type.unwrap_or_default(), - ) + ); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array } } @@ -230,48 +268,25 @@ impl From> for GenericListArray } } -pub(crate) fn first_pass<'a>( - geoms: impl ExactSizeIterator + Iterator>, +pub(crate) fn count_from_iter<'a>( + geoms: impl Iterator>, ) -> (usize, usize) { let mut coord_capacity = 0; - let geom_capacity = geoms.len(); + let mut geom_capacity = 0; - for line_string in geoms.into_iter().flatten() { - coord_capacity += line_string.num_coords(); + for maybe_line_string in geoms.into_iter() { + geom_capacity += 1; + if let Some(line_string) = maybe_line_string { + coord_capacity += line_string.num_coords(); + } } (coord_capacity, geom_capacity) } -pub(crate) fn second_pass<'a, O: OffsetSizeTrait>( - geoms: impl Iterator + 'a)>>, - coord_capacity: usize, - geom_capacity: usize, - coord_type: CoordType, -) -> MutableLineStringArray { - let mut array = MutableLineStringArray::with_capacities_and_options( - coord_capacity, - geom_capacity, - coord_type, - ); - - geoms - .into_iter() - .try_for_each(|maybe_multi_point| array.push_line_string(maybe_multi_point)) - .unwrap(); - - array -} - impl> From> for MutableLineStringArray { fn from(geoms: Vec) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some)); - second_pass( - geoms.iter().map(Some), - coord_capacity, - geom_capacity, - Default::default(), - ) + Self::from_line_strings(&geoms, Default::default()) } } @@ -279,13 +294,7 @@ impl> From>> for MutableLineStringArray { fn from(geoms: Vec>) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(|x| x.as_ref())); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - geom_capacity, - Default::default(), - ) + Self::from_nullable_line_strings(&geoms, Default::default()) } } @@ -293,13 +302,7 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some)); - second_pass( - geoms.iter().map(Some), - coord_capacity, - geom_capacity, - Default::default(), - ) + Self::from_line_strings(&geoms, Default::default()) } } @@ -307,13 +310,7 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, Option>) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(|x| x.as_ref())); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - geom_capacity, - Default::default(), - ) + Self::from_nullable_line_strings(&geoms, Default::default()) } } From a7c821a037c9bd7c8d3a9c5393382ebd7b846292 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 27 Nov 2023 22:31:45 -0500 Subject: [PATCH 5/5] Flesh out public apis --- src/array/linestring/mutable.rs | 9 +- src/array/multilinestring/mutable.rs | 141 ++++++++++--------- src/array/multipoint/mutable.rs | 114 ++++++++++------ src/array/multipolygon/mutable.rs | 196 +++++++++++++++------------ src/array/polygon/mutable.rs | 147 +++++++++++--------- 5 files changed, 355 insertions(+), 252 deletions(-) diff --git a/src/array/linestring/mutable.rs b/src/array/linestring/mutable.rs index 3d7219e8..5ae4ed25 100644 --- a/src/array/linestring/mutable.rs +++ b/src/array/linestring/mutable.rs @@ -65,8 +65,7 @@ impl MutableLineStringArray { pub fn with_capacities_from_iter<'a>( geoms: impl Iterator>, ) -> Self { - let (coord_capacity, geom_capacity) = count_from_iter(geoms); - Self::with_capacities(coord_capacity, geom_capacity) + Self::with_capacities_and_options_from_iter(geoms, Default::default()) } pub fn with_capacities_and_options_from_iter<'a>( @@ -221,8 +220,10 @@ impl MutableLineStringArray { geoms: &[impl LineStringTrait], coord_type: Option, ) -> Self { - let mut array = - Self::with_capacities_and_options_from_iter(geoms.iter().map(Some), coord_type.unwrap_or_default()); + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(Some), + coord_type.unwrap_or_default(), + ); array.extend_from_iter(geoms.iter().map(Some)); array } diff --git a/src/array/multilinestring/mutable.rs b/src/array/multilinestring/mutable.rs index c23823b7..5e7645d5 100644 --- a/src/array/multilinestring/mutable.rs +++ b/src/array/multilinestring/mutable.rs @@ -81,6 +81,20 @@ impl MutableMultiLineStringArray { } } + pub fn with_capacities_from_iter<'a>( + geoms: impl Iterator>, + ) -> Self { + Self::with_capacities_and_options_from_iter(geoms, Default::default()) + } + + pub fn with_capacities_and_options_from_iter<'a>( + geoms: impl Iterator>, + coord_type: CoordType, + ) -> Self { + let (coord_capacity, ring_capacity, geom_capacity) = count_from_iter(geoms); + Self::with_capacities_and_options(coord_capacity, ring_capacity, geom_capacity, coord_type) + } + /// Reserves capacity for at least `additional` more LineStrings to be inserted /// in the given `Vec`. The collection may reserve more space to /// speculatively avoid frequent reallocations. After calling `reserve`, @@ -120,6 +134,22 @@ impl MutableMultiLineStringArray { self.geom_offsets.reserve(geom_additional); } + pub fn reserve_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, ring_capacity, geom_capacity) = count_from_iter(geoms); + self.reserve(coord_capacity, ring_capacity, geom_capacity) + } + + pub fn reserve_exact_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, ring_capacity, geom_capacity) = count_from_iter(geoms); + self.reserve_exact(coord_capacity, ring_capacity, geom_capacity) + } + /// The canonical method to create a [`MutableMultiLineStringArray`] out of its internal /// components. /// @@ -240,6 +270,16 @@ impl MutableMultiLineStringArray { Ok(()) } + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_multi_point| self.push_multi_line_string(maybe_multi_point)) + .unwrap(); + } + /// Push a raw offset to the underlying geometry offsets buffer. /// /// # Safety @@ -281,6 +321,30 @@ impl MutableMultiLineStringArray { self.geom_offsets.extend_constant(1); self.validity.append(false); } + + pub fn from_multi_line_strings( + geoms: &[impl MultiLineStringTrait], + coord_type: Option, + ) -> Self { + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(Some), + coord_type.unwrap_or_default(), + ); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + pub fn from_nullable_multi_line_strings( + geoms: &[Option>], + coord_type: Option, + ) -> Self { + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(|x| x.as_ref()), + coord_type.unwrap_or_default(), + ); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } } impl IntoArrow for MutableMultiLineStringArray { @@ -309,23 +373,25 @@ impl From> for MultiLineStrin } } -fn first_pass<'a>( +fn count_from_iter<'a>( geoms: impl Iterator>, - geoms_length: usize, ) -> (usize, usize, usize) { // Total number of coordinates let mut coord_capacity = 0; let mut ring_capacity = 0; - let geom_capacity = geoms_length; + let mut geom_capacity = 0; - for multi_line_string in geoms.into_iter().flatten() { - // Total number of rings in this polygon - let num_line_strings = multi_line_string.num_lines(); - ring_capacity += num_line_strings; + for maybe_multi_line_string in geoms.into_iter() { + geom_capacity += 1; + if let Some(multi_line_string) = maybe_multi_line_string { + // Total number of rings in this polygon + let num_line_strings = multi_line_string.num_lines(); + ring_capacity += num_line_strings; - for line_string_idx in 0..num_line_strings { - let line_string = multi_line_string.line(line_string_idx).unwrap(); - coord_capacity += line_string.num_coords(); + for line_string_idx in 0..num_line_strings { + let line_string = multi_line_string.line(line_string_idx).unwrap(); + coord_capacity += line_string.num_coords(); + } } } @@ -333,37 +399,11 @@ fn first_pass<'a>( (coord_capacity, ring_capacity, geom_capacity) } -fn second_pass<'a, O: OffsetSizeTrait>( - geoms: impl Iterator + 'a)>>, - coord_capacity: usize, - ring_capacity: usize, - geom_capacity: usize, -) -> MutableMultiLineStringArray { - let mut array = - MutableMultiLineStringArray::with_capacities(coord_capacity, ring_capacity, geom_capacity); - - geoms - .into_iter() - .try_for_each(|maybe_multi_line_string| { - array.push_multi_line_string(maybe_multi_line_string) - }) - .unwrap(); - - array -} - impl> From> for MutableMultiLineStringArray { fn from(geoms: Vec) -> Self { - let (coord_capacity, ring_capacity, geom_capacity) = - first_pass(geoms.iter().map(Some), geoms.len()); - second_pass( - geoms.iter().map(Some), - coord_capacity, - ring_capacity, - geom_capacity, - ) + Self::from_multi_line_strings(&geoms, Default::default()) } } @@ -371,14 +411,7 @@ impl> From>> for MutableMultiLineStringArray { fn from(geoms: Vec>) -> Self { - let (coord_capacity, ring_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - ring_capacity, - geom_capacity, - ) + Self::from_nullable_multi_line_strings(&geoms, Default::default()) } } @@ -386,14 +419,7 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self { - let (coord_capacity, ring_capacity, geom_capacity) = - first_pass(geoms.iter().map(Some), geoms.len()); - second_pass( - geoms.iter().map(Some), - coord_capacity, - ring_capacity, - geom_capacity, - ) + Self::from_multi_line_strings(&geoms, Default::default()) } } @@ -401,14 +427,7 @@ impl> From>> for MutableMultiLineStringArray { fn from(geoms: bumpalo::collections::Vec<'_, Option>) -> Self { - let (coord_capacity, ring_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - ring_capacity, - geom_capacity, - ) + Self::from_nullable_multi_line_strings(&geoms, Default::default()) } } diff --git a/src/array/multipoint/mutable.rs b/src/array/multipoint/mutable.rs index 26ed09fd..f5c01e9c 100644 --- a/src/array/multipoint/mutable.rs +++ b/src/array/multipoint/mutable.rs @@ -62,6 +62,20 @@ impl MutableMultiPointArray { } } + pub fn with_capacities_from_iter<'a>( + geoms: impl Iterator>, + ) -> Self { + Self::with_capacities_and_options_from_iter(geoms, Default::default()) + } + + pub fn with_capacities_and_options_from_iter<'a>( + geoms: impl Iterator>, + coord_type: CoordType, + ) -> Self { + let (coord_capacity, geom_capacity) = count_from_iter(geoms); + Self::with_capacities_and_options(coord_capacity, geom_capacity, coord_type) + } + /// Reserves capacity for at least `additional` more MultiPoints to be inserted /// in the given `Vec`. The collection may reserve more space to /// speculatively avoid frequent reallocations. After calling `reserve`, @@ -89,6 +103,22 @@ impl MutableMultiPointArray { self.geom_offsets.reserve(geom_additional); } + pub fn reserve_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, geom_capacity) = count_from_iter(geoms); + self.reserve(coord_capacity, geom_capacity) + } + + pub fn reserve_exact_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, geom_capacity) = count_from_iter(geoms); + self.reserve_exact(coord_capacity, geom_capacity) + } + /// The canonical method to create a [`MutableMultiPointArray`] out of its internal components. /// /// # Implementation @@ -127,6 +157,16 @@ impl MutableMultiPointArray { Arc::new(self.into_arrow()) } + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_multi_point| self.push_multi_point(maybe_multi_point)) + .unwrap(); + } + /// Add a new Point to the end of this array. /// /// # Errors @@ -206,6 +246,30 @@ impl MutableMultiPointArray { self.geom_offsets.extend_constant(1); self.validity.append(false); } + + pub fn from_multi_points( + geoms: &[impl MultiPointTrait], + coord_type: Option, + ) -> Self { + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(Some), + coord_type.unwrap_or_default(), + ); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + pub fn from_nullable_multi_points( + geoms: &[Option>], + coord_type: Option, + ) -> Self { + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(|x| x.as_ref()), + coord_type.unwrap_or_default(), + ); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } } impl Default for MutableMultiPointArray { @@ -263,39 +327,26 @@ impl From> for GenericListArray } } -fn first_pass<'a>( +fn count_from_iter<'a>( geoms: impl Iterator>, - geoms_length: usize, ) -> (usize, usize) { let mut coord_capacity = 0; - let geom_capacity = geoms_length; + let mut geom_capacity = 0; + + for maybe_multi_point in geoms.into_iter() { + geom_capacity += 1; - for multi_point in geoms.into_iter().flatten() { - coord_capacity += multi_point.num_points(); + if let Some(multi_point) = maybe_multi_point { + coord_capacity += multi_point.num_points(); + } } (coord_capacity, geom_capacity) } -fn second_pass<'a, O: OffsetSizeTrait>( - geoms: impl Iterator + 'a)>>, - coord_capacity: usize, - geom_capacity: usize, -) -> MutableMultiPointArray { - let mut array = MutableMultiPointArray::with_capacities(coord_capacity, geom_capacity); - - geoms - .into_iter() - .try_for_each(|maybe_multi_point| array.push_multi_point(maybe_multi_point)) - .unwrap(); - - array -} - impl> From> for MutableMultiPointArray { fn from(geoms: Vec) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some), geoms.len()); - second_pass(geoms.iter().map(Some), coord_capacity, geom_capacity) + Self::from_multi_points(&geoms, Default::default()) } } @@ -303,13 +354,7 @@ impl> From>> for MutableMultiPointArray { fn from(geoms: Vec>) -> Self { - let (coord_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - geom_capacity, - ) + Self::from_nullable_multi_points(&geoms, Default::default()) } } @@ -317,8 +362,7 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self { - let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some), geoms.len()); - second_pass(geoms.iter().map(Some), coord_capacity, geom_capacity) + Self::from_multi_points(&geoms, Default::default()) } } @@ -326,13 +370,7 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, Option>) -> Self { - let (coord_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - geom_capacity, - ) + Self::from_nullable_multi_points(&geoms, Default::default()) } } diff --git a/src/array/multipolygon/mutable.rs b/src/array/multipolygon/mutable.rs index bdf3da02..1dfe5bad 100644 --- a/src/array/multipolygon/mutable.rs +++ b/src/array/multipolygon/mutable.rs @@ -11,7 +11,6 @@ use crate::geo_traits::{LineStringTrait, MultiPolygonTrait, PolygonTrait}; use crate::io::wkb::reader::maybe_multipolygon::WKBMaybeMultiPolygon; use crate::scalar::WKB; use crate::trait_::IntoArrow; -use crate::GeometryArrayTrait; use arrow_array::{Array, GenericListArray, OffsetSizeTrait}; use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; @@ -93,6 +92,28 @@ impl MutableMultiPolygonArray { } } + pub fn with_capacities_from_iter<'a>( + geoms: impl Iterator>, + ) -> Self { + Self::with_capacities_and_options_from_iter(geoms, Default::default()) + } + + pub fn with_capacities_and_options_from_iter<'a>( + geoms: impl Iterator>, + coord_type: CoordType, + ) -> Self { + let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) = + count_from_iter(geoms); + + Self::with_capacities_and_options( + coord_capacity, + ring_capacity, + polygon_capacity, + geom_capacity, + coord_type, + ) + } + /// Reserves capacity for at least `additional` more LineStrings to be inserted /// in the given `Vec`. The collection may reserve more space to /// speculatively avoid frequent reallocations. After calling `reserve`, @@ -136,6 +157,34 @@ impl MutableMultiPolygonArray { self.geom_offsets.reserve(geom_additional); } + pub fn reserve_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) = + count_from_iter(geoms); + self.reserve( + coord_capacity, + ring_capacity, + polygon_capacity, + geom_capacity, + ) + } + + pub fn reserve_exact_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) = + count_from_iter(geoms); + self.reserve_exact( + coord_capacity, + ring_capacity, + polygon_capacity, + geom_capacity, + ) + } + /// The canonical method to create a [`MutableMultiPolygonArray`] out of its internal /// components. /// @@ -291,6 +340,16 @@ impl MutableMultiPolygonArray { Ok(()) } + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_multi_polygon| self.push_multi_polygon(maybe_multi_polygon)) + .unwrap(); + } + /// Push a raw offset to the underlying geometry offsets buffer. /// /// # Safety @@ -350,6 +409,30 @@ impl MutableMultiPolygonArray { self.geom_offsets.extend_constant(1); self.validity.append(false); } + + pub fn from_multi_polygons( + geoms: &[impl MultiPolygonTrait], + coord_type: Option, + ) -> Self { + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(Some), + coord_type.unwrap_or_default(), + ); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + pub fn from_nullable_multi_polygons( + geoms: &[Option>], + coord_type: Option, + ) -> Self { + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(|x| x.as_ref()), + coord_type.unwrap_or_default(), + ); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } } impl Default for MutableMultiPolygonArray { @@ -385,34 +468,37 @@ impl From> for MultiPolygonArray } } -fn first_pass<'a>( +fn count_from_iter<'a>( geoms: impl Iterator>, - geoms_length: usize, ) -> (usize, usize, usize, usize) { let mut coord_capacity = 0; let mut ring_capacity = 0; let mut polygon_capacity = 0; - let geom_capacity = geoms_length; + let mut geom_capacity = 0; + + for maybe_multi_polygon in geoms.into_iter() { + geom_capacity += 1; - for multi_polygon in geoms.into_iter().flatten() { - // Total number of polygons in this MultiPolygon - let num_polygons = multi_polygon.num_polygons(); - polygon_capacity += num_polygons; + if let Some(multi_polygon) = maybe_multi_polygon { + // Total number of polygons in this MultiPolygon + let num_polygons = multi_polygon.num_polygons(); + polygon_capacity += num_polygons; - for polygon_idx in 0..num_polygons { - let polygon = multi_polygon.polygon(polygon_idx).unwrap(); + for polygon_idx in 0..num_polygons { + let polygon = multi_polygon.polygon(polygon_idx).unwrap(); - // Total number of rings in this MultiPolygon - ring_capacity += polygon.num_interiors() + 1; + // Total number of rings in this MultiPolygon + ring_capacity += polygon.num_interiors() + 1; - // Number of coords for each ring - if let Some(exterior) = polygon.exterior() { - coord_capacity += exterior.num_coords(); - } + // Number of coords for each ring + if let Some(exterior) = polygon.exterior() { + coord_capacity += exterior.num_coords(); + } - for int_ring_idx in 0..polygon.num_interiors() { - let int_ring = polygon.interior(int_ring_idx).unwrap(); - coord_capacity += int_ring.num_coords(); + for int_ring_idx in 0..polygon.num_interiors() { + let int_ring = polygon.interior(int_ring_idx).unwrap(); + coord_capacity += int_ring.num_coords(); + } } } } @@ -425,41 +511,11 @@ fn first_pass<'a>( ) } -fn second_pass<'a, O: OffsetSizeTrait>( - geoms: impl Iterator + 'a)>>, - coord_capacity: usize, - ring_capacity: usize, - polygon_capacity: usize, - geom_capacity: usize, -) -> MutableMultiPolygonArray { - let mut array = MutableMultiPolygonArray::with_capacities( - coord_capacity, - ring_capacity, - polygon_capacity, - geom_capacity, - ); - - geoms - .into_iter() - .try_for_each(|maybe_multi_polygon| array.push_multi_polygon(maybe_multi_polygon)) - .unwrap(); - - array -} - impl> From> for MutableMultiPolygonArray { fn from(geoms: Vec) -> Self { - let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) = - first_pass(geoms.iter().map(Some), geoms.len()); - second_pass( - geoms.iter().map(Some), - coord_capacity, - ring_capacity, - polygon_capacity, - geom_capacity, - ) + Self::from_multi_polygons(&geoms, Default::default()) } } @@ -467,15 +523,7 @@ impl> From>> for MutableMultiPolygonArray { fn from(geoms: Vec>) -> Self { - let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - ring_capacity, - polygon_capacity, - geom_capacity, - ) + Self::from_nullable_multi_polygons(&geoms, Default::default()) } } @@ -483,15 +531,7 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self { - let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) = - first_pass(geoms.iter().map(Some), geoms.len()); - second_pass( - geoms.iter().map(Some), - coord_capacity, - ring_capacity, - polygon_capacity, - geom_capacity, - ) + Self::from_multi_polygons(&geoms, Default::default()) } } @@ -499,15 +539,7 @@ impl> From>> for MutableMultiPolygonArray { fn from(geoms: bumpalo::collections::Vec<'_, Option>) -> Self { - let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - ring_capacity, - polygon_capacity, - geom_capacity, - ) + Self::from_nullable_multi_polygons(&geoms, Default::default()) } } @@ -524,14 +556,6 @@ impl TryFrom> for MutableMultiPolygonArray { .map(|wkb| wkb.to_wkb_object().into_maybe_multi_polygon()) }) .collect(); - let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) = - first_pass(wkb_objects2.iter().map(|item| item.as_ref()), value.len()); - Ok(second_pass( - wkb_objects2.iter().map(|item| item.as_ref()), - coord_capacity, - ring_capacity, - polygon_capacity, - geom_capacity, - )) + Ok(wkb_objects2.into()) } } diff --git a/src/array/polygon/mutable.rs b/src/array/polygon/mutable.rs index d8af0c59..7333a6f8 100644 --- a/src/array/polygon/mutable.rs +++ b/src/array/polygon/mutable.rs @@ -83,6 +83,20 @@ impl MutablePolygonArray { } } + pub fn with_capacities_from_iter<'a>( + geoms: impl Iterator>, + ) -> Self { + Self::with_capacities_and_options_from_iter(geoms, Default::default()) + } + + pub fn with_capacities_and_options_from_iter<'a>( + geoms: impl Iterator>, + coord_type: CoordType, + ) -> Self { + let (coord_capacity, ring_capacity, geom_capacity) = count_from_iter(geoms); + Self::with_capacities_and_options(coord_capacity, ring_capacity, geom_capacity, coord_type) + } + /// Reserves capacity for at least `additional` more LineStrings to be inserted /// in the given `Vec`. The collection may reserve more space to /// speculatively avoid frequent reallocations. After calling `reserve`, @@ -122,6 +136,22 @@ impl MutablePolygonArray { self.geom_offsets.reserve(geom_additional); } + pub fn reserve_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, ring_capacity, geom_capacity) = count_from_iter(geoms); + self.reserve(coord_capacity, ring_capacity, geom_capacity) + } + + pub fn reserve_exact_from_iter<'a>( + &mut self, + geoms: impl Iterator>, + ) { + let (coord_capacity, ring_capacity, geom_capacity) = count_from_iter(geoms); + self.reserve_exact(coord_capacity, ring_capacity, geom_capacity) + } + /// The canonical method to create a [`MutablePolygonArray`] out of its internal components. /// /// # Implementation @@ -216,6 +246,16 @@ impl MutablePolygonArray { Ok(()) } + pub fn extend_from_iter<'a>( + &mut self, + geoms: impl Iterator + 'a)>>, + ) { + geoms + .into_iter() + .try_for_each(|maybe_polygon| self.push_polygon(maybe_polygon)) + .unwrap(); + } + /// Push a raw offset to the underlying geometry offsets buffer. /// /// # Safety @@ -263,6 +303,30 @@ impl MutablePolygonArray { self.geom_offsets.extend_constant(1); self.validity.append(false); } + + pub fn from_polygons( + geoms: &[impl PolygonTrait], + coord_type: Option, + ) -> Self { + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(Some), + coord_type.unwrap_or_default(), + ); + array.extend_from_iter(geoms.iter().map(Some)); + array + } + + pub fn from_nullable_polygons( + geoms: &[Option>], + coord_type: Option, + ) -> Self { + let mut array = Self::with_capacities_and_options_from_iter( + geoms.iter().map(|x| x.as_ref()), + coord_type.unwrap_or_default(), + ); + array.extend_from_iter(geoms.iter().map(|x| x.as_ref())); + array + } } impl Default for MutablePolygonArray { @@ -291,28 +355,30 @@ impl From> for PolygonArray { } } -fn first_pass<'a>( +fn count_from_iter<'a>( geoms: impl Iterator>, - geoms_length: usize, ) -> (usize, usize, usize) { // Total number of coordinates let mut coord_capacity = 0; let mut ring_capacity = 0; - let geom_capacity = geoms_length; + let mut geom_capacity = 0; - for polygon in geoms.into_iter().flatten() { - // Total number of rings in this polygon - let num_interiors = polygon.num_interiors(); - ring_capacity += num_interiors + 1; + for maybe_polygon in geoms.into_iter() { + geom_capacity += 1; + if let Some(polygon) = maybe_polygon { + // Total number of rings in this polygon + let num_interiors = polygon.num_interiors(); + ring_capacity += num_interiors + 1; - // Number of coords for each ring - if let Some(exterior) = polygon.exterior() { - coord_capacity += exterior.num_coords(); - } + // Number of coords for each ring + if let Some(exterior) = polygon.exterior() { + coord_capacity += exterior.num_coords(); + } - for int_ring_idx in 0..polygon.num_interiors() { - let int_ring = polygon.interior(int_ring_idx).unwrap(); - coord_capacity += int_ring.num_coords(); + for int_ring_idx in 0..polygon.num_interiors() { + let int_ring = polygon.interior(int_ring_idx).unwrap(); + coord_capacity += int_ring.num_coords(); + } } } @@ -320,46 +386,15 @@ fn first_pass<'a>( (coord_capacity, ring_capacity, geom_capacity) } -fn second_pass<'a, O: OffsetSizeTrait>( - geoms: impl Iterator + 'a)>>, - coord_capacity: usize, - ring_capacity: usize, - geom_capacity: usize, -) -> MutablePolygonArray { - let mut array = - MutablePolygonArray::with_capacities(coord_capacity, ring_capacity, geom_capacity); - - geoms - .into_iter() - .try_for_each(|maybe_polygon| array.push_polygon(maybe_polygon)) - .unwrap(); - - array -} - impl> From> for MutablePolygonArray { fn from(geoms: Vec) -> Self { - let (coord_capacity, ring_capacity, geom_capacity) = - first_pass(geoms.iter().map(Some), geoms.len()); - second_pass( - geoms.iter().map(Some), - coord_capacity, - ring_capacity, - geom_capacity, - ) + Self::from_polygons(&geoms, Default::default()) } } impl> From>> for MutablePolygonArray { fn from(geoms: Vec>) -> Self { - let (coord_capacity, ring_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - ring_capacity, - geom_capacity, - ) + Self::from_nullable_polygons(&geoms, Default::default()) } } @@ -367,28 +402,14 @@ impl> From { fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self { - let (coord_capacity, ring_capacity, geom_capacity) = - first_pass(geoms.iter().map(Some), geoms.len()); - second_pass( - geoms.iter().map(Some), - coord_capacity, - ring_capacity, - geom_capacity, - ) + Self::from_polygons(&geoms, Default::default()) } } impl> From>> for MutablePolygonArray { fn from(geoms: bumpalo::collections::Vec<'_, Option>) -> Self { - let (coord_capacity, ring_capacity, geom_capacity) = - first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len()); - second_pass( - geoms.iter().map(|x| x.as_ref()), - coord_capacity, - ring_capacity, - geom_capacity, - ) + Self::from_nullable_polygons(&geoms, Default::default()) } }