Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable setting coord type when creating mutable arrays #277

Merged
merged 5 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/array/coord/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ pub use combined::{CoordBuffer, MutableCoordBuffer};
pub use interleaved::{InterleavedCoordBuffer, MutableInterleavedCoordBuffer};
pub use separated::{MutableSeparatedCoordBuffer, SeparatedCoordBuffer};

#[derive(Debug, Clone, PartialEq)]
#[derive(Default, Debug, Clone, PartialEq)]
pub enum CoordType {
#[default]
Interleaved,
Separated,
}
144 changes: 100 additions & 44 deletions src/array/linestring/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// use super::array::check;
use crate::array::mutable_offset::OffsetsBuilder;
use crate::array::{
LineStringArray, MutableCoordBuffer, MutableInterleavedCoordBuffer, MutableMultiPointArray,
WKBArray,
CoordType, LineStringArray, MutableCoordBuffer, MutableInterleavedCoordBuffer,
MutableMultiPointArray, MutableSeparatedCoordBuffer, WKBArray,
};
use crate::error::{GeoArrowError, Result};
use crate::geo_traits::LineStringTrait;
Expand Down Expand Up @@ -30,19 +30,52 @@ pub struct MutableLineStringArray<O: OffsetSizeTrait> {
impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
/// Creates a new empty [`MutableLineStringArray`].
pub fn new() -> Self {
Self::with_capacities(0, 0)
Self::new_with_options(Default::default())
}

pub fn new_with_options(coord_type: CoordType) -> Self {
Self::with_capacities_and_options(0, 0, coord_type)
}

/// Creates a new [`MutableLineStringArray`] with a capacity.
pub fn with_capacities(coord_capacity: usize, geom_capacity: usize) -> Self {
let coords = MutableInterleavedCoordBuffer::with_capacity(coord_capacity);
Self::with_capacities_and_options(coord_capacity, geom_capacity, Default::default())
}

pub fn with_capacities_and_options(
coord_capacity: usize,
geom_capacity: usize,
coord_type: CoordType,
) -> Self {
let coords = match coord_type {
CoordType::Interleaved => MutableCoordBuffer::Interleaved(
MutableInterleavedCoordBuffer::with_capacity(coord_capacity),
),
CoordType::Separated => MutableCoordBuffer::Separated(
MutableSeparatedCoordBuffer::with_capacity(coord_capacity),
),
};
Self {
coords: MutableCoordBuffer::Interleaved(coords),
coords,
geom_offsets: OffsetsBuilder::with_capacity(geom_capacity),
validity: NullBufferBuilder::new(geom_capacity),
}
}

pub fn with_capacities_from_iter<'a>(
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
) -> Self {
Self::with_capacities_and_options_from_iter(geoms, Default::default())
}

pub fn with_capacities_and_options_from_iter<'a>(
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
coord_type: CoordType,
) -> Self {
let (coord_capacity, geom_capacity) = count_from_iter(geoms);
Self::with_capacities_and_options(coord_capacity, geom_capacity, coord_type)
}

/// Reserves capacity for at least `additional` more LineStrings to be inserted
/// in the given `Vec<T>`. The collection may reserve more space to
/// speculatively avoid frequent reallocations. After calling `reserve`,
Expand All @@ -53,6 +86,22 @@ impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
self.geom_offsets.reserve(geom_additional);
}

pub fn reserve_from_iter<'a>(
&mut self,
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
) {
let (coord_capacity, geom_capacity) = count_from_iter(geoms);
self.reserve(coord_capacity, geom_capacity)
}

pub fn reserve_exact_from_iter<'a>(
&mut self,
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
) {
let (coord_capacity, geom_capacity) = count_from_iter(geoms);
self.reserve_exact(coord_capacity, geom_capacity)
}

/// Reserves the minimum capacity for at least `additional` more LineStrings to
/// be inserted in the given `Vec<T>`. Unlike [`reserve`], this will not
/// deliberately over-allocate to speculatively avoid frequent allocations.
Expand All @@ -67,6 +116,7 @@ impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
/// [`reserve`]: Vec::reserve
pub fn reserve_exact(&mut self, coord_additional: usize, geom_additional: usize) {
self.coords.reserve_exact(coord_additional);
// TODO: implement reserve_exact on OffsetsBuilder
self.geom_offsets.reserve(geom_additional);
}

Expand Down Expand Up @@ -126,6 +176,16 @@ impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
Ok(())
}

pub fn extend_from_iter<'a>(
&mut self,
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait<T = f64> + 'a)>>,
) {
geoms
.into_iter()
.try_for_each(|maybe_multi_point| self.push_line_string(maybe_multi_point))
.unwrap();
}

/// Push a raw coordinate to the underlying coordinate array.
///
/// # Safety
Expand Down Expand Up @@ -155,6 +215,30 @@ impl<O: OffsetSizeTrait> MutableLineStringArray<O> {
pub fn into_array_ref(self) -> Arc<dyn Array> {
Arc::new(self.into_arrow())
}

pub fn from_line_strings(
geoms: &[impl LineStringTrait<T = f64>],
coord_type: Option<CoordType>,
) -> Self {
let mut array = Self::with_capacities_and_options_from_iter(
geoms.iter().map(Some),
coord_type.unwrap_or_default(),
);
array.extend_from_iter(geoms.iter().map(Some));
array
}

pub fn from_nullable_line_strings(
geoms: &[Option<impl LineStringTrait<T = f64>>],
coord_type: Option<CoordType>,
) -> Self {
let mut array = Self::with_capacities_and_options_from_iter(
geoms.iter().map(|x| x.as_ref()),
coord_type.unwrap_or_default(),
);
array.extend_from_iter(geoms.iter().map(|x| x.as_ref()));
array
}
}

impl<O: OffsetSizeTrait> IntoArrow for MutableLineStringArray<O> {
Expand Down Expand Up @@ -185,77 +269,49 @@ impl<O: OffsetSizeTrait> From<MutableLineStringArray<O>> for GenericListArray<O>
}
}

pub(crate) fn first_pass<'a>(
pub(crate) fn count_from_iter<'a>(
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait + 'a)>>,
geoms_length: usize,
) -> (usize, usize) {
let mut coord_capacity = 0;
let geom_capacity = geoms_length;
let mut geom_capacity = 0;

for line_string in geoms.into_iter().flatten() {
coord_capacity += line_string.num_coords();
for maybe_line_string in geoms.into_iter() {
geom_capacity += 1;
if let Some(line_string) = maybe_line_string {
coord_capacity += line_string.num_coords();
}
}

(coord_capacity, geom_capacity)
}

pub(crate) fn second_pass<'a, O: OffsetSizeTrait>(
geoms: impl Iterator<Item = Option<&'a (impl LineStringTrait<T = f64> + 'a)>>,
coord_capacity: usize,
geom_capacity: usize,
) -> MutableLineStringArray<O> {
let mut array = MutableLineStringArray::with_capacities(coord_capacity, geom_capacity);

geoms
.into_iter()
.try_for_each(|maybe_multi_point| array.push_line_string(maybe_multi_point))
.unwrap();

array
}

impl<O: OffsetSizeTrait, G: LineStringTrait<T = f64>> From<Vec<G>> for MutableLineStringArray<O> {
fn from(geoms: Vec<G>) -> Self {
let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some), geoms.len());
second_pass(geoms.iter().map(Some), coord_capacity, geom_capacity)
Self::from_line_strings(&geoms, Default::default())
}
}

impl<O: OffsetSizeTrait, G: LineStringTrait<T = f64>> From<Vec<Option<G>>>
for MutableLineStringArray<O>
{
fn from(geoms: Vec<Option<G>>) -> Self {
let geoms_len = geoms.len();
let (coord_capacity, geom_capacity) =
first_pass(geoms.iter().map(|x| x.as_ref()), geoms_len);
second_pass(
geoms.iter().map(|x| x.as_ref()),
coord_capacity,
geom_capacity,
)
Self::from_nullable_line_strings(&geoms, Default::default())
}
}

impl<O: OffsetSizeTrait, G: LineStringTrait<T = f64>> From<bumpalo::collections::Vec<'_, G>>
for MutableLineStringArray<O>
{
fn from(geoms: bumpalo::collections::Vec<'_, G>) -> Self {
let (coord_capacity, geom_capacity) = first_pass(geoms.iter().map(Some), geoms.len());
second_pass(geoms.iter().map(Some), coord_capacity, geom_capacity)
Self::from_line_strings(&geoms, Default::default())
}
}

impl<O: OffsetSizeTrait, G: LineStringTrait<T = f64>> From<bumpalo::collections::Vec<'_, Option<G>>>
for MutableLineStringArray<O>
{
fn from(geoms: bumpalo::collections::Vec<'_, Option<G>>) -> Self {
let (coord_capacity, geom_capacity) =
first_pass(geoms.iter().map(|x| x.as_ref()), geoms.len());
second_pass(
geoms.iter().map(|x| x.as_ref()),
coord_capacity,
geom_capacity,
)
Self::from_nullable_line_strings(&geoms, Default::default())
}
}

Expand Down
Loading
Loading