diff --git a/python/src/lib.rs b/python/src/lib.rs index 72be5f63c3..2dc1ab5dd7 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -193,18 +193,17 @@ impl RawDeltaTable { &self, partitions_filters: Vec<(&str, &str, PartitionFilterValue)>, ) -> PyResult> { - let partition_filters: Result>, DeltaTableError> = - partitions_filters - .into_iter() - .map(|filter| match filter { - (key, op, PartitionFilterValue::Single(v)) => { - PartitionFilter::try_from((key, op, v)) - } - (key, op, PartitionFilterValue::Multiple(v)) => { - PartitionFilter::try_from((key, op, v)) - } - }) - .collect(); + let partition_filters: Result, DeltaTableError> = partitions_filters + .into_iter() + .map(|filter| match filter { + (key, op, PartitionFilterValue::Single(v)) => { + PartitionFilter::try_from((key, op, v)) + } + (key, op, PartitionFilterValue::Multiple(v)) => { + PartitionFilter::try_from((key, op, v.as_slice())) + } + }) + .collect(); match partition_filters { Ok(filters) => Ok(self ._table @@ -673,13 +672,15 @@ impl RawDeltaTable { } fn convert_partition_filters<'a>( - partitions_filters: Vec<(&'a str, &'a str, PartitionFilterValue<'a>)>, -) -> Result>, DeltaTableError> { + partitions_filters: Vec<(&'a str, &'a str, PartitionFilterValue)>, +) -> Result, DeltaTableError> { partitions_filters .into_iter() .map(|filter| match filter { (key, op, PartitionFilterValue::Single(v)) => PartitionFilter::try_from((key, op, v)), - (key, op, PartitionFilterValue::Multiple(v)) => PartitionFilter::try_from((key, op, v)), + (key, op, PartitionFilterValue::Multiple(v)) => { + PartitionFilter::try_from((key, op, v.as_slice())) + } }) .collect() } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index af692fd5c9..b990214c5c 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -17,8 +17,8 @@ //! async { //! let table = deltalake::open_table_with_version("./tests/data/simple_table", 0).await.unwrap(); //! let files = table.get_files_by_partitions(&[deltalake::PartitionFilter { -//! key: "month", -//! value: deltalake::PartitionValue::Equal("12"), +//! key: "month".to_string(), +//! value: deltalake::PartitionValue::Equal("12".to_string()), //! }]); //! }; //! ``` @@ -348,12 +348,12 @@ mod tests { let filters = vec![ crate::PartitionFilter { - key: "month", - value: crate::PartitionValue::Equal("2"), + key: "month".to_string(), + value: crate::PartitionValue::Equal("2".to_string()), }, crate::PartitionFilter { - key: "year", - value: crate::PartitionValue::Equal("2020"), + key: "year".to_string(), + value: crate::PartitionValue::Equal("2020".to_string()), }, ]; @@ -383,8 +383,8 @@ mod tests { ); let filters = vec![crate::PartitionFilter { - key: "month", - value: crate::PartitionValue::NotEqual("2"), + key: "month".to_string(), + value: crate::PartitionValue::NotEqual("2".to_string()), }]; assert_eq!( table.get_files_by_partitions(&filters).unwrap(), @@ -397,8 +397,8 @@ mod tests { ); let filters = vec![crate::PartitionFilter { - key: "month", - value: crate::PartitionValue::In(vec!["2", "12"]), + key: "month".to_string(), + value: crate::PartitionValue::In(vec!["2".to_string(), "12".to_string()]), }]; assert_eq!( table.get_files_by_partitions(&filters).unwrap(), @@ -411,8 +411,8 @@ mod tests { ); let filters = vec![crate::PartitionFilter { - key: "month", - value: crate::PartitionValue::NotIn(vec!["2", "12"]), + key: "month".to_string(), + value: crate::PartitionValue::NotIn(vec!["2".to_string(), "12".to_string()]), }]; assert_eq!( table.get_files_by_partitions(&filters).unwrap(), @@ -430,8 +430,8 @@ mod tests { .unwrap(); let filters = vec![crate::PartitionFilter { - key: "k", - value: crate::PartitionValue::Equal("A"), + key: "k".to_string(), + value: crate::PartitionValue::Equal("A".to_string()), }]; assert_eq!( table.get_files_by_partitions(&filters).unwrap(), @@ -441,8 +441,8 @@ mod tests { ); let filters = vec![crate::PartitionFilter { - key: "k", - value: crate::PartitionValue::Equal(""), + key: "k".to_string(), + value: crate::PartitionValue::Equal("".to_string()), }]; assert_eq!( table.get_files_by_partitions(&filters).unwrap(), @@ -473,8 +473,8 @@ mod tests { ); let filters = vec![crate::PartitionFilter { - key: "x", - value: crate::PartitionValue::Equal("A/A"), + key: "x".to_string(), + value: crate::PartitionValue::Equal("A/A".to_string()), }]; assert_eq!( table.get_files_by_partitions(&filters).unwrap(), @@ -492,8 +492,8 @@ mod tests { .unwrap(); let filters = vec![crate::PartitionFilter { - key: "x", - value: crate::PartitionValue::LessThanOrEqual("9"), + key: "x".to_string(), + value: crate::PartitionValue::LessThanOrEqual("9".to_string()), }]; assert_eq!( table.get_files_by_partitions(&filters).unwrap(), @@ -503,8 +503,8 @@ mod tests { ); let filters = vec![crate::PartitionFilter { - key: "y", - value: crate::PartitionValue::LessThan("10.0"), + key: "y".to_string(), + value: crate::PartitionValue::LessThan("10.0".to_string()), }]; assert_eq!( table.get_files_by_partitions(&filters).unwrap(), diff --git a/rust/src/operations/optimize.rs b/rust/src/operations/optimize.rs index d4d9080614..2042b3af4e 100644 --- a/rust/src/operations/optimize.rs +++ b/rust/src/operations/optimize.rs @@ -157,7 +157,7 @@ pub struct OptimizeBuilder<'a> { /// Delta object store for handling data files store: ObjectStoreRef, /// Filters to select specific table partitions to be optimized - filters: &'a [PartitionFilter<'a, &'a str>], + filters: &'a [PartitionFilter], /// Desired file size after bin-packing files target_size: Option, /// Properties passed to underlying parquet writer @@ -200,7 +200,7 @@ impl<'a> OptimizeBuilder<'a> { } /// Only optimize files that return true for the specified partition filter - pub fn with_filters(mut self, filters: &'a [PartitionFilter<'a, &'a str>]) -> Self { + pub fn with_filters(mut self, filters: &'a [PartitionFilter]) -> Self { self.filters = filters; self } @@ -769,7 +769,7 @@ impl PartitionTuples { pub fn create_merge_plan( optimize_type: OptimizeType, snapshot: &DeltaTableState, - filters: &[PartitionFilter<'_, &str>], + filters: &[PartitionFilter], target_size: Option, writer_properties: WriterProperties, ) -> Result { @@ -860,7 +860,7 @@ impl IntoIterator for MergeBin { fn build_compaction_plan( snapshot: &DeltaTableState, partition_keys: &[String], - filters: &[PartitionFilter<'_, &str>], + filters: &[PartitionFilter], target_size: i64, ) -> Result<(OptimizeOperations, Metrics), DeltaTableError> { let mut metrics = Metrics::default(); @@ -923,7 +923,7 @@ fn build_zorder_plan( zorder_columns: Vec, snapshot: &DeltaTableState, partition_keys: &[String], - filters: &[PartitionFilter<'_, &str>], + filters: &[PartitionFilter], ) -> Result<(OptimizeOperations, Metrics), DeltaTableError> { if zorder_columns.is_empty() { return Err(DeltaTableError::Generic( diff --git a/rust/src/schema/partitions.rs b/rust/src/schema/partitions.rs index 0c1b0f6404..f2f0d0456a 100644 --- a/rust/src/schema/partitions.rs +++ b/rust/src/schema/partitions.rs @@ -9,32 +9,32 @@ use std::collections::HashMap; /// A Enum used for selecting the partition value operation when filtering a DeltaTable partition. #[derive(Clone, Debug, PartialEq, Eq)] -pub enum PartitionValue { +pub enum PartitionValue { /// The partition value with the equal operator - Equal(T), + Equal(String), /// The partition value with the not equal operator - NotEqual(T), + NotEqual(String), /// The partition value with the greater than operator - GreaterThan(T), + GreaterThan(String), /// The partition value with the greater than or equal operator - GreaterThanOrEqual(T), + GreaterThanOrEqual(String), /// The partition value with the less than operator - LessThan(T), + LessThan(String), /// The partition value with the less than or equal operator - LessThanOrEqual(T), + LessThanOrEqual(String), /// The partition values with the in operator - In(Vec), + In(Vec), /// The partition values with the not in operator - NotIn(Vec), + NotIn(Vec), } /// A Struct used for filtering a DeltaTable partition by key and value. #[derive(Clone, Debug, PartialEq, Eq)] -pub struct PartitionFilter<'a, T> { +pub struct PartitionFilter { /// The key of the PartitionFilter - pub key: &'a str, + pub key: String, /// The value of the PartitionFilter - pub value: PartitionValue, + pub value: PartitionValue, } fn compare_typed_value( @@ -65,11 +65,11 @@ fn compare_typed_value( } /// Partition filters methods for filtering the DeltaTable partitions. -impl<'a> PartitionFilter<'a, &str> { +impl PartitionFilter { /// Indicates if a DeltaTable partition matches with the partition filter by key and value. pub fn match_partition( &self, - partition: &DeltaTablePartition<'a>, + partition: &DeltaTablePartition, data_type: &SchemaDataType, ) -> bool { if self.key != partition.key { @@ -80,22 +80,22 @@ impl<'a> PartitionFilter<'a, &str> { PartitionValue::Equal(value) => value == &partition.value, PartitionValue::NotEqual(value) => value != &partition.value, PartitionValue::GreaterThan(value) => { - compare_typed_value(partition.value, value.to_owned(), data_type) + compare_typed_value(&partition.value, value, data_type) .map(|x| x.is_gt()) .unwrap_or(false) } PartitionValue::GreaterThanOrEqual(value) => { - compare_typed_value(partition.value, value.to_owned(), data_type) + compare_typed_value(&partition.value, value, data_type) .map(|x| x.is_ge()) .unwrap_or(false) } PartitionValue::LessThan(value) => { - compare_typed_value(partition.value, value.to_owned(), data_type) + compare_typed_value(&partition.value, value, data_type) .map(|x| x.is_lt()) .unwrap_or(false) } PartitionValue::LessThanOrEqual(value) => { - compare_typed_value(partition.value, value.to_owned(), data_type) + compare_typed_value(&partition.value, value, data_type) .map(|x| x.is_le()) .unwrap_or(false) } @@ -108,10 +108,13 @@ impl<'a> PartitionFilter<'a, &str> { /// matches with the partition filter. pub fn match_partitions( &self, - partitions: &[DeltaTablePartition<'a>], + partitions: &[DeltaTablePartition], partition_col_data_types: &HashMap<&str, &SchemaDataType>, ) -> bool { - let data_type = partition_col_data_types.get(self.key).unwrap().to_owned(); + let data_type = partition_col_data_types + .get(self.key.as_str()) + .unwrap() + .to_owned(); partitions .iter() .any(|partition| self.match_partition(partition, data_type)) @@ -119,36 +122,36 @@ impl<'a> PartitionFilter<'a, &str> { } /// Create a PartitionFilter from a filter Tuple with the structure (key, operation, value). -impl<'a, T: std::fmt::Debug> TryFrom<(&'a str, &str, T)> for PartitionFilter<'a, T> { +impl TryFrom<(&str, &str, &str)> for PartitionFilter { type Error = DeltaTableError; /// Try to create a PartitionFilter from a Tuple of (key, operation, value). /// Returns a DeltaTableError in case of a malformed filter. - fn try_from(filter: (&'a str, &str, T)) -> Result { + fn try_from(filter: (&str, &str, &str)) -> Result { match filter { (key, "=", value) if !key.is_empty() => Ok(PartitionFilter { - key, - value: PartitionValue::Equal(value), + key: key.to_owned(), + value: PartitionValue::Equal(value.to_owned()), }), (key, "!=", value) if !key.is_empty() => Ok(PartitionFilter { - key, - value: PartitionValue::NotEqual(value), + key: key.to_owned(), + value: PartitionValue::NotEqual(value.to_owned()), }), (key, ">", value) if !key.is_empty() => Ok(PartitionFilter { - key, - value: PartitionValue::GreaterThan(value), + key: key.to_owned(), + value: PartitionValue::GreaterThan(value.to_owned()), }), (key, ">=", value) if !key.is_empty() => Ok(PartitionFilter { - key, - value: PartitionValue::GreaterThanOrEqual(value), + key: key.to_owned(), + value: PartitionValue::GreaterThanOrEqual(value.to_owned()), }), (key, "<", value) if !key.is_empty() => Ok(PartitionFilter { - key, - value: PartitionValue::LessThan(value), + key: key.to_owned(), + value: PartitionValue::LessThan(value.to_owned()), }), (key, "<=", value) if !key.is_empty() => Ok(PartitionFilter { - key, - value: PartitionValue::LessThanOrEqual(value), + key: key.to_owned(), + value: PartitionValue::LessThanOrEqual(value.to_owned()), }), (_, _, _) => Err(DeltaTableError::InvalidPartitionFilter { partition_filter: format!("{filter:?}"), @@ -158,20 +161,20 @@ impl<'a, T: std::fmt::Debug> TryFrom<(&'a str, &str, T)> for PartitionFilter<'a, } /// Create a PartitionFilter from a filter Tuple with the structure (key, operation, list(value)). -impl<'a, T: std::fmt::Debug> TryFrom<(&'a str, &str, Vec)> for PartitionFilter<'a, T> { +impl TryFrom<(&str, &str, &[&str])> for PartitionFilter { type Error = DeltaTableError; /// Try to create a PartitionFilter from a Tuple of (key, operation, list(value)). /// Returns a DeltaTableError in case of a malformed filter. - fn try_from(filter: (&'a str, &str, Vec)) -> Result { + fn try_from(filter: (&str, &str, &[&str])) -> Result { match filter { (key, "in", value) if !key.is_empty() => Ok(PartitionFilter { - key, - value: PartitionValue::In(value), + key: key.to_owned(), + value: PartitionValue::In(value.iter().map(|x| x.to_string()).collect()), }), (key, "not in", value) if !key.is_empty() => Ok(PartitionFilter { - key, - value: PartitionValue::NotIn(value), + key: key.to_owned(), + value: PartitionValue::NotIn(value.iter().map(|x| x.to_string()).collect()), }), (_, _, _) => Err(DeltaTableError::InvalidPartitionFilter { partition_filter: format!("{filter:?}"), @@ -182,11 +185,11 @@ impl<'a, T: std::fmt::Debug> TryFrom<(&'a str, &str, Vec)> for PartitionFilte /// A Struct DeltaTablePartition used to represent a partition of a DeltaTable. #[derive(Clone, Debug, PartialEq, Eq)] -pub struct DeltaTablePartition<'a> { +pub struct DeltaTablePartition { /// The key of the DeltaTable partition. - pub key: &'a str, + pub key: String, /// The value of the DeltaTable partition. - pub value: &'a str, + pub value: String, } /// Create a DeltaTable partition from a HivePartition string. @@ -201,17 +204,17 @@ pub struct DeltaTablePartition<'a> { /// assert_eq!("ds", partition.key); /// assert_eq!("2023-01-01", partition.value); /// ``` -impl<'a> TryFrom<&'a str> for DeltaTablePartition<'a> { +impl TryFrom<&str> for DeltaTablePartition { type Error = DeltaTableError; /// Try to create a DeltaTable partition from a HivePartition string. /// Returns a DeltaTableError if the string is not in the form of a HivePartition. - fn try_from(partition: &'a str) -> Result { + fn try_from(partition: &str) -> Result { let partition_splitted: Vec<&str> = partition.split('=').collect(); match partition_splitted { partition_splitted if partition_splitted.len() == 2 => Ok(DeltaTablePartition { - key: partition_splitted[0], - value: partition_splitted[1], + key: partition_splitted[0].to_owned(), + value: partition_splitted[1].to_owned(), }), _ => Err(DeltaTableError::PartitionError { partition: partition.to_string(), @@ -220,13 +223,13 @@ impl<'a> TryFrom<&'a str> for DeltaTablePartition<'a> { } } -impl<'a> DeltaTablePartition<'a> { +impl DeltaTablePartition { /// Try to create a DeltaTable partition from a partition value kv pair. /// /// ```rust /// use deltalake::DeltaTablePartition; /// - /// let value = (&"ds".to_string(), &Some("2023-01-01".to_string())); + /// let value = ("ds", &Some("2023-01-01".to_string())); /// let null_default = "1979-01-01"; /// let partition = DeltaTablePartition::from_partition_value(value, null_default); /// @@ -234,15 +237,18 @@ impl<'a> DeltaTablePartition<'a> { /// assert_eq!("2023-01-01", partition.value); /// ``` pub fn from_partition_value( - partition_value: (&'a String, &'a Option), - default_for_null: &'a str, + partition_value: (&str, &Option), + default_for_null: &str, ) -> Self { let (k, v) = partition_value; let v = match v { Some(s) => s, None => default_for_null, }; - DeltaTablePartition { key: k, value: v } + DeltaTablePartition { + key: k.to_owned(), + value: v.to_owned(), + } } } diff --git a/rust/src/table/mod.rs b/rust/src/table/mod.rs index a9e3899abf..4883134fcd 100644 --- a/rust/src/table/mod.rs +++ b/rust/src/table/mod.rs @@ -710,8 +710,8 @@ impl DeltaTable { /// Obtain Add actions for files that match the filter pub fn get_active_add_actions_by_partitions<'a>( &'a self, - filters: &'a [PartitionFilter<'a, &'a str>], - ) -> Result + '_, DeltaTableError> { + filters: &'a [PartitionFilter], + ) -> Result + '_, DeltaTableError> { self.state.get_active_add_actions_by_partitions(filters) } @@ -719,7 +719,7 @@ impl DeltaTable { /// `PartitionFilter`s. pub fn get_files_by_partitions( &self, - filters: &[PartitionFilter<&str>], + filters: &[PartitionFilter], ) -> Result, DeltaTableError> { Ok(self .get_active_add_actions_by_partitions(filters)? @@ -736,7 +736,7 @@ impl DeltaTable { /// Return the file uris as strings for the partition(s) pub fn get_file_uris_by_partitions( &self, - filters: &[PartitionFilter<&str>], + filters: &[PartitionFilter], ) -> Result, DeltaTableError> { let files = self.get_files_by_partitions(filters)?; Ok(files diff --git a/rust/src/table/state.rs b/rust/src/table/state.rs index 71aa6bddc9..e72f726ba8 100644 --- a/rust/src/table/state.rs +++ b/rust/src/table/state.rs @@ -370,13 +370,13 @@ impl DeltaTableState { /// Obtain Add actions for files that match the filter pub fn get_active_add_actions_by_partitions<'a>( &'a self, - filters: &'a [PartitionFilter<'a, &'a str>], - ) -> Result + '_, DeltaTableError> { + filters: &'a [PartitionFilter], + ) -> Result + '_, DeltaTableError> { let current_metadata = self.current_metadata().ok_or(DeltaTableError::NoMetadata)?; let nonpartitioned_columns: Vec = filters .iter() - .filter(|f| !current_metadata.partition_columns.contains(&f.key.into())) + .filter(|f| !current_metadata.partition_columns.contains(&f.key)) .map(|f| f.key.to_string()) .collect(); @@ -395,7 +395,7 @@ impl DeltaTableState { let partitions = add .partition_values .iter() - .map(|p| DeltaTablePartition::from_partition_value(p, "")) + .map(|p| DeltaTablePartition::from_partition_value((p.0, p.1), "")) .collect::>(); filters .iter() diff --git a/rust/tests/read_delta_partitions_test.rs b/rust/tests/read_delta_partitions_test.rs index 8ee68777cc..7b05e2f93c 100644 --- a/rust/tests/read_delta_partitions_test.rs +++ b/rust/tests/read_delta_partitions_test.rs @@ -8,12 +8,12 @@ mod fs_common; #[test] fn test_create_delta_table_partition() { - let year = "2021"; + let year = "2021".to_string(); let path = format!("year={year}"); assert_eq!( deltalake::DeltaTablePartition::try_from(path.as_ref()).unwrap(), deltalake::DeltaTablePartition { - key: "year", + key: "year".to_string(), value: year } ); @@ -30,25 +30,25 @@ fn test_create_delta_table_partition() { #[test] fn test_match_partition() { let partition_2021 = deltalake::DeltaTablePartition { - key: "year", - value: "2021", + key: "year".to_string(), + value: "2021".to_string(), }; let partition_2020 = deltalake::DeltaTablePartition { - key: "year", - value: "2020", + key: "year".to_string(), + value: "2020".to_string(), }; let partition_2019 = deltalake::DeltaTablePartition { - key: "year", - value: "2019", + key: "year".to_string(), + value: "2019".to_string(), }; let partition_year_2020_filter = deltalake::PartitionFilter { - key: "year", - value: deltalake::PartitionValue::Equal("2020"), + key: "year".to_string(), + value: deltalake::PartitionValue::Equal("2020".to_string()), }; let partition_month_12_filter = deltalake::PartitionFilter { - key: "month", - value: deltalake::PartitionValue::Equal("12"), + key: "month".to_string(), + value: deltalake::PartitionValue::Equal("12".to_string()), }; let string_type = SchemaDataType::primitive(String::from("string")); @@ -62,12 +62,12 @@ fn test_match_partition() { fn test_match_filters() { let partitions = vec![ deltalake::DeltaTablePartition { - key: "year", - value: "2021", + key: "year".to_string(), + value: "2021".to_string(), }, deltalake::DeltaTablePartition { - key: "month", - value: "12", + key: "month".to_string(), + value: "12".to_string(), }, ]; @@ -78,18 +78,18 @@ fn test_match_filters() { .collect(); let valid_filters = deltalake::PartitionFilter { - key: "year", - value: deltalake::PartitionValue::Equal("2021"), + key: "year".to_string(), + value: deltalake::PartitionValue::Equal("2021".to_string()), }; let valid_filter_month = deltalake::PartitionFilter { - key: "month", - value: deltalake::PartitionValue::Equal("12"), + key: "month".to_string(), + value: deltalake::PartitionValue::Equal("12".to_string()), }; let invalid_filter = deltalake::PartitionFilter { - key: "year", - value: deltalake::PartitionValue::Equal("2020"), + key: "year".to_string(), + value: deltalake::PartitionValue::Equal("2020".to_string()), }; assert!(valid_filters.match_partitions(&partitions, &partition_data_types),);