diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 2de4db64276f..fc0a28deb5fc 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -44,7 +44,6 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } [dependencies] -serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } indexmap = { version = "1.9", default-features = false, features = ["std"] } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } @@ -75,7 +74,7 @@ default = ["csv", "ipc", "json"] ipc_compression = ["ipc", "zstd", "lz4"] csv = ["csv_crate"] ipc = ["flatbuffers"] -json = ["serde", "serde_json"] +json = ["serde_json"] simd = ["packed_simd"] prettyprint = ["comfy-table"] # The test utils feature enables code used in benchmarks and tests but diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs index b65bfd7725ac..ab60644f389d 100644 --- a/arrow/src/datatypes/datatype.rs +++ b/arrow/src/datatypes/datatype.rs @@ -40,7 +40,6 @@ use super::Field; /// For more information on these types please see /// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout). #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum DataType { /// Null type Null, @@ -235,7 +234,6 @@ pub enum TimeUnit { /// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum IntervalUnit { /// Indicates the number of elapsed whole months, stored as 4-byte integers. YearMonth, @@ -254,7 +252,6 @@ pub enum IntervalUnit { // Sparse or Dense union layouts #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum UnionMode { Sparse, Dense, diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs index ac966cafe34f..a7a05c8da582 100644 --- a/arrow/src/datatypes/field.rs +++ b/arrow/src/datatypes/field.rs @@ -27,7 +27,6 @@ use super::DataType; /// A [`Schema`](super::Schema) is an ordered collection of /// [`Field`] objects. #[derive(Debug, Clone)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Field { name: String, data_type: DataType, @@ -35,7 +34,6 @@ pub struct Field { dict_id: i64, dict_is_ordered: bool, /// A map of key-value pairs containing additional custom meta data. - #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))] metadata: Option>, } diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs index 38b6c7bf9744..4077fa172b62 100644 --- a/arrow/src/datatypes/mod.rs +++ b/arrow/src/datatypes/mod.rs @@ -153,7 +153,7 @@ mod tests { ), ]); - let serialized = serde_json::to_string(&person).unwrap(); + let serialized = person.to_json(); // NOTE that this is testing the default (derived) serialization format, not the // JSON format specified in metadata.md @@ -169,7 +169,7 @@ mod tests { serialized ); - let deserialized = serde_json::from_str(&serialized).unwrap(); + let deserialized = DataType::from(&serialized).unwrap(); assert_eq!(person, deserialized); } diff --git a/arrow/src/datatypes/schema.rs b/arrow/src/datatypes/schema.rs index efde4edefa66..3a21fb4afb08 100644 --- a/arrow/src/datatypes/schema.rs +++ b/arrow/src/datatypes/schema.rs @@ -28,14 +28,9 @@ use super::Field; /// Note that this information is only part of the meta-data and not part of the physical /// memory layout. #[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Schema { pub fields: Vec, /// A map of key-value pairs containing additional meta data. - #[cfg_attr( - feature = "serde", - serde(skip_serializing_if = "HashMap::is_empty", default) - )] pub metadata: HashMap, } @@ -275,17 +270,28 @@ impl Schema { #[cfg(feature = "json")] fn from_metadata(json: &serde_json::Value) -> Result> { use serde_json::Value; + use ArrowError::ParseError; match json { - Value::Array(_) => { - let mut hashmap = HashMap::new(); - let values: Vec = serde_json::from_value(json.clone()) - .map_err(|_| { - ArrowError::JsonError( - "Unable to parse object into key-value pair".to_string(), - ) + Value::Array(values) => { + let mut hashmap = HashMap::with_capacity(values.len()); + for value in values { + let object = value.as_object().ok_or_else(|| { + ParseError("expected list of objects".to_string()) })?; - for meta in values { - hashmap.insert(meta.key.clone(), meta.value); + + let key = object + .get("key") + .ok_or_else(|| ParseError("key not found".to_string()))? + .as_str() + .ok_or_else(|| ParseError("expected string key".to_string()))?; + + let value = object + .get("value") + .ok_or_else(|| ParseError("value not found".to_string()))? + .as_str() + .ok_or_else(|| ParseError("expected string value".to_string()))?; + + hashmap.insert(key.to_string(), value.to_string()); } Ok(hashmap) } @@ -295,15 +301,13 @@ impl Schema { if let Value::String(v) = v { Ok((k.to_string(), v.to_string())) } else { - Err(ArrowError::ParseError( + Err(ParseError( "metadata `value` field must be a string".to_string(), )) } }) .collect::>(), - _ => Err(ArrowError::ParseError( - "`metadata` field must be an object".to_string(), - )), + _ => Err(ParseError("`metadata` field must be an object".to_string())), } } @@ -355,13 +359,6 @@ impl Hash for Schema { } } -#[cfg(feature = "json")] -#[derive(serde::Deserialize)] -struct MetadataKeyValue { - key: String, - value: String, -} - #[cfg(test)] mod tests { use crate::datatypes::DataType; @@ -378,16 +375,16 @@ mod tests { Field::new("priority", DataType::UInt8, false), ]); - let json = serde_json::to_string(&schema).unwrap(); - let de_schema = serde_json::from_str(&json).unwrap(); + let json = schema.to_json(); + let de_schema = Schema::from(&json).unwrap(); assert_eq!(schema, de_schema); // ser/de with non-empty metadata let schema = schema .with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect()); - let json = serde_json::to_string(&schema).unwrap(); - let de_schema = serde_json::from_str(&json).unwrap(); + let json = schema.to_json(); + let de_schema = Schema::from(&json).unwrap(); assert_eq!(schema, de_schema); }