Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't Derive Serialize/Deserialize Serde Implementations for Schema Types #2723

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }

[dependencies]
serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
indexmap = { version = "1.9", default-features = false, features = ["std"] }
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
Expand Down Expand Up @@ -75,7 +74,7 @@ default = ["csv", "ipc", "json"]
ipc_compression = ["ipc", "zstd", "lz4"]
csv = ["csv_crate"]
ipc = ["flatbuffers"]
json = ["serde", "serde_json"]
json = ["serde_json"]
simd = ["packed_simd"]
prettyprint = ["comfy-table"]
# The test utils feature enables code used in benchmarks and tests but
Expand Down
3 changes: 0 additions & 3 deletions arrow/src/datatypes/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ use super::Field;
/// For more information on these types please see
/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout).
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum DataType {
/// Null type
Null,
Expand Down Expand Up @@ -235,7 +234,6 @@ pub enum TimeUnit {

/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum IntervalUnit {
/// Indicates the number of elapsed whole months, stored as 4-byte integers.
YearMonth,
Expand All @@ -254,7 +252,6 @@ pub enum IntervalUnit {

// Sparse or Dense union layouts
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum UnionMode {
Sparse,
Dense,
Expand Down
2 changes: 0 additions & 2 deletions arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,13 @@ use super::DataType;
/// A [`Schema`](super::Schema) is an ordered collection of
/// [`Field`] objects.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Field {
name: String,
data_type: DataType,
nullable: bool,
dict_id: i64,
dict_is_ordered: bool,
/// A map of key-value pairs containing additional custom meta data.
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
metadata: Option<BTreeMap<String, String>>,
}

Expand Down
4 changes: 2 additions & 2 deletions arrow/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ mod tests {
),
]);

let serialized = serde_json::to_string(&person).unwrap();
let serialized = person.to_json();

// NOTE that this is testing the default (derived) serialization format, not the
// JSON format specified in metadata.md
Expand All @@ -169,7 +169,7 @@ mod tests {
serialized
);

let deserialized = serde_json::from_str(&serialized).unwrap();
let deserialized = DataType::from(&serialized).unwrap();

assert_eq!(person, deserialized);
}
Expand Down
55 changes: 26 additions & 29 deletions arrow/src/datatypes/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,9 @@ use super::Field;
/// Note that this information is only part of the meta-data and not part of the physical
/// memory layout.
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Schema {
pub fields: Vec<Field>,
/// A map of key-value pairs containing additional meta data.
#[cfg_attr(
feature = "serde",
serde(skip_serializing_if = "HashMap::is_empty", default)
)]
pub metadata: HashMap<String, String>,
}

Expand Down Expand Up @@ -275,17 +270,28 @@ impl Schema {
#[cfg(feature = "json")]
fn from_metadata(json: &serde_json::Value) -> Result<HashMap<String, String>> {
use serde_json::Value;
use ArrowError::ParseError;
match json {
Value::Array(_) => {
let mut hashmap = HashMap::new();
let values: Vec<MetadataKeyValue> = serde_json::from_value(json.clone())
.map_err(|_| {
ArrowError::JsonError(
"Unable to parse object into key-value pair".to_string(),
)
Value::Array(values) => {
let mut hashmap = HashMap::with_capacity(values.len());
for value in values {
let object = value.as_object().ok_or_else(|| {
ParseError("expected list of objects".to_string())
})?;
for meta in values {
hashmap.insert(meta.key.clone(), meta.value);

let key = object
.get("key")
.ok_or_else(|| ParseError("key not found".to_string()))?
.as_str()
.ok_or_else(|| ParseError("expected string key".to_string()))?;

let value = object
.get("value")
.ok_or_else(|| ParseError("value not found".to_string()))?
.as_str()
.ok_or_else(|| ParseError("expected string value".to_string()))?;

hashmap.insert(key.to_string(), value.to_string());
}
Ok(hashmap)
}
Expand All @@ -295,15 +301,13 @@ impl Schema {
if let Value::String(v) = v {
Ok((k.to_string(), v.to_string()))
} else {
Err(ArrowError::ParseError(
Err(ParseError(
"metadata `value` field must be a string".to_string(),
))
}
})
.collect::<Result<_>>(),
_ => Err(ArrowError::ParseError(
"`metadata` field must be an object".to_string(),
)),
_ => Err(ParseError("`metadata` field must be an object".to_string())),
}
}

Expand Down Expand Up @@ -355,13 +359,6 @@ impl Hash for Schema {
}
}

#[cfg(feature = "json")]
#[derive(serde::Deserialize)]
struct MetadataKeyValue {
key: String,
value: String,
}

#[cfg(test)]
mod tests {
use crate::datatypes::DataType;
Expand All @@ -378,16 +375,16 @@ mod tests {
Field::new("priority", DataType::UInt8, false),
]);

let json = serde_json::to_string(&schema).unwrap();
let de_schema = serde_json::from_str(&json).unwrap();
let json = schema.to_json();
let de_schema = Schema::from(&json).unwrap();

assert_eq!(schema, de_schema);

// ser/de with non-empty metadata
let schema = schema
.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
let json = serde_json::to_string(&schema).unwrap();
let de_schema = serde_json::from_str(&json).unwrap();
let json = schema.to_json();
let de_schema = Schema::from(&json).unwrap();

assert_eq!(schema, de_schema);
}
Expand Down