Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Schema::with_metadata and Field::with_metadata #1092

Merged
merged 1 commit into from
Jan 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ impl Field {
}
}

/// Sets the metadata of this `Field` to be `metadata` and returns self
pub fn with_metadata(mut self, metadata: Option<BTreeMap<String, String>>) -> Self {
self.set_metadata(metadata);
self
}

/// Returns the immutable reference to the `Field`'s optional custom metadata.
#[inline]
pub const fn metadata(&self) -> &Option<BTreeMap<String, String>> {
Expand Down
36 changes: 16 additions & 20 deletions arrow/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ mod tests {
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();

// Non-empty map: should be converted as JSON obj { ... }
let mut first_name = Field::new("first_name", DataType::Utf8, false);
first_name.set_metadata(Some(field_metadata));
let first_name = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(field_metadata));

// Empty map: should be omitted.
let mut last_name = Field::new("last_name", DataType::Utf8, false);
last_name.set_metadata(Some(BTreeMap::default()));
let last_name = Field::new("last_name", DataType::Utf8, false)
.with_metadata(Some(BTreeMap::default()));

let person = DataType::Struct(vec![
first_name,
Expand Down Expand Up @@ -1154,8 +1154,7 @@ mod tests {
assert!(schema2 != schema4);
assert!(schema3 != schema4);

let mut f = Field::new("c1", DataType::Utf8, false);
f.set_metadata(Some(
let f = Field::new("c1", DataType::Utf8, false).with_metadata(Some(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
Expand Down Expand Up @@ -1195,8 +1194,8 @@ mod tests {
fn person_schema() -> Schema {
let kv_array = [("k".to_string(), "v".to_string())];
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
let mut first_name = Field::new("first_name", DataType::Utf8, false);
first_name.set_metadata(Some(field_metadata));
let first_name = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(field_metadata));

Schema::new(vec![
first_name,
Expand Down Expand Up @@ -1227,16 +1226,16 @@ mod tests {
.iter()
.cloned()
.collect();
let mut f1 = Field::new("first_name", DataType::Utf8, false);
f1.set_metadata(Some(metadata1));
let f1 = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(metadata1));

let metadata2: BTreeMap<String, String> =
[("foo".to_string(), "baz".to_string())]
.iter()
.cloned()
.collect();
let mut f2 = Field::new("first_name", DataType::Utf8, false);
f2.set_metadata(Some(metadata2));
let f2 = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(metadata2));

assert!(
Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])])
Expand All @@ -1250,8 +1249,8 @@ mod tests {
.iter()
.cloned()
.collect();
let mut f2 = Field::new("first_name", DataType::Utf8, false);
f2.set_metadata(Some(metadata2));
let f2 = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(metadata2));

assert!(f1.try_merge(&f2).is_ok());
assert!(f1.metadata().is_some());
Expand All @@ -1261,15 +1260,13 @@ mod tests {
);

// 3. Some + Some
let mut f1 = Field::new("first_name", DataType::Utf8, false);
f1.set_metadata(Some(
let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
.collect(),
));
let mut f2 = Field::new("first_name", DataType::Utf8, false);
f2.set_metadata(Some(
let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
[("foo2".to_string(), "bar2".to_string())]
.iter()
.cloned()
Expand All @@ -1290,8 +1287,7 @@ mod tests {
);

// 4. Some + None.
let mut f1 = Field::new("first_name", DataType::Utf8, false);
f1.set_metadata(Some(
let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
Expand Down
42 changes: 21 additions & 21 deletions arrow/src/datatypes/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ impl Schema {
Self { fields, metadata }
}

/// Sets the metadata of this `Schema` to be `metadata` and returns self
pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
self.metadata = metadata;
self
}

/// Returns a new schema with only the specified columns in the new schema
/// This carries metadata from the parent schema over as well
pub fn project(&self, indices: &[usize]) -> Result<Schema> {
Expand Down Expand Up @@ -366,7 +372,7 @@ mod tests {
#[test]
fn test_ser_de_metadata() {
// ser/de with empty metadata
let mut schema = Schema::new(vec![
let schema = Schema::new(vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
Expand All @@ -378,10 +384,8 @@ mod tests {
assert_eq!(schema, de_schema);

// ser/de with non-empty metadata
schema.metadata = [("key".to_owned(), "val".to_owned())]
.iter()
.cloned()
.collect();
let schema = schema
.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
let json = serde_json::to_string(&schema).unwrap();
let de_schema = serde_json::from_str(&json).unwrap();

Expand All @@ -393,14 +397,12 @@ mod tests {
let mut metadata = HashMap::new();
metadata.insert("meta".to_string(), "data".to_string());

let schema = Schema::new_with_metadata(
vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
],
metadata,
);
let schema = Schema::new(vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
])
.with_metadata(metadata);

let projected: Schema = schema.project(&[0, 2]).unwrap();

Expand All @@ -415,14 +417,12 @@ mod tests {
let mut metadata = HashMap::new();
metadata.insert("meta".to_string(), "data".to_string());

let schema = Schema::new_with_metadata(
vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
],
metadata,
);
let schema = Schema::new(vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
])
.with_metadata(metadata);

let projected: Result<Schema> = schema.project(&[0, 3]);

Expand Down
11 changes: 3 additions & 8 deletions arrow/src/ipc/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub fn schema_to_fb_offset<'a>(
/// Convert an IPC Field to Arrow Field
impl<'a> From<ipc::Field<'a>> for Field {
fn from(field: ipc::Field) -> Field {
let mut arrow_field = if let Some(dictionary) = field.dictionary() {
let arrow_field = if let Some(dictionary) = field.dictionary() {
Field::new_dict(
field.name().unwrap(),
get_data_type(field, true),
Expand All @@ -99,8 +99,7 @@ impl<'a> From<ipc::Field<'a>> for Field {
metadata = Some(metadata_map);
}

arrow_field.set_metadata(metadata);
arrow_field
arrow_field.with_metadata(metadata)
}
}

Expand Down Expand Up @@ -705,11 +704,7 @@ mod tests {
.collect();
let schema = Schema::new_with_metadata(
vec![
{
let mut f = Field::new("uint8", DataType::UInt8, false);
f.set_metadata(Some(field_md));
f
},
Field::new("uint8", DataType::UInt8, false).with_metadata(Some(field_md)),
Field::new("uint16", DataType::UInt16, true),
Field::new("uint32", DataType::UInt32, false),
Field::new("uint64", DataType::UInt64, true),
Expand Down
Loading