Skip to content

Commit

Permalink
Add Schema::with_metadata and Field::with_metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Dec 22, 2021
1 parent e8cc39e commit dfbeb22
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 140 deletions.
6 changes: 6 additions & 0 deletions arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ impl Field {
}
}

/// Sets the metadata of this `Field` to be `metadata` and returns self
pub fn with_metadata(mut self, metadata: Option<BTreeMap<String, String>>) -> Self {
self.set_metadata(metadata);
self
}

/// Returns the immutable reference to the `Field`'s optional custom metadata.
#[inline]
pub const fn metadata(&self) -> &Option<BTreeMap<String, String>> {
Expand Down
36 changes: 16 additions & 20 deletions arrow/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ mod tests {
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();

// Non-empty map: should be converted as JSON obj { ... }
let mut first_name = Field::new("first_name", DataType::Utf8, false);
first_name.set_metadata(Some(field_metadata));
let first_name = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(field_metadata));

// Empty map: should be omitted.
let mut last_name = Field::new("last_name", DataType::Utf8, false);
last_name.set_metadata(Some(BTreeMap::default()));
let last_name = Field::new("last_name", DataType::Utf8, false)
.with_metadata(Some(BTreeMap::default()));

let person = DataType::Struct(vec![
first_name,
Expand Down Expand Up @@ -1154,8 +1154,7 @@ mod tests {
assert!(schema2 != schema4);
assert!(schema3 != schema4);

let mut f = Field::new("c1", DataType::Utf8, false);
f.set_metadata(Some(
let f = Field::new("c1", DataType::Utf8, false).with_metadata(Some(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
Expand Down Expand Up @@ -1195,8 +1194,8 @@ mod tests {
fn person_schema() -> Schema {
let kv_array = [("k".to_string(), "v".to_string())];
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
let mut first_name = Field::new("first_name", DataType::Utf8, false);
first_name.set_metadata(Some(field_metadata));
let first_name = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(field_metadata));

Schema::new(vec![
first_name,
Expand Down Expand Up @@ -1227,16 +1226,16 @@ mod tests {
.iter()
.cloned()
.collect();
let mut f1 = Field::new("first_name", DataType::Utf8, false);
f1.set_metadata(Some(metadata1));
let f1 = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(metadata1));

let metadata2: BTreeMap<String, String> =
[("foo".to_string(), "baz".to_string())]
.iter()
.cloned()
.collect();
let mut f2 = Field::new("first_name", DataType::Utf8, false);
f2.set_metadata(Some(metadata2));
let f2 = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(metadata2));

assert!(
Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])])
Expand All @@ -1250,8 +1249,8 @@ mod tests {
.iter()
.cloned()
.collect();
let mut f2 = Field::new("first_name", DataType::Utf8, false);
f2.set_metadata(Some(metadata2));
let f2 = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(metadata2));

assert!(f1.try_merge(&f2).is_ok());
assert!(f1.metadata().is_some());
Expand All @@ -1261,15 +1260,13 @@ mod tests {
);

// 3. Some + Some
let mut f1 = Field::new("first_name", DataType::Utf8, false);
f1.set_metadata(Some(
let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
.collect(),
));
let mut f2 = Field::new("first_name", DataType::Utf8, false);
f2.set_metadata(Some(
let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
[("foo2".to_string(), "bar2".to_string())]
.iter()
.cloned()
Expand All @@ -1290,8 +1287,7 @@ mod tests {
);

// 4. Some + None.
let mut f1 = Field::new("first_name", DataType::Utf8, false);
f1.set_metadata(Some(
let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
Expand Down
42 changes: 21 additions & 21 deletions arrow/src/datatypes/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ impl Schema {
Self { fields, metadata }
}

/// Sets the metadata of this `Schema` to be `metadata` and returns self
pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
self.metadata = metadata;
self
}

/// Returns a new schema with only the specified columns in the new schema
/// This carries metadata from the parent schema over as well
pub fn project(&self, indices: &[usize]) -> Result<Schema> {
Expand Down Expand Up @@ -366,7 +372,7 @@ mod tests {
#[test]
fn test_ser_de_metadata() {
// ser/de with empty metadata
let mut schema = Schema::new(vec![
let schema = Schema::new(vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
Expand All @@ -378,10 +384,8 @@ mod tests {
assert_eq!(schema, de_schema);

// ser/de with non-empty metadata
schema.metadata = [("key".to_owned(), "val".to_owned())]
.iter()
.cloned()
.collect();
let schema = schema
.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
let json = serde_json::to_string(&schema).unwrap();
let de_schema = serde_json::from_str(&json).unwrap();

Expand All @@ -393,14 +397,12 @@ mod tests {
let mut metadata = HashMap::new();
metadata.insert("meta".to_string(), "data".to_string());

let schema = Schema::new_with_metadata(
vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
],
metadata,
);
let schema = Schema::new(vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
])
.with_metadata(metadata);

let projected: Schema = schema.project(&[0, 2]).unwrap();

Expand All @@ -415,14 +417,12 @@ mod tests {
let mut metadata = HashMap::new();
metadata.insert("meta".to_string(), "data".to_string());

let schema = Schema::new_with_metadata(
vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
],
metadata,
);
let schema = Schema::new(vec![
Field::new("name", DataType::Utf8, false),
Field::new("address", DataType::Utf8, false),
Field::new("priority", DataType::UInt8, false),
])
.with_metadata(metadata);

let projected: Result<Schema> = schema.project(&[0, 3]);

Expand Down
11 changes: 3 additions & 8 deletions arrow/src/ipc/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub fn schema_to_fb_offset<'a>(
/// Convert an IPC Field to Arrow Field
impl<'a> From<ipc::Field<'a>> for Field {
fn from(field: ipc::Field) -> Field {
let mut arrow_field = if let Some(dictionary) = field.dictionary() {
let arrow_field = if let Some(dictionary) = field.dictionary() {
Field::new_dict(
field.name().unwrap(),
get_data_type(field, true),
Expand All @@ -99,8 +99,7 @@ impl<'a> From<ipc::Field<'a>> for Field {
metadata = Some(metadata_map);
}

arrow_field.set_metadata(metadata);
arrow_field
arrow_field.with_metadata(metadata)
}
}

Expand Down Expand Up @@ -705,11 +704,7 @@ mod tests {
.collect();
let schema = Schema::new_with_metadata(
vec![
{
let mut f = Field::new("uint8", DataType::UInt8, false);
f.set_metadata(Some(field_md));
f
},
Field::new("uint8", DataType::UInt8, false).with_metadata(Some(field_md)),
Field::new("uint16", DataType::UInt16, true),
Field::new("uint32", DataType::UInt32, false),
Field::new("uint64", DataType::UInt64, true),
Expand Down
Loading

0 comments on commit dfbeb22

Please sign in to comment.