Skip to content

Commit

Permalink
denormalizing arrow datatypes into Objects and ObjectFields
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Jun 20, 2023
1 parent 2f90d9f commit 8cb951a
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 18 deletions.
43 changes: 28 additions & 15 deletions crates/re_types_builder/src/arrow_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use anyhow::Context as _;
use arrow2::datatypes::{DataType, Field, UnionMode};
use std::collections::{BTreeMap, HashMap};

use crate::{ElementType, Object, Type, ATTR_ARROW_SPARSE_UNION, ATTR_ARROW_TRANSPARENT};
use crate::{
ElementType, Object, ObjectField, Type, ATTR_ARROW_SPARSE_UNION, ATTR_ARROW_TRANSPARENT,
};

// --- Registry ---

Expand All @@ -18,7 +20,7 @@ pub struct ArrowRegistry {
impl ArrowRegistry {
/// Computes the Arrow datatype for the specified object and stores it in the registry, to be
/// resolved later on.
pub fn register(&mut self, obj: &Object) {
pub fn register(&mut self, obj: &mut Object) {
let (fqname, datatype) = (obj.fqname.clone(), self.arrow_datatype_from_object(obj));
self.registry.insert(fqname, datatype);
}
Expand Down Expand Up @@ -48,7 +50,7 @@ impl ArrowRegistry {

// ---

fn arrow_datatype_from_object(&self, obj: &Object) -> LazyDatatype {
fn arrow_datatype_from_object(&mut self, obj: &mut Object) -> LazyDatatype {
let is_struct = obj.is_struct();
let is_transparent = obj.try_get_attr::<String>(ATTR_ARROW_TRANSPARENT).is_some();
let num_fields = obj.fields.len();
Expand All @@ -72,11 +74,10 @@ impl ArrowRegistry {
obj.fqname.clone(),
Box::new(LazyDatatype::Struct(
obj.fields
.iter()
.iter_mut()
.map(|field| LazyField {
name: field.name.clone(),
datatype: self.arrow_datatype_from_type(&field.typ),
is_nullable: field.required,
datatype: self.arrow_datatype_from_type(field.typ.clone(), field),
is_nullable: !field.required,
metadata: Default::default(),
})
Expand All @@ -92,10 +93,10 @@ impl ArrowRegistry {
obj.fqname.clone(),
Box::new(LazyDatatype::Union(
obj.fields
.iter()
.iter_mut()
.map(|field| LazyField {
name: field.name.clone(),
datatype: self.arrow_datatype_from_type(&field.typ),
datatype: self.arrow_datatype_from_type(field.typ.clone(), field),
is_nullable: false,
metadata: Default::default(),
})
Expand All @@ -109,11 +110,18 @@ impl ArrowRegistry {
)),
None,
)
};

// NOTE: Arrow-transparent objects by definition don't have a datatype of their own.
if !is_transparent {
obj.datatype = datatype.clone().into();
}

datatype
}

fn arrow_datatype_from_type(&self, typ: &Type) -> LazyDatatype {
match typ {
fn arrow_datatype_from_type(&mut self, typ: Type, field: &mut ObjectField) -> LazyDatatype {
let datatype = match typ {
Type::UInt8 => LazyDatatype::UInt8,
Type::UInt16 => LazyDatatype::UInt16,
Type::UInt32 => LazyDatatype::UInt32,
Expand All @@ -134,19 +142,24 @@ impl ArrowRegistry {
is_nullable: false,
metadata: Default::default(),
}),
*length,
length,
),
Type::Vector { elem_type } => LazyDatatype::List(Box::new(LazyField {
name: "item".into(),
datatype: self.arrow_datatype_from_element_type(elem_type),
is_nullable: false,
metadata: Default::default(),
})),
Type::Object(fqname) => LazyDatatype::Unresolved(fqname.clone()),
}
Type::Object(fqname) => LazyDatatype::Unresolved(fqname),
};

field.datatype = datatype.clone().into();
self.registry.insert(field.fqname.clone(), datatype.clone());

datatype
}

fn arrow_datatype_from_element_type(&self, typ: &ElementType) -> LazyDatatype {
fn arrow_datatype_from_element_type(&self, typ: ElementType) -> LazyDatatype {
_ = self;
match typ {
ElementType::UInt8 => LazyDatatype::UInt8,
Expand All @@ -162,7 +175,7 @@ impl ArrowRegistry {
ElementType::Float32 => LazyDatatype::Float32,
ElementType::Float64 => LazyDatatype::Float64,
ElementType::String => LazyDatatype::Utf8,
ElementType::Object(fqname) => LazyDatatype::Unresolved(fqname.clone()),
ElementType::Object(fqname) => LazyDatatype::Unresolved(fqname),
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions crates/re_types_builder/src/codegen/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ impl QuotedObject {
attrs: _,
fields,
specifics: _,
datatype: _,
} = obj;

let mut code = String::new();
Expand Down Expand Up @@ -296,6 +297,7 @@ impl QuotedObject {
attrs: _,
required: _,
deprecated: _,
datatype: _,
} = field;

let (typ, _) = quote_field_type_from_field(objects, field, false);
Expand Down Expand Up @@ -352,6 +354,7 @@ impl QuotedObject {
attrs: _,
fields,
specifics: _,
datatype: _,
} = obj;

let mut code = String::new();
Expand Down Expand Up @@ -390,6 +393,7 @@ impl QuotedObject {
attrs: _,
required: _,
deprecated: _,
datatype: _,
} = field;

let (typ, _) = quote_field_type_from_field(objects, field, false);
Expand Down
6 changes: 6 additions & 0 deletions crates/re_types_builder/src/codegen/rust.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ impl QuotedObject {
attrs: _,
fields,
specifics: _,
datatype: _,
} = obj;

let name = format_ident!("{name}");
Expand Down Expand Up @@ -246,6 +247,7 @@ impl QuotedObject {
attrs: _,
fields,
specifics: _,
datatype: _,
} = obj;

let name = format_ident!("{name}");
Expand All @@ -265,6 +267,7 @@ impl QuotedObject {
attrs: _,
required: _,
deprecated: _,
datatype: _,
} = obj_field;

let name = format_ident!("{name}");
Expand Down Expand Up @@ -322,6 +325,7 @@ impl quote::ToTokens for ObjectFieldTokenizer<'_> {
required,
// TODO(#2366): support for deprecation notices
deprecated: _,
datatype: _,
} = obj_field;

let quoted_docs = quote_doc_from_docs(docs);
Expand Down Expand Up @@ -459,6 +463,7 @@ fn quote_trait_impls_from_obj(arrow_registry: &ArrowRegistry, obj: &Object) -> T
attrs: _,
fields: _,
specifics: _,
datatype: _,
} = obj;

let name = format_ident!("{name}");
Expand Down Expand Up @@ -559,6 +564,7 @@ fn quote_builder_from_obj(obj: &Object) -> TokenStream {
attrs: _,
fields,
specifics: _,
datatype: _,
} = obj;

let name = format_ident!("{name}");
Expand Down
6 changes: 3 additions & 3 deletions crates/re_types_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ mod codegen;
#[allow(clippy::unimplemented)]
mod objects;

pub use self::arrow_registry::ArrowRegistry;
pub use self::arrow_registry::{ArrowRegistry, LazyDatatype, LazyField};
pub use self::codegen::{CodeGenerator, PythonCodeGenerator, RustCodeGenerator};
pub use self::objects::{
Attributes, Docs, ElementType, Object, ObjectField, ObjectKind, Objects, Type,
Expand Down Expand Up @@ -183,15 +183,15 @@ fn generate_lang_agnostic(
binary_entrypoint_path.set_extension("bfbs");

// semantic pass: high level objects from low-level reflection data
let objects = Objects::from_buf(
let mut objects = Objects::from_buf(
sh.read_binary_file(tmp.path().join(binary_entrypoint_path))
.unwrap()
.as_slice(),
);

// create and fill out arrow registry
let mut arrow_registry = ArrowRegistry::default();
for obj in objects.ordered_objects(None) {
for obj in objects.ordered_objects_mut(None) {
arrow_registry.register(obj);
}

Expand Down
16 changes: 16 additions & 0 deletions crates/re_types_builder/src/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,12 @@ pub struct Object {

/// Properties that only apply to either structs or unions.
pub specifics: ObjectSpecifics,

/// The Arrow datatype of this `Object`, or `None` if the object is Arrow-transparent.
///
/// This is lazily computed when the parent object gets registered into the Arrow registry and
/// will be `None` until then.
pub datatype: Option<crate::LazyDatatype>,
}

impl Object {
Expand Down Expand Up @@ -293,6 +299,7 @@ impl Object {
attrs,
fields,
specifics: ObjectSpecifics::Struct {},
datatype: None,
}
}

Expand Down Expand Up @@ -352,6 +359,7 @@ impl Object {
attrs,
fields,
specifics: ObjectSpecifics::Union { utype },
datatype: None,
}
}

Expand Down Expand Up @@ -447,6 +455,12 @@ pub struct ObjectField {
// TODO(#2366): do something with this
// TODO(#2367): implement custom attr to specify deprecation reason
pub deprecated: bool,

/// The Arrow datatype of this `ObjectField`.
///
/// This is lazily computed when the parent object gets registered into the Arrow registry and
/// will be `None` until then.
pub datatype: Option<crate::LazyDatatype>,
}

impl ObjectField {
Expand Down Expand Up @@ -487,6 +501,7 @@ impl ObjectField {
attrs,
required,
deprecated,
datatype: None,
}
}

Expand Down Expand Up @@ -534,6 +549,7 @@ impl ObjectField {
attrs,
required,
deprecated,
datatype: None,
}
}

Expand Down

0 comments on commit 8cb951a

Please sign in to comment.