Skip to content

Commit

Permalink
Migrate ArraySchema::serialize into array_schema_operations. (#5210)
Browse files Browse the repository at this point in the history
Migrate `ArraySchema::serialize` into `array_schema_operations`.

[sc-50777]

---
TYPE: NO_HISTORY
DESC: Migrate `ArraySchema::serialize` into `array_schema_operations`.
  • Loading branch information
bekadavis9 authored Jul 30, 2024
1 parent a2476a0 commit 24cae8f
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 113 deletions.
103 changes: 0 additions & 103 deletions tiledb/sm/array_schema/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
#include "tiledb/sm/filter/webp_filter.h"
#include "tiledb/sm/fragment/fragment_identifier.h"
#include "tiledb/sm/misc/hilbert.h"
#include "tiledb/sm/misc/integral_type_casts.h"
#include "tiledb/sm/misc/tdb_time.h"
#include "tiledb/storage_format/uri/generate_uri.h"
#include "tiledb/type/apply_with_type.h"
Expand Down Expand Up @@ -734,108 +733,6 @@ bool ArraySchema::is_nullable(const std::string& name) const {
return attr->nullable();
}

// ===== FORMAT =====
// version (uint32_t)
// allow_dups (bool)
// array_type (uint8_t)
// tile_order (uint8_t)
// cell_order (uint8_t)
// capacity (uint64_t)
// coords_filters (see FilterPipeline::serialize)
// cell_var_offsets_filters (see FilterPipeline::serialize)
// cell_validity_filters (see FilterPipeline::serialize)
// domain
// attribute_num (uint32_t)
// attribute #1
// attribute #2
// ...
// dimension_label_num (uint32_t)
// dimension_label #1
// dimension_label #2
// ...
// enumeration_num (uint32_t)
// enumeration_name_length #1 (uint32_t)
// enumeration_name_chars #1 (string)
// enumeration_filename_length #1 (uint32_t)
// enumeration_filename_chars #1 (string)
// enumeration_name_length #2 (uint32_t)
// enumeration_name_chars #2 (string)
// enumeration_filename_length #2 (uint32_t)
// enumeration_filename_chars #2 (string)
// ...
// current_domain
void ArraySchema::serialize(Serializer& serializer) const {
// Write version, which is always the current version. Despite
// the in-memory `version_`, we will serialize every array schema
// as the latest version.
const format_version_t version = constants::format_version;
serializer.write<format_version_t>(version);

// Write allows_dups
serializer.write<uint8_t>(allows_dups_);

// Write array type
auto array_type = (uint8_t)array_type_;
serializer.write<uint8_t>(array_type);

// Write tile and cell order
auto tile_order = (uint8_t)tile_order_;
serializer.write<uint8_t>(tile_order);
auto cell_order = (uint8_t)cell_order_;
serializer.write<uint8_t>(cell_order);

// Write capacity
serializer.write<uint64_t>(capacity_);

// Write coords filters
coords_filters_.serialize(serializer);

// Write offsets filters
cell_var_offsets_filters_.serialize(serializer);

// Write validity filters
cell_validity_filters_.serialize(serializer);

// Write domain
domain_->serialize(serializer, version);

// Write attributes
auto attribute_num = (uint32_t)attributes_.size();
serializer.write<uint32_t>(attribute_num);
for (auto& attr : attributes_) {
attr->serialize(serializer, version);
}

// Write dimension labels
auto label_num = static_cast<uint32_t>(dimension_labels_.size());
if (label_num != dimension_labels_.size()) {
throw ArraySchemaException(
"Overflow when attempting to serialize label number.");
}
serializer.write<uint32_t>(label_num);
for (auto& label : dimension_labels_) {
label->serialize(serializer, version);
}

// Write Enumeration path map
auto enmr_num =
utils::safe_integral_cast<size_t, uint32_t>(enumeration_map_.size());

serializer.write<uint32_t>(enmr_num);
for (auto& [enmr_name, enmr_filename] : enumeration_path_map_) {
auto enmr_name_size = static_cast<uint32_t>(enmr_name.size());
serializer.write<uint32_t>(enmr_name_size);
serializer.write(enmr_name.data(), enmr_name_size);

auto enmr_filename_size = static_cast<uint32_t>(enmr_filename.size());
serializer.write<uint32_t>(enmr_filename_size);
serializer.write(enmr_filename.data(), enmr_filename_size);
}

// Serialize array current domain information
current_domain_->serialize(serializer);
}

Layout ArraySchema::tile_order() const {
return tile_order_;
}
Expand Down
19 changes: 11 additions & 8 deletions tiledb/sm/array_schema/array_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,11 @@ class ArraySchema {
/** Return the pipeline used for coordinates. */
const FilterPipeline& coords_filters() const;

/** Return the array current domain. */
inline const CurrentDomain& current_domain() const {
return *current_domain_;
}

/** True if the array is dense. */
bool dense() const;

Expand Down Expand Up @@ -329,14 +334,6 @@ class ArraySchema {
/** Returns true if the input name is nullable. */
bool is_nullable(const std::string& name) const;

/**
* Serializes the array schema object into a buffer.
*
* @param serializer The object the array schema is serialized into.
* @return Status
*/
void serialize(Serializer& serializer) const;

/** Returns the tile order. */
Layout tile_order() const;

Expand Down Expand Up @@ -595,6 +592,12 @@ class ArraySchema {
return enumeration_map_;
}

/** Returns the enumeration path map. */
inline const tdb::pmr::unordered_map<std::string, std::string>&
enumeration_path_map() const {
return enumeration_path_map_;
}

/** Returns the dimension labels. */
inline const tdb::pmr::vector<shared_ptr<const DimensionLabel>>&
dimension_labels() const {
Expand Down
107 changes: 105 additions & 2 deletions tiledb/sm/array_schema/array_schema_operations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,121 @@

#include "tiledb/sm/array_schema/array_schema_operations.h"
#include "tiledb/sm/array_schema/array_schema.h"
#include "tiledb/sm/array_schema/current_domain.h"
#include "tiledb/sm/array_schema/dimension_label.h"
#include "tiledb/sm/array_schema/domain.h"
#include "tiledb/sm/array_schema/enumeration.h"
#include "tiledb/sm/filesystem/uri.h"
#include "tiledb/sm/misc/integral_type_casts.h"
#include "tiledb/sm/storage_manager/context_resources.h"
#include "tiledb/sm/tile/generic_tile_io.h"
#include "tiledb/sm/tile/tile.h"

namespace tiledb::sm {

/** Class for locally generated status exceptions. */
class ArraySchemaOperationsException : public StatusException {
public:
explicit ArraySchemaOperationsException(const std::string& msg)
: StatusException("ArraySchemaOperations", msg) {
}
};

/* ********************************* */
/* API */
/* ********************************* */

// ===== FORMAT =====
// version (uint32_t)
// allow_dups (bool)
// array_type (uint8_t)
// tile_order (uint8_t)
// cell_order (uint8_t)
// capacity (uint64_t)
// coords_filters (see FilterPipeline::serialize)
// cell_var_offsets_filters (see FilterPipeline::serialize)
// cell_validity_filters (see FilterPipeline::serialize)
// domain
// attribute_num (uint32_t)
// attribute #1
// attribute #2
// ...
// dimension_label_num (uint32_t)
// dimension_label #1
// dimension_label #2
// ...
// current_domain
void serialize_array_schema(
Serializer& serializer, const ArraySchema& array_schema) {
// Write version, which is always the current version. Despite
// the in-memory `version_`, we will serialize every array schema
// as the latest version.
const format_version_t version = constants::format_version;
serializer.write<format_version_t>(version);

// Write allows_dups
serializer.write<uint8_t>(array_schema.allows_dups());

// Write array type
serializer.write<uint8_t>((uint8_t)array_schema.array_type());

// Write tile and cell order
serializer.write<uint8_t>((uint8_t)array_schema.tile_order());
serializer.write<uint8_t>((uint8_t)array_schema.cell_order());

// Write capacity
serializer.write<uint64_t>(array_schema.capacity());

// Write coords filters
array_schema.coords_filters().serialize(serializer);

// Write offsets filters
array_schema.cell_var_offsets_filters().serialize(serializer);

// Write validity filters
array_schema.cell_validity_filters().serialize(serializer);

// Write domain
array_schema.domain().serialize(serializer, version);

// Write attributes
auto attribute_num = (uint32_t)array_schema.attributes().size();
serializer.write<uint32_t>(attribute_num);
for (auto& attr : array_schema.attributes()) {
attr->serialize(serializer, version);
}

// Write dimension labels
auto dimension_labels = array_schema.dimension_labels();
auto label_num = static_cast<uint32_t>(dimension_labels.size());
if (label_num != dimension_labels.size()) {
throw ArraySchemaOperationsException(
"Overflow when attempting to serialize label number.");
}
serializer.write<uint32_t>(label_num);
for (auto& label : dimension_labels) {
label->serialize(serializer, version);
}

// Write Enumeration path map
auto enmr_num = utils::safe_integral_cast<size_t, uint32_t>(
array_schema.enumeration_map().size());

serializer.write<uint32_t>(enmr_num);
for (auto& [enmr_name, enmr_uri] : array_schema.enumeration_path_map()) {
auto enmr_name_size = static_cast<uint32_t>(enmr_name.size());
serializer.write<uint32_t>(enmr_name_size);
serializer.write(enmr_name.data(), enmr_name_size);

auto enmr_uri_size = static_cast<uint32_t>(enmr_uri.size());
serializer.write<uint32_t>(enmr_uri_size);
serializer.write(enmr_uri.data(), enmr_uri_size);
}

// Serialize array current domain information
array_schema.current_domain().serialize(serializer);
}

/**
* Note: This function currently implements defective behavior.
* Storing an array schema that does not have a URI attached to it should
Expand All @@ -59,13 +162,13 @@ void store_array_schema(

// Serialize
SizeComputationSerializer size_computation_serializer;
array_schema->serialize(size_computation_serializer);
serialize_array_schema(size_computation_serializer, *array_schema);

auto tile{WriterTile::from_generic(
size_computation_serializer.size(),
resources.ephemeral_memory_tracker())};
Serializer serializer(tile->data(), tile->size());
array_schema->serialize(serializer);
serialize_array_schema(serializer, *array_schema);
resources.stats().add_counter("write_array_schema_size", tile->size());

// Delete file if it exists already
Expand Down
10 changes: 10 additions & 0 deletions tiledb/sm/array_schema/array_schema_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define TILEDB_ARRAY_SCHEMA_OPERATIONS_H

#include "tiledb/common/common.h"
#include "tiledb/storage_format/serialization/serializers.h"

using namespace tiledb::common;

Expand All @@ -47,6 +48,15 @@ class EncryptionKey;
/* API */
/* ********************************* */

/**
* Serializes the array schema object into a buffer.
*
* @param serializer The object the array schema is serialized into.
* @param array_schema The array schema to be serialized.
*/
void serialize_array_schema(
Serializer& serializer, const ArraySchema& array_schema);

/**
* Stores an array schema into persistent storage.
*
Expand Down

0 comments on commit 24cae8f

Please sign in to comment.