Skip to content

Commit

Permalink
Remove Array::get_max_buffer_sizes. (#5292)
Browse files Browse the repository at this point in the history
This PR removes all the code associated with
`Array::get_max_buffer_sizes`.
This means Array bookkeeping members, functions for calculating maximum
buffer sizes based on a subarray, capnp struct, rest client functions.
The `tiledb_serialize_array_max_buffer_sizes` C API was kept until
downstreams migrate, but its implementation was updated to always fail.
Soon will follow a PR in the REST server for removing the handling code
of this request.

---
TYPE: NO_HISTORY

---------

Co-authored-by: Theodore Tsirpanis <teo@tsirpanis.gr>
Co-authored-by: Shaun Reed <shaunrd0@gmail.com>
  • Loading branch information
3 people authored Oct 17, 2024
1 parent b1dd2b4 commit 3eeba4b
Show file tree
Hide file tree
Showing 13 changed files with 10 additions and 908 deletions.
30 changes: 0 additions & 30 deletions test/src/unit-capi-rest-dense_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1794,36 +1794,6 @@ TEST_CASE_METHOD(
tiledb_array_free(&array);
}

TEST_CASE_METHOD(
DenseArrayRESTFx,
"C API: REST Test dense array, get max buffer sizes",
"[capi][rest][dense]") {
array_uri_ = vfs_test_setup_.array_uri("max_buffer_sizes_array");
create_dense_array(array_uri_);

// Check max buffer sizes with empty array
tiledb_array_t* array;
int rc = tiledb_array_alloc(ctx_, array_uri_.c_str(), &array);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_open(ctx_, array, TILEDB_READ);
CHECK(rc == TILEDB_OK);
REQUIRE(tiledb_array_close(ctx_, array) == TILEDB_OK);
tiledb_array_free(&array);

// Write array
write_dense_array(array_uri_);

// Check max buffer sizes for whole domain
rc = tiledb_array_alloc(ctx_, array_uri_.c_str(), &array);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_open(ctx_, array, TILEDB_READ);
CHECK(rc == TILEDB_OK);

// Clean up
REQUIRE(tiledb_array_close(ctx_, array) == TILEDB_OK);
tiledb_array_free(&array);
}

TEST_CASE_METHOD(
DenseArrayRESTFx,
"C API: REST Test dense array, error without rest server configured",
Expand Down
267 changes: 0 additions & 267 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,6 @@ Status Array::close() {
return Status::Ok();
}

clear_last_max_buffer_sizes();

try {
set_array_closed();

Expand Down Expand Up @@ -990,131 +988,6 @@ QueryType Array::get_query_type() const {
return query_type_;
}

Status Array::get_max_buffer_size(
const char* name, const void* subarray, uint64_t* buffer_size) {
// Check if array is open
if (!is_open_) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; Array is not open"));
}

// Error if the array was not opened in read mode
if (query_type_ != QueryType::READ) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; "
"Array was not opened in read mode"));
}

// Check if name is null
if (name == nullptr) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer size; Attribute/Dimension name is null"));
}

// Not applicable to heterogeneous domains
if (!array_schema_latest().domain().all_dims_same_type()) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; Function not "
"applicable to heterogeneous domains"));
}

// Not applicable to variable-sized dimensions
if (!array_schema_latest().domain().all_dims_fixed()) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer size; Function not "
"applicable to domains with variable-sized dimensions"));
}

// Check if name is attribute or dimension
bool is_dim = array_schema_latest().is_dim(name);
bool is_attr = array_schema_latest().is_attr(name);

// Check if attribute/dimension exists
if (name != constants::coords && !is_dim && !is_attr) {
return LOG_STATUS(Status_ArrayError(
std::string("Cannot get max buffer size; Attribute/Dimension '") +
name + "' does not exist"));
}

// Check if attribute/dimension is fixed sized
if (array_schema_latest().var_size(name)) {
return LOG_STATUS(Status_ArrayError(
std::string("Cannot get max buffer size; Attribute/Dimension '") +
name + "' is var-sized"));
}

RETURN_NOT_OK(compute_max_buffer_sizes(subarray));

// Retrieve buffer size
auto it = last_max_buffer_sizes_.find(name);
assert(it != last_max_buffer_sizes_.end());
*buffer_size = it->second.first;

return Status::Ok();
}

Status Array::get_max_buffer_size(
const char* name,
const void* subarray,
uint64_t* buffer_off_size,
uint64_t* buffer_val_size) {
// Check if array is open
if (!is_open_) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; Array is not open"));
}

// Error if the array was not opened in read mode
if (query_type_ != QueryType::READ) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; "
"Array was not opened in read mode"));
}

// Check if name is null
if (name == nullptr) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer size; Attribute/Dimension name is null"));
}

// Not applicable to heterogeneous domains
if (!array_schema_latest().domain().all_dims_same_type()) {
return LOG_STATUS(
Status_ArrayError("Cannot get max buffer size; Function not "
"applicable to heterogeneous domains"));
}

// Not applicable to variable-sized dimensions
if (!array_schema_latest().domain().all_dims_fixed()) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer size; Function not "
"applicable to domains with variable-sized dimensions"));
}

RETURN_NOT_OK(compute_max_buffer_sizes(subarray));

// Check if attribute/dimension exists
auto it = last_max_buffer_sizes_.find(name);
if (it == last_max_buffer_sizes_.end()) {
return LOG_STATUS(Status_ArrayError(
std::string("Cannot get max buffer size; Attribute/Dimension '") +
name + "' does not exist"));
}

// Check if attribute/dimension is var-sized
if (!array_schema_latest().var_size(name)) {
return LOG_STATUS(Status_ArrayError(
std::string("Cannot get max buffer size; Attribute/Dimension '") +
name + "' is fixed-sized"));
}

// Retrieve buffer sizes
*buffer_off_size = it->second.first;
*buffer_val_size = it->second.second;

return Status::Ok();
}

Status Array::reopen() {
// Note: Array will only reopen for reads. This is why we are checking the
// timestamp for the array directory and not new components. This needs to be
Expand Down Expand Up @@ -1155,9 +1028,6 @@ Status Array::reopen(uint64_t timestamp_start, uint64_t timestamp_end) {
}
array_dir_timestamp_start_ = timestamp_start;

// Reset the last max buffer sizes.
clear_last_max_buffer_sizes();

// Reopen metadata.
auto key = opened_array_->encryption_key();
opened_array_ = make_shared<OpenedArray>(
Expand Down Expand Up @@ -1902,143 +1772,6 @@ Array::open_for_writes() {
return {array_schema_latest, array_schemas_all};
}

void Array::clear_last_max_buffer_sizes() {
last_max_buffer_sizes_.clear();
last_max_buffer_sizes_subarray_.clear();
last_max_buffer_sizes_subarray_.shrink_to_fit();
}

Status Array::compute_max_buffer_sizes(const void* subarray) {
// Applicable only to domains where all dimensions have the same type
if (!array_schema_latest().domain().all_dims_same_type()) {
return LOG_STATUS(
Status_ArrayError("Cannot compute max buffer sizes; Inapplicable when "
"dimension domains have different types"));
}

// Allocate space for max buffer sizes subarray
auto dim_num = array_schema_latest().dim_num();
auto coord_size{
array_schema_latest().domain().dimension_ptr(0)->coord_size()};
auto subarray_size = 2 * dim_num * coord_size;
last_max_buffer_sizes_subarray_.resize(subarray_size);

// Compute max buffer sizes
if (last_max_buffer_sizes_.empty() ||
std::memcmp(
&last_max_buffer_sizes_subarray_[0], subarray, subarray_size) != 0) {
last_max_buffer_sizes_.clear();

// Get all attributes and coordinates
auto& attributes = array_schema_latest().attributes();
last_max_buffer_sizes_.clear();
for (const auto& attr : attributes)
last_max_buffer_sizes_[attr->name()] =
std::pair<uint64_t, uint64_t>(0, 0);
last_max_buffer_sizes_[constants::coords] =
std::pair<uint64_t, uint64_t>(0, 0);
for (unsigned d = 0; d < dim_num; ++d)
last_max_buffer_sizes_
[array_schema_latest().domain().dimension_ptr(d)->name()] =
std::pair<uint64_t, uint64_t>(0, 0);

RETURN_NOT_OK(compute_max_buffer_sizes(subarray, &last_max_buffer_sizes_));
}

// Update subarray
std::memcpy(&last_max_buffer_sizes_subarray_[0], subarray, subarray_size);

return Status::Ok();
}

Status Array::compute_max_buffer_sizes(
const void* subarray,
std::unordered_map<std::string, std::pair<uint64_t, uint64_t>>*
buffer_sizes) const {
if (remote_) {
auto rest_client = resources_.rest_client();
if (rest_client == nullptr) {
return LOG_STATUS(Status_ArrayError(
"Cannot get max buffer sizes; remote array with no REST client."));
}

return rest_client->get_array_max_buffer_sizes(
array_uri_, array_schema_latest(), subarray, buffer_sizes);
}

// Keep the current opened array alive for the duration of this call.
auto opened_array = opened_array_;
auto& fragment_metadata = opened_array->fragment_metadata();
auto& array_schema_latest = opened_array->array_schema_latest();

// Return if there are no metadata
if (fragment_metadata.empty()) {
return Status::Ok();
}

// First we calculate a rough upper bound. Especially for dense
// arrays, this will not be accurate, as it accounts only for the
// non-empty regions of the subarray.
for (auto& meta : fragment_metadata) {
meta->add_max_buffer_sizes(*encryption_key(), subarray, buffer_sizes);
}

// Prepare an NDRange for the subarray
auto dim_num = array_schema_latest.dim_num();
NDRange sub(dim_num);
auto sub_ptr = (const unsigned char*)subarray;
uint64_t offset = 0;
for (unsigned d = 0; d < dim_num; ++d) {
auto r_size{2 * array_schema_latest.dimension_ptr(d)->coord_size()};
sub[d] = Range(&sub_ptr[offset], r_size);
offset += r_size;
}

// Rectify bound for dense arrays
if (array_schema_latest.dense()) {
auto cell_num = array_schema_latest.domain().cell_num(sub);
// `cell_num` becomes 0 when `subarray` is huge, leading to a
// `uint64_t` overflow.
if (cell_num != 0) {
for (auto& it : *buffer_sizes) {
if (array_schema_latest.var_size(it.first)) {
it.second.first = cell_num * constants::cell_var_offset_size;
it.second.second +=
cell_num * datatype_size(array_schema_latest.type(it.first));
} else {
it.second.first = cell_num * array_schema_latest.cell_size(it.first);
}
}
}
}

// Rectify bound for sparse arrays with integer domain, without duplicates
if (!array_schema_latest.dense() && !array_schema_latest.allows_dups() &&
array_schema_latest.domain().all_dims_int()) {
auto cell_num = array_schema_latest.domain().cell_num(sub);
// `cell_num` becomes 0 when `subarray` is huge, leading to a
// `uint64_t` overflow.
if (cell_num != 0) {
for (auto& it : *buffer_sizes) {
if (!array_schema_latest.var_size(it.first)) {
// Check for overflow
uint64_t new_size =
cell_num * array_schema_latest.cell_size(it.first);
if (new_size / array_schema_latest.cell_size((it.first)) !=
cell_num) {
continue;
}

// Potentially rectify size
it.second.first = std::min(it.second.first, new_size);
}
}
}
}

return Status::Ok();
}

void Array::do_load_metadata() {
if (!array_directory().loaded()) {
throw ArrayException(
Expand Down
Loading

0 comments on commit 3eeba4b

Please sign in to comment.