Skip to content

Commit

Permalink
Take user block into account when returning chunk addresses (#4236)
Browse files Browse the repository at this point in the history
Both H5Dchunk_iter() and H5Dget_chunk_info(_by_coord)() did not take
the size of the user block into account when reporting addresses. Since
the #1 use of these functions is to root around in the file for the raw
data, this is kind of a problem.

Fixes GitHub issue #3003
  • Loading branch information
derobins committed Mar 26, 2024
1 parent cb3169f commit fc46c83
Show file tree
Hide file tree
Showing 4 changed files with 493 additions and 223 deletions.
21 changes: 21 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,27 @@ Bug Fixes since HDF5-1.14.0 release
overwriting data with a shorter (top level) variable length sequence, an
error could occur. This has been fixed.

- Take user block into account in H5Dchunk_iter() and H5Dget_chunk_info()

The address reported by the following functions did not correctly
take the user block into account:

* H5Dchunk_iter() <-- addr passed to callback
* H5Dget_chunk_info() <-- addr parameter
* H5Dget_chunk_info_by_coord() <-- addr parameter

This means that these functions reported logical HDF5 file addresses,
which would only be equal to the physical addresses when there is no
user block prepended to the HDF5 file. This is unfortunate, as the
primary use of these functions is to get physical addresses in order
to directly access the chunks.

The listed functions now correctly take the user block into account,
so they will emit physical addresses that can be used to directly
access the chunks.

Fixes #3003

- Fixed asserts raised by large values of H5Pset_est_link_info() parameters

If large values for est_num_entries and/or est_name_len were passed
Expand Down
91 changes: 46 additions & 45 deletions src/H5Dchunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,10 @@ typedef struct H5D_chunk_coll_fill_info_t {
#endif /* H5_HAVE_PARALLEL */

typedef struct H5D_chunk_iter_ud_t {
H5D_chunk_iter_op_t op; /* User defined callback */
void *op_data; /* User data for user defined callback */
H5O_layout_chunk_t *chunk; /* Chunk layout */
H5D_chunk_iter_op_t op; /* User defined callback */
void *op_data; /* User data for user defined callback */
H5O_layout_chunk_t *chunk; /* Chunk layout */
haddr_t base_addr; /* Base address of the file, taking user block into account */
} H5D_chunk_iter_ud_t;

/********************/
Expand Down Expand Up @@ -7850,7 +7851,7 @@ H5D__get_num_chunks(const H5D_t *dset, const H5S_t H5_ATTR_UNUSED *space, hsize_
/*-------------------------------------------------------------------------
* Function: H5D__get_chunk_info_cb
*
* Purpose: Get the chunk info of the queried chunk, given by its index.
* Purpose: Get the chunk info of the queried chunk, given by its index
*
* Return: Success: H5_ITER_CONT or H5_ITER_STOP
* H5_ITER_STOP indicates the queried chunk is found
Expand Down Expand Up @@ -7901,21 +7902,18 @@ H5D__get_chunk_info_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata)
* Note: Currently, the domain of the index in this function is of all
* the written chunks, regardless the dataspace.
*
* Return: Success: SUCCEED
* Failure: FAIL
*
* Return: SUCCEED/FAIL
*-------------------------------------------------------------------------
*/
herr_t
H5D__get_chunk_info(const H5D_t *dset, const H5S_t H5_ATTR_UNUSED *space, hsize_t chk_index, hsize_t *offset,
unsigned *filter_mask, haddr_t *addr, hsize_t *size)
{
H5D_chk_idx_info_t idx_info; /* Chunked index info */
H5D_chunk_info_iter_ud_t udata; /* User data for callback */
const H5D_rdcc_t *rdcc = NULL; /* Raw data chunk cache */
H5D_rdcc_ent_t *ent; /* Cache entry index */
hsize_t ii = 0; /* Dimension index */
herr_t ret_value = SUCCEED; /* Return value */
H5D_chk_idx_info_t idx_info; /* Chunked index info */
const H5D_rdcc_t *rdcc = NULL; /* Raw data chunk cache */
H5D_rdcc_ent_t *ent; /* Cache entry index */
hsize_t ii = 0; /* Dimension index */
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr)

Expand Down Expand Up @@ -7947,6 +7945,9 @@ H5D__get_chunk_info(const H5D_t *dset, const H5S_t H5_ATTR_UNUSED *space, hsize_

/* If the chunk is written, get its info, otherwise, return without error */
if (H5_addr_defined(idx_info.storage->idx_addr)) {

H5D_chunk_info_iter_ud_t udata;

/* Initialize before iteration */
udata.chunk_idx = chk_index;
udata.curr_idx = 0;
Expand All @@ -7967,14 +7968,14 @@ H5D__get_chunk_info(const H5D_t *dset, const H5S_t H5_ATTR_UNUSED *space, hsize_
if (filter_mask)
*filter_mask = udata.filter_mask;
if (addr)
*addr = udata.chunk_addr;
*addr = udata.chunk_addr + H5F_BASE_ADDR(dset->oloc.file);
if (size)
*size = udata.nbytes;
if (offset)
for (ii = 0; ii < udata.ndims; ii++)
offset[ii] = udata.scaled[ii] * dset->shared->layout.u.chunk.dim[ii];
} /* end if */
} /* end if H5_addr_defined */
}
}

done:
FUNC_LEAVE_NOAPI_TAG(ret_value)
Expand Down Expand Up @@ -8039,12 +8040,11 @@ herr_t
H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *offset, unsigned *filter_mask, haddr_t *addr,
hsize_t *size)
{
const H5O_layout_t *layout = NULL; /* Dataset layout */
const H5D_rdcc_t *rdcc = NULL; /* Raw data chunk cache */
H5D_rdcc_ent_t *ent; /* Cache entry index */
H5D_chk_idx_info_t idx_info; /* Chunked index info */
H5D_chunk_info_iter_ud_t udata; /* User data for callback */
herr_t ret_value = SUCCEED; /* Return value */
const H5O_layout_t *layout = NULL; /* Dataset layout */
const H5D_rdcc_t *rdcc = NULL; /* Raw data chunk cache */
H5D_rdcc_ent_t *ent; /* Cache entry index */
H5D_chk_idx_info_t idx_info; /* Chunked index info */
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr)

Expand Down Expand Up @@ -8080,6 +8080,9 @@ H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *offset, unsigned

/* If the dataset is not written, return without errors */
if (H5_addr_defined(idx_info.storage->idx_addr)) {

H5D_chunk_info_iter_ud_t udata;

/* Calculate the scaled of this chunk */
H5VM_chunk_scaled(dset->shared->ndims, offset, layout->u.chunk.dim, udata.scaled);
udata.scaled[dset->shared->ndims] = 0;
Expand All @@ -8102,11 +8105,11 @@ H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *offset, unsigned
if (filter_mask)
*filter_mask = udata.filter_mask;
if (addr)
*addr = udata.chunk_addr;
*addr = udata.chunk_addr + H5F_BASE_ADDR(dset->oloc.file);
if (size)
*size = udata.nbytes;
} /* end if */
} /* end if H5_addr_defined */
}
}

done:
FUNC_LEAVE_NOAPI_TAG(ret_value)
Expand All @@ -8115,33 +8118,32 @@ H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *offset, unsigned
/*-------------------------------------------------------------------------
* Function: H5D__chunk_iter_cb
*
* Purpose: Call the user-defined function with the chunk data. The iterator continues if
* the user-defined function returns H5_ITER_CONT, and stops if H5_ITER_STOP is
* returned.
* Purpose: Call the user-defined function with the chunk data. The
* iterator continues if the user-defined function returns
* H5_ITER_CONT, and stops if H5_ITER_STOP is returned.
*
* Return: Success: H5_ITER_CONT or H5_ITER_STOP
* Failure: Negative (H5_ITER_ERROR)
*
*-------------------------------------------------------------------------
*/
static int
H5D__chunk_iter_cb(const H5D_chunk_rec_t *chunk_rec, void *udata)
{
const H5D_chunk_iter_ud_t *data = (H5D_chunk_iter_ud_t *)udata;
const H5O_layout_chunk_t *chunk = data->chunk;
int ret_value = H5_ITER_CONT;
const H5D_chunk_iter_ud_t *data = (H5D_chunk_iter_ud_t *)udata;
const H5O_layout_chunk_t *chunk = data->chunk;
hsize_t offset[H5O_LAYOUT_NDIMS];
unsigned ii; /* Match H5O_layout_chunk_t.ndims */
int ret_value = H5_ITER_CONT;

/* Similar to H5D__get_chunk_info */
for (ii = 0; ii < chunk->ndims; ii++)
offset[ii] = chunk_rec->scaled[ii] * chunk->dim[ii];
for (unsigned i = 0; i < chunk->ndims; i++)
offset[i] = chunk_rec->scaled[i] * chunk->dim[i];

FUNC_ENTER_PACKAGE_NOERR

/* Check for callback failure and pass along return value */
if ((ret_value = (data->op)(offset, (unsigned)chunk_rec->filter_mask, chunk_rec->chunk_addr,
(hsize_t)chunk_rec->nbytes, data->op_data)) < 0)
if ((ret_value =
(data->op)(offset, (unsigned)chunk_rec->filter_mask, data->base_addr + chunk_rec->chunk_addr,
(hsize_t)chunk_rec->nbytes, data->op_data)) < 0)
HERROR(H5E_DATASET, H5E_CANTNEXT, "iteration operator failed");

FUNC_LEAVE_NOAPI(ret_value)
Expand All @@ -8150,11 +8152,9 @@ H5D__chunk_iter_cb(const H5D_chunk_rec_t *chunk_rec, void *udata)
/*-------------------------------------------------------------------------
* Function: H5D__chunk_iter
*
* Purpose: Iterate over all the chunks in the dataset with given callback.
*
* Return: Success: Non-negative
* Failure: Negative
* Purpose: Iterate over all the chunks in the dataset with given callback
*
* Return: SUCCEED/FAIL
*-------------------------------------------------------------------------
*/
herr_t
Expand Down Expand Up @@ -8196,14 +8196,15 @@ H5D__chunk_iter(H5D_t *dset, H5D_chunk_iter_op_t op, void *op_data)
H5D_chunk_iter_ud_t ud;

/* Set up info for iteration callback */
ud.op = op;
ud.op_data = op_data;
ud.chunk = &dset->shared->layout.u.chunk;
ud.op = op;
ud.op_data = op_data;
ud.chunk = &dset->shared->layout.u.chunk;
ud.base_addr = H5F_BASE_ADDR(dset->oloc.file);

/* Iterate over the allocated chunks calling the iterator callback */
if ((ret_value = (layout->storage.u.chunk.ops->iterate)(&idx_info, H5D__chunk_iter_cb, &ud)) < 0)
HERROR(H5E_DATASET, H5E_CANTNEXT, "chunk iteration failed");
} /* end if H5_addr_defined */
}

done:
FUNC_LEAVE_NOAPI_TAG(ret_value)
Expand Down
15 changes: 12 additions & 3 deletions src/H5Dpublic.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ typedef herr_t (*H5D_gather_func_t)(const void *dst_buf, size_t dst_buf_bytes_us
*
* \param[in] offset Logical position of the chunk's first element in units of dataset elements
* \param[in] filter_mask Bitmask indicating the filters used when the chunk was written
* \param[in] addr Chunk address in the file
* \param[in] addr Chunk address in the file, taking the user block (if any) into account
* \param[in] size Chunk size in bytes, 0 if the chunk does not exist
* \param[in,out] op_data Pointer to any user-defined data associated with
* the operation.
Expand Down Expand Up @@ -669,7 +669,7 @@ H5_DLL herr_t H5Dget_num_chunks(hid_t dset_id, hid_t fspace_id, hsize_t *nchunks
* \dset_id
* \param[in] offset Logical position of the chunk's first element in units of dataset elements
* \param[out] filter_mask Bitmask indicating the filters used when the chunk was written
* \param[out] addr Chunk address in the file
* \param[out] addr Chunk address in the file, taking the user block (if any) into account
* \param[out] size Chunk size in bytes, 0 if the chunk does not exist
*
* \return \herr_t
Expand All @@ -686,6 +686,9 @@ H5_DLL herr_t H5Dget_num_chunks(hid_t dset_id, hid_t fspace_id, hsize_t *nchunks
* equal to the dataset's rank. Each element is the logical
* position of the chunk's first element in a dimension.
*
* \note Prior to HDF5 1.14.4, the reported address did not take the
* user block into account.
*
* \since 1.10.5
*
*/
Expand All @@ -709,6 +712,9 @@ H5_DLL herr_t H5Dget_chunk_info_by_coord(hid_t dset_id, const hsize_t *offset, u
* user supplied callback with the details of the chunk and the supplied
* context \p op_data.
*
* \note Prior to HDF5 1.14.4, the address passed to the callback did not take
* the user block into account.
*
* \par Example
* For each chunk, print the allocated chunk size (0 for unallocated chunks).
* \snippet H5D_examples.c H5Dchunk_iter_cb
Expand All @@ -731,7 +737,7 @@ H5_DLL herr_t H5Dchunk_iter(hid_t dset_id, hid_t dxpl_id, H5D_chunk_iter_op_t cb
* \param[in] chk_idx Index of the chunk
* \param[out] offset Logical position of the chunk's first element in units of dataset elements
* \param[out] filter_mask Bitmask indicating the filters used when the chunk was written
* \param[out] addr Chunk address in the file
* \param[out] addr Chunk address in the file, taking the user block (if any) into account
* \param[out] size Chunk size in bytes, 0 if the chunk does not exist
*
* \return \herr_t
Expand All @@ -745,6 +751,9 @@ H5_DLL herr_t H5Dchunk_iter(hid_t dset_id, hid_t dxpl_id, H5D_chunk_iter_op_t cb
* address to #HADDR_UNDEF. The value pointed to by filter_mask will
* not be modified. \c NULL can be passed in for any \p out parameters.
*
* \note Prior to HDF5 1.14.4, the reported address did not take the
* user block into account.
*
* \p chk_idx is the chunk index in the selection. The index value
* may have a value of 0 up to the number of chunks stored in
* the file that has a nonempty intersection with the file
Expand Down
Loading

0 comments on commit fc46c83

Please sign in to comment.