Skip to content

Commit

Permalink
DAOS-16089 object: more check when transfer bitmap for coll punch (#1…
Browse files Browse the repository at this point in the history
…4743)

To detect some potential DRAM corruption and logic bug.

Signed-off-by: Fan Yong <fan.yong@intel.com>
  • Loading branch information
Nasf-Fan authored Aug 5, 2024
1 parent 846b57a commit 5cdae94
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 16 deletions.
5 changes: 4 additions & 1 deletion src/object/cli_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,11 @@ obj_coll_prep_one(struct coll_oper_args *coa, struct dc_object *obj,

dcs->dcs_buf[dcs->dcs_nr++] = shard->do_id.id_shard;

if (unlikely(dct->dct_tgt_nr == (uint8_t)(-1)))
if (unlikely(dct->dct_tgt_nr == (uint8_t)(-1))) {
D_WARN("Too much shards for obj "DF_OID"reside on the same target %u/%u\n",
DP_OID(obj->cob_md.omd_id), shard->do_target_rank, shard->do_target_idx);
goto out;
}

if (coa->coa_for_modify) {
if (dct->dct_tgt_nr >= dct->dct_tgt_cap) {
Expand Down
4 changes: 2 additions & 2 deletions src/object/obj_class.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ daos_oclass_attr_find(daos_obj_id_t oid, uint32_t *nr_grps)
/* see daos_objid_generate */
oc = oclass_ident2cl(daos_obj_id2class(oid), nr_grps);
if (!oc) {
D_DEBUG(DB_PL, "Unknown object class %u for "DF_OID"\n",
(unsigned int)daos_obj_id2class(oid), DP_OID(oid));
D_WARN("Unknown object class %u for "DF_OID"\n",
(unsigned int)daos_obj_id2class(oid), DP_OID(oid));
return NULL;
}
D_DEBUG(DB_PL, "Find class %s for oid "DF_OID"\n",
Expand Down
25 changes: 19 additions & 6 deletions src/object/obj_rpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1134,11 +1134,8 @@ crt_proc_struct_daos_coll_shard(crt_proc_t proc, crt_proc_op_t proc_op, struct d
if (DECODING(proc_op))
dcs->dcs_cap = dcs->dcs_nr;

if (dcs->dcs_nr <= 1) {
if (DECODING(proc_op))
dcs->dcs_buf = &dcs->dcs_inline;
if (dcs->dcs_nr <= 1)
return 0;
}

if (DECODING(proc_op)) {
D_ALLOC_ARRAY(dcs->dcs_buf, dcs->dcs_nr);
Expand All @@ -1161,6 +1158,7 @@ crt_proc_struct_daos_coll_shard(crt_proc_t proc, crt_proc_op_t proc_op, struct d
int
crt_proc_struct_daos_coll_target(crt_proc_t proc, crt_proc_op_t proc_op, struct daos_coll_target *dct)
{
int size;
int rc;
int i;

Expand All @@ -1172,6 +1170,12 @@ crt_proc_struct_daos_coll_target(crt_proc_t proc, crt_proc_op_t proc_op, struct
if (unlikely(rc))
return rc;

size = dct->dct_bitmap_sz << 3;
if (unlikely(size == 0)) {
D_ERROR("Invalid bitmap size (0) for collective operation!\n");
return -DER_INVAL;
}

rc = crt_proc_uint8_t(proc, proc_op, &dct->dct_max_shard);
if (unlikely(rc))
return rc;
Expand All @@ -1185,14 +1189,23 @@ crt_proc_struct_daos_coll_target(crt_proc_t proc, crt_proc_op_t proc_op, struct
return rc;

if (DECODING(proc_op)) {
if (unlikely(dct->dct_max_shard >= size)) {
D_ERROR("Too small bitmap for collective operation: %u vs %u\n",
dct->dct_max_shard, size);
return -DER_INVAL;
}

D_ALLOC(dct->dct_bitmap, dct->dct_bitmap_sz);
if (dct->dct_bitmap == NULL)
return -DER_NOMEM;

/* When decode, allocate enough buffer to avoid some XS accessing invalid DRAM. */
D_ALLOC_ARRAY(dct->dct_shards, dct->dct_bitmap_sz << 3);
D_ALLOC_ARRAY(dct->dct_shards, size);
if (dct->dct_shards == NULL)
goto out;
D_GOTO(out, rc = -DER_NOMEM);

for (i = 0; i < size; i++)
dct->dct_shards[i].dcs_buf = &dct->dct_shards[i].dcs_inline;
}

rc = crt_proc_memcpy(proc, proc_op, dct->dct_bitmap, dct->dct_bitmap_sz);
Expand Down
11 changes: 9 additions & 2 deletions src/object/srv_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,11 @@ obj_coll_punch_bulk(crt_rpc_t *rpc, d_iov_t *iov, crt_proc_t *p_proc,

rc = obj_bulk_transfer(rpc, CRT_BULK_GET, false, &ocpi->ocpi_tgt_bulk, NULL, NULL,
DAOS_HDL_INVAL, &sgls, 1, NULL, NULL);
if (rc != 0)
if (rc != 0) {
D_ERROR("Failed to prepare bulk transfer for coll_punch, size %u: "DF_RC"\n",
ocpi->ocpi_bulk_tgt_sz, DP_RC(rc));
goto out;
}

rc = crt_proc_create(dss_get_module_info()->dmi_ctx, iov->iov_buf, iov->iov_len,
CRT_PROC_DECODE, &proc);
Expand Down Expand Up @@ -238,8 +241,12 @@ obj_coll_punch_prep(struct obj_coll_punch_in *ocpi, struct daos_coll_target *dct

/* dcts[0] is for current engine. */
if (dcts[0].dct_bitmap == NULL || dcts[0].dct_bitmap_sz == 0 ||
dcts[0].dct_shards == NULL)
dcts[0].dct_shards == NULL) {
D_ERROR("Invalid input for current engine: bitmap %s, bitmap_sz %u, shards %s\n",
dcts[0].dct_bitmap == NULL ? "empty" : "non-empty", dcts[0].dct_bitmap_sz,
dcts[0].dct_shards == NULL ? "empty" : "non-empty");
D_GOTO(out, rc = -DER_INVAL);
}

/* Already allocated enough space in MBS when decode to hold the targets and bitmap. */
target = (struct dtx_coll_target *)(ddt + mbs->dm_tgt_cnt);
Expand Down
11 changes: 6 additions & 5 deletions src/object/srv_obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -5701,14 +5701,15 @@ ds_obj_coll_punch_handler(crt_rpc_t *rpc)
max_ver = version;

DL_CDEBUG(rc != 0 && rc != -DER_INPROGRESS && rc != -DER_TX_RESTART, DLOG_ERR, DB_IO, rc,
"(%s) handled collective punch RPC %p for obj "
DF_UOID" on XS %u/%u epc "DF_X64" pmv %u/%u, with dti "
DF_DTI", forward width %u, forward depth %u",
"(%s) handled collective punch RPC %p for obj "DF_UOID" on XS %u/%u epc "
DF_X64" pmv %u/%u, with dti "DF_DTI", bulk_tgt_sz %u, bulk_tgt_nr %u, "
"tgt_nr %u, forward width %u, forward depth %u",
(ocpi->ocpi_flags & ORF_LEADER) ? "leader" :
(ocpi->ocpi_tgts.ca_count == 1 ? "non-leader" : "relay-engine"), rpc,
DP_UOID(ocpi->ocpi_oid), dmi->dmi_xs_id, dmi->dmi_tgt_id, ocpi->ocpi_epoch,
ocpi->ocpi_map_ver, max_ver, DP_DTI(&ocpi->ocpi_xid), ocpi->ocpi_disp_width,
ocpi->ocpi_disp_depth);
ocpi->ocpi_map_ver, max_ver, DP_DTI(&ocpi->ocpi_xid), ocpi->ocpi_bulk_tgt_sz,
ocpi->ocpi_bulk_tgt_nr, (unsigned int)ocpi->ocpi_tgts.ca_count,
ocpi->ocpi_disp_width, ocpi->ocpi_disp_depth);

obj_punch_complete(rpc, rc, max_ver);

Expand Down

0 comments on commit 5cdae94

Please sign in to comment.