From b3aab9972e7dd5045f171cbdcb3a1d77ab01939a Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Mon, 22 Jul 2024 23:07:00 +0800 Subject: [PATCH] DAOS-16097 vos: assign persistent DTX entry in vos_dtx_prepared Assign persistent DTX entry only via vos_dtx_prepared() that will initialize such DTX entry immediately to avoid any potential race between persistently allocating DTX entry and initializing it. Add some check (for DTX flag) after DTX locally prepared. Do not allow current transaction to deregister the record that is referenced by another prepared (but non-committed) DTX. Allow-unstable-test: true Signed-off-by: Fan Yong --- src/dtx/dtx_rpc.c | 27 ++++- src/object/srv_obj.c | 3 +- src/vos/vos_common.c | 33 +++--- src/vos/vos_dtx.c | 253 +++++++++++++++-------------------------- src/vos/vos_ilog.c | 3 +- src/vos/vos_internal.h | 4 +- src/vos/vos_tree.c | 14 +-- 7 files changed, 149 insertions(+), 188 deletions(-) diff --git a/src/dtx/dtx_rpc.c b/src/dtx/dtx_rpc.c index cc18a1ac915..1c0e73c9640 100644 --- a/src/dtx/dtx_rpc.c +++ b/src/dtx/dtx_rpc.c @@ -1220,6 +1220,9 @@ dtx_refresh_internal(struct ds_cont_child *cont, int *check_count, d_list_t *che /* Handle the entries whose leaders are on current server. */ d_list_for_each_entry_safe(dsp, tmp, &self, dsp_link) { struct dtx_entry dte; + struct dtx_entry *pdte = &dte; + struct dtx_cos_key dck; + d_list_del(&dsp->dsp_link); @@ -1228,13 +1231,31 @@ dtx_refresh_internal(struct ds_cont_child *cont, int *check_count, d_list_t *che dte.dte_refs = 1; dte.dte_mbs = dsp->dsp_mbs; + if (for_io) { + rc = vos_dtx_check(cont->sc_hdl, &dsp->dsp_xid, NULL, NULL, NULL, false); + switch(rc) { + case DTX_ST_COMMITTABLE: + dck.oid = dsp->dsp_oid; + dck.dkey_hash = dsp->dsp_dkey_hash; + rc = dtx_commit(cont, &pdte, &dck, 1); + if (rc < 0 && rc != -DER_NONEXIST && for_io) + d_list_add_tail(&dsp->dsp_link, cmt_list); + else + dtx_dsp_free(dsp); + continue; + case DTX_ST_COMMITTED: + case -DER_NONEXIST: /* Aborted */ + dtx_dsp_free(dsp); + continue; + default: + break; + } + } + rc = dtx_status_handle_one(cont, &dte, dsp->dsp_oid, dsp->dsp_dkey_hash, dsp->dsp_epoch, NULL, NULL); switch (rc) { case DSHR_NEED_COMMIT: { - struct dtx_entry *pdte = &dte; - struct dtx_cos_key dck; - dck.oid = dsp->dsp_oid; dck.dkey_hash = dsp->dsp_dkey_hash; rc = dtx_commit(cont, &pdte, &dck, 1); diff --git a/src/object/srv_obj.c b/src/object/srv_obj.c index 27617c81a80..a2185a103b9 100644 --- a/src/object/srv_obj.c +++ b/src/object/srv_obj.c @@ -5650,7 +5650,8 @@ ds_obj_coll_punch_handler(crt_rpc_t *rpc) 1 /* start, [0] is for current engine */, ocpi->ocpi_disp_width, &exec_arg.coll_cur); - rc = dtx_leader_begin(ioc.ioc_vos_coh, &odm->odm_xid, &epoch, 1, version, + rc = dtx_leader_begin(ioc.ioc_vos_coh, &odm->odm_xid, &epoch, + dcts[0].dct_shards[dmi->dmi_tgt_id].dcs_nr, version, &ocpi->ocpi_oid, NULL /* dti_cos */, 0 /* dti_cos_cnt */, NULL /* tgts */, exec_arg.coll_cur.grp_nr /* tgt_cnt */, dtx_flags, odm->odm_mbs, dce, &dlh); diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index 28e2ac86757..fb8461e2931 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -269,13 +269,15 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, struct umem_rsrvd_act **rsrvd_scmp, d_list_t *nvme_exts, bool started, struct bio_desc *biod, int err) { - struct vos_pool *pool; - struct dtx_handle *dth = dth_in; - struct vos_dtx_act_ent *dae; - struct dtx_rsrvd_uint *dru; - struct vos_dtx_cmt_ent *dce = NULL; - struct dtx_handle tmp = {0}; - int rc; + struct vos_pool *pool; + struct umem_instance *umm; + struct dtx_handle *dth = dth_in; + struct vos_dtx_act_ent *dae; + struct vos_dtx_act_ent_df *dae_df; + struct dtx_rsrvd_uint *dru; + struct vos_dtx_cmt_ent *dce = NULL; + struct dtx_handle tmp = {0}; + int rc = 0; if (!dtx_is_valid_handle(dth)) { /** Created a dummy dth handle for publishing extents */ @@ -287,11 +289,11 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, D_INIT_LIST_HEAD(&tmp.dth_deferred_nvme); } - if (dth->dth_local) { + if (dth->dth_local) pool = vos_hdl2pool(dth_in->dth_poh); - } else { + else pool = cont->vc_pool; - } + umm = vos_pool2umm(pool); if (rsrvd_scmp != NULL) { D_ASSERT(nvme_exts != NULL); @@ -300,7 +302,7 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, * Just do your best to release the SCM reservation. Can't handle another * error while handling one already anyway. */ - (void)vos_publish_scm(vos_pool2umm(pool), *rsrvd_scmp, false /* publish */); + (void)vos_publish_scm(umm, *rsrvd_scmp, false /* publish */); D_FREE(*rsrvd_scmp); *rsrvd_scmp = NULL; err = -DER_NOMEM; @@ -341,9 +343,9 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, vos_dth_set(NULL, pool->vp_sysdb); if (bio_nvme_configured(SMD_DEV_TYPE_META) && biod != NULL) - err = umem_tx_end_ex(vos_pool2umm(pool), err, biod); + err = umem_tx_end_ex(umm, err, biod); else - err = umem_tx_end(vos_pool2umm(pool), err); + err = umem_tx_end(umm, err); cancel: if (dtx_is_valid_handle(dth_in)) { @@ -409,8 +411,11 @@ vos_tx_end(struct vos_container *cont, struct dtx_handle *dth_in, vos_dtx_post_handle(cont, &dae, &dce, 1, false, err != 0); } else { D_ASSERT(dce == NULL); - if (err == 0) + if (err == 0) { dae->dae_prepared = 1; + dae_df = umem_off2ptr(umm, dae->dae_df_off); + D_ASSERT(!(dae_df->dae_flags & DTE_INVALID)); + } } } } diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 27a41ce5776..0e70133629f 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -639,17 +639,25 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, return 0; /* In spite of for commit or abort, the DTX must be local preparing/prepared. */ - D_ASSERTF(vos_dae_is_prepare(dae), "Unexpected DTX "DF_DTI" status for %s\n", - DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit"); + D_ASSERTF(vos_dae_is_prepare(dae), + "Unexpected DTX "DF_DTI" status for %s in pool "DF_UUID" cont "DF_UUID"\n", + DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit", + DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); dbd = dae->dae_dbd; dae_df = umem_off2ptr(umm, dae->dae_df_off); - D_ASSERTF(dae_df != NULL, "Hit invalid DTX entry "DF_DTI" when release for %s\n", - DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit"); + D_ASSERTF(dae_df != NULL, "Hit invalid DTX entry "DF_DTI" when release for %s in pool " + DF_UUID" cont "DF_UUID"\n", DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit", + DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); D_ASSERTF(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC, - "Invalid blob %p magic %x for "DF_DTI" (lid %x)\n", - dbd, dbd->dbd_magic, DP_DTI(&DAE_XID(dae)), DAE_LID(dae)); + "Bad blob %p magic %x for "DF_DTI" (lid %x) in pool "DF_UUID" cont "DF_UUID"\n", + dbd, dbd->dbd_magic, DP_DTI(&DAE_XID(dae)), DAE_LID(dae), + DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); + D_ASSERTF(dbd->dbd_index > 0, + "%s DTX "DF_DTI" against new DTX blob %p in pool "DF_UUID" cont "DF_UUID"\n", + abort ? "abort" : "commit", DP_DTI(&DAE_XID(dae)), dbd, + DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); if (!UMOFF_IS_NULL(dae_df->dae_mbs_off)) { /* dae_mbs_off will be invalid via flag DTE_INVALID. */ @@ -688,19 +696,16 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, } if (dbd->dbd_count > 1 || dbd->dbd_index < dbd->dbd_cap) { - rc = umem_tx_add_ptr(umm, &dae_df->dae_flags, - sizeof(dae_df->dae_flags)); + rc = umem_tx_add_ptr(umm, &dae_df->dae_flags, sizeof(dae_df->dae_flags)); if (rc != 0) return rc; - /* Mark the DTX entry as invalid in SCM. */ - dae_df->dae_flags = DTE_INVALID; - - rc = umem_tx_add_ptr(umm, &dbd->dbd_count, - sizeof(dbd->dbd_count)); + rc = umem_tx_add_ptr(umm, &dbd->dbd_count, sizeof(dbd->dbd_count)); if (rc != 0) return rc; + /* Mark the DTX entry as invalid persistently. */ + dae_df->dae_flags = DTE_INVALID; dbd->dbd_count--; } else { struct vos_cont_df *cont_df = cont->vc_cont_df; @@ -922,6 +927,8 @@ vos_dtx_extend_act_table(struct vos_container *cont) dbd->dbd_magic = DTX_ACT_BLOB_MAGIC; dbd->dbd_cap = (DTX_BLOB_SIZE - sizeof(struct vos_dtx_blob_df)) / sizeof(struct vos_dtx_act_ent_df); + dbd->dbd_count = 0; + dbd->dbd_index = 0; tmp = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); if (tmp == NULL) { @@ -932,14 +939,14 @@ vos_dtx_extend_act_table(struct vos_container *cont) sizeof(cont_df->cd_dtx_active_head) + sizeof(cont_df->cd_dtx_active_tail)); if (rc != 0) - return rc; + goto out; cont_df->cd_dtx_active_head = dbd_off; } else { rc = umem_tx_add_ptr(umm, &tmp->dbd_next, sizeof(tmp->dbd_next)); if (rc != 0) - return rc; + goto out; tmp->dbd_next = dbd_off; @@ -947,19 +954,20 @@ vos_dtx_extend_act_table(struct vos_container *cont) rc = umem_tx_add_ptr(umm, &cont_df->cd_dtx_active_tail, sizeof(cont_df->cd_dtx_active_tail)); if (rc != 0) - return rc; + goto out; } cont_df->cd_dtx_active_tail = dbd_off; - D_DEBUG(DB_IO, "Allocated DTX active blob %p ("UMOFF_PF") for cont "DF_UUID"\n", - dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); - - return 0; +out: + DL_CDEBUG(rc == 0, DB_IO, DLOG_ERR, rc, + "Allocated DTX active blob %p ("UMOFF_PF") for cont "DF_UUID, + dbd, UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); + return rc; } static int -vos_dtx_alloc(struct umem_instance *umm, struct vos_dtx_blob_df *dbd, struct dtx_handle *dth) +vos_dtx_alloc(struct umem_instance *umm, struct dtx_handle *dth) { struct vos_dtx_act_ent *dae = NULL; struct vos_container *cont; @@ -1005,21 +1013,12 @@ vos_dtx_alloc(struct umem_instance *umm, struct vos_dtx_blob_df *dbd, struct dtx DAE_MBS_FLAGS(dae) = 0; } - if (dbd != NULL) { - D_ASSERT(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC); - - dae->dae_df_off = umem_ptr2off(umm, dbd) + - offsetof(struct vos_dtx_blob_df, dbd_active_data) + - sizeof(struct vos_dtx_act_ent_df) * dbd->dbd_index; - } - /* Will be set as dbd::dbd_index via vos_dtx_prepared(). */ DAE_INDEX(dae) = DTX_INDEX_INVAL; - dae->dae_dbd = dbd; dae->dae_dth = dth; - D_DEBUG(DB_IO, "Allocated new lid DTX: "DF_DTI" lid=%lx, dae=%p, dae_dbd=%p\n", - DP_DTI(&dth->dth_xid), DAE_LID(dae) & DTX_LID_SOLO_MASK, dae, dbd); + D_DEBUG(DB_IO, "Allocated new lid DTX: "DF_DTI" lid=%lx, dae=%p\n", + DP_DTI(&dth->dth_xid), DAE_LID(dae) & DTX_LID_SOLO_MASK, dae); d_iov_set(&kiov, &DAE_XID(dae), sizeof(DAE_XID(dae))); d_iov_set(&riov, dae, sizeof(*dae)); @@ -1445,46 +1444,6 @@ vos_dtx_validation(struct dtx_handle *dth) return rc; } -static int -vos_dtx_active(struct dtx_handle *dth) -{ - struct vos_dtx_act_ent *dae = dth->dth_ent; - struct vos_container *cont; - struct vos_cont_df *cont_df; - struct umem_instance *umm; - struct vos_dtx_blob_df *dbd; - int rc = 0; - - if (dae->dae_dbd != NULL) - goto out; - - cont = vos_hdl2cont(dth->dth_coh); - cont_df = cont->vc_cont_df; - umm = vos_cont2umm(cont); - dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); - - if (dbd == NULL || dbd->dbd_index >= dbd->dbd_cap) { - rc = vos_dtx_extend_act_table(cont); - if (rc != 0) - goto out; - - dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); - } - - D_ASSERT(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC); - - dae->dae_df_off = umem_ptr2off(umm, dbd) + - offsetof(struct vos_dtx_blob_df, dbd_active_data) + - sizeof(struct vos_dtx_act_ent_df) * dbd->dbd_index; - dae->dae_dbd = dbd; - -out: - if (rc == 0) - dth->dth_active = 1; - - return rc; -} - /* The caller has started local transaction. */ int vos_dtx_register_record(struct umem_instance *umm, umem_off_t record, @@ -1552,15 +1511,10 @@ vos_dtx_register_record(struct umem_instance *umm, umem_off_t record, return 0; } - if (!dth->dth_active) { - rc = vos_dtx_active(dth); - if (rc != 0) - goto out; - } - rc = vos_dtx_append(dth, record, type); if (rc == 0) { /* Incarnation log entry implies a share */ + dth->dth_active = 1; *tx_id = DAE_LID(dae); if (type == DTX_RT_ILOG) dth->dth_modify_shared = 1; @@ -1577,20 +1531,18 @@ vos_dtx_register_record(struct umem_instance *umm, umem_off_t record, } /* The caller has started local transaction. */ -void +int vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, uint32_t entry, daos_epoch_t epoch, umem_off_t record) { struct vos_container *cont; struct vos_dtx_act_ent *dae; - struct vos_dtx_act_ent_df *dae_df; - umem_off_t *rec_df; bool found; int count; int i; if (!vos_dtx_is_normal_entry(entry)) - return; + return 0; D_ASSERT(entry >= DTX_LID_RESERVED); @@ -1600,20 +1552,24 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, * The on-disk entry will be destroyed soon. */ if (cont == NULL) - return; + return 0; found = lrua_lookupx(cont->vc_dtx_array, entry - DTX_LID_RESERVED, epoch, &dae); if (!found) { D_WARN("Could not find active DTX record for lid=%d, epoch=" DF_U64"\n", entry, epoch); - return; + return 0; } - dae_df = umem_off2ptr(umm, dae->dae_df_off); - if (daos_is_zero_dti(&dae_df->dae_xid) || - dae_df->dae_flags & DTE_INVALID) - return; + /* + * NOTE: If the record to be deregistered (for free or overwrite, and so on) is referenced + * by another prepared (but non-committed) DTX, then do not allow current transaction + * to modify it. Because if current transaction is aborted or failed for some reason, + * there is no efficient way to recover such former non-committed DTX. + */ + if (dae->dae_dbd != NULL) + return dtx_inprogress(dae, vos_dth_get(cont->vc_pool->vp_sysdb), false, false, 8); if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT) count = DTX_INLINE_REC_CNT; @@ -1623,46 +1579,18 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, for (i = 0; i < count; i++) { if (record == umem_off2offset(DAE_REC_INLINE(dae)[i])) { DAE_REC_INLINE(dae)[i] = UMOFF_NULL; - goto handle_df; + return 0; } } for (i = 0; i < DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; i++) { if (record == umem_off2offset(dae->dae_records[i])) { dae->dae_records[i] = UMOFF_NULL; - goto handle_df; - } - } - - /* Not found */ - return; - -handle_df: - if (dae_df->dae_rec_cnt > DTX_INLINE_REC_CNT) - count = DTX_INLINE_REC_CNT; - else - count = dae_df->dae_rec_cnt; - - rec_df = dae_df->dae_rec_inline; - for (i = 0; i < count; i++) { - if (umem_off2offset(rec_df[i]) == record) { - rec_df[i] = UMOFF_NULL; - return; + return 0; } } - rec_df = umem_off2ptr(umm, dae_df->dae_rec_off); - - /* Not found */ - if (rec_df == NULL) - return; - - for (i = 0; i < dae_df->dae_rec_cnt - DTX_INLINE_REC_CNT; i++) { - if (umem_off2offset(rec_df[i]) == record) { - rec_df[i] = UMOFF_NULL; - return; - } - } + return 0; } int @@ -1670,6 +1598,8 @@ vos_dtx_prepared(struct dtx_handle *dth, struct vos_dtx_cmt_ent **dce_p) { struct vos_dtx_act_ent *dae = dth->dth_ent; struct vos_container *cont = vos_hdl2cont(dth->dth_coh); + struct vos_dtx_act_ent_df *dae_df; + struct vos_cont_df *cont_df; struct umem_instance *umm; struct vos_dtx_blob_df *dbd; umem_off_t rec_off; @@ -1705,9 +1635,26 @@ vos_dtx_prepared(struct dtx_handle *dth, struct vos_dtx_cmt_ent **dce_p) return rc; } + D_ASSERT(dae->dae_dbd == NULL); + + cont_df = cont->vc_cont_df; umm = vos_cont2umm(cont); - dbd = dae->dae_dbd; - D_ASSERT(dbd != NULL); + dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); + if (dbd == NULL || dbd->dbd_index >= dbd->dbd_cap) { + rc = vos_dtx_extend_act_table(cont); + if (rc != 0) + return rc; + + dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); + } + + D_ASSERT(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC); + + dae->dae_dbd = dbd; + dae->dae_df_off = umem_ptr2off(umm, dbd) + + offsetof(struct vos_dtx_blob_df, dbd_active_data) + + sizeof(struct vos_dtx_act_ent_df) * dbd->dbd_index; + dae_df = umem_off2ptr(umm, dae->dae_df_off); /* Use the dkey_hash for the last modification as the dkey_hash * for the whole transaction. It will used as the index for DTX @@ -1784,27 +1731,30 @@ vos_dtx_prepared(struct dtx_handle *dth, struct vos_dtx_cmt_ent **dce_p) DAE_INDEX(dae) = dbd->dbd_index; if (DAE_INDEX(dae) > 0) { - rc = umem_tx_xadd_ptr(umm, umem_off2ptr(umm, dae->dae_df_off), - sizeof(struct vos_dtx_act_ent_df), UMEM_XADD_NO_SNAPSHOT); + rc = umem_tx_xadd_ptr(umm, dae_df, sizeof(*dae_df), UMEM_XADD_NO_SNAPSHOT); if (rc != 0) - return rc; + goto out; /* dbd_index is next to dbd_count */ rc = umem_tx_add_ptr(umm, &dbd->dbd_count, sizeof(dbd->dbd_count) + sizeof(dbd->dbd_index)); if (rc != 0) - return rc; + goto out; } - memcpy(umem_off2ptr(umm, dae->dae_df_off), - &dae->dae_base, sizeof(struct vos_dtx_act_ent_df)); + memcpy(dae_df, &dae->dae_base, sizeof(*dae_df)); dbd->dbd_count++; dbd->dbd_index++; dae->dae_preparing = 1; dae->dae_need_release = 1; - return 0; +out: + DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc, + "Preparing DTX "DF_DTI" in dbd "UMOFF_PF" at index %u, count %u, cap %u", + DP_DTI(&DAE_XID(dae)), UMOFF_P(cont_df->cd_dtx_active_tail), + dbd->dbd_index, dbd->dbd_count, dbd->dbd_cap); + return rc; } static struct dtx_memberships * @@ -2827,12 +2777,16 @@ vos_dtx_act_reindex(struct vos_container *cont) dbd_count++; } - D_ASSERTF(dbd_count == dbd->dbd_count, - "Unmatched active DTX count %d/%d, cap %d, idx %d for blob %p (" - UMOFF_PF"), head "UMOFF_PF", tail "UMOFF_PF"\n", - dbd_count, dbd->dbd_count, dbd->dbd_cap, dbd->dbd_index, dbd, - UMOFF_P(dbd_off), UMOFF_P(cont_df->cd_dtx_active_head), - UMOFF_P(cont_df->cd_dtx_active_tail)); + if (unlikely(dbd_count != dbd->dbd_count)) { + D_ERROR("Unmatched active DTX count %d/%d, cap %d, idx %d for blob %p (" + UMOFF_PF"), head "UMOFF_PF", tail "UMOFF_PF" in pool " + DF_UUID" cont "DF_UUID"\n", dbd_count, dbd->dbd_count, dbd->dbd_cap, + dbd->dbd_index, dbd, UMOFF_P(dbd_off), + UMOFF_P(cont_df->cd_dtx_active_head), + UMOFF_P(cont_df->cd_dtx_active_tail), DP_UUID(cont->vc_pool->vp_id), + DP_UUID(cont->vc_id)); + D_GOTO(out, rc = -DER_IO); + } dbd_off = dbd->dbd_next; } @@ -3012,13 +2966,12 @@ vos_dtx_attach(struct dtx_handle *dth, bool persistent, bool exist) { struct vos_container *cont; struct umem_instance *umm = NULL; - struct vos_dtx_blob_df *dbd = NULL; struct vos_dtx_cmt_ent *dce = NULL; - struct vos_cont_df *cont_df = NULL; struct vos_dtx_act_ent *dae; d_iov_t kiov; d_iov_t riov; int rc = 0; + bool tx = false; if (!dtx_is_valid_handle(dth)) return 0; @@ -3055,31 +3008,11 @@ vos_dtx_attach(struct dtx_handle *dth, bool persistent, bool exist) if (rc != 0) goto out; - cont_df = cont->vc_cont_df; - dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); - if (dbd == NULL || dbd->dbd_index >= dbd->dbd_cap) { - rc = vos_dtx_extend_act_table(cont); - if (rc != 0) - goto out; - - dbd = umem_off2ptr(umm, cont_df->cd_dtx_active_tail); - } + tx = true; } - if (dth->dth_ent == NULL) { - rc = vos_dtx_alloc(umm, dbd, dth); - } else if (persistent) { - D_ASSERT(dbd != NULL); - D_ASSERT(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC); - - dae = dth->dth_ent; - D_ASSERT(dae->dae_dbd == NULL); - - dae->dae_df_off = umem_ptr2off(umm, dbd) + - offsetof(struct vos_dtx_blob_df, dbd_active_data) + - sizeof(struct vos_dtx_act_ent_df) * dbd->dbd_index; - dae->dae_dbd = dbd; - } + if (dth->dth_ent == NULL) + rc = vos_dtx_alloc(umm, dth); out: if (rc == 0) { @@ -3094,7 +3027,7 @@ vos_dtx_attach(struct dtx_handle *dth, bool persistent, bool exist) } if (persistent) { - if (cont_df != NULL) { + if (tx) { if (rc == 0) { rc = umem_tx_commit(umm); D_ASSERTF(rc == 0, "local TX commit failure %d\n", rc); diff --git a/src/vos/vos_ilog.c b/src/vos/vos_ilog.c index ae67f9a00ac..54abf2f407f 100644 --- a/src/vos/vos_ilog.c +++ b/src/vos/vos_ilog.c @@ -82,8 +82,7 @@ vos_ilog_del(struct umem_instance *umm, umem_off_t ilog_off, uint32_t tx_id, return 0; coh.cookie = (unsigned long)args; - vos_dtx_deregister_record(umm, coh, tx_id, epoch, ilog_off); - return 0; + return vos_dtx_deregister_record(umm, coh, tx_id, epoch, ilog_off); } void diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 99d49c46fa7..1730cbcffef 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -747,8 +747,10 @@ vos_dtx_get(bool standalone); * \param epoch [IN] Epoch for the DTX. * \param record [IN] Address (offset) of the record to be * deregistered. + * + * \return 0 on success and negative on failure. */ -void +int vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, uint32_t entry, daos_epoch_t epoch, umem_off_t record); diff --git a/src/vos/vos_tree.c b/src/vos/vos_tree.c index e48467f9364..c36fcaa88c5 100644 --- a/src/vos/vos_tree.c +++ b/src/vos/vos_tree.c @@ -601,6 +601,7 @@ svt_rec_free_internal(struct btr_instance *tins, struct btr_record *rec, struct dtx_handle *dth = NULL; struct umem_rsrvd_act *rsrvd_scm; struct vos_container *cont = vos_hdl2cont(tins->ti_coh); + int rc; if (UMOFF_IS_NULL(rec->rec_off)) return 0; @@ -611,12 +612,12 @@ svt_rec_free_internal(struct btr_instance *tins, struct btr_record *rec, return -DER_NO_PERM; /* Not allowed */ } - vos_dtx_deregister_record(&tins->ti_umm, tins->ti_coh, - irec->ir_dtx, *epc, rec->rec_off); + rc = vos_dtx_deregister_record(&tins->ti_umm, tins->ti_coh, + irec->ir_dtx, *epc, rec->rec_off); + if (rc != 0) + return rc; if (!overwrite) { - int rc; - /* SCM value is stored together with vos_irec_df */ if (addr->ba_type == DAOS_MEDIA_NVME) { struct vos_pool *pool = tins->ti_priv; @@ -796,9 +797,8 @@ evt_dop_log_del(struct umem_instance *umm, daos_epoch_t epoch, daos_handle_t coh; coh.cookie = (unsigned long)args; - vos_dtx_deregister_record(umm, coh, desc->dc_dtx, epoch, - umem_ptr2off(umm, desc)); - return 0; + return vos_dtx_deregister_record(umm, coh, desc->dc_dtx, epoch, + umem_ptr2off(umm, desc)); } void