Skip to content

Commit

Permalink
DAOS-16005 object: check resent coll_punch on leader and relay engine (
Browse files Browse the repository at this point in the history
…daos-stack#14659)

For collective punch RPC handler on leader or relay engine, if related
DTX has already been prepared when handling resent RPC, then we should
avoid re-executing the punch locally. Otherwise, it may cleanup former
prepared DTX entry by wrong.

Signed-off-by: Fan Yong <fan.yong@intel.com>
  • Loading branch information
Nasf-Fan authored and grom72 committed Jul 25, 2024
1 parent f8018f4 commit 5809efa
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 11 deletions.
5 changes: 4 additions & 1 deletion src/dtx/dtx_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1274,7 +1274,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul

dtx_shares_fini(dth);

if (daos_is_zero_dti(&dth->dth_xid) || unlikely(result == -DER_ALREADY) || dlh->dlh_relay)
if (daos_is_zero_dti(&dth->dth_xid) || unlikely(result == -DER_ALREADY))
goto out;

if (unlikely(coh->sch_closed)) {
Expand Down Expand Up @@ -1328,6 +1328,9 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul
D_ASSERTF(0, "Unexpected DTX "DF_DTI" status %d\n", DP_DTI(&dth->dth_xid), status);
}

if (dlh->dlh_relay)
goto out;

/*
* Even if the transaction modifies nothing locally, we still need to store
* it persistently. Otherwise, the subsequent DTX resync may not find it as
Expand Down
6 changes: 5 additions & 1 deletion src/object/srv_obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -3568,8 +3568,12 @@ obj_tgt_punch(struct obj_tgt_punch_args *otpa, uint32_t *shards, uint32_t count)
goto out;
}

if (dth != NULL)
if (dth != NULL) {
if (dth->dth_prepared)
D_GOTO(out, rc = 0);

goto exec;
}

if (opi->opi_flags & ORF_RESEND) {
tmp = opi->opi_epoch;
Expand Down
28 changes: 19 additions & 9 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,8 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae,
dbd = dae->dae_dbd;
dae_df = umem_off2ptr(umm, dae->dae_df_off);

D_ASSERT(dae_df != NULL);
D_ASSERTF(dae_df != NULL, "Hit invalid DTX entry "DF_DTI" when release for %s\n",
DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit");
D_ASSERTF(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC,
"Invalid blob %p magic %x for "DF_DTI" (lid %x)\n",
dbd, dbd->dbd_magic, DP_DTI(&DAE_XID(dae)), DAE_LID(dae));
Expand Down Expand Up @@ -1177,13 +1178,16 @@ vos_dtx_check_availability(daos_handle_t coh, uint32_t entry,
}

if (intent == DAOS_INTENT_PURGE) {
uint32_t age = d_hlc_age2sec(DAE_XID(dae).dti_hlc);

/*
* The DTX entry still references related data record,
* then we cannot (vos) aggregate related data record.
*/
if (d_hlc_age2sec(DAE_XID(dae).dti_hlc) >= DAOS_AGG_THRESHOLD)
D_WARN("DTX "DF_DTI" (%u) still references the data, cannot be (VOS) "
"aggregated\n", DP_DTI(&DAE_XID(dae)), vos_dtx_status(dae));
if (age >= DAOS_AGG_THRESHOLD)
D_WARN("DTX "DF_DTI" (state:%u, age:%u) still references the data, "
"cannot be (VOS) aggregated\n",
DP_DTI(&DAE_XID(dae)), vos_dtx_status(dae), age);

return ALB_AVAILABLE_DIRTY;
}
Expand Down Expand Up @@ -1420,21 +1424,25 @@ vos_dtx_validation(struct dtx_handle *dth)
if (rc == -DER_NONEXIST) {
rc = dbtree_lookup(cont->vc_dtx_committed_hdl, &kiov, &riov);
if (rc == 0)
return DTX_ST_COMMITTED;
D_GOTO(out, rc = DTX_ST_COMMITTED);
}

/* Failed to lookup DTX entry, in spite of whether it is DER_NONEXIST
* or not, then handle it as aborted that will cause client to retry.
*/
return DTX_ST_ABORTED;
D_GOTO(out, rc = DTX_ST_ABORTED);
}

dae = riov.iov_buf;
} else if (unlikely(dae == NULL)) {
return DTX_ST_COMMITTED;
D_GOTO(out, rc = DTX_ST_COMMITTED);
}

return vos_dtx_status(dae);
rc = vos_dtx_status(dae);

out:
dth->dth_need_validation = 0;
return rc;
}

static int
Expand Down Expand Up @@ -2934,8 +2942,10 @@ vos_dtx_cleanup_internal(struct dtx_handle *dth)
/* Only keep the DTX entry (header) for handling resend RPC,
* remove DTX records, purge related VOS objects from cache.
*/
if (dae != NULL)
if (dae != NULL) {
D_ASSERT(!vos_dae_is_prepare(dae));
dtx_act_ent_cleanup(cont, dae, dth, true);
}
} else {
d_iov_set(&kiov, &dth->dth_xid, sizeof(dth->dth_xid));
d_iov_set(&riov, NULL, 0);
Expand Down

0 comments on commit 5809efa

Please sign in to comment.