Skip to content

Commit

Permalink
DAOS-16005 object: check resent coll_punch on leader and relay engine
Browse files Browse the repository at this point in the history
For collective punch RPC handler on leader or relay engine, if related
DTX has already been prepared when handling resent RPC, then we should
avoid re-executing the punch locally. Otherwise, it may cleanup former
prepared DTX entry by wrong.

Signed-off-by: Fan Yong <fan.yong@intel.com>
  • Loading branch information
Nasf-Fan committed Jun 26, 2024
1 parent cbff806 commit d3254f0
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 11 deletions.
5 changes: 4 additions & 1 deletion src/dtx/dtx_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,7 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul

dtx_shares_fini(dth);

if (daos_is_zero_dti(&dth->dth_xid) || unlikely(result == -DER_ALREADY) || dlh->dlh_relay)
if (daos_is_zero_dti(&dth->dth_xid) || unlikely(result == -DER_ALREADY))
goto out;

if (unlikely(coh->sch_closed)) {
Expand Down Expand Up @@ -1333,6 +1333,9 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul
D_ASSERTF(0, "Unexpected DTX "DF_DTI" status %d\n", DP_DTI(&dth->dth_xid), status);
}

if (dlh->dlh_relay)
goto out;

/*
* Even if the transaction modifies nothing locally, we still need to store
* it persistently. Otherwise, the subsequent DTX resync may not find it as
Expand Down
6 changes: 5 additions & 1 deletion src/object/srv_obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -3564,8 +3564,12 @@ obj_tgt_punch(struct obj_tgt_punch_args *otpa, uint32_t *shards, uint32_t count)
goto out;
}

if (dth != NULL)
if (dth != NULL) {
if (dth->dth_prepared)
D_GOTO(out, rc = 0);

goto exec;
}

if (opi->opi_flags & ORF_RESEND) {
tmp = opi->opi_epoch;
Expand Down
28 changes: 19 additions & 9 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,8 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae,
dbd = dae->dae_dbd;
dae_df = umem_off2ptr(umm, dae->dae_df_off);

D_ASSERT(dae_df != NULL);
D_ASSERTF(dae_df != NULL, "Hit invalid DTX entry "DF_DTI" when release for %s\n",
DP_DTI(&DAE_XID(dae)), abort ? "abort" : "commit");
D_ASSERTF(dbd->dbd_magic == DTX_ACT_BLOB_MAGIC,
"Invalid blob %p magic %x for "DF_DTI" (lid %x)\n",
dbd, dbd->dbd_magic, DP_DTI(&DAE_XID(dae)), DAE_LID(dae));
Expand Down Expand Up @@ -1177,13 +1178,16 @@ vos_dtx_check_availability(daos_handle_t coh, uint32_t entry,
}

if (intent == DAOS_INTENT_PURGE) {
uint32_t age = d_hlc_age2sec(DAE_XID(dae).dti_hlc);

/*
* The DTX entry still references related data record,
* then we cannot (vos) aggregate related data record.
*/
if (d_hlc_age2sec(DAE_XID(dae).dti_hlc) >= DAOS_AGG_THRESHOLD)
D_WARN("DTX "DF_DTI" (%u) still references the data, cannot be (VOS) "
"aggregated\n", DP_DTI(&DAE_XID(dae)), vos_dtx_status(dae));
if (age >= DAOS_AGG_THRESHOLD)
D_WARN("DTX "DF_DTI" (state:%u, age:%u) still references the data, "
"cannot be (VOS) aggregated\n",
DP_DTI(&DAE_XID(dae)), vos_dtx_status(dae), age);

return ALB_AVAILABLE_DIRTY;
}
Expand Down Expand Up @@ -1420,21 +1424,25 @@ vos_dtx_validation(struct dtx_handle *dth)
if (rc == -DER_NONEXIST) {
rc = dbtree_lookup(cont->vc_dtx_committed_hdl, &kiov, &riov);
if (rc == 0)
return DTX_ST_COMMITTED;
D_GOTO(out, rc = DTX_ST_COMMITTED);
}

/* Failed to lookup DTX entry, in spite of whether it is DER_NONEXIST
* or not, then handle it as aborted that will cause client to retry.
*/
return DTX_ST_ABORTED;
D_GOTO(out, rc = DTX_ST_ABORTED);
}

dae = riov.iov_buf;
} else if (unlikely(dae == NULL)) {
return DTX_ST_COMMITTED;
D_GOTO(out, rc = DTX_ST_COMMITTED);
}

return vos_dtx_status(dae);
rc = vos_dtx_status(dae);

out:
dth->dth_need_validation = 0;
return rc;
}

static int
Expand Down Expand Up @@ -2930,8 +2938,10 @@ vos_dtx_cleanup_internal(struct dtx_handle *dth)
/* Only keep the DTX entry (header) for handling resend RPC,
* remove DTX records, purge related VOS objects from cache.
*/
if (dae != NULL)
if (dae != NULL) {
D_ASSERT(!vos_dae_is_prepare(dae));
dtx_act_ent_cleanup(cont, dae, dth, true);
}
} else {
d_iov_set(&kiov, &dth->dth_xid, sizeof(dth->dth_xid));
d_iov_set(&riov, NULL, 0);
Expand Down

0 comments on commit d3254f0

Please sign in to comment.