Skip to content

Commit

Permalink
DAOS-14969 container: retry IV might cause deadlock (#13632)
Browse files Browse the repository at this point in the history
OID IV entry lock might be required again for retry
case.

Signed-off-by: Di Wang <di.wang@intel.com>
  • Loading branch information
wangdi committed Jan 21, 2024
1 parent be4402b commit 1946ef3
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 6 deletions.
4 changes: 2 additions & 2 deletions src/cart/crt_iv.c
Original file line number Diff line number Diff line change
Expand Up @@ -3508,8 +3508,8 @@ crt_iv_update_internal(crt_iv_namespace_t ivns, uint32_t class_id,

D_GOTO(exit, rc);
} else {
DL_CDEBUG(rc == -DER_NONEXIST || rc == -DER_NOTLEADER, DLOG_INFO, DLOG_ERR, rc,
"ivo_on_update failed");
DL_CDEBUG(rc == -DER_NONEXIST || rc == -DER_NOTLEADER || rc == -DER_BUSY,
DLOG_INFO, DLOG_ERR, rc, "ivo_on_update failed");

update_comp_cb(ivns, class_id, iv_key, NULL,
iv_value, rc, cb_arg);
Expand Down
10 changes: 9 additions & 1 deletion src/container/oid_iv.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct oid_iv_entry {
struct oid_iv_range rg;
/** protect the entry */
ABT_mutex lock;
void *current_req;
};

/** Priv data in the iv layer */
Expand Down Expand Up @@ -130,7 +131,14 @@ oid_iv_ent_update(struct ds_iv_entry *ns_entry, struct ds_iv_key *iv_key,
D_ASSERT(priv != NULL);

entry = ns_entry->iv_value.sg_iovs[0].iov_buf;
ABT_mutex_lock(entry->lock);
rc = ABT_mutex_trylock(entry->lock);
/* For retry requests, from _iv_op(), the lock may not be released
* in some cases.
*/
if (rc == ABT_ERR_MUTEX_LOCKED && entry->current_req != src)
return -DER_BUSY;

entry->current_req = src;
avail = &entry->rg;

oids = src->sg_iovs[0].iov_buf;
Expand Down
6 changes: 3 additions & 3 deletions src/engine/server_iv.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2017-2023 Intel Corporation.
* (C) Copyright 2017-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -1053,7 +1053,7 @@ _iv_op(struct ds_iv_ns *ns, struct ds_iv_key *key, d_sg_list_t *value,
retry:
rc = iv_op_internal(ns, key, value, sync, shortcut, opc);
if (retry && !ns->iv_stop &&
(daos_rpc_retryable_rc(rc) || rc == -DER_NOTLEADER)) {
(daos_rpc_retryable_rc(rc) || rc == -DER_NOTLEADER || rc == -DER_BUSY)) {
if (rc == -DER_NOTLEADER && key->rank != (d_rank_t)(-1) &&
sync && (sync->ivs_mode == CRT_IV_SYNC_LAZY ||
sync->ivs_mode == CRT_IV_SYNC_EAGER)) {
Expand All @@ -1070,7 +1070,7 @@ _iv_op(struct ds_iv_ns *ns, struct ds_iv_key *key, d_sg_list_t *value,
* but in-flight fetch request return IVCB_FORWARD, then queued RPC will
* reply IVCB_FORWARD.
*/
D_WARN("ns %u retry for class %d opc %d rank %u/%u: " DF_RC "\n", ns->iv_ns_id,
D_INFO("ns %u retry for class %d opc %d rank %u/%u: " DF_RC "\n", ns->iv_ns_id,
key->class_id, opc, key->rank, ns->iv_master_rank, DP_RC(rc));
/* sleep 1sec and retry */
dss_sleep(1000);
Expand Down

0 comments on commit 1946ef3

Please sign in to comment.