Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16037 pool: Fix upgrade for svc_ops #14753

Merged
merged 3 commits into from
Jul 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 80 additions & 50 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1653,13 +1653,8 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf_out,
struct pool_buf *map_buf;
struct daos_prop_entry *svc_rf_entry;
daos_prop_t *prop = NULL;
uint32_t svc_ops_enabled = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change to this function seems good. No change requested since this form seems cleaner.

Just for confirmation, if the changes to this function were omitted from this patch, the issues in the ticket resulting from the pool upgrade bug would be resolved via the other change in this patch to pool_upgrade_props()? i.e., For a daos 2.4 pool whose svc_ops_{enabled,age,max} all do not exist, functionally with or without the change to this function, in both cases itt will result in the same svc->ps_ops_enabled = 0, svc->svc_ops_max = 0, svc->svc_ops_age = 0 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kccain, yes, I think things should work without the changes in read_db_for_stepping_up. Do you prefer not to make these changes?

This question reminds me: To make the code easier to understand, I should have written an else branch that sets enabled, max, and age to 0. What do you think?

(Since the CI job has already failed during git fetch, I'll refresh the PR to add the else branch. But please feel free to share any concern/objection.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No objection, I think your change is an improvement, including the most recent change to add the else branch and improve code readability.

uint32_t svc_ops_max = 0;
uint32_t svc_ops_age = 0;
uint32_t map_version;
uint64_t rdb_size;
bool rdb_size_ok = false;
int rc;
uint32_t map_version;
int rc;

rc = rdb_tx_begin(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term, &tx);
if (rc != 0)
Expand All @@ -1686,48 +1681,53 @@ read_db_for_stepping_up(struct pool_svc *svc, struct pool_buf **map_buf_out,
else
svc->ps_svc_rf = -1;

/* Check if duplicate operations detection is enabled, for informative debug log */
rc = rdb_get_size(svc->ps_rsvc.s_db, &rdb_size);
if (rc != 0)
goto out_lock;
rdb_size_ok = (rdb_size >= DUP_OP_MIN_RDB_SIZE);
if (svc->ps_global_version >= DAOS_POOL_GLOBAL_VERSION_WITH_SVC_OPS_KVS) {
uint64_t rdb_size;
bool rdb_size_ok;

d_iov_set(&value, &svc_ops_enabled, sizeof(svc_ops_enabled));
rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_svc_ops_enabled, &value);
if (rc == -DER_NONEXIST) {
rc = 0;
} else if (rc != 0) {
D_ERROR(DF_UUID ": failed to lookup svc_ops_enabled: " DF_RC "\n",
DP_UUID(svc->ps_uuid), DP_RC(rc));
goto out_lock;
}
svc->ps_ops_enabled = svc_ops_enabled;
/* Check if duplicate operations detection is enabled, for informative debug log */
rc = rdb_get_size(svc->ps_rsvc.s_db, &rdb_size);
if (rc != 0)
goto out_lock;
rdb_size_ok = (rdb_size >= DUP_OP_MIN_RDB_SIZE);

d_iov_set(&value, &svc_ops_max, sizeof(svc_ops_max));
rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_svc_ops_max, &value);
if (rc == -DER_NONEXIST) {
rc = 0;
} else if (rc != 0) {
DL_ERROR(rc, DF_UUID ": failed to lookup svc_ops_max", DP_UUID(svc->ps_uuid));
goto out_lock;
}
svc->ps_ops_max = svc_ops_max;
d_iov_set(&value, &svc->ps_ops_enabled, sizeof(svc->ps_ops_enabled));
rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_svc_ops_enabled, &value);
if (rc != 0) {
D_ERROR(DF_UUID ": failed to lookup svc_ops_enabled: " DF_RC "\n",
DP_UUID(svc->ps_uuid), DP_RC(rc));
goto out_lock;
}

d_iov_set(&value, &svc_ops_age, sizeof(svc_ops_age));
rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_svc_ops_age, &value);
if (rc == -DER_NONEXIST) {
rc = 0;
} else if (rc != 0) {
DL_ERROR(rc, DF_UUID ": failed to lookup svc_ops_age", DP_UUID(svc->ps_uuid));
goto out_lock;
}
svc->ps_ops_age = svc_ops_age;
d_iov_set(&value, &svc->ps_ops_age, sizeof(svc->ps_ops_age));
rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_svc_ops_age, &value);
if (rc != 0) {
DL_ERROR(rc, DF_UUID ": failed to lookup svc_ops_age",
DP_UUID(svc->ps_uuid));
goto out_lock;
}

D_DEBUG(DB_MD,
DF_UUID ": duplicate ops detection %s (rdb size " DF_U64 " %s %u minimum), "
"max entries %u, max entry age %u sec\n",
DP_UUID(svc->ps_uuid), svc_ops_enabled ? "enabled" : "disabled", rdb_size,
rdb_size_ok ? ">=" : "<", DUP_OP_MIN_RDB_SIZE, svc_ops_max, svc_ops_age);
d_iov_set(&value, &svc->ps_ops_max, sizeof(svc->ps_ops_max));
rc = rdb_tx_lookup(&tx, &svc->ps_root, &ds_pool_prop_svc_ops_max, &value);
if (rc != 0) {
DL_ERROR(rc, DF_UUID ": failed to lookup svc_ops_max",
DP_UUID(svc->ps_uuid));
goto out_lock;
}

D_DEBUG(DB_MD,
DF_UUID ": duplicate ops detection %s (rdb size " DF_U64 " %s %u minimum), "
"max entries %u, max entry age %u sec\n",
DP_UUID(svc->ps_uuid), svc->ps_ops_enabled ? "enabled" : "disabled",
rdb_size, rdb_size_ok ? ">=" : "<", DUP_OP_MIN_RDB_SIZE, svc->ps_ops_max,
svc->ps_ops_age);
} else {
svc->ps_ops_enabled = 0;
svc->ps_ops_age = 0;
svc->ps_ops_max = 0;
D_DEBUG(DB_MD, DF_UUID ": duplicate ops detection unavailable\n",
DP_UUID(svc->ps_uuid));
}

D_ASSERTF(rc == 0, DF_RC"\n", DP_RC(rc));
*map_buf_out = map_buf;
Expand Down Expand Up @@ -5172,8 +5172,7 @@ pool_upgrade_one_prop_int32(struct rdb_tx *tx, struct pool_svc *svc, uuid_t uuid
}

static int
pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc,
uuid_t pool_uuid, crt_rpc_t *rpc)
pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc, uuid_t pool_uuid, crt_rpc_t *rpc)
{
d_iov_t value;
uint64_t val;
Expand All @@ -5185,6 +5184,8 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc,
int n_hdl_uuids = 0;
uint32_t connectable;
uint32_t svc_ops_enabled = 0;
uint32_t svc_ops_age;
uint32_t svc_ops_max;

if (rpc) {
rc = find_hdls_to_evict(tx, svc, &hdl_uuids, &hdl_uuids_size,
Expand Down Expand Up @@ -5408,14 +5409,14 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc,

/* Upgrade for the pool/container service operations KVS */
D_DEBUG(DB_MD, DF_UUID ": check ds_pool_prop_svc_ops\n", DP_UUID(pool_uuid));

d_iov_set(&value, NULL, 0);
rc = rdb_tx_lookup(tx, &svc->ps_root, &ds_pool_prop_svc_ops, &value);
if (rc && rc != -DER_NONEXIST) {
D_ERROR(DF_UUID ": failed to lookup service ops KVS: %d\n", DP_UUID(pool_uuid), rc);
D_GOTO(out_free, rc);
} else if (rc == -DER_NONEXIST) {
struct rdb_kvs_attr attr;
uint32_t svc_ops_num;

D_DEBUG(DB_MD, DF_UUID ": creating service ops KVS\n", DP_UUID(pool_uuid));
attr.dsa_class = RDB_KVS_LEXICAL;
Expand All @@ -5426,6 +5427,14 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc,
DP_UUID(pool_uuid), rc);
D_GOTO(out_free, rc);
}
svc_ops_num = 0;
d_iov_set(&value, &svc_ops_num, sizeof(svc_ops_num));
rc = rdb_tx_update(tx, &svc->ps_root, &ds_pool_prop_svc_ops_num, &value);
if (rc != 0) {
DL_ERROR(rc, DF_UUID ": failed to write upgrade svc_ops_num",
DP_UUID(pool_uuid));
D_GOTO(out_free, rc);
}
need_commit = true;
}

Expand Down Expand Up @@ -5462,12 +5471,12 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc,
}

D_DEBUG(DB_MD, DF_UUID ": check ds_pool_prop_svc_ops_age\n", DP_UUID(pool_uuid));
d_iov_set(&value, &val32, sizeof(val32));
d_iov_set(&value, &svc_ops_age, sizeof(svc_ops_age));
rc = rdb_tx_lookup(tx, &svc->ps_root, &ds_pool_prop_svc_ops_age, &value);
if (rc && rc != -DER_NONEXIST) {
D_GOTO(out_free, rc);
} else if (rc == -DER_NONEXIST) {
val32 = DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_DEFAULT;
svc_ops_age = DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_DEFAULT;
rc = rdb_tx_update(tx, &svc->ps_root, &ds_pool_prop_svc_ops_age, &value);
if (rc != 0) {
DL_ERROR(rc, "failed to write upgrade svc_ops_age");
Expand All @@ -5476,6 +5485,22 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc,
need_commit = true;
}

D_DEBUG(DB_MD, DF_UUID ": check ds_pool_prop_svc_ops_max\n", DP_UUID(pool_uuid));
d_iov_set(&value, &svc_ops_max, sizeof(svc_ops_max));
rc = rdb_tx_lookup(tx, &svc->ps_root, &ds_pool_prop_svc_ops_max, &value);
if (rc && rc != -DER_NONEXIST) {
D_GOTO(out_free, rc);
} else if (rc == -DER_NONEXIST) {
svc_ops_max = PS_OPS_PER_SEC * svc_ops_age;
rc = rdb_tx_update(tx, &svc->ps_root, &ds_pool_prop_svc_ops_max, &value);
if (rc != 0) {
DL_ERROR(rc, DF_UUID ": failed to write upgrade svc_ops_max",
DP_UUID(pool_uuid));
D_GOTO(out_free, rc);
}
need_commit = true;
}

D_DEBUG(DB_MD, DF_UUID ": need_commit=%s\n", DP_UUID(pool_uuid),
need_commit ? "true" : "false");
if (need_commit) {
Expand All @@ -5484,6 +5509,11 @@ pool_upgrade_props(struct rdb_tx *tx, struct pool_svc *svc,
rc = rdb_tx_commit(tx);
if (rc)
D_GOTO(out_free, rc);

svc->ps_ops_enabled = svc_ops_enabled;
svc->ps_ops_age = svc_ops_age;
svc->ps_ops_max = svc_ops_max;

rc = pool_prop_read(tx, svc, DAOS_PO_QUERY_PROP_ALL, &prop);
if (rc)
D_GOTO(out_free, rc);
Expand Down
Loading