From 06735186b6d0d603d39b42d823a6fc426da7aac2 Mon Sep 17 00:00:00 2001 From: Xuezhao Liu Date: Mon, 17 Jun 2024 03:06:26 +0000 Subject: [PATCH] DAOS-16039 object: fix EC aggregation wrong peer address Fix EC aggregation wrong peer address when multiple dkeys on same EC agg leader shard. Change existing test cases to cover that case. Required-githooks: true Signed-off-by: Xuezhao Liu --- src/include/daos/common.h | 1 + src/object/srv_ec_aggregate.c | 48 +++++++++------ src/tests/suite/daos_obj_ec.c | 108 +++++++++++++++++++++++++++++----- 3 files changed, 125 insertions(+), 32 deletions(-) diff --git a/src/include/daos/common.h b/src/include/daos/common.h index 97cf8d911bc9..4d0e8c6376d6 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -882,6 +882,7 @@ enum { #define DAOS_SHARD_OBJ_RW_DROP_REPLY (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x80) #define DAOS_OBJ_FETCH_DATA_LOST (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x81) #define DAOS_OBJ_TRY_SPECIAL_SHARD (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x82) +#define DAOS_OBJ_EC_AGG_LEADER_DIFF (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x83) #define DAOS_VOS_AGG_RANDOM_YIELD (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x90) #define DAOS_VOS_AGG_MW_THRESH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x91) diff --git a/src/object/srv_ec_aggregate.c b/src/object/srv_ec_aggregate.c index 398efb6dba76..7ec8e36f9bb5 100644 --- a/src/object/srv_ec_aggregate.c +++ b/src/object/srv_ec_aggregate.c @@ -500,26 +500,28 @@ agg_count_cells(uint8_t *fcbit_map, uint8_t *tbit_map, uint64_t estart, * initialized and share to other servers at higher(pool/container) layer. */ static int -agg_get_obj_handle(struct ec_agg_entry *entry) +agg_get_obj_handle(struct ec_agg_entry *entry, bool reset_peer) { struct ec_agg_param *agg_param; uint32_t grp_start; uint32_t tgt_ec_idx; struct dc_object *obj; int i; - int rc; + int rc = 0; - if (daos_handle_is_valid(entry->ae_obj_hdl)) + if (daos_handle_is_valid(entry->ae_obj_hdl) && !reset_peer) return 0; agg_param = container_of(entry, struct ec_agg_param, ap_agg_entry); - rc = dsc_obj_open(agg_param->ap_pool_info.api_cont_hdl, - entry->ae_oid.id_pub, DAOS_OO_RW, - &entry->ae_obj_hdl); - if (rc) - goto out; + if (!daos_handle_is_valid(entry->ae_obj_hdl)) { + rc = dsc_obj_open(agg_param->ap_pool_info.api_cont_hdl, + entry->ae_oid.id_pub, DAOS_OO_RW, + &entry->ae_obj_hdl); + if (rc) + goto out; + } - if (entry->ae_peer_pshards[0].sd_rank != DAOS_TGT_IGNORE) + if (!reset_peer && entry->ae_peer_pshards[0].sd_rank != DAOS_TGT_IGNORE) D_GOTO(out, rc = 0); grp_start = entry->ae_grp_idx * entry->ae_obj_layout->ol_grp_size; @@ -599,7 +601,7 @@ agg_fetch_odata_cells(struct ec_agg_entry *entry, uint8_t *bit_map, for (i = 0; i < cell_cnt; i++) d_iov_set(&sgl.sg_iovs[i], &buf[i * cell_b], cell_b); - rc = agg_get_obj_handle(entry); + rc = agg_get_obj_handle(entry, false); if (rc) { D_ERROR("Failed to open object: "DF_RC"\n", DP_RC(rc)); goto out; @@ -1315,9 +1317,9 @@ agg_peer_update_ult(void *arg) rc = obj_req_create(dss_get_module_info()->dmi_ctx, &tgt_ep, DAOS_OBJ_RPC_EC_AGGREGATE, &rpc); if (rc) { - D_ERROR(DF_UOID" pidx %d to peer %d, obj_req_create " + D_ERROR(DF_UOID" pidx %d to peer %d, rank %d tag %d obj_req_create " DF_RC"\n", DP_UOID(entry->ae_oid), pidx, peer, - DP_RC(rc)); + tgt_ep.ep_rank, tgt_ep.ep_tag, DP_RC(rc)); goto out; } ec_agg_in = crt_req_get(rpc); @@ -1459,7 +1461,7 @@ agg_peer_update(struct ec_agg_entry *entry, bool write_parity) return -1; } - rc = agg_get_obj_handle(entry); + rc = agg_get_obj_handle(entry, false); if (rc) { D_ERROR("Failed to open object: "DF_RC"\n", DP_RC(rc)); return rc; @@ -1753,7 +1755,7 @@ agg_process_holes(struct ec_agg_entry *entry) } stripe_ud.asu_agg_entry = entry; - rc = agg_get_obj_handle(entry); + rc = agg_get_obj_handle(entry, false); if (rc) { D_ERROR("Failed to open object: "DF_RC"\n", DP_RC(rc)); goto out; @@ -2135,9 +2137,16 @@ agg_shard_is_parity(struct ds_pool *pool, struct ec_agg_entry *agg_entry) uint32_t shard_idx; struct pl_obj_shard *shard; - ec_tgt_idx = obj_ec_shard_idx_by_layout_ver(agg_entry->ae_oid.id_layout_ver, - agg_entry->ae_dkey_hash, oca, - daos_oclass_grp_size(oca) - i - 1); + if (unlikely(DAOS_FAIL_CHECK(DAOS_OBJ_EC_AGG_LEADER_DIFF) && + agg_entry->ae_dkey_hash % obj_ec_parity_tgt_nr(oca) == 0)) + ec_tgt_idx = obj_ec_shard_idx_by_layout_ver(agg_entry->ae_oid.id_layout_ver, + agg_entry->ae_dkey_hash, oca, + obj_ec_data_tgt_nr(oca) + i); + else + ec_tgt_idx = obj_ec_shard_idx_by_layout_ver(agg_entry->ae_oid.id_layout_ver, + agg_entry->ae_dkey_hash, oca, + daos_oclass_grp_size(oca) + - i - 1); shard_idx = grp_start + ec_tgt_idx; shard = pl_obj_get_shard(agg_entry->ae_obj_layout, shard_idx); @@ -2184,6 +2193,8 @@ agg_dkey(daos_handle_t ih, vos_iter_entry_t *entry, struct ec_agg_param *agg_param, struct ec_agg_entry *agg_entry, unsigned int *acts) { + int rc = 0; + if (!agg_key_compare(agg_entry->ae_dkey, entry->ie_key)) { D_DEBUG(DB_EPC, "Skip dkey: "DF_KEY" ec agg on re-probe\n", DP_KEY(&entry->ie_key)); @@ -2202,11 +2213,12 @@ agg_dkey(daos_handle_t ih, vos_iter_entry_t *entry, DP_UOID(agg_entry->ae_oid), DP_KEY(&agg_entry->ae_dkey), agg_entry->ae_is_leader ? "yes" : "no"); agg_reset_dkey_entry(&agg_param->ap_agg_entry, entry); + rc = agg_get_obj_handle(agg_entry, true); } else { *acts |= VOS_ITER_CB_SKIP; } - return 0; + return rc; } /* Handles akeys returned by the iterator. */ diff --git a/src/tests/suite/daos_obj_ec.c b/src/tests/suite/daos_obj_ec.c index 2eef576d096c..e6f6327e570b 100644 --- a/src/tests/suite/daos_obj_ec.c +++ b/src/tests/suite/daos_obj_ec.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -450,6 +450,27 @@ trigger_and_wait_ec_aggreation(test_arg_t *arg, daos_obj_id_t *oids, daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); } +void +trigger_and_wait_ec_aggreation_2dkeys(test_arg_t *arg, daos_obj_id_t *oids, + int oids_nr, char *dkey, char *dkey2, char *akey, + daos_off_t offset, daos_size_t size) +{ + int i; + + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, + DAOS_OBJ_EC_AGG_LEADER_DIFF | DAOS_FAIL_ALWAYS, 0, NULL); + + print_message("wait for 30 seconds for EC aggregation.\n"); + sleep(30); + + for (i = 0; i < oids_nr; i++) { + ec_agg_check_replica_on_parity(arg, oids[i], dkey, akey, offset, size, false); + ec_agg_check_replica_on_parity(arg, oids[i], dkey2, akey, offset, size, false); + } + + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); +} + void ec_verify_parity_data(struct ioreq *req, char *dkey, char *akey, daos_off_t offset, daos_size_t size, @@ -474,6 +495,37 @@ ec_verify_parity_data(struct ioreq *req, char *dkey, char *akey, free(data); } +void +ec_verify_parity_data_fail2shards(struct ioreq *req, char *dkey, char *akey, + daos_off_t offset, daos_size_t size, + char *verify_data, daos_handle_t th, + uint16_t shard1, uint16_t shard2) +{ + daos_recx_t recx; + char *data; + uint16_t fail_shards[2]; + uint64_t fail_val; + + data = (char *)malloc(size); + assert_true(data != NULL); + memset(data, 0, size); + + req->iod_type = DAOS_IOD_ARRAY; + recx.rx_nr = size; + recx.rx_idx = offset; + + fail_shards[0] = shard1; + fail_shards[1] = shard2; + fail_val = daos_shard_fail_value(fail_shards, 2); + daos_fail_value_set(fail_val); + daos_fail_loc_set(DAOS_FAIL_SHARD_OPEN | DAOS_FAIL_ALWAYS); + + lookup_recxs(dkey, akey, 1, th, &recx, 1, data, size, req); + assert_memory_equal(data, verify_data, size); + daos_fail_loc_set(0); + free(data); +} + static void ec_partial_update_agg(void **state) { @@ -503,20 +555,34 @@ ec_partial_update_agg(void **state) recx.rx_nr = EC_CELL_SIZE; recx.rx_idx = i * EC_CELL_SIZE; memset(data, 'a' + i, EC_CELL_SIZE); - insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + insert_recxs("d_key1", "a_key", 1, DAOS_TX_NONE, &recx, 1, + data, EC_CELL_SIZE, &req); + insert_recxs("d_key2", "a_key", 1, DAOS_TX_NONE, &recx, 1, data, EC_CELL_SIZE, &req); } - trigger_and_wait_ec_aggreation(arg, &oid, 1, "d_key", "a_key", 0, - EC_CELL_SIZE * 8, DAOS_FORCE_EC_AGG); + trigger_and_wait_ec_aggreation_2dkeys(arg, &oid, 1, "d_key1", "d_key2", "a_key", 0, + EC_CELL_SIZE * 8); for (i = 0; i < 10; i++) { daos_off_t offset = i * EC_CELL_SIZE; memset(verify_data, 'a' + i, EC_CELL_SIZE); - ec_verify_parity_data(&req, "d_key", "a_key", offset, - (daos_size_t)EC_CELL_SIZE, verify_data, - DAOS_TX_NONE, true); + ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", offset, + (daos_size_t)EC_CELL_SIZE, verify_data, + DAOS_TX_NONE, 0, 3); + ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", offset, + (daos_size_t)EC_CELL_SIZE, verify_data, + DAOS_TX_NONE, 0, 2); + ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", offset, + (daos_size_t)EC_CELL_SIZE, verify_data, + DAOS_TX_NONE, 0, 1); + ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", offset, + (daos_size_t)EC_CELL_SIZE, verify_data, + DAOS_TX_NONE, 2, 3); + ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", offset, + (daos_size_t)EC_CELL_SIZE, verify_data, + DAOS_TX_NONE, 4, 5); } ioreq_fini(&req); free(data); @@ -612,7 +678,9 @@ ec_full_partial_update_agg(void **state) recx.rx_idx = 0; memset(data, 'a', full_update_size); memcpy(verify_data, data, full_update_size); - insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + insert_recxs("d_key1", "a_key", 1, DAOS_TX_NONE, &recx, 1, + data, full_update_size, &req); + insert_recxs("d_key2", "a_key", 1, DAOS_TX_NONE, &recx, 1, data, full_update_size, &req); /* then partial stripe update */ @@ -627,15 +695,27 @@ ec_full_partial_update_agg(void **state) memset(buffer, 'a' + i, partial_update_size); memcpy(verify_buffer, buffer, partial_update_size); - insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1, + insert_recxs("d_key1", "a_key", 1, DAOS_TX_NONE, &recx, 1, + buffer, partial_update_size, &req); + insert_recxs("d_key2", "a_key", 1, DAOS_TX_NONE, &recx, 1, buffer, partial_update_size, &req); } - trigger_and_wait_ec_aggreation(arg, &oid, 1, "d_key", "a_key", 0, - full_update_size, DAOS_FORCE_EC_AGG); - - ec_verify_parity_data(&req, "d_key", "a_key", (daos_size_t)0, - full_update_size, verify_data, DAOS_TX_NONE, true); + trigger_and_wait_ec_aggreation_2dkeys(arg, &oid, 1, "d_key1", "d_key2", "a_key", 0, + full_update_size); + + ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", (daos_size_t)0, full_update_size, + verify_data, DAOS_TX_NONE, 0, 2); + ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", (daos_size_t)0, full_update_size, + verify_data, DAOS_TX_NONE, 3, 4); + ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", (daos_size_t)0, full_update_size, + verify_data, DAOS_TX_NONE, 4, 5); + ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", (daos_size_t)0, full_update_size, + verify_data, DAOS_TX_NONE, 0, 3); + ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", (daos_size_t)0, full_update_size, + verify_data, DAOS_TX_NONE, 3, 4); + ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", (daos_size_t)0, full_update_size, + verify_data, DAOS_TX_NONE, 4, 5); free(data); free(verify_data); }