Skip to content

Commit

Permalink
DAOS-16039 object: fix EC aggregation wrong peer address
Browse files Browse the repository at this point in the history
Fix EC aggregation wrong peer address when multiple dkeys on
same EC agg leader shard.
Change existing test cases to cover that case.

Required-githooks: true
Signed-off-by: Xuezhao Liu <xuezhao.liu@intel.com>
  • Loading branch information
liuxuezhao committed Jun 17, 2024
1 parent f825add commit 0673518
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 32 deletions.
1 change: 1 addition & 0 deletions src/include/daos/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,7 @@ enum {
#define DAOS_SHARD_OBJ_RW_DROP_REPLY (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x80)
#define DAOS_OBJ_FETCH_DATA_LOST (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x81)
#define DAOS_OBJ_TRY_SPECIAL_SHARD (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x82)
#define DAOS_OBJ_EC_AGG_LEADER_DIFF (DAOS_FAIL_SYS_TEST_GROUP_LOC | 0x83)

#define DAOS_VOS_AGG_RANDOM_YIELD (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x90)
#define DAOS_VOS_AGG_MW_THRESH (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x91)
Expand Down
48 changes: 30 additions & 18 deletions src/object/srv_ec_aggregate.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,26 +500,28 @@ agg_count_cells(uint8_t *fcbit_map, uint8_t *tbit_map, uint64_t estart,
* initialized and share to other servers at higher(pool/container) layer.
*/
static int
agg_get_obj_handle(struct ec_agg_entry *entry)
agg_get_obj_handle(struct ec_agg_entry *entry, bool reset_peer)
{
struct ec_agg_param *agg_param;
uint32_t grp_start;
uint32_t tgt_ec_idx;
struct dc_object *obj;
int i;
int rc;
int rc = 0;

if (daos_handle_is_valid(entry->ae_obj_hdl))
if (daos_handle_is_valid(entry->ae_obj_hdl) && !reset_peer)
return 0;

agg_param = container_of(entry, struct ec_agg_param, ap_agg_entry);
rc = dsc_obj_open(agg_param->ap_pool_info.api_cont_hdl,
entry->ae_oid.id_pub, DAOS_OO_RW,
&entry->ae_obj_hdl);
if (rc)
goto out;
if (!daos_handle_is_valid(entry->ae_obj_hdl)) {
rc = dsc_obj_open(agg_param->ap_pool_info.api_cont_hdl,
entry->ae_oid.id_pub, DAOS_OO_RW,
&entry->ae_obj_hdl);
if (rc)
goto out;
}

if (entry->ae_peer_pshards[0].sd_rank != DAOS_TGT_IGNORE)
if (!reset_peer && entry->ae_peer_pshards[0].sd_rank != DAOS_TGT_IGNORE)
D_GOTO(out, rc = 0);

grp_start = entry->ae_grp_idx * entry->ae_obj_layout->ol_grp_size;
Expand Down Expand Up @@ -599,7 +601,7 @@ agg_fetch_odata_cells(struct ec_agg_entry *entry, uint8_t *bit_map,
for (i = 0; i < cell_cnt; i++)
d_iov_set(&sgl.sg_iovs[i], &buf[i * cell_b], cell_b);

rc = agg_get_obj_handle(entry);
rc = agg_get_obj_handle(entry, false);
if (rc) {
D_ERROR("Failed to open object: "DF_RC"\n", DP_RC(rc));
goto out;
Expand Down Expand Up @@ -1315,9 +1317,9 @@ agg_peer_update_ult(void *arg)
rc = obj_req_create(dss_get_module_info()->dmi_ctx, &tgt_ep,
DAOS_OBJ_RPC_EC_AGGREGATE, &rpc);
if (rc) {
D_ERROR(DF_UOID" pidx %d to peer %d, obj_req_create "
D_ERROR(DF_UOID" pidx %d to peer %d, rank %d tag %d obj_req_create "
DF_RC"\n", DP_UOID(entry->ae_oid), pidx, peer,
DP_RC(rc));
tgt_ep.ep_rank, tgt_ep.ep_tag, DP_RC(rc));
goto out;
}
ec_agg_in = crt_req_get(rpc);
Expand Down Expand Up @@ -1459,7 +1461,7 @@ agg_peer_update(struct ec_agg_entry *entry, bool write_parity)
return -1;
}

rc = agg_get_obj_handle(entry);
rc = agg_get_obj_handle(entry, false);
if (rc) {
D_ERROR("Failed to open object: "DF_RC"\n", DP_RC(rc));
return rc;
Expand Down Expand Up @@ -1753,7 +1755,7 @@ agg_process_holes(struct ec_agg_entry *entry)
}

stripe_ud.asu_agg_entry = entry;
rc = agg_get_obj_handle(entry);
rc = agg_get_obj_handle(entry, false);
if (rc) {
D_ERROR("Failed to open object: "DF_RC"\n", DP_RC(rc));
goto out;
Expand Down Expand Up @@ -2135,9 +2137,16 @@ agg_shard_is_parity(struct ds_pool *pool, struct ec_agg_entry *agg_entry)
uint32_t shard_idx;
struct pl_obj_shard *shard;

ec_tgt_idx = obj_ec_shard_idx_by_layout_ver(agg_entry->ae_oid.id_layout_ver,
agg_entry->ae_dkey_hash, oca,
daos_oclass_grp_size(oca) - i - 1);
if (unlikely(DAOS_FAIL_CHECK(DAOS_OBJ_EC_AGG_LEADER_DIFF) &&
agg_entry->ae_dkey_hash % obj_ec_parity_tgt_nr(oca) == 0))
ec_tgt_idx = obj_ec_shard_idx_by_layout_ver(agg_entry->ae_oid.id_layout_ver,
agg_entry->ae_dkey_hash, oca,
obj_ec_data_tgt_nr(oca) + i);
else
ec_tgt_idx = obj_ec_shard_idx_by_layout_ver(agg_entry->ae_oid.id_layout_ver,
agg_entry->ae_dkey_hash, oca,
daos_oclass_grp_size(oca)
- i - 1);

shard_idx = grp_start + ec_tgt_idx;
shard = pl_obj_get_shard(agg_entry->ae_obj_layout, shard_idx);
Expand Down Expand Up @@ -2184,6 +2193,8 @@ agg_dkey(daos_handle_t ih, vos_iter_entry_t *entry,
struct ec_agg_param *agg_param, struct ec_agg_entry *agg_entry,
unsigned int *acts)
{
int rc = 0;

if (!agg_key_compare(agg_entry->ae_dkey, entry->ie_key)) {
D_DEBUG(DB_EPC, "Skip dkey: "DF_KEY" ec agg on re-probe\n",
DP_KEY(&entry->ie_key));
Expand All @@ -2202,11 +2213,12 @@ agg_dkey(daos_handle_t ih, vos_iter_entry_t *entry,
DP_UOID(agg_entry->ae_oid), DP_KEY(&agg_entry->ae_dkey),
agg_entry->ae_is_leader ? "yes" : "no");
agg_reset_dkey_entry(&agg_param->ap_agg_entry, entry);
rc = agg_get_obj_handle(agg_entry, true);
} else {
*acts |= VOS_ITER_CB_SKIP;
}

return 0;
return rc;
}

/* Handles akeys returned by the iterator. */
Expand Down
108 changes: 94 additions & 14 deletions src/tests/suite/daos_obj_ec.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -450,6 +450,27 @@ trigger_and_wait_ec_aggreation(test_arg_t *arg, daos_obj_id_t *oids,
daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL);
}

void
trigger_and_wait_ec_aggreation_2dkeys(test_arg_t *arg, daos_obj_id_t *oids,
int oids_nr, char *dkey, char *dkey2, char *akey,
daos_off_t offset, daos_size_t size)
{
int i;

daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC,
DAOS_OBJ_EC_AGG_LEADER_DIFF | DAOS_FAIL_ALWAYS, 0, NULL);

print_message("wait for 30 seconds for EC aggregation.\n");
sleep(30);

for (i = 0; i < oids_nr; i++) {
ec_agg_check_replica_on_parity(arg, oids[i], dkey, akey, offset, size, false);
ec_agg_check_replica_on_parity(arg, oids[i], dkey2, akey, offset, size, false);
}

daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL);
}

void
ec_verify_parity_data(struct ioreq *req, char *dkey, char *akey,
daos_off_t offset, daos_size_t size,
Expand All @@ -474,6 +495,37 @@ ec_verify_parity_data(struct ioreq *req, char *dkey, char *akey,
free(data);
}

void
ec_verify_parity_data_fail2shards(struct ioreq *req, char *dkey, char *akey,
daos_off_t offset, daos_size_t size,
char *verify_data, daos_handle_t th,
uint16_t shard1, uint16_t shard2)
{
daos_recx_t recx;
char *data;
uint16_t fail_shards[2];
uint64_t fail_val;

data = (char *)malloc(size);
assert_true(data != NULL);
memset(data, 0, size);

req->iod_type = DAOS_IOD_ARRAY;
recx.rx_nr = size;
recx.rx_idx = offset;

fail_shards[0] = shard1;
fail_shards[1] = shard2;
fail_val = daos_shard_fail_value(fail_shards, 2);
daos_fail_value_set(fail_val);
daos_fail_loc_set(DAOS_FAIL_SHARD_OPEN | DAOS_FAIL_ALWAYS);

lookup_recxs(dkey, akey, 1, th, &recx, 1, data, size, req);
assert_memory_equal(data, verify_data, size);
daos_fail_loc_set(0);
free(data);
}

static void
ec_partial_update_agg(void **state)
{
Expand Down Expand Up @@ -503,20 +555,34 @@ ec_partial_update_agg(void **state)
recx.rx_nr = EC_CELL_SIZE;
recx.rx_idx = i * EC_CELL_SIZE;
memset(data, 'a' + i, EC_CELL_SIZE);
insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1,
insert_recxs("d_key1", "a_key", 1, DAOS_TX_NONE, &recx, 1,
data, EC_CELL_SIZE, &req);
insert_recxs("d_key2", "a_key", 1, DAOS_TX_NONE, &recx, 1,
data, EC_CELL_SIZE, &req);
}

trigger_and_wait_ec_aggreation(arg, &oid, 1, "d_key", "a_key", 0,
EC_CELL_SIZE * 8, DAOS_FORCE_EC_AGG);
trigger_and_wait_ec_aggreation_2dkeys(arg, &oid, 1, "d_key1", "d_key2", "a_key", 0,
EC_CELL_SIZE * 8);

for (i = 0; i < 10; i++) {
daos_off_t offset = i * EC_CELL_SIZE;

memset(verify_data, 'a' + i, EC_CELL_SIZE);
ec_verify_parity_data(&req, "d_key", "a_key", offset,
(daos_size_t)EC_CELL_SIZE, verify_data,
DAOS_TX_NONE, true);
ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", offset,
(daos_size_t)EC_CELL_SIZE, verify_data,
DAOS_TX_NONE, 0, 3);
ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", offset,
(daos_size_t)EC_CELL_SIZE, verify_data,
DAOS_TX_NONE, 0, 2);
ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", offset,
(daos_size_t)EC_CELL_SIZE, verify_data,
DAOS_TX_NONE, 0, 1);
ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", offset,
(daos_size_t)EC_CELL_SIZE, verify_data,
DAOS_TX_NONE, 2, 3);
ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", offset,
(daos_size_t)EC_CELL_SIZE, verify_data,
DAOS_TX_NONE, 4, 5);
}
ioreq_fini(&req);
free(data);
Expand Down Expand Up @@ -612,7 +678,9 @@ ec_full_partial_update_agg(void **state)
recx.rx_idx = 0;
memset(data, 'a', full_update_size);
memcpy(verify_data, data, full_update_size);
insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1,
insert_recxs("d_key1", "a_key", 1, DAOS_TX_NONE, &recx, 1,
data, full_update_size, &req);
insert_recxs("d_key2", "a_key", 1, DAOS_TX_NONE, &recx, 1,
data, full_update_size, &req);

/* then partial stripe update */
Expand All @@ -627,15 +695,27 @@ ec_full_partial_update_agg(void **state)
memset(buffer, 'a' + i, partial_update_size);
memcpy(verify_buffer, buffer, partial_update_size);

insert_recxs("d_key", "a_key", 1, DAOS_TX_NONE, &recx, 1,
insert_recxs("d_key1", "a_key", 1, DAOS_TX_NONE, &recx, 1,
buffer, partial_update_size, &req);
insert_recxs("d_key2", "a_key", 1, DAOS_TX_NONE, &recx, 1,
buffer, partial_update_size, &req);
}

trigger_and_wait_ec_aggreation(arg, &oid, 1, "d_key", "a_key", 0,
full_update_size, DAOS_FORCE_EC_AGG);

ec_verify_parity_data(&req, "d_key", "a_key", (daos_size_t)0,
full_update_size, verify_data, DAOS_TX_NONE, true);
trigger_and_wait_ec_aggreation_2dkeys(arg, &oid, 1, "d_key1", "d_key2", "a_key", 0,
full_update_size);

ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", (daos_size_t)0, full_update_size,
verify_data, DAOS_TX_NONE, 0, 2);
ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", (daos_size_t)0, full_update_size,
verify_data, DAOS_TX_NONE, 3, 4);
ec_verify_parity_data_fail2shards(&req, "d_key1", "a_key", (daos_size_t)0, full_update_size,
verify_data, DAOS_TX_NONE, 4, 5);
ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", (daos_size_t)0, full_update_size,
verify_data, DAOS_TX_NONE, 0, 3);
ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", (daos_size_t)0, full_update_size,
verify_data, DAOS_TX_NONE, 3, 4);
ec_verify_parity_data_fail2shards(&req, "d_key2", "a_key", (daos_size_t)0, full_update_size,
verify_data, DAOS_TX_NONE, 4, 5);
free(data);
free(verify_data);
}
Expand Down

0 comments on commit 0673518

Please sign in to comment.