From b2b5444da5b52b9e635e26d58e6effb35176dba5 Mon Sep 17 00:00:00 2001 From: Ping Xie Date: Sun, 28 Apr 2024 20:52:26 -0700 Subject: [PATCH] Reintroduce server-initiated wait and remove the `REPLICAONLY` flag for `CLUSTER SETSLOT NODE` Signed-off-by: Ping Xie --- src/blocked.c | 35 ++-- src/cluster_legacy.c | 252 ++++++++++++-------------- src/networking.c | 2 +- src/replication.c | 3 + src/server.h | 4 + src/valkey-cli.c | 34 ---- tests/unit/cluster/slot-migration.tcl | 33 +--- 7 files changed, 158 insertions(+), 205 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index ad815a9b6c4..8fcf428e233 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -186,7 +186,8 @@ void unblockClient(client *c, int queue_for_reprocessing) { c->bstate.btype == BLOCKED_ZSET || c->bstate.btype == BLOCKED_STREAM) { unblockClientWaitingData(c); - } else if (c->bstate.btype == BLOCKED_WAIT || c->bstate.btype == BLOCKED_WAITAOF) { + } else if (c->bstate.btype == BLOCKED_WAIT || c->bstate.btype == BLOCKED_WAITAOF || + c->bstate.btype == BLOCKED_WAIT_PREREPL) { unblockClientWaitingReplicas(c); } else if (c->bstate.btype == BLOCKED_MODULE) { if (moduleClientIsBlockedOnKeys(c)) unblockClientWaitingData(c); @@ -202,7 +203,8 @@ void unblockClient(client *c, int queue_for_reprocessing) { /* Reset the client for a new query, unless the client has pending command to process * or in case a shutdown operation was canceled and we are still in the processCommand sequence */ - if (!(c->flags & CLIENT_PENDING_COMMAND) && c->bstate.btype != BLOCKED_SHUTDOWN) { + if (!(c->flags & CLIENT_PENDING_COMMAND) && c->bstate.btype != BLOCKED_SHUTDOWN && + c->bstate.btype != BLOCKED_WAIT_PREREPL) { freeClientOriginalArgv(c); /* Clients that are not blocked on keys are not reprocessed so we must * call reqresAppendResponse here (for clients blocked on key, @@ -240,6 +242,8 @@ void replyToBlockedClientTimedOut(client *c) { addReplyLongLong(c,replicationCountAOFAcksByOffset(c->bstate.reploffset)); } else if (c->bstate.btype == BLOCKED_MODULE) { moduleBlockedClientTimedOut(c, 0); + } else if (c->bstate.btype == BLOCKED_WAIT_PREREPL) { + addReplyErrorObject(c, shared.noreplicaserr); } else { serverPanic("Unknown btype in replyToBlockedClientTimedOut()."); } @@ -597,23 +601,30 @@ static void handleClientsBlockedOnKey(readyList *rl) { } } -/* block a client due to wait command */ -void blockForReplication(client *c, mstime_t timeout, long long offset, long numreplicas) { +/* block a client for replica acknowledgement */ +void blockClientForReplicaAck(client *c, mstime_t timeout, long long offset, long numreplicas, int blockType, int numlocal) { c->bstate.timeout = timeout; c->bstate.reploffset = offset; c->bstate.numreplicas = numreplicas; - listAddNodeHead(server.clients_waiting_acks,c); - blockClient(c,BLOCKED_WAIT); + c->bstate.numlocal = numlocal; + listAddNodeHead(server.clients_waiting_acks, c); + blockClient(c, blockType); +} + +/* block a client due to pre-replication */ +void blockForPreReplication(client *c, mstime_t timeout, long long offset, long numreplicas) { + blockClientForReplicaAck(c, timeout, offset, numreplicas, BLOCKED_WAIT_PREREPL, 0); + c->flags |= CLIENT_PENDING_COMMAND; +} + +/* block a client due to wait command */ +void blockForReplication(client *c, mstime_t timeout, long long offset, long numreplicas) { + blockClientForReplicaAck(c, timeout, offset, numreplicas, BLOCKED_WAIT, 0); } /* block a client due to waitaof command */ void blockForAofFsync(client *c, mstime_t timeout, long long offset, int numlocal, long numreplicas) { - c->bstate.timeout = timeout; - c->bstate.reploffset = offset; - c->bstate.numreplicas = numreplicas; - c->bstate.numlocal = numlocal; - listAddNodeHead(server.clients_waiting_acks,c); - blockClient(c,BLOCKED_WAITAOF); + blockClientForReplicaAck(c, timeout, offset, numreplicas, BLOCKED_WAITAOF, numlocal); } /* Postpone client from executing a command. For example the server might be busy diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 9ae53e659c1..f89b9f2fbd4 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -5229,6 +5229,7 @@ int clusterAddSlot(clusterNode *n, int slot) { if (server.cluster->slots[slot]) return C_ERR; clusterNodeSetSlotBit(n,slot); server.cluster->slots[slot] = n; + bitmapClearBit(server.cluster->owner_not_claiming_slot, slot); return C_OK; } @@ -6333,8 +6334,10 @@ int clusterCommandSpecial(client *c) { /* SETSLOT 10 NODE */ int slot; clusterNode *n; + int replSetSlot = nodeIsMaster(myself); + int reply = 1; - /* Allow primaries to replicate "CLUSTER SETSLOT" */ + /* Allow primaries to replicate "CLUSTER SETSLOT" */ if (!(c->flags & CLIENT_MASTER) && nodeIsSlave(myself)) { addReplyError(c,"Please use SETSLOT only with masters."); return 1; @@ -6358,6 +6361,11 @@ int clusterCommandSpecial(client *c) { addReplyError(c,"Target node is not a master"); return 1; } + serverLog(LL_NOTICE, + "Migrating slot %d to node %.40s (%.s)", + slot, + n->name, + n->human_nodename); server.cluster->migrating_slots_to[slot] = n; } else if (!strcasecmp(c->argv[3]->ptr,"importing") && c->argc == 5) { if (server.cluster->slots[slot] == myself) { @@ -6375,9 +6383,15 @@ int clusterCommandSpecial(client *c) { addReplyError(c,"Target node is not a master"); return 1; } + serverLog(LL_NOTICE, + "Importing slot %d from node %.40s (%s)", + slot, + n->name, + n->human_nodename); server.cluster->importing_slots_from[slot] = n; } else if (!strcasecmp(c->argv[3]->ptr,"stable") && c->argc == 4) { /* CLUSTER SETSLOT STABLE */ + serverLog(LL_NOTICE, "Marking slot %d stable", slot); server.cluster->importing_slots_from[slot] = NULL; server.cluster->migrating_slots_to[slot] = NULL; } else if (!strcasecmp(c->argv[3]->ptr,"node") && c->argc == 5) { @@ -6403,143 +6417,116 @@ int clusterCommandSpecial(client *c) { } } - serverLog(LL_NOTICE, "Assigning slot %d to node %.40s (%s) in shard %.40s", - slot, - n->name, - n->human_nodename, - n->shard_id); - - /* If this slot is in migrating status but we have no keys - * for it assigning the slot to another node will clear - * the migrating status. */ - if (countKeysInSlot(slot) == 0 && - server.cluster->migrating_slots_to[slot]) - server.cluster->migrating_slots_to[slot] = NULL; - - int slot_was_mine = server.cluster->slots[slot] == myself; - clusterDelSlot(slot); - clusterAddSlot(n,slot); - - /* If we are a master left without slots, we should turn into a - * replica of the new master. */ - if (slot_was_mine && - n != myself && - myself->numslots == 0 && - server.cluster_allow_replica_migration) { - serverLog(LL_NOTICE, - "Lost my last slot during slot migration. Reconfiguring myself " - "as a replica of %.40s (%s) in shard %.40s", - n->name, - n->human_nodename, - n->shard_id); - clusterSetMaster(n, 1); - clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | - CLUSTER_TODO_UPDATE_STATE | - CLUSTER_TODO_FSYNC_CONFIG); - } - - /* If this node or this node's primary was importing this slot, - * assigning the slot to itself also clears the importing status. */ - if ((n == myself || n == myself->slaveof) && - server.cluster->importing_slots_from[slot]) - { - server.cluster->importing_slots_from[slot] = NULL; - - /* Only primary broadcasts the updates */ - if (n == myself) { - /* This slot was manually migrated, set this node configEpoch - * to a new epoch so that the new version can be propagated - * by the cluster. - * - * Note that if this ever results in a collision with another - * node getting the same configEpoch, for example because a - * failover happens at the same time we close the slot, the - * configEpoch collision resolution will fix it assigning - * a different epoch to each node. */ - if (clusterBumpConfigEpochWithoutConsensus() == C_OK) { - serverLog(LL_NOTICE, - "ConfigEpoch updated after importing slot %d", - slot); - } - /* After importing this slot, let the other nodes know as - * soon as possible. */ - clusterBroadcastPong(CLUSTER_BROADCAST_ALL); - } - } - } else if (!strcasecmp(c->argv[3]->ptr,"node") && - c->argc == 6 && - !strcasecmp(c->argv[5]->ptr,"replicaonly")) - { - /* CLUSTER SETSLOT NODE REPLICAONLY */ - /* When finalizing the slot, there is a possibility that the - * target node B sends a cluster PONG to the source node A + * destination node B sends cluster PONG to the source node A * before SETSLOT has been replicated to B'. If B crashes here, - * B' will be in an importing state and the slot will have no - * owner. To help mitigate this issue, we added a new SETSLOT - * command variant that takes a special marker token called - * "REPLICAONLY". This command is a no-op on the primary. It - * simply replicates asynchronously the command without the - * "REPLICAONLY" marker to the replicas, if there exist any. - * The caller is expected to wait for this asynchronous - * replication to complete using the "WAIT" command. - * - * With the help of this command, we finalize the slots - * on the replicas before the primary in the following - * sequence, where A is the source primary and B is the target - * primary: + * B' will be in importing state and the slot will have no owner. + * To help mitigate this issue, we enforce the following order + * for slot migration finalization such that the replicas will + * finalize the slot ownership before this primary: * - * 1. Client C issues SETSLOT n NODE B REPLICAONLY against - * node B - * 2. Node B replicates SETSLOT n NODE B to all of its replicas, - * such as B', B'', etc - * 3. Client C then issues WAIT for - * a number of B's replicas of C's choice to complete the - * finalization - * 4. On successful WAIT completion, Client C executes SETSLOT - * n NODE B against node B but without the "REPLICAONLY" - * marker this time, which completes the slot finalization - * on node B - * - * The following steps can happen in parallel: + * 1. Client C issues SETSLOT n NODE B against node B + * 2. Node B replicates SETSLOT n NODE B to all of its + * replicas, such as B', B'', etc + * 3. On replication completion, node B executes SETSLOT + * n NODE B and returns control back to client C + * 4. The following steps can happen in parallel * a. Client C issues SETSLOT n NODE B against node A - * b. Node B gossips its new slot ownership to the cluster, - * including A, A', etc */ + * b. node B gossips its new slot ownership to the cluster + * including A, A', etc + * + * Where A is the source primary and B is the destination primary. */ + int replDone = (c->flags & CLIENT_INTERNAL_PREREPL_DONE) != 0; + int syncReplRequired = (server.cluster->importing_slots_from[slot] != NULL) && + (nodeIsMaster(myself) != 0) && + (myself->numslaves != 0); + + + /* Slot states must be updated on replicas first before being + * applied to the primary. This is controlled via the retry + * flag */ + if (replDone || !syncReplRequired) { + serverLog(LL_NOTICE, "Assigning slot %d to node %.40s (%s) in shard %.40s", + slot, + n->name, + n->human_nodename, + n->shard_id); - n = clusterLookupNode(c->argv[4]->ptr, sdslen(c->argv[4]->ptr)); + /* If this slot is in migrating status but we have no keys + * for it assigning the slot to another node will clear + * the migrating status. */ + if (countKeysInSlot(slot) == 0 && + server.cluster->migrating_slots_to[slot]) + server.cluster->migrating_slots_to[slot] = NULL; + + int slot_was_mine = server.cluster->slots[slot] == myself; + clusterDelSlot(slot); + clusterAddSlot(n, slot); + + /* If we are a master left without slots, we should turn into a + * replica of the new master. */ + if (slot_was_mine && + n != myself && + myself->numslots == 0 && + server.cluster_allow_replica_migration) { + serverLog(LL_NOTICE, + "Lost my last slot during slot migration. Reconfiguring myself " + "as a replica of %.40s (%s) in shard %.40s", + n->name, + n->human_nodename, + n->shard_id); + clusterSetMaster(n, 1); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | + CLUSTER_TODO_UPDATE_STATE | + CLUSTER_TODO_FSYNC_CONFIG); + } - if (!n) { - addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[4]->ptr); - return 1; - } - if (nodeIsSlave(n)) { - addReplyError(c,"Target node is not a master"); - return 1; - } - /* If this hash slot was served by 'myself' before to switch - * make sure there are no longer local keys for this hash slot. */ - if (server.cluster->slots[slot] == myself && n != myself) { - if (countKeysInSlot(slot) != 0) { - addReplyErrorFormat(c, - "Can't assign hashslot %d to a different node " - "while I still hold keys for this hash slot.", slot); - return 1; + /* If this node or this node's primary was importing this slot, + * assigning the slot to itself also clears the importing status. */ + if ((n == myself || n == myself->slaveof) && + server.cluster->importing_slots_from[slot]) + { + server.cluster->importing_slots_from[slot] = NULL; + + /* Only primary broadcasts the updates */ + if (n == myself) { + /* This slot was manually migrated, set this node configEpoch + * to a new epoch so that the new version can be propagated + * by the cluster. + * + * Note that if this ever results in a collision with another + * node getting the same configEpoch, for example because a + * failover happens at the same time we close the slot, the + * configEpoch collision resolution will fix it assigning + * a different epoch to each node. */ + if (clusterBumpConfigEpochWithoutConsensus() == C_OK) { + serverLog(LL_NOTICE, + "ConfigEpoch updated after importing slot %d", + slot); + } + /* After importing this slot, let the other nodes know as + * soon as possible. */ + clusterBroadcastPong(CLUSTER_BROADCAST_ALL); + } } - } - if (server.cluster->importing_slots_from[slot] == NULL) { - addReplyError(c,"Slot is not open for importing"); - return 1; - } - if (myself->numslaves == 0) { - addReplyError(c,"Target node has no replicas"); - return 1; - } - /* Remove the last "REPLICAONLY" token so the command - * can be applied as the real "SETSLOT" command on the - * replicas. */ - serverAssert(c->argc == 6); - rewriteClientCommandVector(c, 5, c->argv[0], c->argv[1], c->argv[2], c->argv[3], c->argv[4]); + /* Don't replicate again if setslot has been replicated */ + replSetSlot &= !replDone; + } else { + /* We are a primary and this is the first time we see this "setslot" + * command. Force-replicate the setslot command to all of our replicas + * first and only on success will we handle the command. + * Note that + * 1. All replicas are expected to ack the replication within 1000ms + * 2. The repl offset target is set to the master's current repl offset + 1. + * There is no concern of partial replication because replicas always + * ack the repl offset at the command boundary. */ + blockForPreReplication(c, mstime()+1000, server.master_repl_offset+1, myself->numslaves); + replicationRequestAckFromSlaves(); + + /* Don't reply to the client yet */ + reply = 0; + } } else { addReplyError(c, "Invalid CLUSTER SETSLOT action or number of arguments. Try CLUSTER HELP"); @@ -6547,10 +6534,10 @@ int clusterCommandSpecial(client *c) { } /* Force-replicate "CLUSTER SETSLOT" */ - if (nodeIsMaster(myself)) forceCommandPropagation(c, PROPAGATE_REPL); + if (replSetSlot) forceCommandPropagation(c, PROPAGATE_REPL); clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_UPDATE_STATE); - addReply(c,shared.ok); + if (reply) addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"bumpepoch") && c->argc == 2) { /* CLUSTER BUMPEPOCH */ int retval = clusterBumpConfigEpochWithoutConsensus(); @@ -6623,6 +6610,7 @@ int clusterCommandSpecial(client *c) { /* Set the master. */ clusterSetMaster(n, 1); + clusterBroadcastPong(CLUSTER_BROADCAST_ALL); clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG); addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") && diff --git a/src/networking.c b/src/networking.c index 4af3edb0264..ed919cb010b 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2082,7 +2082,7 @@ void resetClient(client *c) { c->multibulklen = 0; c->bulklen = -1; c->slot = -1; - c->flags &= ~CLIENT_EXECUTING_COMMAND; + c->flags &= ~(CLIENT_EXECUTING_COMMAND | CLIENT_INTERNAL_PREREPL_DONE); /* Make sure the duration has been recorded to some command. */ serverAssert(c->duration == 0); diff --git a/src/replication.c b/src/replication.c index 85e23ffd9f7..035c4f876f0 100644 --- a/src/replication.c +++ b/src/replication.c @@ -3640,6 +3640,7 @@ void processClientsWaitingReplicas(void) { client *c = ln->value; int is_wait_aof = c->bstate.btype == BLOCKED_WAITAOF; + int is_wait_prerepl = c->bstate.btype == BLOCKED_WAIT_PREREPL; if (is_wait_aof && c->bstate.numlocal && !server.aof_enabled) { addReplyError(c, "WAITAOF cannot be used when numlocal is set but appendonly is disabled."); @@ -3689,6 +3690,8 @@ void processClientsWaitingReplicas(void) { addReplyArrayLen(c, 2); addReplyLongLong(c, numlocal); addReplyLongLong(c, numreplicas); + } else if (is_wait_prerepl) { + c->flags |= CLIENT_INTERNAL_PREREPL_DONE; } else { addReplyLongLong(c, numreplicas); } diff --git a/src/server.h b/src/server.h index 7f054abb572..02d269f9ea9 100644 --- a/src/server.h +++ b/src/server.h @@ -402,6 +402,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define CLIENT_MODULE_PREVENT_AOF_PROP (1ULL<<48) /* Module client do not want to propagate to AOF */ #define CLIENT_MODULE_PREVENT_REPL_PROP (1ULL<<49) /* Module client do not want to propagate to replica */ #define CLIENT_REPROCESSING_COMMAND (1ULL<<50) /* The client is re-processing the command. */ +#define CLIENT_INTERNAL_PREREPL_DONE (1ULL<<51) /* Indicate that pre-replication has been done on the client */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -415,6 +416,7 @@ typedef enum blocking_type { BLOCKED_ZSET, /* BZPOP et al. */ BLOCKED_POSTPONE, /* Blocked by processCommand, re-try processing later. */ BLOCKED_SHUTDOWN, /* SHUTDOWN. */ + BLOCKED_WAIT_PREREPL, /* WAIT for pre-replication and then run the command. */ BLOCKED_NUM, /* Number of blocked states. */ BLOCKED_END /* End of enumeration */ } blocking_type; @@ -3430,7 +3432,9 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo void blockClientShutdown(client *c); void blockPostponeClient(client *c); void blockForReplication(client *c, mstime_t timeout, long long offset, long numreplicas); +void blockForPreReplication(client *c, mstime_t timeout, long long offset, long numreplicas); void blockForAofFsync(client *c, mstime_t timeout, long long offset, int numlocal, long numreplicas); +void replicationRequestAckFromSlaves(void); void signalDeletedKeyAsReady(serverDb *db, robj *key, int type); void updateStatsOnUnblock(client *c, long blocked_us, long reply_us, int had_errors); void scanDatabaseForDeletedKeys(serverDb *emptied, serverDb *replaced_with); diff --git a/src/valkey-cli.c b/src/valkey-cli.c index f583ae14532..f905100cd08 100644 --- a/src/valkey-cli.c +++ b/src/valkey-cli.c @@ -4762,39 +4762,6 @@ static clusterManagerNode *clusterManagerGetSlotOwner(clusterManagerNode *n, return owner; } -static void clusterManagerSetSlotNodeReplicaOnly(clusterManagerNode *node1, - clusterManagerNode *node2, - int slot) { - /* A new CLUSTER SETSLOT variant that finalizes slot ownership on replicas - * only (CLUSTER SETSLOT s NODE n REPLICAONLY) is introduced in Valkey 8.0 - * to help mitigate the single-point-of-failure issue related to the slot - * ownership finalization on HA clusters. We make a best-effort attempt below - * to utilize this enhanced reliability. Regardless of the result, we continue - * with finalizing slot ownership on the primary nodes. Note that this command - * is not essential. Valkey 8.0 will attempt to recover from failed slot - * ownership finalizations if they occur, although there may be a brief period - * where slots caught in this transition stage are unavailable. Including this - * additional step ensures no downtime for these slots if any failures arise. */ - redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER SETSLOT %d NODE %s REPLICAONLY", - slot, (char *) node2->name); - if (reply->type == REDIS_REPLY_ERROR) { - /* Either target Redis server doesn't support this command or the slot is - * not in the right state*/ - clusterManagerLogWarn("*** Failed to finalize slot on replicas: %s\n", reply->str); - freeReplyObject(reply); - return; - } - freeReplyObject(reply); - clusterManagerLogInfo(">>> Waiting for %d replicas to complete slot finalization\n", node1->replicas_count); - reply = CLUSTER_MANAGER_COMMAND(node1, "WAIT %d 1000", node1->replicas_count); - if (reply->type == REDIS_REPLY_ERROR) { - clusterManagerLogWarn("*** Failed to wait for slot finalization on replicas: %s\n", reply->str); - } else { - clusterManagerLogInfo(">>> %d replicas completed slot finalization in time\n", reply->integer); - } - freeReplyObject(reply); -} - /* Set slot status to "importing" or "migrating" */ static int clusterManagerSetSlot(clusterManagerNode *node1, clusterManagerNode *node2, @@ -5266,7 +5233,6 @@ static int clusterManagerMoveSlot(clusterManagerNode *source, * If we inform any other node first, it can happen that the target node * crashes before it is set as the new owner and then the slot is left * without an owner which results in redirect loops. See issue #7116. */ - clusterManagerSetSlotNodeReplicaOnly(target, target, slot); success = clusterManagerSetSlot(target, target, slot, "node", err); if (!success) return 0; diff --git a/tests/unit/cluster/slot-migration.tcl b/tests/unit/cluster/slot-migration.tcl index 68c7b0414f1..58e5b99e92d 100644 --- a/tests/unit/cluster/slot-migration.tcl +++ b/tests/unit/cluster/slot-migration.tcl @@ -307,37 +307,18 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica wait_for_slot_state 3 "\[7->-$R1_id\] \[13->-$R1_id\] \[17->-$R1_id\]" } - test "Slot finalization succeeds on replicas" { - # Trigger slot finalization on replicas - assert_equal {OK} [R 1 cluster setslot 7 node $R1_id replicaonly] - assert_equal {1} [R 1 wait 1 1000] - wait_for_slot_state 1 "\[7-<-$R0_id\] \[13-<-$R0_id\] \[17-<-$R0_id\]" + test "Slot finalization succeeds on both primary and replicas" { + assert_equal {OK} [R 1 cluster setslot 7 node $R1_id] + wait_for_slot_state 1 "\[13-<-$R0_id\] \[17-<-$R0_id\]" wait_for_slot_state 4 "\[13-<-$R0_id\] \[17-<-$R0_id\]" - assert_equal {OK} [R 1 cluster setslot 13 node $R1_id replicaonly] - assert_equal {1} [R 1 wait 1 1000] - wait_for_slot_state 1 "\[7-<-$R0_id\] \[13-<-$R0_id\] \[17-<-$R0_id\]" + assert_equal {OK} [R 1 cluster setslot 13 node $R1_id] + wait_for_slot_state 1 "\[17-<-$R0_id\]" wait_for_slot_state 4 "\[17-<-$R0_id\]" - assert_equal {OK} [R 1 cluster setslot 17 node $R1_id replicaonly] - assert_equal {1} [R 1 wait 1 1000] - wait_for_slot_state 1 "\[7-<-$R0_id\] \[13-<-$R0_id\] \[17-<-$R0_id\]" + assert_equal {OK} [R 1 cluster setslot 17 node $R1_id] + wait_for_slot_state 1 "" wait_for_slot_state 4 "" } - test "Finalizing incorrect slot fails" { - catch {R 1 cluster setslot 123 node $R1_id replicaonly} e - assert_equal {ERR Slot is not open for importing} $e - } - - test "Slot migration without expected target replicas fails" { - migrate_slot 0 1 100 - # Move the target replica away - wait_for_role 0 master - assert_equal {OK} [R 4 cluster replicate $R0_id] - after $node_timeout - # Slot finalization should fail - catch {R 1 cluster setslot 100 node $R1_id replicaonly} e - assert_equal {ERR Target node has no replicas} $e - } } start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica-migration no cluster-node-timeout 1000} } {