From 469b6a2d0b318bbc377e7915a9cbe99f00e38e1d Mon Sep 17 00:00:00 2001 From: Wesley Wang <2791412+WesleyW@users.noreply.github.com> Date: Mon, 21 Sep 2020 17:42:01 -0700 Subject: [PATCH] [backport 2.2] #5703: #5791: #5792 [YW] Releasing OnPrem nodes should not delete them from universe metadata Summary: 1. This diff aligns the OnPrem release node + re-add node flow to VM universes - releasing a node should not clear the universe metaedata for that node. To re-add the node, use the "Node -> Actions -> Add" flow instead of the "Edit Universe" flow. 2. Fixes bug introduced in D8405 3. Fixes bug introduced in https://phabricator.dev.yugabyte.com/D9097 - #5791 - releasing a node instance should only attempt to shut down processes on that node. 4. During an add node, we were not running the selection logic in NodeManager::pickNodes to select nodes from the onprem provider. Test Plan: 1. Create an onprem universe, release node, verify that an entry still remains in the universe nodes page. Verify that the node shows up as not in use on the provider node instances page. Verify that data is deleted from a node when it is released. 2. Verify that a newly created universe can pick up a released node. 3. Verify that a newly created universe can pick up a combination of new nodes and released nodes. Reviewers: arnav, wesley, daniel, zyu Reviewed By: daniel, zyu Subscribers: zyu, jenkins-bot, yugaware Differential Revision: https://phabricator.dev.yugabyte.com/D9441 --- .../templates/yb-server-ctl.sh.j2 | 1 + .../commissioner/tasks/AddNodeToUniverse.java | 27 ++++++++++++++-- .../commissioner/tasks/DestroyUniverse.java | 7 +++-- .../tasks/ReleaseInstanceFromUniverse.java | 8 ++--- .../tasks/UniverseDefinitionTaskBase.java | 6 +++- .../tasks/subtasks/AnsibleDestroyServer.java | 31 ++++++++++++------- .../tasks/subtasks/AnsibleUpdateNodeInfo.java | 2 +- .../com/yugabyte/yw/models/NodeInstance.java | 4 +++ 8 files changed, 63 insertions(+), 23 deletions(-) diff --git a/managed/devops/roles/configure-cluster-server/templates/yb-server-ctl.sh.j2 b/managed/devops/roles/configure-cluster-server/templates/yb-server-ctl.sh.j2 index 15ecdb3152a2..e077cfad5a81 100644 --- a/managed/devops/roles/configure-cluster-server/templates/yb-server-ctl.sh.j2 +++ b/managed/devops/roles/configure-cluster-server/templates/yb-server-ctl.sh.j2 @@ -134,6 +134,7 @@ clean_data_paths() { for (( i=0; i node = new HashSet(Arrays.asList(currentNode)); // First spawn an instance for Decommissioned node. boolean wasDecommissioned = currentNode.state == NodeState.Decommissioned; if (wasDecommissioned) { + if (cluster.userIntent.providerType.equals(CloudType.onprem)) { + // For onprem universes, allocate an available node + // from the provider's node_instance table. + Map> onpremAzToNodes = new HashMap>(); + List nodeNameList = new ArrayList<>(); + nodeNameList.add(currentNode.nodeName); + onpremAzToNodes.put(currentNode.azUuid, nodeNameList); + String instanceType = currentNode.cloudInfo.instance_type; + + Map nodeMap = NodeInstance.pickNodes(onpremAzToNodes, instanceType); + currentNode.nodeUuid = nodeMap.get(currentNode.nodeName).nodeUuid; + } + createSetupServerTasks(node) .setSubTaskGroupType(SubTaskGroupType.Provisioning); @@ -93,6 +114,10 @@ public void run() { // Bring up any masters, as needed. boolean masterAdded = false; if (areMastersUnderReplicated(currentNode, universe)) { + LOG.info( + "Bringing up master for under replicated universe {} ({})", + universe.universeUUID, universe.name + ); // Set gflags for master. createGFlagsOverrideTasks(node, ServerType.MASTER); @@ -115,8 +140,6 @@ public void run() { masterAdded = true; } - Cluster cluster = taskParams().getClusterByUuid(currentNode.placementUuid); - UniverseDefinitionTaskParams universeDetails = universe.getUniverseDetails(); // Explicitly set webserver ports for each dql diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/DestroyUniverse.java b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/DestroyUniverse.java index 69c8f2fe2e4d..a20822c89cc1 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/DestroyUniverse.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/DestroyUniverse.java @@ -63,8 +63,11 @@ public void run() { .setSubTaskGroupType(SubTaskGroupType.RemovingUnusedServers); // Create tasks to destroy the existing nodes. - createDestroyServerTasks(universe.getNodes(), params().isForceDelete, true) - .setSubTaskGroupType(SubTaskGroupType.RemovingUnusedServers); + createDestroyServerTasks( + universe.getNodes(), + params().isForceDelete, + true /* delete node */ + ).setSubTaskGroupType(SubTaskGroupType.RemovingUnusedServers); } // Create tasks to remove the universe entry from the Universe table. diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/ReleaseInstanceFromUniverse.java b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/ReleaseInstanceFromUniverse.java index f01c7446f1d5..7ec5d1e2da66 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/ReleaseInstanceFromUniverse.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/ReleaseInstanceFromUniverse.java @@ -22,8 +22,10 @@ import com.yugabyte.yw.models.Universe; import com.yugabyte.yw.models.helpers.NodeDetails; import com.yugabyte.yw.models.helpers.NodeDetails.NodeState; +import com.yugabyte.yw.models.NodeInstance; import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import org.slf4j.Logger; @@ -95,12 +97,6 @@ public void run() { createSetNodeStateTask(currentNode, NodeState.Decommissioned) .setSubTaskGroupType(SubTaskGroupType.ReleasingInstance); - // Delete and reset node metadata for onprem universes. - if (userIntent.providerType.equals(CloudType.onprem)) { - deleteNodeFromUniverseTask(taskParams().nodeName) - .setSubTaskGroupType(SubTaskGroupType.ReleasingInstance); - } - // Update the DNS entry for this universe. createDnsManipulationTask(DnsManager.DnsCommandType.Edit, false, userIntent.providerType, userIntent.provider, userIntent.universeName) diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/UniverseDefinitionTaskBase.java b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/UniverseDefinitionTaskBase.java index 21d13359039d..c0edc6d2256c 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/UniverseDefinitionTaskBase.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/UniverseDefinitionTaskBase.java @@ -329,7 +329,11 @@ public void setNodeNames(UniverseOpType opType, Universe universe) { } public void updateOnPremNodeUuids(Universe universe) { - LOG.debug("Update on prem nodes in universe {}.", taskParams().universeUUID); + LOG.info( + "Selecting prem nodes for universe {} ({}).", + universe.name, + taskParams().universeUUID + ); UniverseDefinitionTaskParams universeDetails = universe.getUniverseDetails(); diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/AnsibleDestroyServer.java b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/AnsibleDestroyServer.java index cfb798741a0b..643c75798514 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/AnsibleDestroyServer.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/AnsibleDestroyServer.java @@ -46,27 +46,17 @@ private void removeNodeFromUniverse(final String nodeName) { LOG.error("No node in universe with name " + nodeName); return; } - UserIntent userIntent = u.getUniverseDetails() - .getClusterByUuid(u.getNode(taskParams().nodeName).placementUuid).userIntent; // Persist the desired node information into the DB. UniverseUpdater updater = new UniverseUpdater() { @Override public void run(Universe universe) { UniverseDefinitionTaskParams universeDetails = universe.getUniverseDetails(); universeDetails.removeNode(nodeName); - LOG.debug("Removing node " + nodeName + " from universe " + taskParams().universeUUID); + LOG.info("Removed node " + nodeName + " from universe " + taskParams().universeUUID); } }; Universe.saveDetails(taskParams().universeUUID, updater); - - if (userIntent.providerType.equals(Common.CloudType.onprem)) { - // Free up the node. - NodeInstance node = NodeInstance.getByName(nodeName); - node.inUse = false; - node.setNodeName(""); - node.save(); - } } @Override @@ -84,6 +74,25 @@ public void run() { } } + Universe u = Universe.get(taskParams().universeUUID); + UserIntent userIntent = u.getUniverseDetails() + .getClusterByUuid(u.getNode(taskParams().nodeName).placementUuid).userIntent; + NodeDetails univNodeDetails = u.getNode(taskParams().nodeName); + + if (userIntent.providerType.equals(Common.CloudType.onprem) && + univNodeDetails.state != NodeDetails.NodeState.Decommissioned) { + // Free up the node. + try { + NodeInstance providerNode = NodeInstance.getByName(taskParams().nodeName); + providerNode.clearNodeDetails(); + } catch (Exception e) { + if (!taskParams().isForceDelete) { + throw e; + } + } + LOG.info("Marked node instance {} as available", taskParams().nodeName); + } + if (taskParams().deleteNode) { // Update the node state to removed. Even though we remove the node below, this will // help tracking state for any nodes stuck in limbo. diff --git a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/AnsibleUpdateNodeInfo.java b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/AnsibleUpdateNodeInfo.java index 3722b2bd91ec..e7b1dcf6a45a 100644 --- a/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/AnsibleUpdateNodeInfo.java +++ b/managed/src/main/java/com/yugabyte/yw/commissioner/tasks/subtasks/AnsibleUpdateNodeInfo.java @@ -69,7 +69,7 @@ public void run(Universe universe) { } Field field; try { - LOG.info("Node {}: setting field {} to value {}.", + LOG.info("Node {}: setting univ node details field {} to value {}.", taskParams.nodeName, entry.getKey(), entry.getValue()); // Error out if the host was not found. if (entry.getKey().equals("host_found") && entry.getValue().asText().equals("false")) { diff --git a/managed/src/main/java/com/yugabyte/yw/models/NodeInstance.java b/managed/src/main/java/com/yugabyte/yw/models/NodeInstance.java index d26f539d863d..9ed373f42a79 100644 --- a/managed/src/main/java/com/yugabyte/yw/models/NodeInstance.java +++ b/managed/src/main/java/com/yugabyte/yw/models/NodeInstance.java @@ -113,6 +113,9 @@ public static int deleteByProvider(UUID providerUUID) { return deleteStmt.execute(); } + /** Pick available nodes in zones specified by onpremAzToNodes with + * with the instance type specified + */ public static synchronized Map pickNodes( Map> onpremAzToNodes, String instanceTypeCode) { Map outputMap = new HashMap(); @@ -134,6 +137,7 @@ public static synchronized Map pickNodes( node.setNodeName(nodeName); outputMap.put(nodeName, node); ++index; + LOG.info("Marking node {} (ip {}) as in-use.", nodeName, node.getDetails().ip); } } // All good, save to DB.