linkedin · zzmao · Oct 14, 2019 · Sep 27, 2019 · Sep 27, 2019 · Oct 9, 2019
diff --git a/ambry-api/src/main/java/com.github.ambry/clustermap/ClusterMap.java b/ambry-api/src/main/java/com.github.ambry/clustermap/ClusterMap.java
@@ -122,6 +122,14 @@ public interface ClusterMap extends AutoCloseable {
    */
   JSONObject getSnapshot();
 
+  /**
+   * Attempt to get new replica of certain partition that resides on given data node.
+   * @param partitionIdStr the partition id string
+   * @param dataNodeId the {@link DataNodeId} on which new replica is placed
+   * @return {@link ReplicaId} if there is a new replica satisfying given partition and data node. {@code null} otherwise.
+   */
+  ReplicaId getNewReplica(String partitionIdStr, DataNodeId dataNodeId);
+
   /**
    * Close the cluster map. Any cleanups should be done in this call.
    */

diff --git a/ambry-api/src/main/java/com.github.ambry/config/ReplicationConfig.java b/ambry-api/src/main/java/com.github.ambry/config/ReplicationConfig.java
@@ -27,7 +27,7 @@ public class ReplicationConfig {
   /**
    * The factory class the replication uses to create cloud token
    */
-  @Config("replication.cloudtoken.factory")
+  @Config("replication.cloud.token.factory")
   @Default("com.github.ambry.cloud.CloudFindTokenFactory")
   public final String replicationCloudTokenFactory;
 
@@ -134,15 +134,15 @@ public class ReplicationConfig {
   /**
    * The version of metadata request to be used for replication.
    */
-  @Config("replication.metadatarequest.version")
+  @Config("replication.metadata.request.version")
   @Default("1")
   public final short replicaMetadataRequestVersion;
 
   public ReplicationConfig(VerifiableProperties verifiableProperties) {
 
     replicationStoreTokenFactory =
         verifiableProperties.getString("replication.token.factory", "com.github.ambry.store.StoreFindTokenFactory");
-    replicationCloudTokenFactory = verifiableProperties.getString("replication.cloudtoken.factory",
+    replicationCloudTokenFactory = verifiableProperties.getString("replication.cloud.token.factory",
         "com.github.ambry.cloud.CloudFindTokenFactory");
     replicationNumOfIntraDCReplicaThreads =
         verifiableProperties.getInt("replication.no.of.intra.dc.replica.threads", 1);
@@ -172,6 +172,6 @@ public ReplicationConfig(VerifiableProperties verifiableProperties) {
     replicationTrackPerPartitionLagFromRemote =
         verifiableProperties.getBoolean("replication.track.per.partition.lag.from.remote", false);
     replicaMetadataRequestVersion =
-        verifiableProperties.getShortInRange("replication.metadatarequest.version", (short) 1, (short) 1, (short) 2);
+        verifiableProperties.getShortInRange("replication.metadata.request.version", (short) 1, (short) 1, (short) 2);
   }
 }
diff --git a/ambry-cloud/src/main/java/com.github.ambry.cloud/VcrRequests.java b/ambry-cloud/src/main/java/com.github.ambry.cloud/VcrRequests.java
@@ -53,7 +53,7 @@ public VcrRequests(StoreManager storeManager, RequestResponseChannel requestResp
       NotificationSystem notification, ReplicationEngine replicationEngine, StoreKeyFactory storageKeyFactory,
       boolean enableDataPrefetch, StoreKeyConverterFactory storeKeyConverterFactory) {
     super(storeManager, requestResponseChannel, clusterMap, currentNode, registry, serverMetrics, findTokenHelper,
-        notification, replicationEngine, storageKeyFactory, enableDataPrefetch, storeKeyConverterFactory);
+        notification, replicationEngine, storageKeyFactory, enableDataPrefetch, storeKeyConverterFactory, null);
   }
 
   @Override

diff --git a/ambry-clustermap/src/main/java/com.github.ambry.clustermap/CompositeClusterManager.java b/ambry-clustermap/src/main/java/com.github.ambry.clustermap/CompositeClusterManager.java
@@ -287,6 +287,11 @@ public JSONObject getSnapshot() {
     return staticClusterManager.getSnapshot();
   }
 
+  @Override
+  public ReplicaId getNewReplica(String partitionIdStr, DataNodeId dataNodeId) {
+    return helixClusterManager.getNewReplica(partitionIdStr, dataNodeId);
+  }
+
   @Override
   public void close() {
     staticClusterManager.close();

diff --git a/ambry-clustermap/src/main/java/com.github.ambry.clustermap/HelixClusterManager.java b/ambry-clustermap/src/main/java/com.github.ambry.clustermap/HelixClusterManager.java
@@ -29,6 +29,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CountDownLatch;
@@ -87,6 +88,7 @@ class HelixClusterManager implements ClusterMap {
   final HelixClusterManagerMetrics helixClusterManagerMetrics;
   private final PartitionSelectionHelper partitionSelectionHelper;
   private final Map<String, Map<String, String>> partitionOverrideInfoMap = new HashMap<>();
+  private ZkHelixPropertyStore<ZNRecord> helixPropertyStoreInLocalDc = null;
   // The current xid currently does not change after instantiation. This can change in the future, allowing the cluster
   // manager to dynamically incorporate newer changes in the cluster. This variable is atomic so that the gauge metric
   // reflects the current value.
@@ -216,13 +218,13 @@ private HelixManager initializeHelixManagerAndPropertyStoreInLocalDC(Map<String,
     DcZkInfo dcZkInfo = dataCenterToZkAddress.get(clusterMapConfig.clusterMapDatacenterName);
     String zkConnectStr = dcZkInfo.getZkConnectStr();
     HelixManager manager;
-    ZkHelixPropertyStore<ZNRecord> helixPropertyStore;
     manager = helixFactory.getZKHelixManager(clusterName, instanceName, InstanceType.SPECTATOR, zkConnectStr);
     logger.info("Connecting to Helix manager in local zookeeper at {}", zkConnectStr);
     manager.connect();
     logger.info("Established connection to Helix manager in local zookeeper at {}", zkConnectStr);
-    helixPropertyStore = manager.getHelixPropertyStore();
-    logger.info("HelixPropertyStore from local datacenter {} is: {}", dcZkInfo.getDcName(), helixPropertyStore);
+    helixPropertyStoreInLocalDc = manager.getHelixPropertyStore();
+    logger.info("HelixPropertyStore from local datacenter {} is: {}", dcZkInfo.getDcName(),
+        helixPropertyStoreInLocalDc);
     IZkDataListener dataListener = new IZkDataListener() {
       @Override
       public void handleDataChange(String dataPath, Object data) {
@@ -235,10 +237,9 @@ public void handleDataDeleted(String dataPath) {
       }
     };
     logger.info("Subscribing data listener to HelixPropertyStore.");
-    helixPropertyStore.subscribeDataChanges(ClusterMapUtils.PARTITION_OVERRIDE_ZNODE_PATH, dataListener);
-    logger.info("Getting ZNRecord from HelixPropertyStore");
-    ZNRecord zNRecord =
-        helixPropertyStore.get(ClusterMapUtils.PARTITION_OVERRIDE_ZNODE_PATH, null, AccessOption.PERSISTENT);
+    helixPropertyStoreInLocalDc.subscribeDataChanges(PARTITION_OVERRIDE_ZNODE_PATH, dataListener);
+    logger.info("Getting PartitionOverride ZNRecord from HelixPropertyStore");
+    ZNRecord zNRecord = helixPropertyStoreInLocalDc.get(PARTITION_OVERRIDE_ZNODE_PATH, null, AccessOption.PERSISTENT);
     if (clusterMapConfig.clusterMapEnablePartitionOverride) {
       if (zNRecord != null) {
         partitionOverrideInfoMap.putAll(zNRecord.getMapFields());
@@ -381,6 +382,55 @@ public List<PartitionId> getAllPartitionIds(String partitionClass) {
     return partitionSelectionHelper.getPartitions(partitionClass);
   }
 
+  @Override
+  public ReplicaId getNewReplica(String partitionIdStr, DataNodeId dataNodeId) {
+    ReplicaId newReplica = null;
+    logger.info("Getting ReplicaAddition ZNRecord from HelixPropertyStore in local DC.");
+    ZNRecord zNRecord = helixPropertyStoreInLocalDc.get(REPLICA_ADDITION_ZNODE_PATH, null, AccessOption.PERSISTENT);
+    if (zNRecord != null) {
+      String instanceName = getInstanceName(dataNodeId.getHostname(), dataNodeId.getPort());
+      Map<String, Map<String, String>> partitionToReplicas = zNRecord.getMapFields();
+      Map<String, String> replicaInfos = partitionToReplicas.get(partitionIdStr);
+      if (replicaInfos != null && replicaInfos.containsKey(instanceName)) {
+        long replicaCapacity = Long.valueOf(replicaInfos.get(REPLICAS_CAPACITY_STR));
+        String partitionClass = replicaInfos.get(PARTITION_CLASS_STR);
+        AmbryPartition mappedPartition = partitionNameToAmbryPartition.get(partitionIdStr);
+        if (mappedPartition == null) {
+          logger.info("Partition {} is currently not present in cluster map, creating a new partition.",
+              partitionIdStr);
+          mappedPartition =
+              new AmbryPartition(Long.valueOf(partitionIdStr), partitionClass, helixClusterManagerCallback);
+        }
+        // Check if data node or disk is in current cluster map, if not, set newReplica to null.
+        AmbryDataNode dataNode = instanceNameToAmbryDataNode.get(instanceName);
+        String mountPathFromHelix = replicaInfos.get(instanceName);
+        Set<AmbryDisk> disks = dataNode != null ? ambryDataNodeToAmbryDisks.get(dataNode) : null;
+        Optional<AmbryDisk> potentialDisk =
+            disks != null ? disks.stream().filter(d -> d.getMountPath().equals(mountPathFromHelix)).findAny()
+                : Optional.empty();
+        if (dataNode != null && potentialDisk.isPresent()) {
+          try {
+            newReplica =
+                new AmbryReplica(clusterMapConfig, mappedPartition, potentialDisk.get(), true, replicaCapacity, false);
+          } catch (Exception e) {
+            logger.error("Failed to create new replica for partition {} on {} due to exception: ", partitionIdStr,
+                instanceName, e);
+            newReplica = null;
+          }
+        } else {
+          logger.error(
+              "Either datanode or disk that associated with new replica is not found in cluster map. Cannot create new replica.");
+        }
+      } else {
+        logger.warn("Partition {} or replica on host {} is not found in replica info map", partitionIdStr,
+            instanceName);
+      }
+    } else {
+      logger.warn("ZNRecord from HelixPropertyStore is NULL, partition to replicaInfo map doesn't exist.");
+    }
+    return newReplica;
+  }
+
   /**
    * Disconnect from the HelixManagers associated with each and every datacenter.
    */

diff --git a/ambry-clustermap/src/main/java/com.github.ambry.clustermap/StaticClusterManager.java b/ambry-clustermap/src/main/java/com.github.ambry.clustermap/StaticClusterManager.java
@@ -533,6 +533,12 @@ public JSONObject getSnapshot() {
     return snapshot;
   }
 
+  @Override
+  public ReplicaId getNewReplica(String partitionIdStr, DataNodeId dataNodeId) {
+    throw new UnsupportedOperationException(
+        "Adding new replica is currently not supported in static cluster manager. Return null here");
+  }
+
   @Override
   public void close() {
     // No-op.