diff --git a/java/interface-annotations/pom.xml b/java/interface-annotations/pom.xml
index 3780b836e241..f0990392a191 100644
--- a/java/interface-annotations/pom.xml
+++ b/java/interface-annotations/pom.xml
@@ -21,7 +21,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
interface-annotations
diff --git a/java/pom.xml b/java/pom.xml
index e38938d86872..d4027e8ca6bc 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -40,7 +40,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
pom
Yugabyte
diff --git a/java/yb-cdc/pom.xml b/java/yb-cdc/pom.xml
index 2734cfb867a5..124d216c0ada 100644
--- a/java/yb-cdc/pom.xml
+++ b/java/yb-cdc/pom.xml
@@ -5,7 +5,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-cdc
YB CDC Connector
diff --git a/java/yb-cli/pom.xml b/java/yb-cli/pom.xml
index 3e6165cb243a..0e8746d0b5bf 100644
--- a/java/yb-cli/pom.xml
+++ b/java/yb-cli/pom.xml
@@ -25,7 +25,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-cli
diff --git a/java/yb-client/pom.xml b/java/yb-client/pom.xml
index 70b09ddc6978..1f0fdc205d20 100644
--- a/java/yb-client/pom.xml
+++ b/java/yb-client/pom.xml
@@ -25,7 +25,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-client
diff --git a/java/yb-client/src/main/java/org/yb/client/AsyncYBClient.java b/java/yb-client/src/main/java/org/yb/client/AsyncYBClient.java
index 5cc0959c60be..da42533d2d85 100644
--- a/java/yb-client/src/main/java/org/yb/client/AsyncYBClient.java
+++ b/java/yb-client/src/main/java/org/yb/client/AsyncYBClient.java
@@ -1059,6 +1059,19 @@ public Deferred changeLoadBalancerState(boolean
return sendRpcToTablet(rpc);
}
+ /**
+ * Get the load balancer state on master.
+ *
+ * @return a deferred object that yields the response to the config change.
+ */
+ public Deferred getLoadBalancerState() {
+ checkIsClosed();
+ GetLoadBalancerStateRequest rpc =
+ new GetLoadBalancerStateRequest(this.masterTable);
+ rpc.setTimeoutMillis(defaultAdminOperationTimeoutMs);
+ return sendRpcToTablet(rpc);
+ }
+
/**
* Get the tablet load move completion percentage for blacklisted nodes.
*
diff --git a/java/yb-client/src/main/java/org/yb/client/GetLoadBalancerStateRequest.java b/java/yb-client/src/main/java/org/yb/client/GetLoadBalancerStateRequest.java
new file mode 100644
index 000000000000..293837cc996f
--- /dev/null
+++ b/java/yb-client/src/main/java/org/yb/client/GetLoadBalancerStateRequest.java
@@ -0,0 +1,67 @@
+// Copyright (c) YugaByte, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations
+// under the License.
+//
+
+package org.yb.client;
+
+import com.google.protobuf.Message;
+import io.netty.buffer.ByteBuf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.yb.annotations.InterfaceAudience;
+import org.yb.master.MasterClusterOuterClass;
+import org.yb.master.MasterClusterOuterClass.GetLoadBalancerStateRequestPB;
+import org.yb.util.Pair;
+
+@InterfaceAudience.Public
+public class GetLoadBalancerStateRequest extends YRpc {
+
+ public static final Logger LOG = LoggerFactory.getLogger(GetLoadBalancerStateRequest.class);
+
+ public GetLoadBalancerStateRequest(YBTable table) {
+ // The passed table will be a master table from AsyncYBClient since this service is registered
+ // on master.
+ super(table);
+ }
+
+ @Override
+ ByteBuf serialize(Message header) {
+ assert header.isInitialized();
+ final GetLoadBalancerStateRequestPB.Builder builder =
+ GetLoadBalancerStateRequestPB.newBuilder();
+ return toChannelBuffer(header, builder.build());
+ }
+
+ @Override
+ String serviceName() {
+ return MASTER_SERVICE_NAME;
+ }
+
+ @Override
+ String method() {
+ return "GetLoadBalancerState";
+ }
+
+ @Override
+ Pair deserialize(CallResponse callResponse, String uuid)
+ throws Exception {
+ final MasterClusterOuterClass.GetLoadBalancerStateResponsePB.Builder respBuilder =
+ MasterClusterOuterClass.GetLoadBalancerStateResponsePB.newBuilder();
+ readProtobuf(callResponse.getPBMessage(), respBuilder);
+
+ GetLoadBalancerStateResponse response =
+ new GetLoadBalancerStateResponse(
+ deadlineTracker.getElapsedMillis(), uuid, respBuilder.build());
+ return new Pair(
+ response, respBuilder.hasError() ? respBuilder.getError() : null);
+ }
+}
diff --git a/java/yb-client/src/main/java/org/yb/client/GetLoadBalancerStateResponse.java b/java/yb-client/src/main/java/org/yb/client/GetLoadBalancerStateResponse.java
new file mode 100644
index 000000000000..7ed95b5d3a2f
--- /dev/null
+++ b/java/yb-client/src/main/java/org/yb/client/GetLoadBalancerStateResponse.java
@@ -0,0 +1,54 @@
+// Copyright (c) YugaByte, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations
+// under the License.
+//
+
+package org.yb.client;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.yb.annotations.InterfaceAudience;
+import org.yb.master.MasterClusterOuterClass.GetLoadBalancerStateResponsePB;
+import org.yb.master.MasterTypes.MasterErrorPB;
+
+@InterfaceAudience.Public
+public class GetLoadBalancerStateResponse extends YRpcResponse {
+
+ public static final Logger LOG = LoggerFactory.getLogger(GetLoadBalancerStateResponse.class);
+
+ private GetLoadBalancerStateResponsePB masterLBState;
+
+ public GetLoadBalancerStateResponse(
+ long elapsedMillis, String uuid, GetLoadBalancerStateResponsePB response) {
+ super(elapsedMillis, uuid);
+ this.masterLBState = response;
+ }
+
+ public MasterErrorPB getServerError() {
+ return masterLBState.getError();
+ }
+
+ public boolean hasError() {
+ return masterLBState.hasError();
+ }
+
+ public String errorMessage() {
+ return masterLBState.hasError() ? masterLBState.getError().getStatus().getMessage() : null;
+ }
+
+ public boolean hasIsEnabled() {
+ return masterLBState.hasIsEnabled();
+ }
+
+ public boolean isEnabled() {
+ return masterLBState.getIsEnabled();
+ }
+}
diff --git a/java/yb-client/src/main/java/org/yb/client/IsLoadBalancedRequest.java b/java/yb-client/src/main/java/org/yb/client/IsLoadBalancedRequest.java
index 3b8fc9759188..f112750b4023 100644
--- a/java/yb-client/src/main/java/org/yb/client/IsLoadBalancedRequest.java
+++ b/java/yb-client/src/main/java/org/yb/client/IsLoadBalancedRequest.java
@@ -34,7 +34,9 @@ ByteBuf serialize(Message header) {
assert header.isInitialized();
final MasterClusterOuterClass.IsLoadBalancedRequestPB.Builder builder =
MasterClusterOuterClass.IsLoadBalancedRequestPB.newBuilder();
- builder.setExpectedNumServers(expectedServers);
+ if (expectedServers != 0) {
+ builder.setExpectedNumServers(expectedServers);
+ }
return toChannelBuffer(header, builder.build());
}
diff --git a/java/yb-client/src/main/java/org/yb/client/IsLoadBalancerIdleResponse.java b/java/yb-client/src/main/java/org/yb/client/IsLoadBalancerIdleResponse.java
index 72b4c5de7dd0..f10f8d6c8c06 100644
--- a/java/yb-client/src/main/java/org/yb/client/IsLoadBalancerIdleResponse.java
+++ b/java/yb-client/src/main/java/org/yb/client/IsLoadBalancerIdleResponse.java
@@ -27,6 +27,10 @@ public IsLoadBalancerIdleResponse(long ellapsedMillis,
serverError = error;
}
+ public MasterTypes.MasterErrorPB getError() {
+ return serverError;
+ }
+
public boolean hasError() {
return serverError != null;
}
diff --git a/java/yb-client/src/main/java/org/yb/client/YBClient.java b/java/yb-client/src/main/java/org/yb/client/YBClient.java
index 1e22859571e0..96df769d628a 100644
--- a/java/yb-client/src/main/java/org/yb/client/YBClient.java
+++ b/java/yb-client/src/main/java/org/yb/client/YBClient.java
@@ -458,6 +458,17 @@ public ChangeLoadBalancerStateResponse changeLoadBalancerState(boolean isEnable)
return d.join(getDefaultAdminOperationTimeoutMs());
}
+ /**
+ * Get the load balancer state.
+ *
+ * @return the response of the operation.
+ */
+ public GetLoadBalancerStateResponse getLoadBalancerState()
+ throws Exception {
+ Deferred d = asyncClient.getLoadBalancerState();
+ return d.join(getDefaultAdminOperationTimeoutMs());
+ }
+
/**
* Get the tablet load move completion percentage for blacklisted nodes, if any.
*
diff --git a/java/yb-cql-4x/pom.xml b/java/yb-cql-4x/pom.xml
index 858779dffa9e..9db13172f4bd 100644
--- a/java/yb-cql-4x/pom.xml
+++ b/java/yb-cql-4x/pom.xml
@@ -7,7 +7,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-cql-4x
YB CQL Support for 4.x Driver
diff --git a/java/yb-cql/pom.xml b/java/yb-cql/pom.xml
index 76bcf12e5bce..b97c96e20306 100644
--- a/java/yb-cql/pom.xml
+++ b/java/yb-cql/pom.xml
@@ -7,7 +7,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-cql
YB CQL Support
diff --git a/java/yb-jedis-tests/pom.xml b/java/yb-jedis-tests/pom.xml
index 3ec2368ea32c..63c29d4a93e5 100644
--- a/java/yb-jedis-tests/pom.xml
+++ b/java/yb-jedis-tests/pom.xml
@@ -7,7 +7,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-jedis-tests
YB Jedis Tests
diff --git a/java/yb-loadtester/pom.xml b/java/yb-loadtester/pom.xml
index f8a27286b314..0f126a8d55cd 100644
--- a/java/yb-loadtester/pom.xml
+++ b/java/yb-loadtester/pom.xml
@@ -6,7 +6,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-loadtester
diff --git a/java/yb-multiapi/pom.xml b/java/yb-multiapi/pom.xml
index fd04466cf70a..aceee78a174f 100644
--- a/java/yb-multiapi/pom.xml
+++ b/java/yb-multiapi/pom.xml
@@ -9,7 +9,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-multiapi
diff --git a/java/yb-pgsql/pom.xml b/java/yb-pgsql/pom.xml
index 924e8958ba92..b02a7dc795c1 100644
--- a/java/yb-pgsql/pom.xml
+++ b/java/yb-pgsql/pom.xml
@@ -8,7 +8,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-pgsql
YB PostgreSQL Support
diff --git a/java/yb-sample/pom.xml b/java/yb-sample/pom.xml
index 25ffe14679b3..d5b8bc43dfd9 100644
--- a/java/yb-sample/pom.xml
+++ b/java/yb-sample/pom.xml
@@ -8,7 +8,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-sample
YB Manual Support
diff --git a/java/yb-ysql-conn-mgr/pom.xml b/java/yb-ysql-conn-mgr/pom.xml
index fc8a8dcbc336..41c35fcf2aaf 100644
--- a/java/yb-ysql-conn-mgr/pom.xml
+++ b/java/yb-ysql-conn-mgr/pom.xml
@@ -22,7 +22,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-ysql-conn-mgr
Ysql Connection Manager Tests
diff --git a/java/yb-yugabyted/pom.xml b/java/yb-yugabyted/pom.xml
index 1ed0c8667638..5d6a36f4379d 100644
--- a/java/yb-yugabyted/pom.xml
+++ b/java/yb-yugabyted/pom.xml
@@ -10,7 +10,7 @@
org.yb
yb-parent
- 0.8.92-SNAPSHOT
+ 0.8.93-SNAPSHOT
yb-yugabyted
diff --git a/managed/build.sbt b/managed/build.sbt
index 43b69740fdd8..2a34c8c3447e 100644
--- a/managed/build.sbt
+++ b/managed/build.sbt
@@ -926,7 +926,7 @@ runPlatform := {
Project.extract(newState).runTask(runPlatformTask, newState)
}
-libraryDependencies += "org.yb" % "yb-client" % "0.8.92-SNAPSHOT"
+libraryDependencies += "org.yb" % "yb-client" % "0.8.93-SNAPSHOT"
libraryDependencies += "org.yb" % "ybc-client" % "2.2.0.0-b3"
libraryDependencies += "org.yb" % "yb-perf-advisor" % "1.0.0-b33"
diff --git a/managed/src/main/java/com/yugabyte/yw/controllers/MetaMasterController.java b/managed/src/main/java/com/yugabyte/yw/controllers/MetaMasterController.java
index 0eb1323b1234..888ca6d225a8 100644
--- a/managed/src/main/java/com/yugabyte/yw/controllers/MetaMasterController.java
+++ b/managed/src/main/java/com/yugabyte/yw/controllers/MetaMasterController.java
@@ -9,6 +9,8 @@
import com.yugabyte.yw.common.rbac.PermissionInfo.Action;
import com.yugabyte.yw.common.rbac.PermissionInfo.ResourceType;
import com.yugabyte.yw.common.services.YBClientService;
+import com.yugabyte.yw.controllers.apiModels.MasterLBStateResponse;
+import com.yugabyte.yw.controllers.handlers.MetaMasterHandler;
import com.yugabyte.yw.forms.PlatformResults;
import com.yugabyte.yw.models.Customer;
import com.yugabyte.yw.models.Universe;
@@ -42,6 +44,8 @@ public class MetaMasterController extends Controller {
@Inject KubernetesManagerFactory kubernetesManagerFactory;
+ @Inject MetaMasterHandler metaMasterHandler;
+
@ApiOperation(
value = "List a universe's master nodes",
response = MastersList.class,
@@ -78,6 +82,22 @@ public Result getMasterAddresses(UUID customerUUID, UUID universeUUID) {
return getServerAddresses(customerUUID, universeUUID, ServerType.MASTER);
}
+ @ApiOperation(
+ notes = "Available since YBA version 2024.2.0",
+ value = "Get the state of master load balancing ops",
+ response = MasterLBStateResponse.class)
+ @YbaApi(visibility = YbaApi.YbaApiVisibility.INTERNAL, sinceYBAVersion = "2024.2.0")
+ @AuthzPath({
+ @RequiredPermissionOnResource(
+ requiredPermission =
+ @PermissionAttribute(resourceType = ResourceType.UNIVERSE, action = Action.READ),
+ resourceLocation = @Resource(path = Util.UNIVERSES, sourceType = SourceType.ENDPOINT))
+ })
+ public Result getMasterLBState(UUID customerUUID, UUID universeUUID) {
+ MasterLBStateResponse resp = metaMasterHandler.getMasterLBState(customerUUID, universeUUID);
+ return PlatformResults.withData(resp);
+ }
+
@ApiOperation(
notes = "Available since YBA version 2.2.0.0.",
value = "List a YQL server's addresses",
diff --git a/managed/src/main/java/com/yugabyte/yw/controllers/apiModels/MasterLBStateResponse.java b/managed/src/main/java/com/yugabyte/yw/controllers/apiModels/MasterLBStateResponse.java
new file mode 100644
index 000000000000..5e43a5e192b0
--- /dev/null
+++ b/managed/src/main/java/com/yugabyte/yw/controllers/apiModels/MasterLBStateResponse.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2024 YugaByte, Inc. and Contributors
+ *
+ * Licensed under the Polyform Free Trial License 1.0.0 (the "License"); you
+ * may not use this file except in compliance with the License. You
+ * may obtain a copy of the License at
+ *
+ * http://github.com/YugaByte/yugabyte-db/blob/master/licenses/POLYFORM-FREE-TRIAL-LICENSE-1.0.0.txt
+ */
+package com.yugabyte.yw.controllers.apiModels;
+
+import io.swagger.annotations.ApiModel;
+import io.swagger.annotations.ApiModelProperty;
+
+@ApiModel(description = "Master tablet load balancer status")
+public class MasterLBStateResponse {
+ @ApiModelProperty(
+ required = true,
+ value = "YbaApi Internal Whether master tablet load balancer is enabled")
+ public Boolean isEnabled;
+
+ @ApiModelProperty(
+ required = false,
+ value = "YbaApi Internal Whether master tablet load balancer is inactive")
+ public Boolean isIdle;
+
+ @ApiModelProperty(
+ required = false,
+ value =
+ "YbaApi Internal Estimate of time for which master tablet load balancer will be active")
+ public Long estTimeToBalanceSecs;
+}
diff --git a/managed/src/main/java/com/yugabyte/yw/controllers/handlers/MetaMasterHandler.java b/managed/src/main/java/com/yugabyte/yw/controllers/handlers/MetaMasterHandler.java
new file mode 100644
index 000000000000..7b3ffac964ce
--- /dev/null
+++ b/managed/src/main/java/com/yugabyte/yw/controllers/handlers/MetaMasterHandler.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2021 YugaByte, Inc. and Contributors
+ *
+ * Licensed under the Polyform Free Trial License 1.0.0 (the "License"); you
+ * may not use this file except in compliance with the License. You
+ * may obtain a copy of the License at
+ *
+ * http://github.com/YugaByte/yugabyte-db/blob/master/licenses/POLYFORM-FREE-TRIAL-LICENSE-1.0.0.txt
+ */
+
+package com.yugabyte.yw.controllers.handlers;
+
+import com.google.inject.Inject;
+import com.yugabyte.yw.commissioner.Commissioner;
+import com.yugabyte.yw.common.PlatformServiceException;
+import com.yugabyte.yw.common.SwamperHelper;
+import com.yugabyte.yw.common.config.RuntimeConfigFactory;
+import com.yugabyte.yw.common.services.YBClientService;
+import com.yugabyte.yw.controllers.apiModels.MasterLBStateResponse;
+import com.yugabyte.yw.metrics.MetricQueryHelper;
+import com.yugabyte.yw.metrics.MetricQueryResponse;
+import com.yugabyte.yw.models.Universe;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.UUID;
+import lombok.extern.slf4j.Slf4j;
+import org.yb.client.GetLoadBalancerStateResponse;
+import org.yb.client.IsLoadBalancerIdleResponse;
+import org.yb.client.MasterErrorException;
+import org.yb.client.YBClient;
+import org.yb.master.MasterTypes.MasterErrorPB;
+
+@Slf4j
+public class MetaMasterHandler {
+
+ @Inject Commissioner commissioner;
+ @Inject private YBClientService ybService;
+ @Inject private MetricQueryHelper metricQueryHelper;
+ @Inject private RuntimeConfigFactory runtimeConfigFactory;
+
+ public MasterLBStateResponse getMasterLBState(UUID customerUUID, UUID universeUUID) {
+ Universe universe = Universe.getOrBadRequest(universeUUID);
+ String masterAddresses = universe.getMasterAddresses();
+ String universeCertificate = universe.getCertificateNodetoNode();
+ MasterLBStateResponse resp = new MasterLBStateResponse();
+
+ try (YBClient client = ybService.getClient(masterAddresses, universeCertificate)) {
+
+ // Check if the tablet load balancer is actually enabled
+ GetLoadBalancerStateResponse masterLBState = client.getLoadBalancerState();
+ if (masterLBState == null || masterLBState.hasError() || !masterLBState.hasIsEnabled()) {
+ throw new RuntimeException(
+ masterLBState != null ? masterLBState.errorMessage() : "Null response");
+ }
+ resp.isEnabled = new Boolean(masterLBState.isEnabled());
+ if (!resp.isEnabled) {
+ // If it is not enabled, no point getting the current state of the tablet LB
+ return resp;
+ }
+
+ try {
+ IsLoadBalancerIdleResponse isBalancedResp = client.getIsLoadBalancerIdle();
+ if (isBalancedResp.hasError()
+ && MasterErrorPB.Code.LOAD_BALANCER_RECENTLY_ACTIVE
+ != isBalancedResp.getError().getCode()) {
+ // other error codes are real errors talking to the master
+ throw new RuntimeException(isBalancedResp.errorMessage());
+ }
+ resp.isIdle = new Boolean(!isBalancedResp.hasError());
+ } catch (MasterErrorException mex) {
+ if (mex.error != null
+ && mex.error.getCode() == MasterErrorPB.Code.LOAD_BALANCER_RECENTLY_ACTIVE) {
+ resp.isIdle = new Boolean(false);
+ } else {
+ // other error codes are real errors talking to the master
+ throw mex;
+ }
+ }
+
+ } catch (Exception ex) {
+ throw new PlatformServiceException(
+ play.mvc.Http.Status.SERVICE_UNAVAILABLE,
+ "Error reaching masters. Details: " + ex.getMessage());
+ }
+
+ try {
+ if (resp.isIdle != null && !resp.isIdle) {
+ resp.estTimeToBalanceSecs =
+ getEstTimeToBalance(universeUUID, this.metricQueryHelper, this.runtimeConfigFactory)
+ .toSeconds();
+ }
+ } catch (Exception ex) {
+ log.trace("Unable to get an estimate of the time to balance tablet load", ex); // todo: trace
+ }
+
+ return resp;
+ }
+
+ private static Duration getEstTimeToBalance(
+ UUID univUuid,
+ MetricQueryHelper metricQueryHelper,
+ RuntimeConfigFactory runtimeConfigFactory) {
+
+ long scrapeIntervalSecs =
+ SwamperHelper.getScrapeIntervalSeconds(runtimeConfigFactory.staticApplicationConf());
+ // Query over at least 5 scrape intervals or 2 minutes
+ long windowDurationSecs = Long.max(scrapeIntervalSecs * 5, Duration.ofMinutes(2).toSeconds());
+ final String promFilters =
+ String.format("export_type=\"master_export\",universe_uuid=\"%s\"", univUuid);
+
+ // Under regular load balancing or if a tserver is un-blacklisted, we expect
+ // total_table_load_difference to decrease down to 0 steadily.
+ // When a tserver is blacklisted, we expect tablets_in_wrong_placement to decrease steadily
+ // We use total_table_diff / (-1 * deriv(total_table_diff[2m])) to estimate the rate of
+ // convergence.
+ // This only works if total_table_diff is going to go down to 0 but that doesn't always happen
+ // (if the number of
+ // tservers per AZ is imbalanced, for example), so this
+ // is a rough guess.
+ // tablets_in_wrong_placement does always go down to 0, so that case is more
+ // accurate.
+ final String promQuery =
+ String.format(
+ "max ((total_table_load_difference{%1$s}"
+ + " / (-1 *"
+ + " deriv(total_table_load_difference{%1$s}[%2$ds])))"
+ + " or (tablets_in_wrong_placement{%1$s}"
+ + " / (-1 *"
+ + " deriv(tablets_in_wrong_placement{%1$s}[%2$ds]))))",
+ promFilters, windowDurationSecs);
+ ArrayList queryResult = metricQueryHelper.queryDirect(promQuery);
+ log.trace("Response to is load balanced query {} is {}", promQuery, queryResult); // todo: trace
+ if (queryResult.size() != 1 || queryResult.get(0).values.isEmpty()) {
+ throw new RuntimeException("Unable to estimate time to balance");
+ }
+
+ double estSeconds = queryResult.get(0).values.get(0).getRight();
+ if (Double.isNaN(estSeconds)
+ || Double.isInfinite(estSeconds)
+ || estSeconds <= 0
+ || estSeconds > Duration.ofDays(10).getSeconds()) {
+ throw new RuntimeException("Unable to calculate time to balance");
+ }
+
+ return Duration.ofSeconds((long) Math.ceil(estSeconds));
+ }
+}
diff --git a/managed/src/main/resources/metric/Dashboard.json b/managed/src/main/resources/metric/Dashboard.json
index b915290694db..bee7f334d5ac 100644
--- a/managed/src/main/resources/metric/Dashboard.json
+++ b/managed/src/main/resources/metric/Dashboard.json
@@ -163,7 +163,7 @@
"x" : 0,
"y" : 1
},
- "id" : 186,
+ "id" : 187,
"interval" : null,
"links" : [ ],
"mappingType" : 2,
@@ -310,7 +310,7 @@
"value" : "null"
} ],
"valueName" : "current",
- "id" : 187
+ "id" : 188
}, {
"collapsed" : false,
"datasource" : "$datasource",
@@ -323,7 +323,7 @@
"panels" : [ ],
"type" : "row",
"title" : "Container",
- "id" : 188
+ "id" : 189
}, {
"aliasColors" : { },
"bars" : false,
@@ -796,7 +796,7 @@
"panels" : [ ],
"type" : "row",
"title" : "Master",
- "id" : 189
+ "id" : 190
}, {
"aliasColors" : { },
"bars" : false,
@@ -1267,6 +1267,108 @@
"x" : 12,
"y" : 74
}
+ }, {
+ "aliasColors" : { },
+ "bars" : false,
+ "collapsed" : false,
+ "dashLength" : 10,
+ "dashes" : false,
+ "datasource" : "$datasource",
+ "fieldConfig" : {
+ "defaults" : {
+ "custom" : { }
+ },
+ "overrides" : [ ]
+ },
+ "fill" : 1,
+ "fillGradient" : 0,
+ "hiddenSeries" : false,
+ "legend" : {
+ "alignAsTable" : true,
+ "avg" : false,
+ "current" : true,
+ "max" : true,
+ "min" : false,
+ "show" : true,
+ "total" : false,
+ "values" : true
+ },
+ "lines" : true,
+ "linewidth" : 1,
+ "nullPointMode" : "null",
+ "options" : {
+ "dataLinks" : [ ]
+ },
+ "percentage" : false,
+ "pointradius" : 2,
+ "points" : false,
+ "renderer" : "flot",
+ "repeat" : null,
+ "repeatDirection" : "v",
+ "seriesOverrides" : [ ],
+ "spaceLength" : 10,
+ "stack" : false,
+ "steppedLine" : false,
+ "thresholds" : [ ],
+ "timeFrom" : null,
+ "timeRegions" : [ ],
+ "timeShift" : null,
+ "tooltip" : {
+ "shared" : true,
+ "sort" : 0,
+ "value_type" : "individual"
+ },
+ "type" : "graph",
+ "xaxis" : {
+ "buckets" : null,
+ "mode" : "time",
+ "name" : null,
+ "show" : true,
+ "values" : [ ]
+ },
+ "yaxes" : [ {
+ "label" : null,
+ "logBase" : 1,
+ "max" : null,
+ "min" : null,
+ "show" : true,
+ "format" : ""
+ }, {
+ "format" : "short",
+ "label" : null,
+ "logBase" : 1,
+ "max" : null,
+ "min" : null,
+ "show" : false
+ } ],
+ "yaxis" : {
+ "align" : false,
+ "alignLevel" : null
+ },
+ "title" : "Master load balancer statistics",
+ "id" : 69,
+ "targets" : [ {
+ "hide" : false,
+ "legendFormat" : "Tablets in wrong placement",
+ "expr" : "max(max_over_time(tablets_in_wrong_placement{node_prefix=~\"$dbcluster\", export_type=\"master_export\"}[300s])) by (saved_name)",
+ "refId" : "A"
+ }, {
+ "hide" : false,
+ "legendFormat" : "Total table load difference",
+ "expr" : "max(max_over_time(total_table_load_difference{node_prefix=~\"$dbcluster\", export_type=\"master_export\"}[300s])) by (saved_name)",
+ "refId" : "B"
+ }, {
+ "hide" : false,
+ "legendFormat" : "Tablets with blacklisted leaders",
+ "expr" : "max(max_over_time(blacklisted_leaders{node_prefix=~\"$dbcluster\", export_type=\"master_export\"}[300s])) by (saved_name)",
+ "refId" : "C"
+ } ],
+ "gridPos" : {
+ "w" : 12,
+ "h" : 9,
+ "x" : 0,
+ "y" : 83
+ }
}, {
"aliasColors" : { },
"bars" : false,
@@ -1346,7 +1448,7 @@
"alignLevel" : null
},
"title" : "Max Follower Lag (ms)",
- "id" : 83,
+ "id" : 84,
"targets" : [ {
"hide" : false,
"legendFormat" : "Max Follower Lag (ms)",
@@ -1356,7 +1458,7 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 0,
+ "x" : 12,
"y" : 83
}
}, {
@@ -1438,7 +1540,7 @@
"alignLevel" : null
},
"title" : "MultiRaftUpdateConsensus / sec",
- "id" : 84,
+ "id" : 85,
"targets" : [ {
"hide" : false,
"legendFormat" : "MultiRaftUpdateConsensus",
@@ -1448,8 +1550,8 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 12,
- "y" : 83
+ "x" : 0,
+ "y" : 92
}
}, {
"aliasColors" : { },
@@ -1530,7 +1632,7 @@
"alignLevel" : null
},
"title" : "MultiRaftUpdateConsensus Latency",
- "id" : 85,
+ "id" : 86,
"targets" : [ {
"hide" : false,
"legendFormat" : "MultiRaftUpdateConsensus",
@@ -1540,7 +1642,7 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 0,
+ "x" : 12,
"y" : 92
}
}, {
@@ -1622,7 +1724,7 @@
"alignLevel" : null
},
"title" : "Overall RPCs / sec",
- "id" : 86,
+ "id" : 87,
"targets" : [ {
"hide" : false,
"legendFormat" : "RPCs",
@@ -1632,8 +1734,8 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 12,
- "y" : 92
+ "x" : 0,
+ "y" : 101
}
}, {
"aliasColors" : { },
@@ -1714,7 +1816,7 @@
"alignLevel" : null
},
"title" : "Create/Delete Table RPCs",
- "id" : 87,
+ "id" : 88,
"targets" : [ {
"hide" : false,
"legendFormat" : "Create Table",
@@ -1729,7 +1831,7 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 0,
+ "x" : 12,
"y" : 101
}
}, {
@@ -1811,7 +1913,7 @@
"alignLevel" : null
},
"title" : "TS Heartbeats / sec",
- "id" : 90,
+ "id" : 91,
"targets" : [ {
"hide" : false,
"legendFormat" : "TS Heartbeats",
@@ -1821,8 +1923,8 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 12,
- "y" : 101
+ "x" : 0,
+ "y" : 110
}
}, {
"aliasColors" : { },
@@ -1903,7 +2005,7 @@
"alignLevel" : null
},
"title" : "Master TSService Reads / sec",
- "id" : 91,
+ "id" : 92,
"targets" : [ {
"hide" : false,
"legendFormat" : "TS Reads",
@@ -1913,7 +2015,7 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 0,
+ "x" : 12,
"y" : 110
}
}, {
@@ -1995,7 +2097,7 @@
"alignLevel" : null
},
"title" : "Master TS Read Latency",
- "id" : 92,
+ "id" : 93,
"targets" : [ {
"hide" : false,
"legendFormat" : "TSService Read Latency",
@@ -2005,8 +2107,8 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 12,
- "y" : 110
+ "x" : 0,
+ "y" : 119
}
}, {
"aliasColors" : { },
@@ -2087,7 +2189,7 @@
"alignLevel" : null
},
"title" : "Master TSService Writes / sec",
- "id" : 93,
+ "id" : 94,
"targets" : [ {
"hide" : false,
"legendFormat" : "TS Writes",
@@ -2097,7 +2199,7 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 0,
+ "x" : 12,
"y" : 119
}
}, {
@@ -2179,7 +2281,7 @@
"alignLevel" : null
},
"title" : "Master TS Write Latency",
- "id" : 94,
+ "id" : 95,
"targets" : [ {
"hide" : false,
"legendFormat" : "TSService Write Latency",
@@ -2189,8 +2291,8 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 12,
- "y" : 119
+ "x" : 0,
+ "y" : 128
}
}, {
"aliasColors" : { },
@@ -2271,7 +2373,7 @@
"alignLevel" : null
},
"title" : "Uptime",
- "id" : 95,
+ "id" : 96,
"targets" : [ {
"hide" : false,
"legendFormat" : "Uptime",
@@ -2281,7 +2383,7 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 0,
+ "x" : 12,
"y" : 128
}
}, {
@@ -2363,7 +2465,7 @@
"alignLevel" : null
},
"title" : "Inbound RPC Connections Alive",
- "id" : 96,
+ "id" : 97,
"targets" : [ {
"hide" : false,
"legendFormat" : "Connections",
@@ -2373,8 +2475,8 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 12,
- "y" : 128
+ "x" : 0,
+ "y" : 137
}
}, {
"aliasColors" : { },
@@ -2455,7 +2557,7 @@
"alignLevel" : null
},
"title" : "RPC Queue Size",
- "id" : 168,
+ "id" : 169,
"targets" : [ {
"hide" : false,
"legendFormat" : "rpcs_in_queue_.*",
@@ -2465,7 +2567,7 @@
"gridPos" : {
"w" : 12,
"h" : 9,
- "x" : 0,
+ "x" : 12,
"y" : 137
}
}, {
@@ -2480,7 +2582,7 @@
"panels" : [ ],
"type" : "row",
"title" : "Master Advanced",
- "id" : 190
+ "id" : 191
}, {
"aliasColors" : { },
"bars" : false,
@@ -2754,7 +2856,7 @@
"alignLevel" : null
},
"title" : "WAL Bytes Read / Sec / Node",
- "id" : 69,
+ "id" : 70,
"targets" : [ {
"hide" : false,
"legendFormat" : "Read",
@@ -2846,7 +2948,7 @@
"alignLevel" : null
},
"title" : "WAL Bytes Written / Sec / Node",
- "id" : 70,
+ "id" : 71,
"targets" : [ {
"hide" : false,
"legendFormat" : "Written",
@@ -2938,7 +3040,7 @@
"alignLevel" : null
},
"title" : "WAL Latency",
- "id" : 71,
+ "id" : 72,
"targets" : [ {
"hide" : false,
"legendFormat" : "Sync",
@@ -3040,7 +3142,7 @@
"alignLevel" : null
},
"title" : "Average SSTables",
- "id" : 72,
+ "id" : 73,
"targets" : [ {
"hide" : false,
"legendFormat" : "SST Files",
@@ -3132,7 +3234,7 @@
"alignLevel" : null
},
"title" : "Cache Hit & Miss",
- "id" : 73,
+ "id" : 74,
"targets" : [ {
"hide" : false,
"legendFormat" : "Hit",
@@ -3229,7 +3331,7 @@
"alignLevel" : null
},
"title" : "Block cache usage",
- "id" : 74,
+ "id" : 75,
"targets" : [ {
"hide" : false,
"legendFormat" : "Multi Touch",
@@ -3326,7 +3428,7 @@
"alignLevel" : null
},
"title" : "LSM-DB Blooms usefulness",
- "id" : 75,
+ "id" : 76,
"targets" : [ {
"hide" : false,
"legendFormat" : "Blooms Useful",
@@ -3423,7 +3525,7 @@
"alignLevel" : null
},
"title" : "Compaction",
- "id" : 76,
+ "id" : 77,
"targets" : [ {
"hide" : false,
"legendFormat" : "Written",
@@ -3520,7 +3622,7 @@
"alignLevel" : null
},
"title" : "Compaction num files",
- "id" : 77,
+ "id" : 78,
"targets" : [ {
"hide" : false,
"legendFormat" : "Num Files",
@@ -3612,7 +3714,7 @@
"alignLevel" : null
},
"title" : "Compaction time",
- "id" : 78,
+ "id" : 79,
"targets" : [ {
"hide" : false,
"legendFormat" : "Avg",
@@ -3704,7 +3806,7 @@
"alignLevel" : null
},
"title" : "Flush write",
- "id" : 79,
+ "id" : 80,
"targets" : [ {
"hide" : false,
"legendFormat" : "Written",
@@ -3796,7 +3898,7 @@
"alignLevel" : null
},
"title" : "LSM-DB Seeks / Sec / Node",
- "id" : 80,
+ "id" : 81,
"targets" : [ {
"hide" : false,
"legendFormat" : "Seek",
@@ -3888,7 +3990,7 @@
"alignLevel" : null
},
"title" : "LSM-DB Seek/Next/Prev Ops / Sec",
- "id" : 81,
+ "id" : 82,
"targets" : [ {
"hide" : false,
"legendFormat" : "Seek",
@@ -3990,7 +4092,7 @@
"alignLevel" : null
},
"title" : "SSTable size",
- "id" : 82,
+ "id" : 83,
"targets" : [ {
"hide" : false,
"legendFormat" : "Size",
@@ -4082,7 +4184,7 @@
"alignLevel" : null
},
"title" : "TCMalloc Stats",
- "id" : 88,
+ "id" : 89,
"targets" : [ {
"hide" : false,
"legendFormat" : "In Use",
@@ -4179,7 +4281,7 @@
"alignLevel" : null
},
"title" : "Threads Running",
- "id" : 89,
+ "id" : 90,
"targets" : [ {
"hide" : false,
"legendFormat" : "Running",
@@ -4204,7 +4306,7 @@
"panels" : [ ],
"type" : "row",
"title" : "Misc",
- "id" : 191
+ "id" : 192
}, {
"aliasColors" : { },
"bars" : false,
@@ -4938,7 +5040,7 @@
"alignLevel" : null
},
"title" : "Cache Hit & Miss",
- "id" : 131,
+ "id" : 132,
"targets" : [ {
"hide" : false,
"legendFormat" : "Hit",
@@ -5035,7 +5137,7 @@
"alignLevel" : null
},
"title" : "Compaction",
- "id" : 132,
+ "id" : 133,
"targets" : [ {
"hide" : false,
"legendFormat" : "Written",
@@ -5132,7 +5234,7 @@
"alignLevel" : null
},
"title" : "WAL Bytes Written / Sec",
- "id" : 133,
+ "id" : 134,
"targets" : [ {
"hide" : false,
"legendFormat" : "Written",
@@ -5224,7 +5326,7 @@
"alignLevel" : null
},
"title" : "WAL Latency",
- "id" : 134,
+ "id" : 135,
"targets" : [ {
"hide" : false,
"legendFormat" : "Sync",
@@ -5326,7 +5428,7 @@
"alignLevel" : null
},
"title" : "WAL Ops / Sec",
- "id" : 135,
+ "id" : 136,
"targets" : [ {
"hide" : false,
"legendFormat" : "Sync",
@@ -5428,7 +5530,7 @@
"alignLevel" : null
},
"title" : "Memtable Size",
- "id" : 136,
+ "id" : 137,
"targets" : [ {
"hide" : false,
"legendFormat" : "IntentsDB",
@@ -5545,7 +5647,7 @@
"alignLevel" : null
},
"title" : "Memory Pressure Rejections",
- "id" : 137,
+ "id" : 138,
"targets" : [ {
"hide" : false,
"legendFormat" : "Leader",
@@ -5647,7 +5749,7 @@
"alignLevel" : null
},
"title" : "Operations Inflight",
- "id" : 138,
+ "id" : 139,
"targets" : [ {
"hide" : false,
"legendFormat" : "Write",
@@ -5744,7 +5846,7 @@
"alignLevel" : null
},
"title" : "Read Op Latency (Avg)",
- "id" : 139,
+ "id" : 140,
"targets" : [ {
"hide" : false,
"legendFormat" : "Read",
@@ -5836,7 +5938,7 @@
"alignLevel" : null
},
"title" : "Read Ops / Sec",
- "id" : 140,
+ "id" : 141,
"targets" : [ {
"hide" : false,
"legendFormat" : "Read",
@@ -5928,7 +6030,7 @@
"alignLevel" : null
},
"title" : "LSM-DB Seek/Next/Prev Ops / Sec",
- "id" : 141,
+ "id" : 142,
"targets" : [ {
"hide" : false,
"legendFormat" : "Seek",
@@ -6030,7 +6132,7 @@
"alignLevel" : null
},
"title" : "Write Op Latency (Avg)",
- "id" : 142,
+ "id" : 143,
"targets" : [ {
"hide" : false,
"legendFormat" : "Write",
@@ -6122,7 +6224,7 @@
"alignLevel" : null
},
"title" : "Write Lock Latency",
- "id" : 143,
+ "id" : 144,
"targets" : [ {
"hide" : false,
"legendFormat" : "Write Lock",
@@ -6214,7 +6316,7 @@
"alignLevel" : null
},
"title" : "Write Rejections",
- "id" : 144,
+ "id" : 145,
"targets" : [ {
"hide" : false,
"legendFormat" : "Rejections",
@@ -6306,7 +6408,7 @@
"alignLevel" : null
},
"title" : "Write Ops / Sec",
- "id" : 145,
+ "id" : 146,
"targets" : [ {
"hide" : false,
"legendFormat" : "Write",
@@ -6331,7 +6433,7 @@
"panels" : [ ],
"type" : "row",
"title" : "Node Metrics",
- "id" : 192
+ "id" : 193
}, {
"aliasColors" : { },
"bars" : false,
@@ -7545,7 +7647,7 @@
"alignLevel" : null
},
"title" : "Memory Usage",
- "id" : 97,
+ "id" : 98,
"targets" : [ {
"hide" : false,
"legendFormat" : "Total",
@@ -7652,7 +7754,7 @@
"alignLevel" : null
},
"title" : "Network Bytes / Sec / Node",
- "id" : 98,
+ "id" : 99,
"targets" : [ {
"hide" : false,
"legendFormat" : "TX",
@@ -7749,7 +7851,7 @@
"alignLevel" : null
},
"title" : "Network Errors / Sec / Node",
- "id" : 99,
+ "id" : 100,
"targets" : [ {
"hide" : false,
"legendFormat" : "RX",
@@ -7846,7 +7948,7 @@
"alignLevel" : null
},
"title" : "Network Packets / Sec / Node",
- "id" : 100,
+ "id" : 101,
"targets" : [ {
"hide" : false,
"legendFormat" : "TX",
@@ -7943,7 +8045,7 @@
"alignLevel" : null
},
"title" : "Clock Skew",
- "id" : 101,
+ "id" : 102,
"targets" : [ {
"hide" : false,
"legendFormat" : "Max",
@@ -8035,7 +8137,7 @@
"alignLevel" : null
},
"title" : "node_up",
- "id" : 102,
+ "id" : 103,
"targets" : [ {
"hide" : false,
"legendFormat" : "",
@@ -8127,7 +8229,7 @@
"alignLevel" : null
},
"title" : "System Load Over Time",
- "id" : 130,
+ "id" : 131,
"targets" : [ {
"hide" : false,
"legendFormat" : "5 minutes",
@@ -8162,7 +8264,7 @@
"panels" : [ ],
"type" : "row",
"title" : "Otel Collector",
- "id" : 193
+ "id" : 194
}, {
"aliasColors" : { },
"bars" : false,
@@ -8242,7 +8344,7 @@
"alignLevel" : null
},
"title" : "Audit Log Send Queue Size",
- "id" : 103,
+ "id" : 104,
"targets" : [ {
"hide" : false,
"legendFormat" : "",
@@ -8334,7 +8436,7 @@
"alignLevel" : null
},
"title" : "Audit Log Records Read / Sec",
- "id" : 104,
+ "id" : 105,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -8426,7 +8528,7 @@
"alignLevel" : null
},
"title" : "Audit Log Records Refused / Sec",
- "id" : 105,
+ "id" : 106,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -8518,7 +8620,7 @@
"alignLevel" : null
},
"title" : "Audit Log Records Send Failed / Sec",
- "id" : 106,
+ "id" : 107,
"targets" : [ {
"hide" : false,
"legendFormat" : "",
@@ -8610,7 +8712,7 @@
"alignLevel" : null
},
"title" : "Audit Log Records Sent / Sec",
- "id" : 107,
+ "id" : 108,
"targets" : [ {
"hide" : false,
"legendFormat" : "",
@@ -8635,7 +8737,7 @@
"panels" : [ ],
"type" : "row",
"title" : "RocksDB",
- "id" : 194
+ "id" : 195
}, {
"aliasColors" : { },
"bars" : false,
@@ -10814,7 +10916,7 @@
"alignLevel" : null
},
"title" : "Tablet Splitting Operations",
- "id" : 146,
+ "id" : 147,
"targets" : [ {
"hide" : false,
"legendFormat" : "Apply",
@@ -10854,7 +10956,7 @@
"panels" : [ ],
"type" : "row",
"title" : "Tablet Server",
- "id" : 195
+ "id" : 196
}, {
"aliasColors" : { },
"bars" : false,
@@ -11399,7 +11501,7 @@
"alignLevel" : null
},
"title" : "Raft Leader",
- "id" : 108,
+ "id" : 109,
"targets" : [ {
"hide" : false,
"legendFormat" : "Raft Leader",
@@ -11491,7 +11593,7 @@
"alignLevel" : null
},
"title" : "Total Ops / Sec",
- "id" : 147,
+ "id" : 148,
"targets" : [ {
"hide" : false,
"legendFormat" : "Write",
@@ -11593,7 +11695,7 @@
"alignLevel" : null
},
"title" : "Async Replication Lag",
- "id" : 148,
+ "id" : 149,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -11685,7 +11787,7 @@
"alignLevel" : null
},
"title" : "WAL Cache Num Ops / Node",
- "id" : 149,
+ "id" : 150,
"targets" : [ {
"hide" : false,
"legendFormat" : "Num Ops",
@@ -11777,7 +11879,7 @@
"alignLevel" : null
},
"title" : "Total Consensus Change Config",
- "id" : 150,
+ "id" : 151,
"targets" : [ {
"hide" : false,
"legendFormat" : "ChangeConfig",
@@ -11884,7 +11986,7 @@
"alignLevel" : null
},
"title" : "Change Config Latency",
- "id" : 151,
+ "id" : 152,
"targets" : [ {
"hide" : false,
"legendFormat" : "ChangeConfig",
@@ -11976,7 +12078,7 @@
"alignLevel" : null
},
"title" : "Consensus Rpc Latencies",
- "id" : 152,
+ "id" : 153,
"targets" : [ {
"hide" : false,
"legendFormat" : "MultiRaftUpdateConsensus",
@@ -12078,7 +12180,7 @@
"alignLevel" : null
},
"title" : "Consensus Ops / Sec",
- "id" : 153,
+ "id" : 154,
"targets" : [ {
"hide" : false,
"legendFormat" : "MultiRaftUpdateConsensus",
@@ -12180,7 +12282,7 @@
"alignLevel" : null
},
"title" : "Context Switches",
- "id" : 154,
+ "id" : 155,
"targets" : [ {
"hide" : false,
"legendFormat" : "Voluntary",
@@ -12277,7 +12379,7 @@
"alignLevel" : null
},
"title" : "CPU Util Secs / Sec",
- "id" : 155,
+ "id" : 156,
"targets" : [ {
"hide" : false,
"legendFormat" : "System",
@@ -12374,7 +12476,7 @@
"alignLevel" : null
},
"title" : "Glog messages",
- "id" : 156,
+ "id" : 157,
"targets" : [ {
"hide" : false,
"legendFormat" : "Warning",
@@ -12476,7 +12578,7 @@
"alignLevel" : null
},
"title" : "Reactor Delays",
- "id" : 157,
+ "id" : 158,
"targets" : [ {
"hide" : false,
"legendFormat" : "Incoming Queue",
@@ -12578,7 +12680,7 @@
"alignLevel" : null
},
"title" : "Live Tablet Peers",
- "id" : 158,
+ "id" : 159,
"targets" : [ {
"hide" : false,
"legendFormat" : "Live Tablet Peers",
@@ -12670,7 +12772,7 @@
"alignLevel" : null
},
"title" : "WAL Bytes Read / Sec / Node",
- "id" : 159,
+ "id" : 160,
"targets" : [ {
"hide" : false,
"legendFormat" : "Read",
@@ -12762,7 +12864,7 @@
"alignLevel" : null
},
"title" : "WAL Bytes Written / Sec / Node",
- "id" : 160,
+ "id" : 161,
"targets" : [ {
"hide" : false,
"legendFormat" : "Written",
@@ -12854,7 +12956,7 @@
"alignLevel" : null
},
"title" : "WAL Latency",
- "id" : 161,
+ "id" : 162,
"targets" : [ {
"hide" : false,
"legendFormat" : "Sync",
@@ -12956,7 +13058,7 @@
"alignLevel" : null
},
"title" : "WAL Ops / Sec / Node",
- "id" : 162,
+ "id" : 163,
"targets" : [ {
"hide" : false,
"legendFormat" : "Sync",
@@ -13058,7 +13160,7 @@
"alignLevel" : null
},
"title" : "WAL Stats / Node",
- "id" : 163,
+ "id" : 164,
"targets" : [ {
"hide" : false,
"legendFormat" : "Bytes Read",
@@ -13155,7 +13257,7 @@
"alignLevel" : null
},
"title" : "Max Follower Lag (ms)",
- "id" : 164,
+ "id" : 165,
"targets" : [ {
"hide" : false,
"legendFormat" : "Max Follower Lag (ms)",
@@ -13247,7 +13349,7 @@
"alignLevel" : null
},
"title" : "Average Latency",
- "id" : 165,
+ "id" : 166,
"targets" : [ {
"hide" : false,
"legendFormat" : "Write",
@@ -13349,7 +13451,7 @@
"alignLevel" : null
},
"title" : "Remote Bootstraps",
- "id" : 166,
+ "id" : 167,
"targets" : [ {
"hide" : false,
"legendFormat" : "Remote Bootstraps",
@@ -13441,7 +13543,7 @@
"alignLevel" : null
},
"title" : "RPC Queue Size",
- "id" : 167,
+ "id" : 168,
"targets" : [ {
"hide" : false,
"legendFormat" : "YCQL RPC",
@@ -13533,7 +13635,7 @@
"alignLevel" : null
},
"title" : "RPC Queue Size",
- "id" : 170,
+ "id" : 171,
"targets" : [ {
"hide" : false,
"legendFormat" : "rpcs_in_queue_.*",
@@ -13625,7 +13727,7 @@
"alignLevel" : null
},
"title" : "tserver_rpcs_per_sec_by_universe",
- "id" : 171,
+ "id" : 172,
"targets" : [ {
"hide" : false,
"legendFormat" : "",
@@ -13717,7 +13819,7 @@
"alignLevel" : null
},
"title" : "Ops / Sec / Node",
- "id" : 172,
+ "id" : 173,
"targets" : [ {
"hide" : false,
"legendFormat" : "Write",
@@ -13819,7 +13921,7 @@
"alignLevel" : null
},
"title" : "SpinLock Time/Server",
- "id" : 173,
+ "id" : 174,
"targets" : [ {
"hide" : false,
"legendFormat" : "SpinLock",
@@ -13911,7 +14013,7 @@
"alignLevel" : null
},
"title" : "TCMalloc Stats",
- "id" : 174,
+ "id" : 175,
"targets" : [ {
"hide" : false,
"legendFormat" : "In Use",
@@ -14008,7 +14110,7 @@
"alignLevel" : null
},
"title" : "Threads Running",
- "id" : 175,
+ "id" : 176,
"targets" : [ {
"hide" : false,
"legendFormat" : "Running",
@@ -14100,7 +14202,7 @@
"alignLevel" : null
},
"title" : "Threads Started",
- "id" : 176,
+ "id" : 177,
"targets" : [ {
"hide" : false,
"legendFormat" : "Started",
@@ -14192,7 +14294,7 @@
"alignLevel" : null
},
"title" : "Uptime",
- "id" : 177,
+ "id" : 178,
"targets" : [ {
"hide" : false,
"legendFormat" : "Uptime",
@@ -14284,7 +14386,7 @@
"alignLevel" : null
},
"title" : "Write Lock Latency",
- "id" : 178,
+ "id" : 179,
"targets" : [ {
"hide" : false,
"legendFormat" : "Write Lock",
@@ -14376,7 +14478,7 @@
"alignLevel" : null
},
"title" : "Inbound RPC Connections Alive",
- "id" : 179,
+ "id" : 180,
"targets" : [ {
"hide" : false,
"legendFormat" : "Connections",
@@ -14401,7 +14503,7 @@
"panels" : [ ],
"type" : "row",
"title" : "YCQL Ops & Latency",
- "id" : 196
+ "id" : 197
}, {
"aliasColors" : { },
"bars" : false,
@@ -15526,7 +15628,7 @@
"alignLevel" : null
},
"title" : "Response Size",
- "id" : 129,
+ "id" : 130,
"targets" : [ {
"hide" : false,
"legendFormat" : "Response Size",
@@ -15551,7 +15653,7 @@
"panels" : [ ],
"type" : "row",
"title" : "YEDIS Ops & Latency",
- "id" : 197
+ "id" : 198
}, {
"aliasColors" : { },
"bars" : false,
@@ -15631,7 +15733,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency (Avg)",
- "id" : 109,
+ "id" : 110,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -15723,7 +15825,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency -- Hash",
- "id" : 110,
+ "id" : 111,
"targets" : [ {
"hide" : false,
"legendFormat" : "HIncrBy",
@@ -15870,7 +15972,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency -- Others",
- "id" : 111,
+ "id" : 112,
"targets" : [ {
"hide" : false,
"legendFormat" : "Auth",
@@ -16002,7 +16104,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency -- Set",
- "id" : 112,
+ "id" : 113,
"targets" : [ {
"hide" : false,
"legendFormat" : "SCard",
@@ -16119,7 +16221,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency -- Sorted Set",
- "id" : 113,
+ "id" : 114,
"targets" : [ {
"hide" : false,
"legendFormat" : "ZRem",
@@ -16231,7 +16333,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency -- Str",
- "id" : 114,
+ "id" : 115,
"targets" : [ {
"hide" : false,
"legendFormat" : "GetRange",
@@ -16378,7 +16480,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency -- TS",
- "id" : 115,
+ "id" : 116,
"targets" : [ {
"hide" : false,
"legendFormat" : "TsGet",
@@ -16485,7 +16587,7 @@
"alignLevel" : null
},
"title" : "YBClient Reactor Delays",
- "id" : 116,
+ "id" : 117,
"targets" : [ {
"hide" : false,
"legendFormat" : "Incoming Queue",
@@ -16587,7 +16689,7 @@
"alignLevel" : null
},
"title" : "Total YEDIS Ops / Sec",
- "id" : 117,
+ "id" : 118,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -16679,7 +16781,7 @@
"alignLevel" : null
},
"title" : "Total YEDIS Ops / Sec -- Hash",
- "id" : 118,
+ "id" : 119,
"targets" : [ {
"hide" : false,
"legendFormat" : "HGetAll",
@@ -16826,7 +16928,7 @@
"alignLevel" : null
},
"title" : "Total YEDIS Ops / Sec -- Others",
- "id" : 119,
+ "id" : 120,
"targets" : [ {
"hide" : false,
"legendFormat" : "Echo",
@@ -16958,7 +17060,7 @@
"alignLevel" : null
},
"title" : "Total YEDIS Ops / Sec -- Set",
- "id" : 120,
+ "id" : 121,
"targets" : [ {
"hide" : false,
"legendFormat" : "SCard",
@@ -17075,7 +17177,7 @@
"alignLevel" : null
},
"title" : "Total YEDIS Ops / Sec -- Sorted Set",
- "id" : 121,
+ "id" : 122,
"targets" : [ {
"hide" : false,
"legendFormat" : "ZRangeByScore",
@@ -17187,7 +17289,7 @@
"alignLevel" : null
},
"title" : "Total YEDIS Ops / Sec -- Str",
- "id" : 122,
+ "id" : 123,
"targets" : [ {
"hide" : false,
"legendFormat" : "MGet",
@@ -17334,7 +17436,7 @@
"alignLevel" : null
},
"title" : "Total YEDIS Ops / Sec -- TS",
- "id" : 123,
+ "id" : 124,
"targets" : [ {
"hide" : false,
"legendFormat" : "TsGet",
@@ -17441,7 +17543,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency (P95)",
- "id" : 124,
+ "id" : 125,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -17533,7 +17635,7 @@
"alignLevel" : null
},
"title" : "YEDIS Op Latency (P99)",
- "id" : 125,
+ "id" : 126,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -17625,7 +17727,7 @@
"alignLevel" : null
},
"title" : "YBClient Latency Local vs Remote",
- "id" : 126,
+ "id" : 127,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -17717,7 +17819,7 @@
"alignLevel" : null
},
"title" : "YBClient Ops Local vs Remote",
- "id" : 127,
+ "id" : 128,
"targets" : [ {
"hide" : false,
"legendFormat" : null,
@@ -17809,7 +17911,7 @@
"alignLevel" : null
},
"title" : "Inbound RPC Connections Alive",
- "id" : 128,
+ "id" : 129,
"targets" : [ {
"hide" : false,
"legendFormat" : "Connections",
@@ -17901,7 +18003,7 @@
"alignLevel" : null
},
"title" : "RPC Queue Size",
- "id" : 169,
+ "id" : 170,
"targets" : [ {
"hide" : false,
"legendFormat" : "YEDIS RPC",
@@ -17926,7 +18028,7 @@
"panels" : [ ],
"type" : "row",
"title" : "YSQL Ops & Latency",
- "id" : 198
+ "id" : 199
}, {
"aliasColors" : { },
"bars" : false,
@@ -18006,7 +18108,7 @@
"alignLevel" : null
},
"title" : "Total YSQL Connections",
- "id" : 180,
+ "id" : 181,
"targets" : [ {
"hide" : false,
"legendFormat" : "Total",
@@ -18103,7 +18205,7 @@
"alignLevel" : null
},
"title" : "Total YSQL Connections / Sec",
- "id" : 181,
+ "id" : 182,
"targets" : [ {
"hide" : false,
"legendFormat" : "Rejected",
@@ -18200,7 +18302,7 @@
"alignLevel" : null
},
"title" : "Total YSQL Ops / Sec",
- "id" : 182,
+ "id" : 183,
"targets" : [ {
"hide" : false,
"legendFormat" : "Begin",
@@ -18312,7 +18414,7 @@
"alignLevel" : null
},
"title" : "Total YSQL Ops / Sec",
- "id" : 183,
+ "id" : 184,
"targets" : [ {
"hide" : false,
"legendFormat" : "Insert",
@@ -18419,7 +18521,7 @@
"alignLevel" : null
},
"title" : "YSQL Advanced Op Latency (Avg)",
- "id" : 184,
+ "id" : 185,
"targets" : [ {
"hide" : false,
"legendFormat" : "Rollback",
@@ -18531,7 +18633,7 @@
"alignLevel" : null
},
"title" : "YSQL Op Latency (Avg)",
- "id" : 185,
+ "id" : 186,
"targets" : [ {
"hide" : false,
"legendFormat" : "Delete",
diff --git a/managed/src/main/resources/metric/metrics.yml b/managed/src/main/resources/metric/metrics.yml
index 0760d7f0f775..00a044a76f70 100644
--- a/managed/src/main/resources/metric/metrics.yml
+++ b/managed/src/main/resources/metric/metrics.yml
@@ -2776,6 +2776,25 @@ master_max_follower_lag:
alias:
"follower_lag_ms": "Max Follower Lag (ms)"
+
+master_load_balancer_stats:
+ metric: "tablets_in_wrong_placement|total_table_load_difference|blacklisted_leaders"
+ filters:
+ export_type: "master_export"
+ range: true
+ function: "max_over_time|max"
+ panel_group: "Master"
+ group_by: "saved_name"
+ layout:
+ title: "Master load balancer statistics"
+ xaxis:
+ type: "date"
+ yaxis:
+ alias:
+ "tablets_in_wrong_placement": "Tablets in wrong placement"
+ "total_table_load_difference": "Total table load difference"
+ "blacklisted_leaders": "Tablets with blacklisted leaders"
+
tserver_max_follower_lag:
metric: "follower_lag_ms"
filters:
diff --git a/managed/src/main/resources/swagger-strict.json b/managed/src/main/resources/swagger-strict.json
index 9a477d100cc5..17389605bfac 100644
--- a/managed/src/main/resources/swagger-strict.json
+++ b/managed/src/main/resources/swagger-strict.json
@@ -7138,6 +7138,11 @@
"required" : [ "instanceUUID", "peerRole", "port", "privateIp", "uptimeSeconds" ],
"type" : "object"
},
+ "MasterLBStateResponse" : {
+ "description" : "Master tablet load balancer status",
+ "properties" : { },
+ "type" : "object"
+ },
"MasterNode" : {
"properties" : {
"cloudInfo" : {
@@ -24763,6 +24768,46 @@
"tags" : [ "Universe information" ]
}
},
+ "/api/v1/customers/{cUUID}/universes/{uniUUID}/master_lb_state" : {
+ "get" : {
+ "description" : "Available since YBA version 2024.2.0",
+ "operationId" : "getMasterLBState",
+ "parameters" : [ {
+ "format" : "uuid",
+ "in" : "path",
+ "name" : "cUUID",
+ "required" : true,
+ "type" : "string"
+ }, {
+ "format" : "uuid",
+ "in" : "path",
+ "name" : "uniUUID",
+ "required" : true,
+ "type" : "string"
+ } ],
+ "responses" : {
+ "200" : {
+ "description" : "successful operation",
+ "schema" : {
+ "$ref" : "#/definitions/MasterLBStateResponse"
+ }
+ }
+ },
+ "responsesObject" : {
+ "200" : {
+ "description" : "successful operation",
+ "schema" : {
+ "$ref" : "#/definitions/MasterLBStateResponse"
+ }
+ }
+ },
+ "security" : [ {
+ "apiKeyAuth" : [ ]
+ } ],
+ "summary" : "Get the state of master load balancing ops",
+ "tags" : [ "Universe node metadata (metamaster)" ]
+ }
+ },
"/api/v1/customers/{cUUID}/universes/{uniUUID}/masters" : {
"get" : {
"description" : "Available since YBA version 2.2.0.0.",
diff --git a/managed/src/main/resources/swagger.json b/managed/src/main/resources/swagger.json
index 44843ad05da9..55cca7c6b4cc 100644
--- a/managed/src/main/resources/swagger.json
+++ b/managed/src/main/resources/swagger.json
@@ -7185,6 +7185,11 @@
"required" : [ "instanceUUID", "peerRole", "port", "privateIp", "uptimeSeconds" ],
"type" : "object"
},
+ "MasterLBStateResponse" : {
+ "description" : "Master tablet load balancer status",
+ "properties" : { },
+ "type" : "object"
+ },
"MasterNode" : {
"properties" : {
"cloudInfo" : {
@@ -26306,6 +26311,46 @@
"tags" : [ "Universe management" ]
}
},
+ "/api/v1/customers/{cUUID}/universes/{uniUUID}/master_lb_state" : {
+ "get" : {
+ "description" : "Available since YBA version 2024.2.0",
+ "operationId" : "getMasterLBState",
+ "parameters" : [ {
+ "format" : "uuid",
+ "in" : "path",
+ "name" : "cUUID",
+ "required" : true,
+ "type" : "string"
+ }, {
+ "format" : "uuid",
+ "in" : "path",
+ "name" : "uniUUID",
+ "required" : true,
+ "type" : "string"
+ } ],
+ "responses" : {
+ "200" : {
+ "description" : "successful operation",
+ "schema" : {
+ "$ref" : "#/definitions/MasterLBStateResponse"
+ }
+ }
+ },
+ "responsesObject" : {
+ "200" : {
+ "description" : "successful operation",
+ "schema" : {
+ "$ref" : "#/definitions/MasterLBStateResponse"
+ }
+ }
+ },
+ "security" : [ {
+ "apiKeyAuth" : [ ]
+ } ],
+ "summary" : "Get the state of master load balancing ops",
+ "tags" : [ "Universe node metadata (metamaster)" ]
+ }
+ },
"/api/v1/customers/{cUUID}/universes/{uniUUID}/masters" : {
"get" : {
"description" : "Available since YBA version 2.2.0.0.",
diff --git a/managed/src/main/resources/v1.routes b/managed/src/main/resources/v1.routes
index 3b4dad36e7e4..a57cb6a2f84c 100644
--- a/managed/src/main/resources/v1.routes
+++ b/managed/src/main/resources/v1.routes
@@ -333,6 +333,7 @@ GET /customers/:cUUID/universes/:uniUUID/masters c
GET /customers/:cUUID/universes/:uniUUID/yqlservers com.yugabyte.yw.controllers.MetaMasterController.getYQLServerAddresses(cUUID: java.util.UUID, uniUUID: java.util.UUID)
GET /customers/:cUUID/universes/:uniUUID/ysqlservers com.yugabyte.yw.controllers.MetaMasterController.getYSQLServerAddresses(cUUID: java.util.UUID, uniUUID: java.util.UUID)
GET /customers/:cUUID/universes/:uniUUID/redisservers com.yugabyte.yw.controllers.MetaMasterController.getRedisServerAddresses(cUUID: java.util.UUID, uniUUID: java.util.UUID)
+GET /customers/:cUUID/universes/:uniUUID/master_lb_state com.yugabyte.yw.controllers.MetaMasterController.getMasterLBState(cUUID: java.util.UUID, uniUUID: java.util.UUID)
# Universe - Yugabyte DB management
POST /customers/:cUUID/universes/:uniUUID/run_query com.yugabyte.yw.controllers.UniverseYbDbAdminController.runQuery(cUUID: java.util.UUID, uniUUID: java.util.UUID, request: Request)
diff --git a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/EditUniverseLocalTest.java b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/EditUniverseLocalTest.java
index 49bf0cdbaae4..a4dff7abc1c5 100644
--- a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/EditUniverseLocalTest.java
+++ b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/EditUniverseLocalTest.java
@@ -46,6 +46,8 @@ public void testExpand() throws InterruptedException {
Universe universe = createUniverse(userIntent);
initYSQL(universe);
initAndStartPayload(universe);
+ verifyMasterLBStatus(customer, universe, true /*enabled*/, true /*idle*/);
+
changeNumberOfNodesInPrimary(universe, 2);
UUID taskID =
universeCRUDHandler.update(
@@ -53,6 +55,7 @@ public void testExpand() throws InterruptedException {
Universe.getOrBadRequest(universe.getUniverseUUID()),
universe.getUniverseDetails());
TaskInfo taskInfo = waitForTask(taskID, universe);
+
verifyUniverseTaskSuccess(taskInfo);
verifyUniverseState(Universe.getOrBadRequest(universe.getUniverseUUID()));
verifyYSQL(universe);
diff --git a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/LocalProviderUniverseTestBase.java b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/LocalProviderUniverseTestBase.java
index 0a9ffd449ddc..dc4c8e21169d 100644
--- a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/LocalProviderUniverseTestBase.java
+++ b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/LocalProviderUniverseTestBase.java
@@ -45,6 +45,8 @@
import com.yugabyte.yw.common.gflags.SpecificGFlags;
import com.yugabyte.yw.common.services.YBClientService;
import com.yugabyte.yw.common.utils.Pair;
+import com.yugabyte.yw.controllers.apiModels.MasterLBStateResponse;
+import com.yugabyte.yw.controllers.handlers.MetaMasterHandler;
import com.yugabyte.yw.controllers.handlers.UniverseCRUDHandler;
import com.yugabyte.yw.controllers.handlers.UniverseTableHandler;
import com.yugabyte.yw.controllers.handlers.UpgradeUniverseHandler;
@@ -1114,4 +1116,13 @@ protected void verifyNodeModifications(Universe universe, int added, int removed
.filter(n -> n.state == NodeDetails.NodeState.ToBeRemoved)
.count());
}
+
+ protected void verifyMasterLBStatus(
+ Customer customer, Universe universe, boolean isEnabled, boolean isLoadBalancerIdle) {
+ MetaMasterHandler metaMasterHandler = app.injector().instanceOf(MetaMasterHandler.class);
+ MasterLBStateResponse resp =
+ metaMasterHandler.getMasterLBState(customer.getUuid(), universe.getUniverseUUID());
+ assertEquals(resp.isEnabled, isEnabled);
+ assertEquals(resp.isIdle, isLoadBalancerIdle);
+ }
}
diff --git a/managed/ui/src/components/metrics/constants.ts b/managed/ui/src/components/metrics/constants.ts
index 6231a8244d6f..bed8632bdc2e 100644
--- a/managed/ui/src/components/metrics/constants.ts
+++ b/managed/ui/src/components/metrics/constants.ts
@@ -167,7 +167,8 @@ export const MetricTypesWithOperations = {
'master_cpu_util_secs',
'master_yb_rpc_connections',
'master_leaderless_and_underreplicated_tablets',
- 'master_max_follower_lag'
+ 'master_max_follower_lag',
+ 'master_load_balancer_stats'
]
},
master_advanced: {