Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better alerting for task lag #1876

Merged
merged 5 commits into from
Nov 15, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
package com.hubspot.singularity;

import java.util.ArrayList;
import java.util.List;

public class SingularityScheduledTasksInfo {

private final int numLateTasks;
private final int numFutureTasks;
private final long maxTaskLag;
private final long timestamp;
private final List<SingularityPendingTaskId> lateTasks;

private SingularityScheduledTasksInfo(int numLateTasks, int numFutureTasks, long maxTaskLag, long timestamp) {
this.numLateTasks = numLateTasks;
private SingularityScheduledTasksInfo(List<SingularityPendingTaskId> lateTasks, int numFutureTasks, long maxTaskLag, long timestamp) {
this.lateTasks = lateTasks;
this.numFutureTasks = numFutureTasks;
this.maxTaskLag = maxTaskLag;
this.timestamp = timestamp;
}

public int getNumLateTasks() {
return numLateTasks;
public List<SingularityPendingTaskId> getLateTasks() {
return lateTasks;
}

public int getNumLateTasks() { return getLateTasks().size(); }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatting nit, we usually use the new lines for these cases still

public int getNumLateTasks() {
  return getLateTasks().size();
}


public int getNumFutureTasks() {
return numFutureTasks;
}
Expand All @@ -35,15 +38,15 @@ public long getTimestamp() {
public static SingularityScheduledTasksInfo getInfo(List<SingularityPendingTask> pendingTasks, long millisDeltaForLateTasks) {
final long now = System.currentTimeMillis();

int numLateTasks = 0;
int numFutureTasks = 0;
long maxTaskLag = 0;
List<SingularityPendingTaskId> lateTasks = new ArrayList<>();

for (SingularityPendingTask pendingTask : pendingTasks) {
long delta = now - pendingTask.getPendingTaskId().getNextRunAt();

if (delta > millisDeltaForLateTasks) {
numLateTasks++;
lateTasks.add(pendingTask.getPendingTaskId());
} else {
numFutureTasks++;
}
Expand All @@ -53,6 +56,6 @@ public static SingularityScheduledTasksInfo getInfo(List<SingularityPendingTask>
}
}

return new SingularityScheduledTasksInfo(numLateTasks, numFutureTasks, maxTaskLag, now);
return new SingularityScheduledTasksInfo(lateTasks, numFutureTasks, maxTaskLag, now);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ public class SingularityState {
private final int cooldownRequests;
private final int scheduledTasks;
private final int lateTasks;
private final List<SingularityPendingTaskId> listLateTasks;
private final int futureTasks;
private final int cleaningTasks;
private final int lbCleanupTasks;
Expand Down Expand Up @@ -85,6 +86,7 @@ public SingularityState(@JsonProperty("activeTasks") int activeTasks,
@JsonProperty("oldestDeployStep") long oldestDeployStep,
@JsonProperty("activeDeploys") List<SingularityDeployMarker> activeDeploys,
@JsonProperty("lateTasks") int lateTasks,
@JsonProperty("listLateTasks") List<SingularityPendingTaskId> listLateTasks,
@JsonProperty("futureTasks") int futureTasks,
@JsonProperty("maxTaskLag") long maxTaskLag,
@JsonProperty("generatedAt") long generatedAt,
Expand Down Expand Up @@ -119,6 +121,7 @@ public SingularityState(@JsonProperty("activeTasks") int activeTasks,
this.cleaningTasks = cleaningTasks;
this.hostStates = hostStates;
this.lateTasks = lateTasks;
this.listLateTasks = listLateTasks;
this.finishedRequests = finishedRequests;
this.futureTasks = futureTasks;
this.maxTaskLag = maxTaskLag;
Expand Down Expand Up @@ -276,6 +279,11 @@ public int getLateTasks() {
return lateTasks;
}

@Schema(description = "The list of all late tasks that have not been launched in time")
public List<SingularityPendingTaskId> getListLateTasks() {
return listLateTasks;
}

@Schema(description = "The count of pending tasks that will be launched at a future time")
public int getFutureTasks() {
return futureTasks;
Expand Down Expand Up @@ -356,6 +364,7 @@ public String toString() {
", cooldownRequests=" + cooldownRequests +
", scheduledTasks=" + scheduledTasks +
", lateTasks=" + lateTasks +
", listLateTasks=" + listLateTasks +
", futureTasks=" + futureTasks +
", cleaningTasks=" + cleaningTasks +
", lbCleanupTasks=" + lbCleanupTasks +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ public SingularityState generateState(boolean includeRequestIds) {
final Optional<Double> minimumPriorityLevel = getMinimumPriorityLevel();

return new SingularityState(activeTasks, launchingTasks, numActiveRequests, cooldownRequests, numPausedRequests, scheduledTasks, pendingRequests, lbCleanupTasks, lbCleanupRequests, cleaningRequests, activeSlaves,
deadSlaves, decommissioningSlaves, activeRacks, deadRacks, decommissioningRacks, cleaningTasks, states, oldestDeploy, numDeploys, oldestDeployStep, activeDeploys, scheduledTasksInfo.getNumLateTasks(),
deadSlaves, decommissioningSlaves, activeRacks, deadRacks, decommissioningRacks, cleaningTasks, states, oldestDeploy, numDeploys, oldestDeployStep, activeDeploys, scheduledTasksInfo.getNumLateTasks(), scheduledTasksInfo.getLateTasks(),
scheduledTasksInfo.getNumFutureTasks(), scheduledTasksInfo.getMaxTaskLag(), System.currentTimeMillis(), includeRequestIds ? overProvisionedRequestIds : null,
includeRequestIds ? underProvisionedRequestIds : null, overProvisionedRequestIds.size(), underProvisionedRequestIds.size(), numFinishedRequests, unknownRacks, unknownSlaves, authDatastoreHealthy, minimumPriorityLevel,
statusUpdateDeltaAvg.get(), lastHeartbeatTime.get());
Expand Down