Skip to content

Commit

Permalink
Merge pull request #2230 from xiaoanyunfei/bugfix/metrics
Browse files Browse the repository at this point in the history
clean up metrics of deleted objects
  • Loading branch information
volcano-sh-bot authored May 13, 2022
2 parents ff4cde9 + 62e813c commit 87148e0
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pkg/scheduler/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ func (sc *SchedulerCache) processCleanupJob() {

if schedulingapi.JobTerminated(job) {
delete(sc.Jobs, job.UID)
metrics.DeleteJobShare(job.Namespace, job.Name)
metrics.DeleteJobMetrics(job.Name, string(job.Queue), job.Namespace)
klog.V(3).Infof("Job <%v:%v/%v> was deleted.", job.UID, job.Namespace, job.Name)
} else {
// Retry
Expand Down
6 changes: 5 additions & 1 deletion pkg/scheduler/cache/event_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
"volcano.sh/apis/pkg/apis/utils"
schedulingapi "volcano.sh/volcano/pkg/scheduler/api"
"volcano.sh/volcano/pkg/scheduler/metrics"
)

func isTerminated(status schedulingapi.TaskStatus) bool {
Expand Down Expand Up @@ -613,7 +614,10 @@ func (sc *SchedulerCache) updateQueue(queue *scheduling.Queue) {
}

func (sc *SchedulerCache) deleteQueue(id schedulingapi.QueueID) {
delete(sc.Queues, id)
if queue, ok := sc.Queues[id]; ok {
delete(sc.Queues, id)
metrics.DeleteQueueMetrics(queue.Name)
}
}

//DeletePriorityClass delete priorityclass from the scheduler cache
Expand Down
13 changes: 8 additions & 5 deletions pkg/scheduler/metrics/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,15 @@ func UpdateJobShare(jobNs, jobID string, share float64) {
jobShare.WithLabelValues(jobNs, jobID).Set(share)
}

// DeleteJobShare delete jobShare for one job
func DeleteJobShare(jobNs, jobID string) {
jobShare.DeleteLabelValues(jobNs, jobID)
}

// RegisterJobRetries total number of job retries.
func RegisterJobRetries(jobID string) {
jobRetryCount.WithLabelValues(jobID).Inc()
}

// DeleteJobMetrics delete all metrics related to the job
func DeleteJobMetrics(jobName, queue, namespace string) {
e2eJobSchedulingDuration.DeleteLabelValues(jobName, queue, namespace)
unscheduleTaskCount.DeleteLabelValues(jobName)
jobShare.DeleteLabelValues(namespace, jobName)
jobRetryCount.DeleteLabelValues(jobName)
}
17 changes: 17 additions & 0 deletions pkg/scheduler/metrics/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,20 @@ func UpdateQueuePodGroupRunningCount(queueName string, count int32) {
func UpdateQueuePodGroupUnknownCount(queueName string, count int32) {
queuePodGroupUnknown.WithLabelValues(queueName).Set(float64(count))
}

// DeleteQueueMetrics delete all metrics related to the queue
func DeleteQueueMetrics(queueName string) {
queueAllocatedMilliCPU.DeleteLabelValues(queueName)
queueAllocatedMemory.DeleteLabelValues(queueName)
queueRequestMilliCPU.DeleteLabelValues(queueName)
queueRequestMemory.DeleteLabelValues(queueName)
queueDeservedMilliCPU.DeleteLabelValues(queueName)
queueDeservedMemory.DeleteLabelValues(queueName)
queueShare.DeleteLabelValues(queueName)
queueWeight.DeleteLabelValues(queueName)
queueOverused.DeleteLabelValues(queueName)
queuePodGroupInqueue.DeleteLabelValues(queueName)
queuePodGroupPending.DeleteLabelValues(queueName)
queuePodGroupRunning.DeleteLabelValues(queueName)
queuePodGroupUnknown.DeleteLabelValues(queueName)
}

0 comments on commit 87148e0

Please sign in to comment.