diff --git a/pkg/scheduler/actions/allocate/allocate.go b/pkg/scheduler/actions/allocate/allocate.go index 0926f78694..16fde70b23 100644 --- a/pkg/scheduler/actions/allocate/allocate.go +++ b/pkg/scheduler/actions/allocate/allocate.go @@ -17,6 +17,8 @@ package allocate import ( + "time" + "k8s.io/klog" "volcano.sh/volcano/pkg/scheduler/api" @@ -225,6 +227,7 @@ func (alloc *Action) Execute(ssn *framework.Session) { task.UID, node.Name, ssn.UID, err) } else { metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time)) + metrics.UpdateE2eSchedulingLastTimeByJob(job.Name, string(job.Queue), job.Namespace, time.Now()) } } else { klog.V(3).Infof("Predicates failed for task <%s/%s> on node <%s> with limited resources", @@ -239,6 +242,7 @@ func (alloc *Action) Execute(ssn *framework.Session) { task.UID, node.Name, ssn.UID, err) } else { metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time)) + metrics.UpdateE2eSchedulingLastTimeByJob(job.Name, string(job.Queue), job.Namespace, time.Now()) } } } diff --git a/pkg/scheduler/actions/backfill/backfill.go b/pkg/scheduler/actions/backfill/backfill.go index 0e6bbca3de..eded71e03b 100644 --- a/pkg/scheduler/actions/backfill/backfill.go +++ b/pkg/scheduler/actions/backfill/backfill.go @@ -17,6 +17,8 @@ limitations under the License. package backfill import ( + "time" + "k8s.io/klog" "volcano.sh/volcano/pkg/scheduler/api" @@ -76,6 +78,7 @@ func (backfill *Action) Execute(ssn *framework.Session) { } metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time)) + metrics.UpdateE2eSchedulingLastTimeByJob(job.Name, string(job.Queue), job.Namespace, time.Now()) allocated = true break } diff --git a/pkg/scheduler/cache/event_handlers.go b/pkg/scheduler/cache/event_handlers.go index f29355d6d2..07ed8cafc8 100644 --- a/pkg/scheduler/cache/event_handlers.go +++ b/pkg/scheduler/cache/event_handlers.go @@ -420,6 +420,8 @@ func (sc *SchedulerCache) setPodGroup(ss *schedulingapi.PodGroup) error { sc.Jobs[job].Queue = schedulingapi.QueueID(sc.defaultQueue) } + metrics.UpdateE2eSchedulingStartTimeByJob(sc.Jobs[job].Name, string(sc.Jobs[job].Queue), sc.Jobs[job].Namespace, + sc.Jobs[job].CreationTimestamp.Time) return nil } diff --git a/pkg/scheduler/metrics/job.go b/pkg/scheduler/metrics/job.go index ace0008280..47ef091a69 100644 --- a/pkg/scheduler/metrics/job.go +++ b/pkg/scheduler/metrics/job.go @@ -52,6 +52,8 @@ func RegisterJobRetries(jobID string) { // DeleteJobMetrics delete all metrics related to the job func DeleteJobMetrics(jobName, queue, namespace string) { e2eJobSchedulingDuration.DeleteLabelValues(jobName, queue, namespace) + e2eJobSchedulingStartTime.DeleteLabelValues(jobName, queue, namespace) + e2eJobSchedulingLastTime.DeleteLabelValues(jobName, queue, namespace) unscheduleTaskCount.DeleteLabelValues(jobName) jobShare.DeleteLabelValues(namespace, jobName) jobRetryCount.DeleteLabelValues(jobName) diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index 6a39e16517..f4acc7593b 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -62,6 +62,24 @@ var ( []string{"job_name", "queue", "job_namespace"}, ) + e2eJobSchedulingStartTime = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: VolcanoNamespace, + Name: "e2e_job_scheduling_start_time", + Help: "E2E job scheduling start time", + }, + []string{"job_name", "queue", "job_namespace"}, + ) + + e2eJobSchedulingLastTime = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: VolcanoNamespace, + Name: "e2e_job_scheduling_last_time", + Help: "E2E job scheduling last time", + }, + []string{"job_name", "queue", "job_namespace"}, + ) + pluginSchedulingLatency = promauto.NewHistogramVec( prometheus.HistogramOpts{ Subsystem: VolcanoNamespace, @@ -151,6 +169,16 @@ func UpdateE2eSchedulingDurationByJob(jobName string, queue string, namespace st e2eJobSchedulingLatency.Observe(DurationInMilliseconds(duration)) } +// UpdateE2eSchedulingStartTimeByJob updates the start time of scheduling +func UpdateE2eSchedulingStartTimeByJob(jobName string, queue string, namespace string, t time.Time) { + e2eJobSchedulingStartTime.WithLabelValues(jobName, queue, namespace).Set(ConvertToUnix(t)) +} + +// UpdateE2eSchedulingLastTimeByJob updates the last time of scheduling +func UpdateE2eSchedulingLastTimeByJob(jobName string, queue string, namespace string, t time.Time) { + e2eJobSchedulingLastTime.WithLabelValues(jobName, queue, namespace).Set(ConvertToUnix(t)) +} + // UpdateTaskScheduleDuration updates single task scheduling latency func UpdateTaskScheduleDuration(duration time.Duration) { taskSchedulingLatency.Observe(DurationInMilliseconds(duration)) @@ -200,3 +228,8 @@ func DurationInSeconds(duration time.Duration) float64 { func Duration(start time.Time) time.Duration { return time.Since(start) } + +// ConvertToUnix convert the time to Unix time +func ConvertToUnix(t time.Time) float64 { + return float64(t.Unix()) +}