From 90e3dc5bee6da629e603e9efc7a55199a1ac20de Mon Sep 17 00:00:00 2001 From: Nicholas Weaver Date: Thu, 3 Mar 2016 17:31:01 +0100 Subject: [PATCH] Ensures scheduler jobs submissions are sent concurrently for a workflow. Refactors scheduler.workJobs() to a package unexported function workJobs(). --- scheduler/workflow.go | 79 ++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/scheduler/workflow.go b/scheduler/workflow.go index bbb4fd972..330ed7217 100644 --- a/scheduler/workflow.go +++ b/scheduler/workflow.go @@ -22,6 +22,7 @@ package scheduler import ( "errors" "fmt" + "sync" "github.com/intelsdi-x/gomit" @@ -323,7 +324,7 @@ func (s *schedulerWorkflow) Start(t *task) { defer s.eventEmitter.Emit(event) // walk through the tree and dispatch work - s.workJobs(s.processNodes, s.publishNodes, t, j) + workJobs(s.processNodes, s.publishNodes, t, j) } func (s *schedulerWorkflow) State() WorkflowState { @@ -334,27 +335,65 @@ func (s *schedulerWorkflow) StateString() string { return WorkflowStateLookup[s.state] } -func (s *schedulerWorkflow) workJobs(prs []*processNode, pus []*publishNode, t *task, pj job) { +// workJobs takes a slice of proccess and publish nodes and submits jobs for each for a task. +// It then iterates down any process nodes to submit their child node jobs for the task +func workJobs(prs []*processNode, pus []*publishNode, t *task, pj job) { + // Create waitgroup to block until all jobs are submitted + wg := &sync.WaitGroup{} + // range over the process jobs and call submitProcessJob for _, pr := range prs { - j := newProcessJob(pj, pr.Name(), pr.Version(), pr.InboundContentType, pr.config.Table(), t.metricsManager, t.id) - errors := t.manager.Work(j).Promise().Await() - if len(errors) != 0 { - t.failedRuns++ - t.lastFailureTime = t.lastFireTime - t.lastFailureMessage = errors[len(errors)-1].Error() - return - } - - s.workJobs(pr.ProcessNodes, pr.PublishNodes, t, j) + // increment the wait group (before starting goroutine to prevent a race condition) + wg.Add(1) + // Start goroutine to submit the process job + go submitProcessJob(pj, t, wg, pr) } + // range over the publish jobs and call submitPublishJob for _, pu := range pus { - j := newPublishJob(pj, pu.Name(), pu.Version(), pu.InboundContentType, pu.config.Table(), t.metricsManager, t.id) - errors := t.manager.Work(j).Promise().Await() - if len(errors) != 0 { - t.failedRuns++ - t.lastFailureTime = t.lastFireTime - t.lastFailureMessage = errors[len(errors)-1].Error() - return - } + // increment the wait group (before starting goroutine to prevent a race condition) + wg.Add(1) + // Start goroutine to submit the process job + go submitPublishJob(pj, t, wg, pu) + } + // Wait until all job submisson goroutines are done + wg.Wait() +} + +func submitProcessJob(pj job, t *task, wg *sync.WaitGroup, pr *processNode) { + // Decrement the waitgroup + defer wg.Done() + // Create a new process job + j := newProcessJob(pj, pr.Name(), pr.Version(), pr.InboundContentType, pr.config.Table(), t.metricsManager, t.id) + // Submit the job against the task.managesWork + errors := t.manager.Work(j).Promise().Await() + // Check for errors and update the task + if len(errors) != 0 { + // Note: we just update the error tracking, the change of state for a task is handled before this is reached. + t.failedRuns++ + t.lastFailureTime = t.lastFireTime + t.lastFailureMessage = errors[len(errors)-1].Error() + // Return and do not proceed further down the workflow + return + } + // Iterate into any child process or publish nodes + workJobs(pr.ProcessNodes, pr.PublishNodes, t, j) +} + +func submitPublishJob(pj job, t *task, wg *sync.WaitGroup, pu *publishNode) { + // Decrement the waitgroup + defer wg.Done() + // Create a new process job + j := newPublishJob(pj, pu.Name(), pu.Version(), pu.InboundContentType, pu.config.Table(), t.metricsManager, t.id) + // Submit the job against the task.managesWork + errors := t.manager.Work(j).Promise().Await() + // Check for errors and update the task + if len(errors) != 0 { + // Note: we just update the error tracking, the change of state for a task is handled before this is reached. + t.failedRuns++ + t.lastFailureTime = t.lastFireTime + t.lastFailureMessage = errors[len(errors)-1].Error() + // Return and do not proceed further down the workflow + return } + // Publish nodes cannot contain child nodes (publish is a terminal node) + // so unlike process nodes there is not a call to workJobs here for child nodes. }