Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Custom metrics are not recorded for DAG tasks Fixes #3872 #3886

Merged
merged 23 commits into from
Sep 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e663374
fix: DAG level Output Artifacts on K8S and Kubelet executor
sarabala1979 Jul 28, 2020
e5d78d4
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Jul 28, 2020
e784e9d
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Jul 29, 2020
a92c98d
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Jul 31, 2020
d6456f6
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Jul 31, 2020
8020986
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 3, 2020
3a21a15
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 4, 2020
81149df
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 6, 2020
886e68b
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 11, 2020
e860842
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 12, 2020
461d03c
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 12, 2020
cbaa8ac
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 13, 2020
fc86b40
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 17, 2020
2846f55
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 18, 2020
227fd7b
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 19, 2020
4eb6392
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 21, 2020
05c22ed
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 24, 2020
909b55f
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 26, 2020
ac8ca20
Merge branch 'master' of https://github.com/argoproj/argo
sarabala1979 Aug 27, 2020
b17c199
fix: Custom metrics are not recorded for DAG tasks
sarabala1979 Aug 28, 2020
7c6f4f3
Added test
sarabala1979 Aug 28, 2020
d60e5fc
Merge remote-tracking branch 'upstream/master' into issue-3872
sarabala1979 Aug 31, 2020
c24ef12
Update dag.go
sarabala1979 Sep 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions workflow/controller/dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,14 @@ func (woc *wfOperationCtx) executeDAGTask(dagCtx *dagContext, taskName string) {
node := dagCtx.getTaskNode(taskName)
task := dagCtx.GetTask(taskName)
if node != nil && node.Fulfilled() {
// Collect the completed task metrics
_, tmpl, _, _ := dagCtx.tmplCtx.ResolveTemplate(task)
if tmpl != nil && tmpl.Metrics != nil {
sarabala1979 marked this conversation as resolved.
Show resolved Hide resolved
if prevNodeStatus, ok := woc.preExecutionNodePhases[node.ID]; ok && !prevNodeStatus.Fulfilled() {
localScope, realTimeScope := woc.prepareMetricScope(node)
woc.computeMetrics(tmpl.Metrics.Prometheus, localScope, realTimeScope, false)
}
}
if node.Completed() {
// Run the node's onExit node, if any.
hasOnExitNode, onExitNode, err := woc.runOnExitNode(task.OnExit, task.Name, node.Name, dagCtx.boundaryID, dagCtx.tmplCtx)
Expand Down
118 changes: 118 additions & 0 deletions workflow/controller/operator_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,3 +311,121 @@ func TestRetryStrategyMetric(t *testing.T) {
assert.Contains(t, metricErrorCounterString, `counter:<value:1 > `)

}

var dagTmplMetrics = `
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
name: hello-world-nl9bj
spec:
arguments: {}
entrypoint: steps
templates:
- arguments: {}
dag:
tasks:
- arguments: {}
name: random-int-dag
template: random-int
- arguments: {}
name: flakey-dag
template: flakey

name: steps
outputs: {}
- arguments: {}
container:
args:
- RAND_INT=$((1 + RANDOM % 10)); echo $RAND_INT; echo $RAND_INT > /tmp/rand_int.txt
command:
- sh
- -c
image: alpine:latest
name: ""
resources: {}
inputs: {}
metadata: {}
metrics:
prometheus:
- help: Value of the int emitted by random-int at step level
histogram:
buckets:
- 2.01
- 4.01
- 6.01
- 8.01
- 10.01
value: 5
name: random_int_step_histogram_dag
- gauge:
realtime: true
value: '{{duration}}'
help: Duration gauge by name
labels:
- key: name
value: random-int
name: duration_gauge_dag
name: random-int
outputs:
parameters:
- globalName: rand-int-value
name: rand-int-value
valueFrom:
path: /tmp/rand_int.txt
- arguments: {}
container:
args:
- import random; import sys; exit_code = random.choice([0, 1, 1]); sys.exit(exit_code)
command:
- python
- -c
image: python:alpine3.6
name: ""
resources: {}
inputs: {}
metadata: {}
metrics:
prometheus:
- counter:
value: "1"
help: Count of step execution by result status
labels:
- key: name
value: flakey
- key: status
value: Failed
name: result_counter_dag
name: flakey
outputs: {}
`

func TestDAGTmplMetrics(t *testing.T) {
cancel, controller := newController()
defer cancel()
wfcset := controller.wfclientset.ArgoprojV1alpha1().Workflows("")
wf := unmarshalWF(dagTmplMetrics)
_, err := wfcset.Create(wf)
assert.NoError(t, err)
woc := newWorkflowOperationCtx(wf, controller)

woc.operate()
makePodsPhase(t, apiv1.PodSucceeded, controller.kubeclientset, wf.Namespace)
woc.operate()
tmpl := woc.wf.GetTemplateByName("random-int")
assert.NotNil(t, tmpl)
metricDesc := tmpl.Metrics.Prometheus[0].GetDesc()
assert.NotNil(t, controller.metrics.GetCustomMetric(metricDesc))
metricHistogram := controller.metrics.GetCustomMetric(metricDesc).(prometheus.Histogram)
metricHistogramString, err := getMetricStringValue(metricHistogram)
assert.NoError(t, err)
assert.Contains(t, metricHistogramString, `histogram:<sample_count:1 sample_sum:5`)

tmpl = woc.wf.GetTemplateByName("flakey")
assert.NotNil(t, tmpl)
metricDesc = tmpl.Metrics.Prometheus[0].GetDesc()
assert.NotNil(t, controller.metrics.GetCustomMetric(metricDesc))
metricCounter := controller.metrics.GetCustomMetric(metricDesc).(prometheus.Counter)
metricCounterString, err := getMetricStringValue(metricCounter)
assert.NoError(t, err)
assert.Contains(t, metricCounterString, `counter:<value:1 > `)
}