Skip to content

Commit

Permalink
Panic in controller when step fails before image digest exporter
Browse files Browse the repository at this point in the history
The image digest exporter (part of the Image Output Resource) is
configured with `"terminationMessagePolicy": "FallbackToLogsOnError",`.

When a previous step has failed in a Task our entrypoint wrapping the
exporter emits a log line like `2020/03/13 12:03:26 Skipping
step because a previous step failed`. Because the image digest exporter
is set to FallbackToLogsOnError Kubernetes slurps up this log line as
the termination message.

That line gets read by the Tekton controller, which is looking for JSON
in the termination message. It fails to parse and stops trying to read
step statuses.

That in turn results in a mismatch in the length of the list of steps and
the length of the list of step statuses. Finally we attempt to sort
the list of step statuses alongside the list of steps. This method
panics with an out of bounds error because it assumes the lengths of the two
lists are the same.

So, this PR does the following things:

1. The image digest exporter has the FallbackToLogsOnError policy
removed. I can't think of a reason that we need this anymore.
2. The Tekton controller no longer breaks out of the loop while it's
parsing step statuses and instead simply ignores non-JSON termination
messages.
  • Loading branch information
Scott authored and tekton-robot committed Mar 17, 2020
1 parent 3588a14 commit 153f1d1
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 56 deletions.
65 changes: 34 additions & 31 deletions pkg/pod/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
"time"

"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1"
"github.com/tektoncd/pipeline/pkg/logging"
"github.com/tektoncd/pipeline/pkg/termination"
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/pkg/apis"
Expand Down Expand Up @@ -99,7 +99,7 @@ func SidecarsReady(podStatus corev1.PodStatus) bool {
}

// MakeTaskRunStatus returns a TaskRunStatus based on the Pod's status.
func MakeTaskRunStatus(tr v1alpha1.TaskRun, pod *corev1.Pod, taskSpec v1alpha1.TaskSpec) v1alpha1.TaskRunStatus {
func MakeTaskRunStatus(logger *zap.SugaredLogger, tr v1alpha1.TaskRun, pod *corev1.Pod, taskSpec v1alpha1.TaskSpec) v1alpha1.TaskRunStatus {
trs := &tr.Status
if trs.GetCondition(apis.ConditionSucceeded) == nil || trs.GetCondition(apis.ConditionSucceeded).Status == corev1.ConditionUnknown {
// If the taskRunStatus doesn't exist yet, it's because we just started running
Expand All @@ -114,39 +114,12 @@ func MakeTaskRunStatus(tr v1alpha1.TaskRun, pod *corev1.Pod, taskSpec v1alpha1.T
trs.PodName = pod.Name
trs.Steps = []v1alpha1.StepState{}
trs.Sidecars = []v1alpha1.SidecarState{}
logger, _ := logging.NewLogger("", "status")
defer func() {
_ = logger.Sync()
}()

for _, s := range pod.Status.ContainerStatuses {
if IsContainerStep(s.Name) {
if s.State.Terminated != nil && len(s.State.Terminated.Message) != 0 {
msg := s.State.Terminated.Message
r, err := termination.ParseMessage(msg)
if err != nil {
logger.Errorf("Could not parse json message %q because of %w", msg, err)
break
}
for index, result := range r {
if result.Key == "StartedAt" {
t, err := time.Parse(time.RFC3339, result.Value)
if err != nil {
logger.Errorf("Could not parse time: %q: %w", result.Value, err)
break
}
s.State.Terminated.StartedAt = metav1.NewTime(t)
// remove the entry for the starting time
r = append(r[:index], r[index+1:]...)
if len(r) == 0 {
s.State.Terminated.Message = ""
} else if bytes, err := json.Marshal(r); err != nil {
logger.Errorf("Error marshalling remaining results: %w", err)
} else {
s.State.Terminated.Message = string(bytes)
}
break
}
if err := updateStatusStartTime(&s); err != nil {
logger.Errorf("error setting the start time of step %q in taskrun %q: %w", s.Name, tr.Name, err)
}
}
trs.Steps = append(trs.Steps, v1alpha1.StepState{
Expand Down Expand Up @@ -180,6 +153,36 @@ func MakeTaskRunStatus(tr v1alpha1.TaskRun, pod *corev1.Pod, taskSpec v1alpha1.T
return *trs
}

// updateStatusStartTime searches for a result called "StartedAt" in the JSON-formatted termination message
// of a step and sets the State.Terminated.StartedAt field to this time if it's found. The "StartedAt" result
// is also removed from the list of results in the container status.
func updateStatusStartTime(s *corev1.ContainerStatus) error {
r, err := termination.ParseMessage(s.State.Terminated.Message)
if err != nil {
return fmt.Errorf("termination message could not be parsed as JSON: %w", err)
}
for index, result := range r {
if result.Key == "StartedAt" {
t, err := time.Parse(time.RFC3339, result.Value)
if err != nil {
return fmt.Errorf("could not parse time value %q in StartedAt field: %w", result.Value, err)
}
s.State.Terminated.StartedAt = metav1.NewTime(t)
// remove the entry for the starting time
r = append(r[:index], r[index+1:]...)
if len(r) == 0 {
s.State.Terminated.Message = ""
} else if bytes, err := json.Marshal(r); err != nil {
return fmt.Errorf("error marshalling remaining results back into termination message: %w", err)
} else {
s.State.Terminated.Message = string(bytes)
}
break
}
}
return nil
}

func updateCompletedTaskRun(trs *v1alpha1.TaskRunStatus, pod *corev1.Pod) {
if DidTaskRunFail(pod) {
msg := getFailureMessage(pod)
Expand Down
94 changes: 93 additions & 1 deletion pkg/pod/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (

"github.com/google/go-cmp/cmp"
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1"
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
"github.com/tektoncd/pipeline/pkg/logging"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/pkg/apis"
Expand All @@ -40,6 +42,7 @@ func TestMakeTaskRunStatus(t *testing.T) {
for _, c := range []struct {
desc string
podStatus corev1.PodStatus
taskSpec v1alpha1.TaskSpec
want v1alpha1.TaskRunStatus
}{{
desc: "empty",
Expand Down Expand Up @@ -600,6 +603,94 @@ func TestMakeTaskRunStatus(t *testing.T) {
}},
},
},
}, {
desc: "non-json-termination-message-with-steps-afterwards-shouldnt-panic",
taskSpec: v1alpha1.TaskSpec{TaskSpec: v1beta1.TaskSpec{
Steps: []v1alpha1.Step{{Container: corev1.Container{
Name: "non-json",
}}, {Container: corev1.Container{
Name: "after-non-json",
}}, {Container: corev1.Container{
Name: "this-step-might-panic",
}}, {Container: corev1.Container{
Name: "foo",
}}},
}},
podStatus: corev1.PodStatus{
Phase: corev1.PodFailed,
ContainerStatuses: []corev1.ContainerStatus{{
Name: "step-this-step-might-panic",
ImageID: "image",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
}, {
Name: "step-foo",
ImageID: "image",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
}, {
Name: "step-non-json",
ImageID: "image",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 1,
Message: "this is a non-json termination message. dont panic!",
},
},
}, {
Name: "step-after-non-json",
ImageID: "image",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{},
},
}},
},
want: v1alpha1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: []apis.Condition{{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: ReasonFailed,
Message: `"step-non-json" exited with code 1 (image: "image"); for logs run: kubectl -n foo logs pod -c step-non-json`,
}},
},
TaskRunStatusFields: v1alpha1.TaskRunStatusFields{
Steps: []v1alpha1.StepState{{
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 1,
Message: "this is a non-json termination message. dont panic!",
}},

Name: "non-json",
ContainerName: "step-non-json",
ImageID: "image",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{}},
Name: "after-non-json",
ContainerName: "step-after-non-json",
ImageID: "image",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{}},
Name: "this-step-might-panic",
ContainerName: "step-this-step-might-panic",
ImageID: "image",
}, {
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{}},
Name: "foo",
ContainerName: "step-foo",
ImageID: "image",
}},
Sidecars: []v1alpha1.SidecarState{},
// We don't actually care about the time, just that it's not nil
CompletionTime: &metav1.Time{Time: time.Now()},
},
},
}} {
t.Run(c.desc, func(t *testing.T) {
now := metav1.Now()
Expand All @@ -624,7 +715,8 @@ func TestMakeTaskRunStatus(t *testing.T) {
},
}

got := MakeTaskRunStatus(tr, pod, v1alpha1.TaskSpec{})
logger, _ := logging.NewLogger("", "status")
got := MakeTaskRunStatus(logger, tr, pod, c.taskSpec)

// Common traits, set for test case brevity.
c.want.PodName = "pod"
Expand Down
1 change: 0 additions & 1 deletion pkg/reconciler/taskrun/resources/image_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,5 @@ func imageDigestExporterStep(imageDigestExporterImage string, imagesJSON []byte)
Args: []string{
"-images", string(imagesJSON),
},
TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError,
}}
}
11 changes: 4 additions & 7 deletions pkg/reconciler/taskrun/resources/image_exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,10 @@ func TestAddOutputImageDigestExporter(t *testing.T) {
wantSteps: []v1alpha1.Step{{Container: corev1.Container{
Name: "step1",
}}, {Container: corev1.Container{
Name: "image-digest-exporter-9l9zj",
Image: "override-with-imagedigest-exporter-image:latest",
Command: []string{"/ko-app/imagedigestexporter"},
Args: []string{"-images", "[{\"name\":\"source-image-1\",\"type\":\"image\",\"url\":\"gcr.io/some-image-1\",\"digest\":\"\",\"OutputImageDir\":\"/workspace/output/source-image\"}]"},
TerminationMessagePolicy: "FallbackToLogsOnError",
Name: "image-digest-exporter-9l9zj",
Image: "override-with-imagedigest-exporter-image:latest",
Command: []string{"/ko-app/imagedigestexporter"},
Args: []string{"-images", "[{\"name\":\"source-image-1\",\"type\":\"image\",\"url\":\"gcr.io/some-image-1\",\"digest\":\"\",\"OutputImageDir\":\"/workspace/output/source-image\"}]"},
}}},
}, {
desc: "image resource in task with multiple steps",
Expand Down Expand Up @@ -163,8 +162,6 @@ func TestAddOutputImageDigestExporter(t *testing.T) {
Image: "override-with-imagedigest-exporter-image:latest",
Command: []string{"/ko-app/imagedigestexporter"},
Args: []string{"-images", "[{\"name\":\"source-image-1\",\"type\":\"image\",\"url\":\"gcr.io/some-image-1\",\"digest\":\"\",\"OutputImageDir\":\"/workspace/output/source-image\"}]"},

TerminationMessagePolicy: "FallbackToLogsOnError",
}}},
}} {
t.Run(c.desc, func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
before := tr.Status.GetCondition(apis.ConditionSucceeded)

// Convert the Pod's status to the equivalent TaskRun Status.
tr.Status = podconvert.MakeTaskRunStatus(*tr, pod, *taskSpec)
tr.Status = podconvert.MakeTaskRunStatus(c.Logger, *tr, pod, *taskSpec)

if err := updateTaskRunResourceResult(tr, pod.Status); err != nil {
return err
Expand Down
1 change: 0 additions & 1 deletion pkg/reconciler/taskrun/taskrun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,6 @@ func TestReconcile(t *testing.T) {
tb.VolumeMount("tekton-internal-workspace", workspaceDir),
tb.VolumeMount("tekton-internal-home", "/tekton/home"),
tb.VolumeMount("tekton-internal-results", "/tekton/results"),
tb.TerminationMessagePolicy(corev1.TerminationMessageFallbackToLogsOnError),
tb.TerminationMessagePath("/tekton/termination"),
),
),
Expand Down
7 changes: 0 additions & 7 deletions test/builder/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,6 @@ func EphemeralStorage(val string) ResourceListOp {
}
}

// TerminationMessagePolicy sets the policy of the termination message.
func TerminationMessagePolicy(terminationMessagePolicy corev1.TerminationMessagePolicy) ContainerOp {
return func(c *corev1.Container) {
c.TerminationMessagePolicy = terminationMessagePolicy
}
}

// TerminationMessagePath sets the termination message path.
func TerminationMessagePath(terminationMessagePath string) ContainerOp {
return func(c *corev1.Container) {
Expand Down
7 changes: 0 additions & 7 deletions test/builder/step.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,3 @@ func StepTerminationMessagePath(terminationMessagePath string) StepOp {
step.TerminationMessagePath = terminationMessagePath
}
}

// StepTerminationMessagePolicy sets the policy of the termination message.
func StepTerminationMessagePolicy(terminationMessagePolicy corev1.TerminationMessagePolicy) StepOp {
return func(step *v1alpha1.Step) {
step.TerminationMessagePolicy = terminationMessagePolicy
}
}

0 comments on commit 153f1d1

Please sign in to comment.