From 0454130ad97a61624fb0b916bf14a51dce8f199d Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 3 Jul 2024 22:18:24 +0400 Subject: [PATCH] feat: suppress controller runtime first N failures on the console As the controllers might fail with transient errors on machine startup, but errors are always retried, persisten errors will anyway show up in the console. The full `talosctl logs controller-runtime` are not suppressed. Signed-off-by: Andrey Smirnov --- .../runtime/v1alpha2/v1alpha2_controller.go | 10 ++- internal/pkg/ntp/ntp.go | 2 +- pkg/logging/error_suppress.go | 79 +++++++++++++++++++ pkg/logging/error_suppress_test.go | 67 ++++++++++++++++ pkg/logging/zap.go | 58 +++++++++----- pkg/machinery/constants/constants.go | 3 + 6 files changed, 195 insertions(+), 24 deletions(-) create mode 100644 pkg/logging/error_suppress.go create mode 100644 pkg/logging/error_suppress_test.go diff --git a/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go b/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go index 057babe989..3a7c61879f 100644 --- a/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go +++ b/internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_controller.go @@ -514,7 +514,13 @@ func (ctrl *Controller) makeLogger(s string) (*zap.Logger, error) { } return logging.ZapLogger( - logging.NewLogDestination(logWriter, zapcore.DebugLevel, logging.WithColoredLevels()), - logging.NewLogDestination(logging.StdWriter, ctrl.consoleLogLevel, logging.WithoutTimestamp(), logging.WithoutLogLevels()), + logging.NewLogDestination(logWriter, zapcore.DebugLevel, + logging.WithColoredLevels(), + ), + logging.NewLogDestination(logging.StdWriter, ctrl.consoleLogLevel, + logging.WithoutTimestamp(), + logging.WithoutLogLevels(), + logging.WithControllerErrorSuppressor(constants.ConsoleLogErrorSuppressThreshold), + ), ).With(logging.Component(s)), nil } diff --git a/internal/pkg/ntp/ntp.go b/internal/pkg/ntp/ntp.go index d9145d1dd7..7c78b8e522 100644 --- a/internal/pkg/ntp/ntp.go +++ b/internal/pkg/ntp/ntp.go @@ -306,7 +306,7 @@ func (syncer *Syncer) resolveServers(ctx context.Context) ([]string, error) { } else { ips, err := net.LookupIP(server) if err != nil { - syncer.logger.Warn(fmt.Sprintf("failed looking up %q, ignored", server), zap.Error(err)) + syncer.logger.Error(fmt.Sprintf("failed looking up %q, ignored", server), zap.Error(err)) } for _, ip := range ips { diff --git a/pkg/logging/error_suppress.go b/pkg/logging/error_suppress.go new file mode 100644 index 0000000000..2fb71594f5 --- /dev/null +++ b/pkg/logging/error_suppress.go @@ -0,0 +1,79 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package logging + +import ( + "sync/atomic" + + "go.uber.org/zap/zapcore" +) + +// NewControllerErrorSuppressor creates a new controller error suppressor. +// +// It suppresses error logs for a given controller unless it logs >= threshold errors. +// The idea is that all controllers reconcile errors, so if the error is not transient, +// it will be reported enough time to hit the threshold, but transient errors will be +// suppressed. +// +// The suppressor records the controller name by inspecting a log field named "controller" +// passed via `logger.With()` call. +func NewControllerErrorSuppressor(core zapcore.Core, threshold int64) zapcore.Core { + return &consoleSampler{ + Core: core, + threshold: threshold, + } +} + +type consoleSampler struct { + zapcore.Core + + hits *atomic.Int64 + threshold int64 + controller string +} + +var _ zapcore.Core = (*consoleSampler)(nil) + +func (s *consoleSampler) Level() zapcore.Level { + return zapcore.LevelOf(s.Core) +} + +func (s *consoleSampler) With(fields []zapcore.Field) zapcore.Core { + controller := s.controller + num := s.hits + + for _, field := range fields { + if field.Key == "controller" { + if field.String != controller { + controller = field.String + num = new(atomic.Int64) + } + + break + } + } + + return &consoleSampler{ + threshold: s.threshold, + controller: controller, + hits: num, + Core: s.Core.With(fields), + } +} + +func (s *consoleSampler) Check(ent zapcore.Entry, ce *zapcore.CheckedEntry) *zapcore.CheckedEntry { + if !s.Enabled(ent.Level) { + return ce + } + + if ent.Level == zapcore.ErrorLevel && s.controller != "" { + if s.hits.Add(1) <= s.threshold { + // suppress the log + return ce + } + } + + return s.Core.Check(ent, ce) +} diff --git a/pkg/logging/error_suppress_test.go b/pkg/logging/error_suppress_test.go new file mode 100644 index 0000000000..3ad7cea986 --- /dev/null +++ b/pkg/logging/error_suppress_test.go @@ -0,0 +1,67 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package logging_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" + + "github.com/siderolabs/talos/pkg/logging" +) + +func assertLogged(t *testing.T, core zapcore.Core, logs *observer.ObservedLogs, entries []zapcore.Entry, expectedCount int) { + t.Helper() + + for _, entry := range entries { + if ce := core.Check(entry, nil); ce != nil { + ce.Write() + } + } + + assert.Len(t, logs.TakeAll(), expectedCount) +} + +func TestErrorSuppressor(t *testing.T) { + t.Parallel() + + core, logs := observer.New(zapcore.InfoLevel) + + const threshold = 2 + + core = logging.NewControllerErrorSuppressor(core, threshold) + + // warn/info messages are not affected + assertLogged(t, core, logs, []zapcore.Entry{ + {Level: zapcore.InfoLevel, Message: "abc"}, + {Level: zapcore.WarnLevel, Message: "def"}, + {Level: zapcore.DebugLevel, Message: "message"}, // below level + }, 2) + + // different controllers, suppress counters are independent + controllerCore1 := core.With([]zapcore.Field{{Key: "controller", String: "c1"}}) + controllerCore2 := core.With([]zapcore.Field{{Key: "controller", String: "c2"}}) + + assertLogged(t, controllerCore1, logs, []zapcore.Entry{ + {Level: zapcore.ErrorLevel, Message: "controller failed"}, // suppressed + {Level: zapcore.ErrorLevel, Message: "controller failed"}, // suppressed + {Level: zapcore.ErrorLevel, Message: "controller failed"}, + }, 1) + + assertLogged(t, controllerCore2, logs, []zapcore.Entry{ + {Level: zapcore.ErrorLevel, Message: "controller failed"}, // suppressed + {Level: zapcore.ErrorLevel, Message: "controller failed"}, // suppressed + }, 0) + + assertLogged(t, controllerCore1, logs, []zapcore.Entry{ + {Level: zapcore.ErrorLevel, Message: "controller failed"}, // not suppressed, over threshold + }, 1) + + assertLogged(t, controllerCore1.With([]zapcore.Field{{Key: "foo", String: "bar"}}), logs, []zapcore.Entry{ + {Level: zapcore.ErrorLevel, Message: "controller failed"}, // .With() without 'controller' field keeps the counter value from the parent + }, 1) +} diff --git a/pkg/logging/zap.go b/pkg/logging/zap.go index f742411617..d26f48ec3f 100644 --- a/pkg/logging/zap.go +++ b/pkg/logging/zap.go @@ -62,50 +62,60 @@ var StdWriter = &logWrapper{nil} // LogDestination defines logging destination Config. type LogDestination struct { // Level log level. - level zapcore.LevelEnabler - writer io.Writer - config zapcore.EncoderConfig + level zapcore.LevelEnabler + writer io.Writer + config zapcore.EncoderConfig + suppressThreshold int64 } -// EncoderOption defines a log destination encoder config setter. -type EncoderOption func(config *zapcore.EncoderConfig) +// LogDestinationOption defines a log destination encoder config setter. +type LogDestinationOption func(dest *LogDestination) // WithoutTimestamp disables timestamp. -func WithoutTimestamp() EncoderOption { - return func(config *zapcore.EncoderConfig) { - config.EncodeTime = nil +func WithoutTimestamp() LogDestinationOption { + return func(dest *LogDestination) { + dest.config.EncodeTime = nil } } // WithoutLogLevels disable log level. -func WithoutLogLevels() EncoderOption { - return func(config *zapcore.EncoderConfig) { - config.EncodeLevel = nil +func WithoutLogLevels() LogDestinationOption { + return func(dest *LogDestination) { + dest.config.EncodeLevel = nil } } // WithColoredLevels enables log level colored output. -func WithColoredLevels() EncoderOption { - return func(config *zapcore.EncoderConfig) { - config.EncodeLevel = zapcore.CapitalColorLevelEncoder +func WithColoredLevels() LogDestinationOption { + return func(dest *LogDestination) { + dest.config.EncodeLevel = zapcore.CapitalColorLevelEncoder + } +} + +// WithControllerErrorSuppressor creates a new console log controller error suppressor. +func WithControllerErrorSuppressor(threshold int64) LogDestinationOption { + return func(dest *LogDestination) { + dest.suppressThreshold = threshold } } // NewLogDestination creates new log destination. -func NewLogDestination(writer io.Writer, logLevel zapcore.LevelEnabler, options ...EncoderOption) *LogDestination { +func NewLogDestination(writer io.Writer, logLevel zapcore.LevelEnabler, options ...LogDestinationOption) *LogDestination { config := zap.NewDevelopmentEncoderConfig() config.ConsoleSeparator = " " config.StacktraceKey = "error" - for _, option := range options { - option(&config) - } - - return &LogDestination{ + dest := &LogDestination{ level: logLevel, config: config, writer: writer, } + + for _, option := range options { + option(dest) + } + + return dest } // Wrap is a simple helper to wrap io.Writer with default arguments. @@ -122,11 +132,17 @@ func ZapLogger(dests ...*LogDestination) *zap.Logger { } cores := xslices.Map(dests, func(dest *LogDestination) zapcore.Core { - return zapcore.NewCore( + core := zapcore.NewCore( zapcore.NewConsoleEncoder(dest.config), zapcore.AddSync(dest.writer), dest.level, ) + + if dest.suppressThreshold > 0 { + core = NewControllerErrorSuppressor(core, dest.suppressThreshold) + } + + return core }) return zap.New(zapcore.NewTee(cores...)) diff --git a/pkg/machinery/constants/constants.go b/pkg/machinery/constants/constants.go index 2e427ed8b6..8380996664 100644 --- a/pkg/machinery/constants/constants.go +++ b/pkg/machinery/constants/constants.go @@ -1005,6 +1005,9 @@ const ( // MinimumGOAMD64Level is the minimum x86_64 microarchitecture level required by Talos. MinimumGOAMD64Level = 2 + + // ConsoleLogErrorSuppressThreshold is the threshold for suppressing console log errors. + ConsoleLogErrorSuppressThreshold = 4 ) // See https://linux.die.net/man/3/klogctl