Skip to content

Commit

Permalink
feat: add counter to track alerts dropped outside of time_intervals
Browse files Browse the repository at this point in the history
Addresses: #3512

This adds a new counter metric `alertmanager_alerts_supressed_total`
that is incremented by `len(alerts)` when an alert is suppressed for
being outside of a time_interval, ie inside of a mute_time_intervals or
outside of an active_time_intervals.

Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
  • Loading branch information
tjhop committed Oct 20, 2023
1 parent 16aa996 commit 4df1a55
Showing 1 changed file with 17 additions and 8 deletions.
25 changes: 17 additions & 8 deletions notify/notify.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ type Metrics struct {
numTotalFailedNotifications *prometheus.CounterVec
numNotificationRequestsTotal *prometheus.CounterVec
numNotificationRequestsFailedTotal *prometheus.CounterVec
numAlertsSuppressedTotal prometheus.Counter
notificationLatencySeconds *prometheus.HistogramVec

ff featurecontrol.Flagger
Expand Down Expand Up @@ -284,6 +285,11 @@ func NewMetrics(r prometheus.Registerer, ff featurecontrol.Flagger) *Metrics {
Name: "notification_requests_failed_total",
Help: "The total number of failed notification requests.",
}, labels),
numAlertsSuppressedTotal: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "alerts_supressed_total",
Help: "The total number of alerts suppressed for being outside of active time intervals or within muted time intervals.",
}),
notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "alertmanager",
Name: "notification_latency_seconds",
Expand All @@ -296,7 +302,7 @@ func NewMetrics(r prometheus.Registerer, ff featurecontrol.Flagger) *Metrics {
r.MustRegister(
m.numNotifications, m.numTotalFailedNotifications,
m.numNotificationRequestsTotal, m.numNotificationRequestsFailedTotal,
m.notificationLatencySeconds,
m.numAlertsSuppressedTotal, m.notificationLatencySeconds,
)

return m
Expand Down Expand Up @@ -382,8 +388,8 @@ func (pb *PipelineBuilder) New(

ms := NewGossipSettleStage(peer)
is := NewMuteStage(inhibitor)
tas := NewTimeActiveStage(intervener)
tms := NewTimeMuteStage(intervener)
tas := NewTimeActiveStage(intervener, pb.metrics)
tms := NewTimeMuteStage(intervener, pb.metrics)
ss := NewMuteStage(silencer)

for name := range receivers {
Expand Down Expand Up @@ -869,13 +875,14 @@ func (n SetNotifiesStage) Exec(ctx context.Context, l log.Logger, alerts ...*typ
}

type timeStage struct {
muter types.TimeMuter
muter types.TimeMuter
metrics *Metrics
}

type TimeMuteStage timeStage

func NewTimeMuteStage(m types.TimeMuter) *TimeMuteStage {
return &TimeMuteStage{m}
func NewTimeMuteStage(m types.TimeMuter, metrics *Metrics) *TimeMuteStage {
return &TimeMuteStage{m, metrics}
}

// Exec implements the stage interface for TimeMuteStage.
Expand All @@ -902,6 +909,7 @@ func (tms TimeMuteStage) Exec(ctx context.Context, l log.Logger, alerts ...*type

// If the current time is inside a mute time, all alerts are removed from the pipeline.
if muted {
tms.metrics.numAlertsSuppressedTotal.Add(float64(len(alerts)))
level.Debug(l).Log("msg", "Notifications not sent, route is within mute time")
return ctx, nil, nil
}
Expand All @@ -910,8 +918,8 @@ func (tms TimeMuteStage) Exec(ctx context.Context, l log.Logger, alerts ...*type

type TimeActiveStage timeStage

func NewTimeActiveStage(m types.TimeMuter) *TimeActiveStage {
return &TimeActiveStage{m}
func NewTimeActiveStage(m types.TimeMuter, metrics *Metrics) *TimeActiveStage {
return &TimeActiveStage{m, metrics}
}

// Exec implements the stage interface for TimeActiveStage.
Expand Down Expand Up @@ -939,6 +947,7 @@ func (tas TimeActiveStage) Exec(ctx context.Context, l log.Logger, alerts ...*ty

// If the current time is not inside an active time, all alerts are removed from the pipeline
if !muted {
tas.metrics.numAlertsSuppressedTotal.Add(float64(len(alerts)))
level.Debug(l).Log("msg", "Notifications not sent, route is not within active time")
return ctx, nil, nil
}
Expand Down

0 comments on commit 4df1a55

Please sign in to comment.