Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update kapacitor alert rule to have detail field #852

Merged
merged 5 commits into from
Feb 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
### Upcoming Bug Fixes

### Upcoming Features
1. [#838](https://github.com/influxdata/chronograf/issues/838): Add detail node to kapacitor alerts

### Upcoming UI Improvements

Expand Down
108 changes: 108 additions & 0 deletions bolt/alerts_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package bolt_test

import (
"context"
"reflect"
"testing"

"github.com/influxdata/chronograf"
)

func setupTestClient() (*TestClient, error) {
if c, err := NewTestClient(); err != nil {
return nil, err
} else if err := c.Open(); err != nil {
return nil, err
} else {
return c, nil
}
}

// Ensure an AlertRuleStore can be stored.
func TestAlertRuleStoreAdd(t *testing.T) {
c, err := setupTestClient()
if err != nil {
t.Fatal(err)
}
defer c.Close()
s := c.AlertsStore

alerts := []chronograf.AlertRule{
chronograf.AlertRule{
ID: "one",
},
chronograf.AlertRule{
ID: "two",
Details: "howdy",
},
}

// Add new alert.
ctx := context.Background()
for i, a := range alerts {
// Adding should return an identical copy
actual, err := s.Add(ctx, 0, 0, a)
if err != nil {
t.Errorf("erroring adding alert to store: %v", err)
}
if !reflect.DeepEqual(actual, alerts[i]) {
t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alerts[i])
}
}
}

func setupWithRule(ctx context.Context, alert chronograf.AlertRule) (*TestClient, error) {
c, err := setupTestClient()
if err != nil {
return nil, err
}

// Add test alert
if _, err := c.AlertsStore.Add(ctx, 0, 0, alert); err != nil {
return nil, err
}
return c, nil
}

// Ensure an AlertRuleStore can be loaded.
func TestAlertRuleStoreGet(t *testing.T) {
ctx := context.Background()
alert := chronograf.AlertRule{
ID: "one",
}
c, err := setupWithRule(ctx, alert)
if err != nil {
t.Fatalf("Error adding test alert to store: %v", err)
}
defer c.Close()
actual, err := c.AlertsStore.Get(ctx, 0, 0, "one")
if err != nil {
t.Fatalf("Error loading rule from store: %v", err)
}

if !reflect.DeepEqual(actual, alert) {
t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alert)
}
}

// Ensure an AlertRuleStore can be load with a detail.
func TestAlertRuleStoreGetDetail(t *testing.T) {
ctx := context.Background()
alert := chronograf.AlertRule{
ID: "one",
Details: "my details",
}
c, err := setupWithRule(ctx, alert)
if err != nil {
t.Fatalf("Error adding test alert to store: %v", err)
}
defer c.Close()
actual, err := c.AlertsStore.Get(ctx, 0, 0, "one")
if err != nil {
t.Fatalf("Error loading rule from store: %v", err)
}

if !reflect.DeepEqual(actual, alert) {
t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alert)
}
}
13 changes: 7 additions & 6 deletions chronograf.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ type AlertRule struct {
Every string `json:"every"` // Every how often to check for the alerting criteria
Alerts []string `json:"alerts"` // AlertServices name all the services to notify (e.g. pagerduty)
Message string `json:"message"` // Message included with alert
Details string `json:"details"` // Details is generally used for the Email alert. If empty will not be added.
Trigger string `json:"trigger"` // Trigger is a type that defines when to trigger the alert
TriggerValues TriggerValues `json:"values"` // Defines the values that cause the alert to trigger
Name string `json:"name"` // Name is the user-defined name for the alert
Expand Down Expand Up @@ -238,13 +239,13 @@ type Dashboard struct {

// DashboardCell holds visual and query information for a cell
type DashboardCell struct {
X int32 `json:"x"`
Y int32 `json:"y"`
W int32 `json:"w"`
H int32 `json:"h"`
Name string `json:"name"`
X int32 `json:"x"`
Y int32 `json:"y"`
W int32 `json:"w"`
H int32 `json:"h"`
Name string `json:"name"`
Queries []Query `json:"queries"`
Type string `json:"type"`
Type string `json:"type"`
}

// DashboardsStore is the storage and retrieval of dashboards
Expand Down
148 changes: 148 additions & 0 deletions kapacitor/tickscripts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,154 @@ trigger
}
}

func TestThresholdDetail(t *testing.T) {
alert := chronograf.AlertRule{
Name: "name",
Trigger: "threshold",
Alerts: []string{"slack", "victorops", "email"},
TriggerValues: chronograf.TriggerValues{
Operator: "greater than",
Value: "90",
},
Every: "30s",
Message: "message",
Details: "details",
Query: chronograf.QueryConfig{
Database: "telegraf",
Measurement: "cpu",
RetentionPolicy: "autogen",
Fields: []chronograf.Field{
{
Field: "usage_user",
Funcs: []string{"mean"},
},
},
Tags: map[string][]string{
"host": []string{
"acc-0eabc309-eu-west-1-data-3",
"prod",
},
"cpu": []string{
"cpu_total",
},
},
GroupBy: chronograf.GroupBy{
Time: "10m",
Tags: []string{"host", "cluster_id"},
},
AreTagsAccepted: true,
RawText: "",
},
}

tests := []struct {
name string
alert chronograf.AlertRule
want chronograf.TICKScript
wantErr bool
}{
{
name: "Test valid template alert",
alert: alert,
want: `var db = 'telegraf'

var rp = 'autogen'

var measurement = 'cpu'

var groupBy = ['host', 'cluster_id']

var whereFilter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod')

var period = 10m

var every = 30s

var name = 'name'

var idVar = name + ':{{.Group}}'

var message = 'message'

var idTag = 'alertID'

var levelTag = 'level'

var messageField = 'message'

var durationField = 'duration'

var outputDB = 'chronograf'

var outputRP = 'autogen'

var outputMeasurement = 'alerts'

var triggerType = 'threshold'

var details = 'details'

var crit = 90

var data = stream
|from()
.database(db)
.retentionPolicy(rp)
.measurement(measurement)
.groupBy(groupBy)
.where(whereFilter)
|window()
.period(period)
.every(every)
.align()
|mean('usage_user')
.as('value')

var trigger = data
|alert()
.crit(lambda: "value" > crit)
.stateChangesOnly()
.message(message)
.id(idVar)
.idTag(idTag)
.levelTag(levelTag)
.messageField(messageField)
.durationField(durationField)
.details(details)
.slack()
.victorOps()
.email()

trigger
|influxDBOut()
.create()
.database(outputDB)
.retentionPolicy(outputRP)
.measurement(outputMeasurement)
.tag('alertName', name)
.tag('triggerType', triggerType)

trigger
|httpOut('output')
`,
wantErr: false,
},
}
for _, tt := range tests {
gen := Alert{}
got, err := gen.Generate(tt.alert)
if (err != nil) != tt.wantErr {
t.Errorf("%q. Threshold() error = %v, wantErr %v", tt.name, err, tt.wantErr)
continue
}
if got != tt.want {
diff := diffmatchpatch.New()
delta := diff.DiffMain(string(tt.want), string(got), true)
t.Errorf("%q\n%s", tt.name, diff.DiffPrettyText(delta))
}
}
}

func TestThresholdInsideRange(t *testing.T) {
alert := chronograf.AlertRule{
Name: "name",
Expand Down
16 changes: 13 additions & 3 deletions kapacitor/triggers.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,19 @@ var AllAlerts = `
.durationField(durationField)
`

// ThresholdTrigger is the trickscript trigger for alerts that exceed a value
// Details is used only for alerts that specify detail string
var Details = `
.details(details)
`

// ThresholdTrigger is the tickscript trigger for alerts that exceed a value
var ThresholdTrigger = `
var trigger = data
|alert()
.crit(lambda: "value" %s crit)
`

// ThresholdRangeTrigger is the alert when data does not intersect the range.
var ThresholdRangeTrigger = `
var trigger = data
|alert()
Expand Down Expand Up @@ -102,7 +108,11 @@ func Trigger(rule chronograf.AlertRule) (string, error) {
return "", err
}

return trigger + AllAlerts, nil
trigger += AllAlerts
if rule.Details != "" {
trigger += Details
}
return trigger, nil
}

func relativeTrigger(rule chronograf.AlertRule) (string, error) {
Expand Down Expand Up @@ -132,7 +142,7 @@ func thresholdRangeTrigger(rule chronograf.AlertRule) (string, error) {
if err != nil {
return "", err
}
var iops []interface{} = make([]interface{}, len(ops))
var iops = make([]interface{}, len(ops))
for i, o := range ops {
iops[i] = o
}
Expand Down
11 changes: 9 additions & 2 deletions kapacitor/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func commonVars(rule chronograf.AlertRule) (string, error) {
var outputMeasurement = '%s'
var triggerType = '%s'
`
return fmt.Sprintf(common,
res := fmt.Sprintf(common,
rule.Query.Database,
rule.Query.RetentionPolicy,
rule.Query.Measurement,
Expand All @@ -117,7 +117,14 @@ func commonVars(rule chronograf.AlertRule) (string, error) {
RP,
Measurement,
rule.Trigger,
), nil
)

if rule.Details != "" {
res += fmt.Sprintf(`
var details = '%s'
`, rule.Details)
}
return res, nil
}

// window is only used if deadman or threshold/relative with aggregate. Will return empty
Expand Down
4 changes: 4 additions & 0 deletions server/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1958,6 +1958,10 @@
"type": "string",
"description": "Message to send when alert occurs."
},
"details": {
"type": "string",
"description": "Template for constructing a detailed HTML message for the alert. (Currently, only used for email/smtp"
},
"trigger": {
"type": "string",
"description": "Trigger defines the alerting structure; deadman alert if no data are received for the specified time range; relative alert if the data change relative to the data in a different time range; threshold alert if the data cross a boundary",
Expand Down
Loading