Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to configure times to suspend chaos. #54

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ RUN go build -o /bin/chaoskube -v \
FROM alpine:3.6
MAINTAINER Linki <linki+docker.com@posteo.de>

RUN apk --no-cache add tzdata

RUN addgroup -S chaoskube && adduser -S -g chaoskube chaoskube
COPY --from=builder /bin/chaoskube /bin/chaoskube

Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,26 @@ spec:
...
```

## Limiting the Chaos

You can limit when chaos introduced. To turn on the feature, add the `--limit-chaos` option and the `--location` option, which requires a timezone name from the [(IANA) tz databse](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). Alternatively, you can use `UTC` or `Local` as the location.
By default, this will only allow chaos to be introduced between 9:30 and 14:30, and not on Saturday or Sunday. You can also explicitly add a list of `YYYY-MM-DD`-formatted dates as "holidays". See the options chart below for more details.

## Options

| Option | Description | Default |
|------------------|---------------------------------------------------------------------|------------------------|
| `--interval` | interval between pod terminations | 10m |
| `--labels` | label selector to filter pods by | (matches everything) |
| `--annotations` | annotation selector to filter pods by | (matches everything) |
| `--namespaces` | namespace selector to filter pods by | (all namespaces) |
| `--dry-run` | don't kill pods, only log what would have been done | true |
| `--limit-chaos` | limit chaos according to specified times/days | false |
| `--location` | timezone from tz database, e.g "America/New_York", "UTC" or "Local" | (none) |
| `--off-days` | days when chaos is to be suspended. (Or "none") | "Saturday,Sunday" |
| `--chaos-hrs` | start and end time for introducing chaos (24hr time) | "start:9:30,end:14:30" |
| `--holidays` | comma-separated, "YYYY-MM-DD" days to skip chaos | (empty list) |

## Contributing

Feel free to create issues or submit pull requests.
274 changes: 262 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package main

import (
"fmt"
"os"
"strconv"
"strings"
"time"

log "github.com/sirupsen/logrus"
Expand All @@ -15,19 +18,56 @@ import (
"github.com/linki/chaoskube/chaoskube"
)

const (
limitChaosOpt = "limit-chaos"
locationOpt = "location"
offDaysOpt = "off-days"
chaosHrsOpt = "chaos-hours"
holidaysOpt = "holidays"

defaultStartHr = 9
defaultStartMin = 30
defaultEndHr = 16
defaultEndMin = 30

iso8601 = "2006-01-02"
)

var (
labelString string
annString string
nsString string
master string
kubeconfig string
interval time.Duration
inCluster bool
dryRun bool
debug bool
version string
labelString string
annString string
nsString string
master string
kubeconfig string
interval time.Duration
inCluster bool
dryRun bool
debug bool
version string
limitChaos bool
locationString string
offDaysString string
chaosHrsString string
holidaysString string
)

// offtimeCfg holds configuration information related to when to suspend the chaos.
type offtimeCfg struct {
// Whether chaos limiting is enabled
enabled bool
// timezone in which the worktimes are expressed
location *time.Location
// Days on which chaos is suspended
offDays []time.Weekday
// Chaos start and end hours and minutes
chaosStartHr int
chaosStartMin int
chaosEndHr int
chaosEndMin int
// holidays, assumed to be expressed in UTC, regardless of Location
holidays []time.Time
}

func init() {
kingpin.Flag("labels", "A set of labels to restrict the list of affected pods. Defaults to everything.").StringVar(&labelString)
kingpin.Flag("annotations", "A set of annotations to restrict the list of affected pods. Defaults to everything.").StringVar(&annString)
Expand All @@ -37,6 +77,14 @@ func init() {
kingpin.Flag("interval", "Interval between Pod terminations").Default("10m").DurationVar(&interval)
kingpin.Flag("dry-run", "If true, don't actually do anything.").Default("true").BoolVar(&dryRun)
kingpin.Flag("debug", "Enable debug logging.").BoolVar(&debug)
kingpin.Flag(limitChaosOpt, "Whether to limit chaos according to configuration. Defaults to false.").Default("false").BoolVar(&limitChaos)
kingpin.Flag(locationOpt, `Timezone location from the "tz database" (e.g. "America/Los_Angeles", not "PDT") `+
`for interpreting chaos-period start and stop times. No default.`).StringVar(&locationString)
help := fmt.Sprintf(`Daily start and end times for introducing chaos. Defaults to "start: %d:%d, end: %d:%d".`,
defaultStartHr, defaultStartMin, defaultEndHr, defaultEndMin)
kingpin.Flag(chaosHrsOpt, help).StringVar(&chaosHrsString)
kingpin.Flag(offDaysOpt, `A list of days of the week when chaos is suspended. Defaults to "Saturday, Sunday". (Use "none" for no off days.)`).StringVar(&offDaysString)
kingpin.Flag(holidaysOpt, `A list of ISO 8601 dates (YYYY-MM-DD) when chaos is suspended. Defaults to and empty list.`).StringVar(&holidaysString)
}

func main() {
Expand Down Expand Up @@ -83,6 +131,18 @@ func main() {
log.Infof("Filtering pods by namespaces: %s", namespaces.String())
}

offcfg, err := handleOfftimeConfig(limitChaos, locationString, offDaysString, chaosHrsString, holidaysString)
if err != nil {
log.Fatal(err)
}
if offcfg.enabled {
log.Infof("Limiting chaos. %s: %s, %s: %s, %s: %s, %s: %s",
locationOpt, locationString,
offDaysOpt, offDaysString,
chaosHrsOpt, chaosHrsString,
holidaysOpt, holidaysString)
}

chaoskube := chaoskube.New(
client,
labelSelector,
Expand All @@ -94,8 +154,12 @@ func main() {
)

for {
if err := chaoskube.TerminateVictim(); err != nil {
log.Fatal(err)
if timeToSuspend(time.Now(), *offcfg) {
log.Debugf("Chaos currently suspended")
} else {
if err := chaoskube.TerminateVictim(); err != nil {
log.Fatal(err)
}
}

log.Debugf("Sleeping for %s...", interval)
Expand Down Expand Up @@ -124,3 +188,189 @@ func newClient() (*kubernetes.Clientset, error) {

return client, nil
}

func setLocation(offcfg *offtimeCfg, locationStr string) error {
var err error
if len(locationStr) == 0 {
err = fmt.Errorf("timezone location is required if %s is enabled", limitChaosOpt)
return err
}
offcfg.location, err = time.LoadLocation(locationStr)
if err != nil {
err = fmt.Errorf(err.Error()+`- %s must one of: a timezone from the "tz database" (IANA), "UTC" or "Local"`, locationOpt)
return err
}
return err
}

func setOffDays(offcfg *offtimeCfg, offDaysStr string) error {
var err error
offcfg.offDays = make([]time.Weekday, 0, 2)
if offDaysStr == "none" {
return err
} else if len(offDaysStr) == 0 {
offcfg.offDays = append(offcfg.offDays, time.Saturday, time.Sunday)
} else {
days := strings.Split(offDaysStr, ",")
for _, day := range days {
switch strings.TrimSpace(day) {
case time.Sunday.String():
offcfg.offDays = append(offcfg.offDays, time.Sunday)
case time.Monday.String():
offcfg.offDays = append(offcfg.offDays, time.Monday)
case time.Tuesday.String():
offcfg.offDays = append(offcfg.offDays, time.Tuesday)
case time.Wednesday.String():
offcfg.offDays = append(offcfg.offDays, time.Wednesday)
case time.Thursday.String():
offcfg.offDays = append(offcfg.offDays, time.Thursday)
case time.Friday.String():
offcfg.offDays = append(offcfg.offDays, time.Friday)
case time.Saturday.String():
offcfg.offDays = append(offcfg.offDays, time.Saturday)
default:
err = fmt.Errorf("unrecognized day of week in %s: %s", offDaysOpt, day)
return err
}
}
}
return err
}

func setChaosHours(offcfg *offtimeCfg, chaosHrsStr string) error {
var err error
if len(chaosHrsStr) == 0 {
offcfg.chaosStartHr = defaultStartHr
offcfg.chaosStartMin = defaultStartMin
offcfg.chaosEndHr = defaultEndHr
offcfg.chaosEndMin = defaultEndMin
} else {
startEnd := strings.Split(chaosHrsStr, ",")
for _, item := range startEnd {
switch kv := strings.SplitN(strings.TrimSpace(item), ":", 2); kv[0] {
case "start":
offcfg.chaosStartHr, offcfg.chaosStartMin, err = getHrMin(kv[1])
if err != nil {
err = fmt.Errorf(`in %s, could not parse "%s"`, chaosHrsOpt, item)
return err
}
case "end":
offcfg.chaosEndHr, offcfg.chaosEndMin, err = getHrMin(kv[1])
if err != nil {
err = fmt.Errorf(`in %s, could not parse "%s"`, chaosHrsOpt, item)
return err
}
default:
err = fmt.Errorf(`%s requires this format: "start: 9:30, end: 17:30". (Got key: "%s")`, chaosHrsOpt, kv[0])
return err
}
}
}
// Validate
v1 := offcfg.chaosStartHr*10 + offcfg.chaosStartMin
v2 := offcfg.chaosEndHr*10 + offcfg.chaosEndMin
if v1 > v2 {
err = fmt.Errorf("%s may not specify a period that spans midnight, and must be expressed in 24hr time", chaosHrsOpt)
}
return err
}

// getHrmMin parses out the hr and min from " hr:min"
func getHrMin(hrmMinStr string) (hr, min int, err error) {
hm := strings.Split(strings.TrimSpace(hrmMinStr), ":")
hr, err = strconv.Atoi(hm[0])
if err != nil {
return hr, min, err
}
min, err = strconv.Atoi(hm[1])
if err != nil {
return hr, min, err
}
return hr, min, err
}

func setHolidays(offcfg *offtimeCfg, holidaysStr string) error {
var err error
if len(holidaysStr) == 0 {
// Leave Holidays nil
return err
}
offcfg.holidays = make([]time.Time, 0)
for _, hStr := range strings.Split(holidaysStr, ",") {
layout := iso8601
var holiday time.Time
holiday, err = time.ParseInLocation(layout, strings.TrimSpace(hStr), offcfg.location)
if err != nil {
err = fmt.Errorf(`in %s, invalid date format. "YYYY-MM-DD" required. (Got "%s")`, holidaysOpt, hStr)
return err
}
offcfg.holidays = append(offcfg.holidays, holiday)
}
return err
}

func handleOfftimeConfig(enabled bool, locationStr, offDaysStr, chaosHrsStr, holidaysStr string) (*offtimeCfg, error) {
var err error
offcfg := &offtimeCfg{}

offcfg.enabled = enabled
if !enabled {
// Not enabled, no need to set other values
return offcfg, err
}

if err = setLocation(offcfg, locationStr); err != nil {
return offcfg, err
}

if err = setOffDays(offcfg, offDaysStr); err != nil {
return offcfg, err
}

if err = setChaosHours(offcfg, chaosHrsStr); err != nil {
return offcfg, err
}

if err = setHolidays(offcfg, holidaysStr); err != nil {
return offcfg, err
}

return offcfg, err
}

// timeToSuspend examines the supplied time and offtimeCfg and determines whether it is time to suspend chaos.
func timeToSuspend(currTime time.Time, offcfg offtimeCfg) bool {
if !offcfg.enabled {
// If limiting not enabled, it's never time to suspend
return false
}

// Localize the currTime
locTime := currTime.In(offcfg.location)

// Check offDays
currDay := locTime.Weekday()
for _, od := range offcfg.offDays {
if currDay == od {
return true
}
}

// Check holidays
ty, tm, td := locTime.Date()
for _, holiday := range offcfg.holidays {
hy, hm, hd := holiday.Date()
if ty == hy && tm == hm && td == hd {
return true
}
}

// Check time of day. Start by getting today's chaos start/end times
chaosStart := time.Date(ty, tm, td, offcfg.chaosStartHr, offcfg.chaosStartMin, 0, 0, offcfg.location)
chaosEnd := time.Date(ty, tm, td, offcfg.chaosEndHr, offcfg.chaosEndMin, 0, 0, offcfg.location)
if !((chaosStart.Before(locTime) || chaosStart.Equal(locTime)) && locTime.Before(chaosEnd)) {
return true
}

return false
}
Loading