Skip to content

Commit

Permalink
stability: support more fault injection (#345)
Browse files Browse the repository at this point in the history
* fault-trigger: support more fault injection
  • Loading branch information
cwen0 authored and weekface committed Mar 25, 2019
1 parent f89131a commit fdd44d1
Show file tree
Hide file tree
Showing 11 changed files with 442 additions and 216 deletions.
23 changes: 0 additions & 23 deletions tests/cmd/e2e/config.yaml

This file was deleted.

50 changes: 49 additions & 1 deletion tests/cmd/stability/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,57 @@ func main() {
glog.Fatal(err)
}

time.Sleep(1 * time.Minute)
time.Sleep(30 * time.Second)

if err := fa.StartETCD("172.16.4.171"); err != nil {
glog.Fatal(err)
}

time.Sleep(10 * time.Second)

if err := fa.StopKubeAPIServer("172.16.4.171"); err != nil {
glog.Fatal(err)
}

time.Sleep(30 * time.Second)

if err := fa.StartKubeAPIServer("172.16.4.171"); err != nil {
glog.Fatal(err)
}

time.Sleep(10 * time.Second)

if err := fa.StopKubeScheduler("172.16.4.171"); err != nil {
glog.Fatal(err)
}

time.Sleep(30 * time.Second)

if err := fa.StartKubeScheduler("172.16.4.171"); err != nil {
glog.Fatal(err)
}

time.Sleep(10 * time.Second)

if err := fa.StopKubeControllerManager("172.16.4.171"); err != nil {
glog.Fatal(err)
}

time.Sleep(30 * time.Second)

if err := fa.StartKubeControllerManager("172.16.4.171"); err != nil {
glog.Fatal(err)
}

time.Sleep(10 * time.Second)

if err := fa.StopKubelet("172.16.4.171"); err != nil {
glog.Fatal(err)
}

time.Sleep(30 * time.Second)

if err := fa.StartKubelet("172.16.4.171"); err != nil {
glog.Fatal(err)
}
}
136 changes: 99 additions & 37 deletions tests/fault.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,25 @@ import (
"k8s.io/client-go/kubernetes"
)

const (
startAction = "start"
stopAction = "stop"
)

type FaultTriggerActions interface {
StopNode(physicalNode string, node string) error
StartNode(physicalNode string, node string) error
StopETCD(nodes ...string) error
StartETCD(nodes ...string) error
StopKubelet(node string) error
StartKubelet(node string) error
StopKubeAPIServer(node string) error
StartKubeAPIServer(node string) error
StopKubeControllerManager(node string) error
StartKubeControllerManager(node string) error
StopKubeScheduler(node string) error
StartKubeScheduler(node string) error
// TODO: support more faults
// StopKubeAPIServer() error
// StartKubeAPIServer() error
// StopKubeControllerManager() error
// StartKubeControllerManager() error
// StopKubeScheduler() error
// StartKubeScheduler() error
// StopKubeProxy(node string) error
// StartKubeProxy(node string) error
// DiskCorruption(node string) error
Expand Down Expand Up @@ -97,16 +102,9 @@ func (fa *faultTriggerActions) StopETCD(nodes ...string) error {
}

for _, node := range nodes {
faultCli := client.NewClient(client.Config{
Addr: fa.genFaultTriggerAddr(node),
})

if err := faultCli.StopETCD(); err != nil {
glog.Errorf("failed to stop etcd %s: %v", node, err)
if err := fa.serviceAction(node, manager.ETCDService, stopAction); err != nil {
return err
}

glog.Infof("etcd %s is stopped", node)
}

return nil
Expand All @@ -122,49 +120,113 @@ func (fa *faultTriggerActions) StartETCD(nodes ...string) error {
}

for _, node := range nodes {
faultCli := client.NewClient(client.Config{
Addr: fa.genFaultTriggerAddr(node),
})

if err := faultCli.StartETCD(); err != nil {
glog.Errorf("failed to start etcd %s: %v", node, err)
if err := fa.serviceAction(node, manager.ETCDService, startAction); err != nil {
return err
}

glog.Infof("etcd %s is started", node)
}

return nil
}

// StopKubelet stops the kubelet service.
func (fa *faultTriggerActions) StopKubelet(node string) error {
faultCli := client.NewClient(client.Config{
Addr: fa.genFaultTriggerAddr(node),
})
return fa.serviceAction(node, manager.KubeletService, stopAction)
}

if err := faultCli.StopKubelet(); err != nil {
glog.Errorf("failed to stop kubelet %s: %v", node, err)
return err
}
// StartKubelet starts the kubelet service.
func (fa *faultTriggerActions) StartKubelet(node string) error {
return fa.serviceAction(node, manager.KubeletService, startAction)
}

glog.Infof("kubelet %s is stopped", node)
// // StopKubeProxy stops the kube-proxy service.
//func (fa *faultTriggerActions) StopKubeProxy(node string) error {
// return fa.serviceAction(node, manager.KubeProxyService, stopAction)
//}
//
//// StartKubeProxy starts the kube-proxy service.
//func (fa *faultTriggerActions) StartKubeProxy(node string) error {
// return fa.serviceAction(node, manager.KubeProxyService, startAction)
//}

// StopKubeScheduler stops the kube-scheduler service.
func (fa *faultTriggerActions) StopKubeScheduler(node string) error {
return fa.serviceAction(node, manager.KubeSchedulerService, stopAction)
}

return nil
// StartKubeScheduler starts the kube-scheduler service.
func (fa *faultTriggerActions) StartKubeScheduler(node string) error {
return fa.serviceAction(node, manager.KubeSchedulerService, startAction)
}

// StartKubelet starts the kubelet service.
func (fa *faultTriggerActions) StartKubelet(node string) error {
// StopKubeControllerManager stops the kube-controller-manager service.
func (fa *faultTriggerActions) StopKubeControllerManager(node string) error {
return fa.serviceAction(node, manager.KubeControllerManagerService, stopAction)
}

// StartKubeControllerManager starts the kube-controller-manager service.
func (fa *faultTriggerActions) StartKubeControllerManager(node string) error {
return fa.serviceAction(node, manager.KubeControllerManagerService, startAction)
}

// StopKubeAPIServer stops the apiserver service.
func (fa *faultTriggerActions) StopKubeAPIServer(node string) error {
return fa.serviceAction(node, manager.KubeAPIServerService, stopAction)
}

// StartKubeAPIServer starts the apiserver service.
func (fa *faultTriggerActions) StartKubeAPIServer(node string) error {
return fa.serviceAction(node, manager.KubeAPIServerService, startAction)
}

func (fa *faultTriggerActions) serviceAction(node string, serverName string, action string) error {
faultCli := client.NewClient(client.Config{
Addr: node,
Addr: fa.genFaultTriggerAddr(node),
})

if err := faultCli.StartKubelet(); err != nil {
glog.Errorf("failed to start kubelet %s: %v", node, err)
var err error
switch action {
case startAction:
switch serverName {
case manager.KubeletService:
err = faultCli.StartKubelet()
case manager.KubeSchedulerService:
err = faultCli.StartKubeScheduler()
case manager.KubeControllerManagerService:
err = faultCli.StartKubeControllerManager()
case manager.KubeAPIServerService:
err = faultCli.StartKubeAPIServer()
case manager.ETCDService:
err = faultCli.StartETCD()
default:
err = fmt.Errorf("%s %s is not supported", action, serverName)
return err
}
case stopAction:
switch serverName {
case manager.KubeletService:
err = faultCli.StopKubelet()
case manager.KubeSchedulerService:
err = faultCli.StopKubeScheduler()
case manager.KubeControllerManagerService:
err = faultCli.StopKubeControllerManager()
case manager.KubeAPIServerService:
err = faultCli.StopKubeAPIServer()
case manager.ETCDService:
err = faultCli.StopETCD()
default:
err = fmt.Errorf("%s %s is not supported", action, serverName)
}
default:
err = fmt.Errorf("action %s is not supported", action)
return err
}

if err != nil {
glog.Errorf("failed to %s %s %s: %v", action, serverName, node, err)
return err
}

glog.Infof("kubelet %s is started", node)
glog.Infof("%s %s %s successfully", action, serverName, node)

return nil
}
Expand Down
6 changes: 6 additions & 0 deletions tests/manifests/e2e-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,11 @@ data:
- 172.16.4.171
- 172.16.4.172
- 172.16.4.173
controller_manager:
- physical_node: 172.16.4.39
nodes:
- 172.16.4.171
- 172.16.4.172
- 172.16.4.173
31 changes: 24 additions & 7 deletions tests/pkg/fault-trigger/api/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@

package api

import restful "github.com/emicklei/go-restful"
import (
"fmt"

restful "github.com/emicklei/go-restful"
"github.com/pingcap/tidb-operator/tests/pkg/fault-trigger/manager"
)

const (
// APIPrefix defines a prefix string for fault-trigger api
Expand All @@ -28,14 +33,26 @@ func (s *Server) newService() *restful.WebService {
Produces(restful.MIME_JSON)

ws.Route(ws.GET("/vms").To(s.listVMs))
ws.Route(ws.GET("/vm/{name}/start").To(s.startVM))
ws.Route(ws.GET("/vm/{name}/stop").To(s.stopVM))
ws.Route(ws.POST("/vm/{name}/start").To(s.startVM))
ws.Route(ws.POST("/vm/{name}/stop").To(s.stopVM))

ws.Route(ws.POST(fmt.Sprintf("/%s/start", manager.ETCDService)).To(s.startETCD))
ws.Route(ws.POST(fmt.Sprintf("/%s/stop", manager.ETCDService)).To(s.stopETCD))

ws.Route(ws.POST(fmt.Sprintf("/%s/start", manager.KubeletService)).To(s.startKubelet))
ws.Route(ws.POST(fmt.Sprintf("/%s/stop", manager.KubeletService)).To(s.stopKubelet))

ws.Route(ws.POST(fmt.Sprintf("/%s/start", manager.KubeAPIServerService)).To(s.startKubeAPIServer))
ws.Route(ws.POST(fmt.Sprintf("/%s/stop", manager.KubeAPIServerService)).To(s.stopKubeAPIServer))

ws.Route(ws.GET("/etcd/start").To(s.startETCD))
ws.Route(ws.GET("/etcd/stop").To(s.stopETCD))
ws.Route(ws.POST(fmt.Sprintf("/%s/start", manager.KubeSchedulerService)).To(s.startKubeScheduler))
ws.Route(ws.POST(fmt.Sprintf("/%s/stop", manager.KubeSchedulerService)).To(s.stopKubeScheduler))

ws.Route(ws.GET("/kubelet/start").To(s.startKubelet))
ws.Route(ws.GET("/kubelet/stop").To(s.stopKubelet))
ws.Route(ws.POST(fmt.Sprintf("/%s/start", manager.KubeControllerManagerService)).To(s.startKubeControllerManager))
ws.Route(ws.POST(fmt.Sprintf("/%s/stop", manager.KubeControllerManagerService)).To(s.stopKubeControllerManager))
// TODO: support kube-proxy
// ws.Route(ws.POST(fmt.Sprintf("/%s/start", manager.KubeProxyService)).To(s.startKubeProxy))
// ws.Route(ws.POST(fmt.Sprintf("/%s/stop", manager.KubeProxyService)).To(s.stopKubeProxy))

return ws
}
34 changes: 32 additions & 2 deletions tests/pkg/fault-trigger/api/server.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
Expand Down Expand Up @@ -131,6 +129,38 @@ func (s *Server) stopKubelet(req *restful.Request, resp *restful.Response) {
s.action(req, resp, s.mgr.StopKubelet, "stopKubelet")
}

func (s *Server) startKubeAPIServer(req *restful.Request, resp *restful.Response) {
s.action(req, resp, s.mgr.StartKubeAPIServer, "startKubeAPIServer")
}

func (s *Server) stopKubeAPIServer(req *restful.Request, resp *restful.Response) {
s.action(req, resp, s.mgr.StopKubeAPIServer, "stopKubeAPIServer")
}

// func (s *Server) startKubeProxy(req *restful.Request, resp *restful.Response) {
// s.action(req, resp, s.mgr.StartKubeProxy, "startKubeProxy")
// }
//
// func (s *Server) stopKubeProxy(req *restful.Request, resp *restful.Response) {
// s.action(req, resp, s.mgr.StopKubeProxy, "stopKubeProxy")
// }

func (s *Server) startKubeScheduler(req *restful.Request, resp *restful.Response) {
s.action(req, resp, s.mgr.StartKubeScheduler, "startKubeScheduler")
}

func (s *Server) stopKubeScheduler(req *restful.Request, resp *restful.Response) {
s.action(req, resp, s.mgr.StopKubeScheduler, "stopKubeScheduler")
}

func (s *Server) startKubeControllerManager(req *restful.Request, resp *restful.Response) {
s.action(req, resp, s.mgr.StartKubeControllerManager, "startKubeControllerManager")
}

func (s *Server) stopKubeControllerManager(req *restful.Request, resp *restful.Response) {
s.action(req, resp, s.mgr.StopKubeControllerManager, "stopKubeControllerManager")
}

func (s *Server) action(
req *restful.Request,
resp *restful.Response,
Expand Down
Loading

0 comments on commit fdd44d1

Please sign in to comment.