Skip to content

Commit

Permalink
Merge pull request #7752 from medyagh/node_pressure_redo
Browse files Browse the repository at this point in the history
Check node pressure & new option "node_ready" for --wait flag
  • Loading branch information
medyagh committed Apr 18, 2020
2 parents cb62197 + 21ad12f commit fe4fdee
Show file tree
Hide file tree
Showing 9 changed files with 328 additions and 18 deletions.
4 changes: 3 additions & 1 deletion pkg/addons/addons.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,9 @@ func Start(wg *sync.WaitGroup, cc *config.ClusterConfig, toEnable map[string]boo

var awg sync.WaitGroup

out.T(out.AddonEnable, "Enabling addons: {{.addons}}", out.V{"addons": strings.Join(toEnableList, ", ")})
defer func() { // making it show after verifications( not perfect till #7613 is closed)
out.T(out.AddonEnable, "Enabled addons: {{.addons}}", out.V{"addons": strings.Join(toEnableList, ", ")})
}()
for _, a := range toEnableList {
awg.Add(1)
go func(name string) {
Expand Down
10 changes: 6 additions & 4 deletions pkg/minikube/bootstrapper/bsutil/kverify/kverify.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,23 @@ const (
// DefaultSAWaitKey is the name used in the flags for default service account
DefaultSAWaitKey = "default_sa"
// AppsRunning is the name used in the flags for waiting for k8s-apps to be running
AppsRunning = "apps_running"
AppsRunningKey = "apps_running"
// NodeReadyKey is the name used in the flags for waiting for the node status to be ready
NodeReadyKey = "node_ready"
)

// vars related to the --wait flag
var (
// DefaultComponents is map of the the default components to wait for
DefaultComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true}
// NoWaitComponents is map of componets to wait for if specified 'none' or 'false'
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunning: false}
NoComponents = map[string]bool{APIServerWaitKey: false, SystemPodsWaitKey: false, DefaultSAWaitKey: false, AppsRunningKey: false, NodeReadyKey: false}
// AllComponents is map for waiting for all components.
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunning: true}
AllComponents = map[string]bool{APIServerWaitKey: true, SystemPodsWaitKey: true, DefaultSAWaitKey: true, AppsRunningKey: true}
// DefaultWaitList is list of all default components to wait for. only names to be used for start flags.
DefaultWaitList = []string{APIServerWaitKey, SystemPodsWaitKey}
// AllComponentsList list of all valid components keys to wait for. only names to be used used for start flags.
AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunning}
AllComponentsList = []string{APIServerWaitKey, SystemPodsWaitKey, DefaultSAWaitKey, AppsRunningKey, NodeReadyKey}
// AppsRunningList running list are valid k8s-app components to wait for them to be running
AppsRunningList = []string{
"kube-dns", // coredns
Expand Down
142 changes: 142 additions & 0 deletions pkg/minikube/bootstrapper/bsutil/kverify/node_conditions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
Copyright 2020 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package kverify verifies a running kubernetes cluster is healthy
package kverify

import (
"fmt"
"time"

"github.com/golang/glog"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
)

// NodeCondition represents a favorable or unfavorable node condition.
type NodeCondition struct {
Type v1.NodeConditionType
Status v1.ConditionStatus
Reason string
Message string
}

// DiskPressure detects if the condition is disk pressure
func (pc *NodeCondition) DiskPressure() bool {
return pc.Type == v1.NodeDiskPressure && pc.Status == v1.ConditionTrue
}

// MemoryPressure detects if the condition is memory pressure
func (pc *NodeCondition) MemoryPressure() bool {
return pc.Type == v1.NodeMemoryPressure && pc.Status == v1.ConditionTrue
}

// PIDPressure detects if the condition is PID pressure
func (pc *NodeCondition) PIDPressure() bool {
return pc.Type == v1.NodePIDPressure && pc.Status == v1.ConditionTrue
}

// NetworkUnavailable detects if the condition is PID pressure
func (pc *NodeCondition) NetworkUnavailable() bool {
return pc.Type == v1.NodeNetworkUnavailable && pc.Status == v1.ConditionTrue
}

const errTextFormat = "node has unwanted condition %q : Reason %q Message: %q"

// ErrMemoryPressure is thrown when there is node memory pressure condition
type ErrMemoryPressure struct {
NodeCondition
}

func (e *ErrMemoryPressure) Error() string {
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message)
}

// ErrDiskPressure is thrown when there is node disk pressure condition
type ErrDiskPressure struct {
NodeCondition
}

func (e *ErrDiskPressure) Error() string {
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message)
}

// ErrPIDPressure is thrown when there is node PID pressure condition
type ErrPIDPressure struct {
NodeCondition
}

func (e *ErrPIDPressure) Error() string {
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message)
}

// ErrNetworkNotReady is thrown when there is node condition is network not ready
type ErrNetworkNotReady struct {
NodeCondition
}

func (e *ErrNetworkNotReady) Error() string {
return fmt.Sprintf(errTextFormat, e.Type, e.Reason, e.Message)
}

// NodePressure verfies that node is not under disk, memory, pid or network pressure.
func NodePressure(cs *kubernetes.Clientset) error {
glog.Info("verifying NodePressure condition ...")
start := time.Now()
defer func() {
glog.Infof("duration metric: took %s to run NodePressure ...", time.Since(start))
}()

ns, err := cs.CoreV1().Nodes().List(meta.ListOptions{})
if err != nil {
return errors.Wrap(err, "list nodes")
}

for _, n := range ns.Items {
glog.Infof("node storage ephemeral capacity is %s", n.Status.Capacity.StorageEphemeral())
glog.Infof("node cpu capacity is %s", n.Status.Capacity.Cpu().AsDec())
for _, c := range n.Status.Conditions {
pc := NodeCondition{Type: c.Type, Status: c.Status, Reason: c.Reason, Message: c.Message}
if pc.DiskPressure() {
return &ErrDiskPressure{
NodeCondition: pc,
}
}

if pc.MemoryPressure() {
return &ErrMemoryPressure{
NodeCondition: pc,
}
}

if pc.PIDPressure() {
return &ErrPIDPressure{
NodeCondition: pc,
}
}

if pc.NetworkUnavailable() {
return &ErrNetworkNotReady{
NodeCondition: pc,
}
}

}
}
return nil
}
64 changes: 64 additions & 0 deletions pkg/minikube/bootstrapper/bsutil/kverify/node_ready.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
Copyright 2020 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

// Package kverify verifies a running kubernetes cluster is healthy
package kverify

import (
"fmt"
"time"

"github.com/golang/glog"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
)

// WaitForNodeReady waits till kube client reports node status as "ready"
func WaitForNodeReady(cs *kubernetes.Clientset, timeout time.Duration) error {
glog.Info("waiting for node status to be ready ...")
start := time.Now()
defer func() {
glog.Infof("duration metric: took %s to wait for WaitForNodeReady...", time.Since(start))
}()
checkReady := func() (bool, error) {
if time.Since(start) > timeout {
return false, fmt.Errorf("wait for node to be ready timed out")
}
ns, err := cs.CoreV1().Nodes().List(meta.ListOptions{})
if err != nil {
glog.Infof("error listing nodes will retry: %v", err)
return false, nil
}

for _, n := range ns.Items {
for _, c := range n.Status.Conditions {
if c.Type == v1.NodeReady && c.Status != v1.ConditionTrue {
glog.Infof("node %q has unwanted condition %q : Reason %q Message: %q. will try. ", n.Name, c.Type, c.Reason, c.Message)
return false, nil
}
}
}
return true, nil
}
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, checkReady); err != nil {
return errors.Wrapf(err, "wait node ready")
}
return nil
}
Loading

0 comments on commit fe4fdee

Please sign in to comment.