Skip to content

Commit

Permalink
Merge pull request #214 from slintes/improve-safe-time-handling
Browse files Browse the repository at this point in the history
Improve safe time to reboot handling and more
  • Loading branch information
slintes authored Jun 24, 2024
2 parents 1569423 + 12c3601 commit 4b91e5c
Show file tree
Hide file tree
Showing 23 changed files with 771 additions and 444 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ ENVTEST = $(shell pwd)/bin/setup-envtest
# Use TEST_OPS to pass further options to `go test` (e.g. -gingo.v and/or -ginkgo.focus)
export TEST_OPS ?= ""
.PHONY: test
test: envtest manifests generate fmt vet ## Run tests.
test: envtest generate fix-imports manifests fmt vet ## Run tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path --bin-dir $(PROJECT_DIR)/testbin)" \
KUBEBUILDER_CONTROLPLANE_STOP_TIMEOUT="60s"\
go test ./api/... ./controllers/... ./pkg/... -coverprofile cover.out -v ${TEST_OPS}
Expand Down Expand Up @@ -487,4 +487,4 @@ fix-imports: sort-imports ## Sort imports
$(SORT_IMPORTS) -w .

.PHONY: full-gen
full-gen: tidy vendor generate manifests bundle fix-imports bundle-reset ## generates all automatically generated content
full-gen: tidy vendor generate manifests bundle fix-imports bundle-reset ## generates all automatically generated content
68 changes: 36 additions & 32 deletions api/v1alpha1/selfnoderemediationconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,102 +25,107 @@ import (
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

const (
ConfigCRName = "self-node-remediation-config"
defaultWatchdogPath = "/dev/watchdog"
DefaultSafeToAssumeNodeRebootTimeout = 180
defaultIsSoftwareRebootEnabled = true
ConfigCRName = "self-node-remediation-config"
defaultWatchdogPath = "/dev/watchdog"
defaultIsSoftwareRebootEnabled = true
)

// SelfNodeRemediationConfigSpec defines the desired state of SelfNodeRemediationConfig
type SelfNodeRemediationConfigSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make" to regenerate code after modifying this file

// WatchdogFilePath is the watchdog file path that should be available on each node, e.g. /dev/watchdog
// WatchdogFilePath is the watchdog file path that should be available on each node, e.g. /dev/watchdog.
// +kubebuilder:default=/dev/watchdog
// +optional
WatchdogFilePath string `json:"watchdogFilePath,omitempty"`

// SafeTimeToAssumeNodeRebootedSeconds is the time after which the healthy self node remediation
// agents will assume the unhealthy node has been rebooted, and it is safe to recover affected workloads.
// This is extremely important as starting replacement Pods while they are still running on the failed
// node will likely lead to data corruption and violation of run-once semantics.
// In an effort to prevent this, the operator ignores values lower than a minimum calculated from the
// ApiCheckInterval, ApiServerTimeout, MaxApiErrorThreshold, PeerDialTimeout, and PeerRequestTimeout fields.
// +kubebuilder:validation:Minimum=0
// +kubebuilder:default=180
SafeTimeToAssumeNodeRebootedSeconds int `json:"safeTimeToAssumeNodeRebootedSeconds,omitempty"`
// ApiCheckInterval, ApiServerTimeout, MaxApiErrorThreshold, PeerDialTimeout, and PeerRequestTimeout fields,
// and the unhealthy node's individual watchdog timeout.
// +optional
SafeTimeToAssumeNodeRebootedSeconds *int `json:"safeTimeToAssumeNodeRebootedSeconds,omitempty"`

// The timeout for api-server connectivity check.
// Valid time units are "ms", "s", "m", "h".
// +optional
// +kubebuilder:default:="5s"
// +kubebuilder:validation:Pattern="^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$"
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
// +kubebuilder:validation:Type:=string
// +optional
PeerApiServerTimeout *metav1.Duration `json:"peerApiServerTimeout,omitempty"`

// the frequency for api-server connectivity check
// The frequency for api-server connectivity check.
// Valid time units are "ms", "s", "m", "h".
// +optional
// +kubebuilder:default:="15s"
// +kubebuilder:validation:Pattern="^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$"
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
// +kubebuilder:validation:Type:=string
// the frequency for api-server connectivity check
// +optional
ApiCheckInterval *metav1.Duration `json:"apiCheckInterval,omitempty"`

// The frequency for updating peers.
// Valid time units are "ms", "s", "m", "h".
// +optional
// +kubebuilder:default:="15m"
// +kubebuilder:validation:Pattern="^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$"
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
// +kubebuilder:validation:Type:=string
// +optional
PeerUpdateInterval *metav1.Duration `json:"peerUpdateInterval,omitempty"`

// Timeout for each api-connectivity check.
// Valid time units are "ms", "s", "m", "h".
// +optional
// +kubebuilder:default:="5s"
// +kubebuilder:validation:Pattern="^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$"
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
// +kubebuilder:validation:Type:=string
// timeout for each api-connectivity check
// +optional
ApiServerTimeout *metav1.Duration `json:"apiServerTimeout,omitempty"`

// Timeout for establishing connection to peer.
// Valid time units are "ms", "s", "m", "h".
// +optional
// +kubebuilder:default:="5s"
// +kubebuilder:validation:Pattern="^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$"
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
// +kubebuilder:validation:Type:=string
// timeout for establishing connection to peer
// +optional
PeerDialTimeout *metav1.Duration `json:"peerDialTimeout,omitempty"`

// Timeout for each peer request.
// Valid time units are "ms", "s", "m", "h".
// +optional
// +kubebuilder:default:="5s"
// +kubebuilder:validation:Pattern="^(0|([0-9]+(\\.[0-9]+)?(ms|s|m|h)))$"
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
// +kubebuilder:validation:Type:=string
// timeout for each peer request
// +optional
PeerRequestTimeout *metav1.Duration `json:"peerRequestTimeout,omitempty"`

// +optional
// After this threshold, the node will start contacting its peers.
// +kubebuilder:default:=3
// +kubebuilder:validation:Minimum=1
// after this threshold, the node will start contacting its peers
// +optional
MaxApiErrorThreshold int `json:"maxApiErrorThreshold,omitempty"`

// IsSoftwareRebootEnabled indicates whether self node remediation agent will do software reboot,
// if the watchdog device can not be used or will use watchdog only,
// without a fallback to software reboot
// without a fallback to software reboot.
// +kubebuilder:default=true
// +optional
IsSoftwareRebootEnabled bool `json:"isSoftwareRebootEnabled,omitempty"`

// EndpointHealthCheckUrl is an url that self node remediation agents which run on control-plane node will try to access when they can't contact their peers.
// This is a part of self diagnostics which will decide whether the node should be remediated or not.
// It will be ignored when empty (which is the default).
// +optional
EndpointHealthCheckUrl string `json:"endpointHealthCheckUrl,omitempty"`

// HostPort is used for internal communication between SNR agents.
// +optional
// +kubebuilder:default:=30001
// +kubebuilder:validation:Minimum=1
// +optional
HostPort int `json:"hostPort,omitempty"`

// CustomDsTolerations allows to add custom tolerations snr agents that are running on the ds in order to support remediation for different types of nodes.
// +optional
CustomDsTolerations []v1.Toleration `json:"customDsTolerations,omitempty"`
}

Expand Down Expand Up @@ -163,9 +168,8 @@ func NewDefaultSelfNodeRemediationConfig() SelfNodeRemediationConfig {
Name: ConfigCRName,
},
Spec: SelfNodeRemediationConfigSpec{
WatchdogFilePath: defaultWatchdogPath,
SafeTimeToAssumeNodeRebootedSeconds: DefaultSafeToAssumeNodeRebootTimeout,
IsSoftwareRebootEnabled: defaultIsSoftwareRebootEnabled,
WatchdogFilePath: defaultWatchdogPath,
IsSoftwareRebootEnabled: defaultIsSoftwareRebootEnabled,
},
}
}
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,17 @@ spec:
apiCheckInterval:
default: 15s
description: |-
the frequency for api-server connectivity check
The frequency for api-server connectivity check.
Valid time units are "ms", "s", "m", "h".
the frequency for api-server connectivity check
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
apiServerTimeout:
default: 5s
description: |-
Timeout for each api-connectivity check.
Valid time units are "ms", "s", "m", "h".
timeout for each api-connectivity check
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
customDsTolerations:
description: CustomDsTolerations allows to add custom tolerations
Expand Down Expand Up @@ -119,53 +119,56 @@ spec:
description: |-
IsSoftwareRebootEnabled indicates whether self node remediation agent will do software reboot,
if the watchdog device can not be used or will use watchdog only,
without a fallback to software reboot
without a fallback to software reboot.
type: boolean
maxApiErrorThreshold:
default: 3
description: after this threshold, the node will start contacting
its peers
description: After this threshold, the node will start contacting
its peers.
minimum: 1
type: integer
peerApiServerTimeout:
default: 5s
description: Valid time units are "ms", "s", "m", "h".
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
description: |-
The timeout for api-server connectivity check.
Valid time units are "ms", "s", "m", "h".
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
peerDialTimeout:
default: 5s
description: |-
Timeout for establishing connection to peer.
Valid time units are "ms", "s", "m", "h".
timeout for establishing connection to peer
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
peerRequestTimeout:
default: 5s
description: |-
Timeout for each peer request.
Valid time units are "ms", "s", "m", "h".
timeout for each peer request
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
peerUpdateInterval:
default: 15m
description: Valid time units are "ms", "s", "m", "h".
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
description: |-
The frequency for updating peers.
Valid time units are "ms", "s", "m", "h".
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
safeTimeToAssumeNodeRebootedSeconds:
default: 180
description: |-
SafeTimeToAssumeNodeRebootedSeconds is the time after which the healthy self node remediation
agents will assume the unhealthy node has been rebooted, and it is safe to recover affected workloads.
This is extremely important as starting replacement Pods while they are still running on the failed
node will likely lead to data corruption and violation of run-once semantics.
In an effort to prevent this, the operator ignores values lower than a minimum calculated from the
ApiCheckInterval, ApiServerTimeout, MaxApiErrorThreshold, PeerDialTimeout, and PeerRequestTimeout fields.
minimum: 0
ApiCheckInterval, ApiServerTimeout, MaxApiErrorThreshold, PeerDialTimeout, and PeerRequestTimeout fields,
and the unhealthy node's individual watchdog timeout.
type: integer
watchdogFilePath:
default: /dev/watchdog
description: WatchdogFilePath is the watchdog file path that should
be available on each node, e.g. /dev/watchdog
be available on each node, e.g. /dev/watchdog.
type: string
type: object
status:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,17 @@ spec:
apiCheckInterval:
default: 15s
description: |-
the frequency for api-server connectivity check
The frequency for api-server connectivity check.
Valid time units are "ms", "s", "m", "h".
the frequency for api-server connectivity check
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
apiServerTimeout:
default: 5s
description: |-
Timeout for each api-connectivity check.
Valid time units are "ms", "s", "m", "h".
timeout for each api-connectivity check
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
customDsTolerations:
description: CustomDsTolerations allows to add custom tolerations
Expand Down Expand Up @@ -117,53 +117,56 @@ spec:
description: |-
IsSoftwareRebootEnabled indicates whether self node remediation agent will do software reboot,
if the watchdog device can not be used or will use watchdog only,
without a fallback to software reboot
without a fallback to software reboot.
type: boolean
maxApiErrorThreshold:
default: 3
description: after this threshold, the node will start contacting
its peers
description: After this threshold, the node will start contacting
its peers.
minimum: 1
type: integer
peerApiServerTimeout:
default: 5s
description: Valid time units are "ms", "s", "m", "h".
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
description: |-
The timeout for api-server connectivity check.
Valid time units are "ms", "s", "m", "h".
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
peerDialTimeout:
default: 5s
description: |-
Timeout for establishing connection to peer.
Valid time units are "ms", "s", "m", "h".
timeout for establishing connection to peer
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
peerRequestTimeout:
default: 5s
description: |-
Timeout for each peer request.
Valid time units are "ms", "s", "m", "h".
timeout for each peer request
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
peerUpdateInterval:
default: 15m
description: Valid time units are "ms", "s", "m", "h".
pattern: ^(0|([0-9]+(\.[0-9]+)?(ms|s|m|h)))$
description: |-
The frequency for updating peers.
Valid time units are "ms", "s", "m", "h".
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
safeTimeToAssumeNodeRebootedSeconds:
default: 180
description: |-
SafeTimeToAssumeNodeRebootedSeconds is the time after which the healthy self node remediation
agents will assume the unhealthy node has been rebooted, and it is safe to recover affected workloads.
This is extremely important as starting replacement Pods while they are still running on the failed
node will likely lead to data corruption and violation of run-once semantics.
In an effort to prevent this, the operator ignores values lower than a minimum calculated from the
ApiCheckInterval, ApiServerTimeout, MaxApiErrorThreshold, PeerDialTimeout, and PeerRequestTimeout fields.
minimum: 0
ApiCheckInterval, ApiServerTimeout, MaxApiErrorThreshold, PeerDialTimeout, and PeerRequestTimeout fields,
and the unhealthy node's individual watchdog timeout.
type: integer
watchdogFilePath:
default: /dev/watchdog
description: WatchdogFilePath is the watchdog file path that should
be available on each node, e.g. /dev/watchdog
be available on each node, e.g. /dev/watchdog.
type: string
type: object
status:
Expand Down
Loading

0 comments on commit 4b91e5c

Please sign in to comment.