Skip to content

Commit

Permalink
add controller and CRD for node-configurator #404
Browse files Browse the repository at this point in the history
  • Loading branch information
Uburro committed Feb 13, 2025
1 parent 9cf684a commit 1e92609
Show file tree
Hide file tree
Showing 26 changed files with 14,592 additions and 630 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ SHELL = /usr/bin/env bash -o pipefail
.SHELLFLAGS = -ec

# Limit the scope of generation otherwise it will try to generate configs for non-controller code
GENPATH = "./api/v1;"
GENPATH = "./api/v1;./api/v1alpha1;"

CHART_PATH = helm
CHART_OPERATOR_PATH = $(CHART_PATH)/soperator
Expand Down Expand Up @@ -79,7 +79,7 @@ help: ## Display this help.
.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
$(CONTROLLER_GEN) crd webhook paths=$(GENPATH) output:crd:artifacts:config=config/crd/bases
$(CONTROLLER_GEN) rbac:roleName=manager-role paths="./internal/controller/clustercontroller/..." output:artifacts:config=config/rbac/clustercontroller/
$(CONTROLLER_GEN) rbac:roleName=manager-role paths="./internal/controller/..." output:artifacts:config=config/rbac/clustercontroller/
$(CONTROLLER_GEN) rbac:roleName=node-configurator-role paths="./internal/rebooter/..." output:artifacts:config=config/rbac/node-configurator/
.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
Expand Down
57 changes: 0 additions & 57 deletions api/v1/slurmcluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,63 +700,6 @@ type SlurmNodeWorker struct {
//
// +kubebuilder:validation:Optional
PriorityClass string `json:"priorityClass,omitempty"`
// It's alpha feature and will be moved to separate CRD in the future
// Rebooter defines the configuration for the Slurm worker node rebooter
//
// +kubebuilder:validation:Optional
Rebooter Rebooter `json:"rebooter"`
}

// Rebooter defines the configuration for the Slurm worker node rebooter
type Rebooter struct {
// enabled defines whether the rebooter is enabled
//
// +kubebuilder:validation:Optional
// +kubebuilder:default=false
Enabled bool `json:"enabled"`

// Image defines the rebooter container image
//
// +kubebuilder:validation:Optional
Image string `json:"image"`

// imagePullPolicy defines the image pull policy
//
// +kubebuilder:validation:Enum=Always;Never;IfNotPresent
// +kubebuilder:validation:Optional
// +kubebuilder:default="IfNotPresent"
ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"`

// Resources defines the [corev1.ResourceRequirements] for the container
//
// +kubebuilder:validation:Optional
Resources corev1.ResourceList `json:"resources,omitempty"`

// evictionMethod defines the method of eviction for the Slurm worker node
// Must be one of [drain, evict]. Now only evict is supported
//
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum="evict"
// +kubebuilder:default="evict"
EvictionMethod string `json:"evictionMethod,omitempty"`

// logLevel defines the log level for the rebooter
//
// +kubebuilder:validation:Optional
// +kubebuilder:default="info"
// +kubebuilder:validation:Enum="debug";"info";"warn";"error"
LogLevel string `json:"logLevel,omitempty"`

// Namespace defines the namespace where the rebooter will be deployed
// By default, the same namespace as the soperator
//
// +kubebuilder:validation:Optional
Namespace string `json:"namespace,omitempty"`

// serviceAccountName defines the service account name for the rebooter
//
// +kubebuilder:validation:Optional
ServiceAccountName string `json:"serviceAccountName,omitempty"`
}

// SlurmNodeWorkerVolumes defines the volumes for the Slurm worker node
Expand Down
23 changes: 0 additions & 23 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

93 changes: 93 additions & 0 deletions api/v1alpha1/extra_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package v1alpha1

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// Image defines container image and it's pull policy
type Image struct {
// Repository contains name of container image + it's repository if needed
Repository string `json:"repository,omitempty"`
// Tag contains desired container image version
Tag string `json:"tag,omitempty"`
// PullPolicy describes how to pull container image
PullPolicy corev1.PullPolicy `json:"pullPolicy,omitempty"`
}

// StatusMetadata holds metadata of application update status
// +k8s:openapi-gen=true
type StatusMetadata struct {
// UpdateStatus defines a status for update rollout
//
UpdateStatus UpdateStatus `json:"updateStatus,omitempty"`
// Reason defines human readable error reason
//
Reason string `json:"reason,omitempty"`
// ObservedGeneration defines current generation picked by operator for the
// reconcile
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
// Known .status.conditions.type are: "Available", "Progressing", and "Degraded"
// +patchMergeKey=type
// +patchStrategy=merge
// +listType=map
// +listMapKey=type
Conditions []Condition `json:"conditions,omitempty"`
}

// Condition defines status condition of the resource
type Condition struct {
// Type of condition in CamelCase or in name.namespace.resource.victoriametrics.com/CamelCase.
// +required
// +kubebuilder:validation:MaxLength=316
Type string `json:"type"`
// status of the condition, one of True, False, Unknown.
// +required
// +kubebuilder:validation:Required
// +kubebuilder:validation:Enum=True;False;Unknown
Status metav1.ConditionStatus `json:"status"`
// observedGeneration represents the .metadata.generation that the condition was set based upon.
// For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
// with respect to the current state of the instance.
// +optional
// +kubebuilder:validation:Minimum=0
ObservedGeneration int64 `json:"observedGeneration,omitempty" protobuf:"varint,3,opt,name=observedGeneration"`
// lastTransitionTime is the last time the condition transitioned from one status to another.
// +required
// +kubebuilder:validation:Required
// +kubebuilder:validation:Type=string
// +kubebuilder:validation:Format=date-time
LastTransitionTime metav1.Time `json:"lastTransitionTime"`
// LastUpdateTime is the last time of given type update.
// This value is used for status TTL update and removal
// +required
// +kubebuilder:validation:Required
// +kubebuilder:validation:Type=string
// +kubebuilder:validation:Format=date-time
LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"`

// reason contains a programmatic identifier indicating the reason for the condition's last transition.
// Producers of specific condition types may define expected values and meanings for this field,
// and whether the values are considered a guaranteed API.
// The value should be a CamelCase string.
// This field may not be empty.
// +required
// +kubebuilder:validation:Required
// +kubebuilder:validation:MaxLength=1024
// +kubebuilder:validation:MinLength=1
Reason string `json:"reason"`
// message is a human readable message indicating details about the transition.
// This may be an empty string.
// +optional
// +kubebuilder:validation:MaxLength=32768
Message string `json:"message,omitempty"`
}

// UpdateStatus defines status for application
type UpdateStatus string

const (
UpdateStatusUpdating UpdateStatus = "updating"
UpdateStatusFailed UpdateStatus = "failed"
UpdateStatusSucceeded UpdateStatus = "succeeded"
)
146 changes: 122 additions & 24 deletions api/v1alpha1/nodeconfigurator_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,119 @@ limitations under the License.
package v1alpha1

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
runtime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)

// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// NodeConfiguratorSpec defines the desired state of NodeConfigurator.
type NodeConfiguratorSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make" to regenerate code after modifying this file
// rebooter controller wich will reboot and drain node by some node conditions
// in same time can be used rebooter or nodeConfigurator
//
// +kubebuilder:validation:Optional
// +kubebuilder:default:={enabled: true}
Rebooter Rebooter `json:"rebooter"`

// nodeConfigurator defines container configuration for the node
// in same time can be used rebooter or nodeConfigurator
//
// +kubebuilder:validation:Optional
SleepContainer SleepContainer `json:"nodeConfigurator"`

// initContainers defines the list of initContainers for the node-configurator
// it rewrite the default initContainers
//
// +kubebuilder:validation:Optional
InitContainers []corev1.Container `json:"initContainers,omitempty"`
}

// Foo is an example field of NodeConfigurator. Edit nodeconfigurator_types.go to remove/update
Foo string `json:"foo,omitempty"`
type ContainerConfig struct {
// image defines the node-configurator container image
//
// +kubebuilder:validation:Optional
Image Image `json:"image,omitempty"`

// resources defines the [corev1.ResourceRequirements] for the container
//
// +kubebuilder:validation:Optional
// +kubebuilder:default:={}
Resources corev1.ResourceRequirements `json:"resources,omitempty"`

// livenessProbe defines the livenessProbe for the node-configurator
//
// +kubebuilder:validation:Optional
LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty"`

// readinessProbe defines the readinessProbe for the node-configurator
//
// +kubebuilder:validation:Optional
ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"`

// env defines the list of environment variables for the node-configurator
//
// +kubebuilder:validation:Optional
Env []corev1.EnvVar `json:"env,omitempty"`

// tolerations defines the list of tolerations for the node-configurator
//
// +kubebuilder:validation:Optional
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`

// nodeSelector defines the nodeSelector for the node-configurator
//
// +kubebuilder:validation:Optional
NodeSelector map[string]string `json:"nodeSelector,omitempty"`

// affinity defines the affinity for the node-configurator
//
// +kubebuilder:validation:Optional
Affinity *corev1.Affinity `json:"affinity,omitempty"`
}

// NodeConfiguratorStatus defines the observed state of NodeConfigurator.
type NodeConfiguratorStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
type PodConfig struct {
// priorityClassName defines the priorityClassName for the pod
//
// +kubebuilder:validation:Optional
PriorityClassName string `json:"priorityClassName,omitempty"`

// serviceAccountName defines the service account name for the pod
//
// +kubebuilder:validation:Optional
ServiceAccountName string `json:"serviceAccountName,omitempty"`
}

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
type Rebooter struct {
Enabled bool `json:"enabled"`
ContainerConfig `json:",inline"`
PodConfig `json:",inline"`
// evictionMethod defines the method of eviction for the Slurm worker node
// Must be one of [drain, evict]. Now only evict is supported
//
// +kubebuilder:validation:Optional
// +kubebuilder:validation:Enum="evict"
// +kubebuilder:default="evict"
EvictionMethod string `json:"evictionMethod,omitempty"`

// logLevel defines the log level for the node-configurator
//
// +kubebuilder:validation:Optional
// +kubebuilder:default="info"
// +kubebuilder:validation:Enum="debug";"info";"warn";"error"
LogLevel string `json:"logLevel,omitempty"`

// logFormat defines the log format for the node-configurator
//
// +kubebuilder:validation:Optional
LogFormat string `json:"logFormat,omitempty"`
}

type SleepContainer struct {
Enabled bool `json:"enabled"`
ContainerConfig `json:",inline"`
PodConfig `json:",inline"`
}

// NodeConfigurator is the Schema for the nodeconfigurators API.
type NodeConfigurator struct {
Expand All @@ -52,15 +140,30 @@ type NodeConfigurator struct {
Status NodeConfiguratorStatus `json:"status,omitempty"`
}

// DeepCopyObject implements runtime.Object.
func (n *NodeConfigurator) DeepCopyObject() runtime.Object {
panic("unimplemented")
// NodeConfiguratorStatus defines the observed state of NodeConfigurator.
type NodeConfiguratorStatus struct {
StatusMetadata `json:",inline"`
}

// GetStatusMetadata returns metadata for object status
func (cr *NodeConfiguratorStatus) GetStatusMetadata() *StatusMetadata {
return &cr.StatusMetadata
}

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status

// GetObjectKind implements runtime.Object.
// Subtle: this method shadows the method (TypeMeta).GetObjectKind of NodeConfigurator.TypeMeta.
func (n *NodeConfigurator) GetObjectKind() schema.ObjectKind {
panic("unimplemented")
return &n.TypeMeta
}

func (n *NodeConfigurator) DeepCopyObject() runtime.Object {
if c := n.DeepCopy(); c != nil {
return c
}
return nil
}

// +kubebuilder:object:root=true
Expand All @@ -72,15 +175,10 @@ type NodeConfiguratorList struct {
Items []NodeConfigurator `json:"items"`
}

// DeepCopyObject implements runtime.Object.
func (n *NodeConfiguratorList) DeepCopyObject() runtime.Object {
panic("unimplemented")
}

// GetObjectKind implements runtime.Object.
// Subtle: this method shadows the method (TypeMeta).GetObjectKind of NodeConfiguratorList.TypeMeta.
func (n *NodeConfiguratorList) GetObjectKind() schema.ObjectKind {
panic("unimplemented")
return &n.TypeMeta
}

func init() {
Expand Down
Loading

0 comments on commit 1e92609

Please sign in to comment.