From 86982f3c07c369bdb259c1f5b89beab42dd8887c Mon Sep 17 00:00:00 2001 From: kobzonega <122476665+kobzonega@users.noreply.github.com> Date: Mon, 13 May 2024 16:16:48 +0200 Subject: [PATCH 1/7] YDBOPS-9635 use configMap from Storage if Database .spec.configuration empty (#199) --- api/v1alpha1/database_webhook.go | 10 ++++---- deploy/ydb-operator/Chart.yaml | 4 ++-- .../remotedatabasenodeset/controller_test.go | 4 ++-- internal/resources/database.go | 22 +++++++++-------- internal/resources/database_statefulset.go | 5 +++- internal/resources/remotedatabasenodeset.go | 24 +++++++++++++------ 6 files changed, 43 insertions(+), 26 deletions(-) diff --git a/api/v1alpha1/database_webhook.go b/api/v1alpha1/database_webhook.go index e519036f..277b7143 100644 --- a/api/v1alpha1/database_webhook.go +++ b/api/v1alpha1/database_webhook.go @@ -141,11 +141,13 @@ func (r *DatabaseDefaulter) Default(ctx context.Context, obj runtime.Object) err database.Spec.StorageEndpoint = storage.GetStorageEndpointWithProto() } - configuration, err := buildConfiguration(storage, database) - if err != nil { - return err + if database.Spec.Configuration != "" || (database.Spec.Encryption != nil && database.Spec.Encryption.Enabled) { + configuration, err := buildConfiguration(storage, database) + if err != nil { + return err + } + database.Spec.Configuration = configuration } - database.Spec.Configuration = configuration return nil } diff --git a/deploy/ydb-operator/Chart.yaml b/deploy/ydb-operator/Chart.yaml index f31c7d12..5e0d3f10 100644 --- a/deploy/ydb-operator/Chart.yaml +++ b/deploy/ydb-operator/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.5.6 +version: 0.5.7 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.5.6" +appVersion: "0.5.7" diff --git a/internal/controllers/remotedatabasenodeset/controller_test.go b/internal/controllers/remotedatabasenodeset/controller_test.go index 91c3cb4e..3cd3a67b 100644 --- a/internal/controllers/remotedatabasenodeset/controller_test.go +++ b/internal/controllers/remotedatabasenodeset/controller_test.go @@ -759,7 +759,7 @@ var _ = Describe("RemoteDatabaseNodeSet controller tests", func() { foundConfigMap := corev1.ConfigMap{} Expect(remoteClient.Get(ctx, types.NamespacedName{ - Name: databaseSample.Name, + Name: storageSample.Name, Namespace: testobjects.YdbNamespace, }, &foundConfigMap)).Should(Succeed()) @@ -791,7 +791,7 @@ var _ = Describe("RemoteDatabaseNodeSet controller tests", func() { foundConfigMap := corev1.ConfigMap{} Expect(remoteClient.Get(ctx, types.NamespacedName{ - Name: databaseSample.Name, + Name: storageSample.Name, Namespace: testobjects.YdbNamespace, }, &foundConfigMap)).Should(Succeed()) diff --git a/internal/resources/database.go b/internal/resources/database.go index cfb23739..eedcc8c9 100644 --- a/internal/resources/database.go +++ b/internal/resources/database.go @@ -72,18 +72,20 @@ func (b *DatabaseBuilder) GetResourceBuilders(restConfig *rest.Config) []Resourc var optionalBuilders []ResourceBuilder - optionalBuilders = append( - optionalBuilders, - &ConfigMapBuilder{ - Object: b, + if b.Spec.Configuration != "" { + optionalBuilders = append( + optionalBuilders, + &ConfigMapBuilder{ + Object: b, - Name: b.GetName(), - Data: map[string]string{ - api.ConfigFileName: b.Spec.Configuration, + Name: b.GetName(), + Data: map[string]string{ + api.ConfigFileName: b.Spec.Configuration, + }, + Labels: databaseLabels, }, - Labels: databaseLabels, - }, - ) + ) + } if b.Spec.Monitoring != nil && b.Spec.Monitoring.Enabled { optionalBuilders = append(optionalBuilders, diff --git a/internal/resources/database_statefulset.go b/internal/resources/database_statefulset.go index fea9e308..2118d290 100644 --- a/internal/resources/database_statefulset.go +++ b/internal/resources/database_statefulset.go @@ -144,7 +144,10 @@ func (b *DatabaseStatefulSetBuilder) buildPodTemplateSpec() corev1.PodTemplateSp } func (b *DatabaseStatefulSetBuilder) buildVolumes() []corev1.Volume { - configMapName := b.Database.Name + configMapName := b.Spec.StorageClusterRef.Name + if b.Spec.Configuration != "" { + configMapName = b.GetName() + } volumes := []corev1.Volume{ { diff --git a/internal/resources/remotedatabasenodeset.go b/internal/resources/remotedatabasenodeset.go index 99877652..c3a7efd4 100644 --- a/internal/resources/remotedatabasenodeset.go +++ b/internal/resources/remotedatabasenodeset.go @@ -102,13 +102,23 @@ func (b *RemoteDatabaseNodeSetResource) GetRemoteObjects( } // sync ConfigMap - remoteObjects = append(remoteObjects, - &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: b.Spec.DatabaseRef.Name, - Namespace: b.Namespace, - }, - }) + if b.Spec.Configuration != "" { + remoteObjects = append(remoteObjects, + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: b.Spec.DatabaseRef.Name, + Namespace: b.Namespace, + }, + }) + } else { + remoteObjects = append(remoteObjects, + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: b.Spec.StorageClusterRef.Name, + Namespace: b.Namespace, + }, + }) + } // sync Services remoteObjects = append(remoteObjects, From bca69e49c13e8acf37976c19ee7c749aa3fb241d Mon Sep 17 00:00:00 2001 From: kobzonega <122476665+kobzonega@users.noreply.github.com> Date: Mon, 13 May 2024 16:38:51 +0200 Subject: [PATCH 2/7] bump chart version for PR #199 (#206) --- deploy/ydb-operator/Chart.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/ydb-operator/Chart.yaml b/deploy/ydb-operator/Chart.yaml index 5e0d3f10..e0545b49 100644 --- a/deploy/ydb-operator/Chart.yaml +++ b/deploy/ydb-operator/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.5.7 +version: 0.5.8 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.5.7" +appVersion: "0.5.8" From 55d84a624843ca3193d80ae9111dfc964692b7d4 Mon Sep 17 00:00:00 2001 From: kobzonega <122476665+kobzonega@users.noreply.github.com> Date: Mon, 13 May 2024 16:39:22 +0200 Subject: [PATCH 3/7] YDBOPS-9613 conditions for NodeSet objects (#197) --- internal/controllers/constants/constants.go | 18 +- internal/controllers/databasenodeset/sync.go | 247 ++++++++++++----- .../remotedatabasenodeset/controller_test.go | 87 +++--- .../controllers/remotedatabasenodeset/sync.go | 62 ++--- .../remotestoragenodeset/controller_test.go | 115 +++++--- .../controllers/remotestoragenodeset/sync.go | 63 ++--- internal/controllers/storagenodeset/sync.go | 253 ++++++++++++++---- internal/resources/databasenodeset.go | 22 -- internal/resources/storagenodeset.go | 22 -- 9 files changed, 567 insertions(+), 322 deletions(-) diff --git a/internal/controllers/constants/constants.go b/internal/controllers/constants/constants.go index 819745be..5d1e8b80 100644 --- a/internal/controllers/constants/constants.go +++ b/internal/controllers/constants/constants.go @@ -19,12 +19,16 @@ const ( OldStorageInitializedCondition = "StorageReady" OldDatabaseInitializedCondition = "TenantInitialized" - StoragePausedCondition = "StoragePaused" - StorageInitializedCondition = "StorageInitialized" - StorageNodeSetReadyCondition = "StorageNodeSetReady" - DatabasePausedCondition = "DatabasePaused" - DatabaseInitializedCondition = "DatabaseInitialized" - DatabaseNodeSetReadyCondition = "DatabaseNodeSetReady" + StoragePausedCondition = "StoragePaused" + StorageInitializedCondition = "StorageInitialized" + DatabasePausedCondition = "DatabasePaused" + DatabaseInitializedCondition = "DatabaseInitialized" + + NodeSetPreparedCondition = "NodeSetPrepared" + NodeSetProvisionedCondition = "NodeSetProvisioned" + NodeSetReadyCondition = "NodeSetReady" + NodeSetPausedCondition = "NodeSetPaused" + RemoteResourceSyncedCondition = "ResourceSynced" Stop = true @@ -48,6 +52,7 @@ const ( DatabasePaused ClusterState = "Paused" DatabaseNodeSetPending ClusterState = "Pending" + DatabaseNodeSetPreparing ClusterState = "Preparing" DatabaseNodeSetProvisioning ClusterState = "Provisioning" DatabaseNodeSetReady ClusterState = "Ready" DatabaseNodeSetPaused ClusterState = "Paused" @@ -60,6 +65,7 @@ const ( StoragePaused ClusterState = "Paused" StorageNodeSetPending ClusterState = "Pending" + StorageNodeSetPreparing ClusterState = "Preparing" StorageNodeSetProvisioning ClusterState = "Provisioning" StorageNodeSetReady ClusterState = "Ready" StorageNodeSetPaused ClusterState = "Paused" diff --git a/internal/controllers/databasenodeset/sync.go b/internal/controllers/databasenodeset/sync.go index ceeb1d8a..2915ad1d 100644 --- a/internal/controllers/databasenodeset/sync.go +++ b/internal/controllers/databasenodeset/sync.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "reflect" + "time" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -27,27 +28,23 @@ func (r *Reconciler) Sync(ctx context.Context, crDatabaseNodeSet *v1alpha1.Datab var err error databaseNodeSet := resources.NewDatabaseNodeSet(crDatabaseNodeSet) - stop, result, err = databaseNodeSet.SetStatusOnFirstReconcile() - if stop { - return result, err - } - stop, result = r.checkDatabaseFrozen(&databaseNodeSet) + stop, result, err = r.setInitialStatus(ctx, &databaseNodeSet) if stop { - return result, nil + return result, err } - stop, result, err = r.handlePauseResume(ctx, &databaseNodeSet) + stop, result, err = r.handleResourcesSync(ctx, &databaseNodeSet) if stop { return result, err } - stop, result, err = r.handleResourcesSync(ctx, &databaseNodeSet) + stop, result, err = r.waitForStatefulSetToScale(ctx, &databaseNodeSet) if stop { return result, err } - stop, result, err = r.waitForStatefulSetToScale(ctx, &databaseNodeSet) + stop, result, err = r.handlePauseResume(ctx, &databaseNodeSet) if stop { return result, err } @@ -55,12 +52,66 @@ func (r *Reconciler) Sync(ctx context.Context, crDatabaseNodeSet *v1alpha1.Datab return ctrl.Result{}, nil } +func (r *Reconciler) setInitialStatus( + ctx context.Context, + databaseNodeSet *resources.DatabaseNodeSetResource, +) (bool, ctrl.Result, error) { + r.Log.Info("running step setInitialStatus") + + if databaseNodeSet.Status.Conditions == nil { + databaseNodeSet.Status.Conditions = []metav1.Condition{} + + if databaseNodeSet.Spec.Pause { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Transitioning to state Paused", + }) + } else { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetReadyCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Transitioning to state Ready", + }) + } + + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step setInitialStatus") + return Continue, ctrl.Result{}, nil +} + func (r *Reconciler) handleResourcesSync( ctx context.Context, databaseNodeSet *resources.DatabaseNodeSetResource, ) (bool, ctrl.Result, error) { r.Log.Info("running step handleResourcesSync") + if !databaseNodeSet.Spec.OperatorSync { + r.Log.Info("`operatorSync: false` is set, no further steps will be run") + r.Recorder.Event( + databaseNodeSet, + corev1.EventTypeNormal, + string(DatabaseNodeSetPreparing), + fmt.Sprintf("Found .spec.operatorSync set to %t, skip further steps", databaseNodeSet.Spec.OperatorSync), + ) + return Stop, ctrl.Result{Requeue: false}, nil + } + + if databaseNodeSet.Status.State == DatabaseNodeSetPending { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPreparedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Waiting for sync resources for generation %d", databaseNodeSet.Generation), + }) + databaseNodeSet.Status.State = DatabaseNodeSetPreparing + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) + } + for _, builder := range databaseNodeSet.GetResourceBuilders(r.Config) { newResource := builder.Placeholder(databaseNodeSet) @@ -104,18 +155,35 @@ func (r *Reconciler) handleResourcesSync( "ProvisioningFailed", eventMessage+fmt.Sprintf(", failed to sync, error: %s", err), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPreparedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Failed to sync resources for generation %d", databaseNodeSet.Generation), + }) + return r.updateStatus(ctx, databaseNodeSet, DefaultRequeueDelay) } else if result == controllerutil.OperationResultCreated || result == controllerutil.OperationResultUpdated { r.Recorder.Event( databaseNodeSet, corev1.EventTypeNormal, - string(DatabaseNodeSetProvisioning), + string(DatabaseNodeSetPreparing), eventMessage+fmt.Sprintf(", changed, result: %s", result), ) } } - return Continue, ctrl.Result{Requeue: false}, nil + if !meta.IsStatusConditionTrue(databaseNodeSet.Status.Conditions, NodeSetPreparedCondition) { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPreparedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully synced resources", + }) + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step handleResourcesSync") + return Continue, ctrl.Result{}, nil } func (r *Reconciler) waitForStatefulSetToScale( @@ -124,15 +192,15 @@ func (r *Reconciler) waitForStatefulSetToScale( ) (bool, ctrl.Result, error) { r.Log.Info("running step waitForStatefulSetToScale") - if databaseNodeSet.Status.State == DatabaseNodeSetPending { - r.Recorder.Event( - databaseNodeSet, - corev1.EventTypeNormal, - string(DatabaseNodeSetProvisioning), - fmt.Sprintf("Starting to track number of running databaseNodeSet pods, expected: %d", databaseNodeSet.Spec.Nodes), - ) + if databaseNodeSet.Status.State == DatabaseNodeSetPreparing { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetProvisionedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Waiting for scale to desired nodes: %d", databaseNodeSet.Spec.Nodes), + }) databaseNodeSet.Status.State = DatabaseNodeSetProvisioning - return r.updateStatus(ctx, databaseNodeSet) + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) } found := &appsv1.StatefulSet{} @@ -191,29 +259,54 @@ func (r *Reconciler) waitForStatefulSetToScale( databaseNodeSet, corev1.EventTypeNormal, string(DatabaseNodeSetProvisioning), - fmt.Sprintf("Waiting for number of running databaseNodeSet pods to match expected: %d != %d", runningPods, databaseNodeSet.Spec.Nodes)) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, nil + fmt.Sprintf("Waiting for number of running databaseNodeSet pods to match expected: %d != %d", runningPods, databaseNodeSet.Spec.Nodes), + ) + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetProvisionedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Number of running nodes does not match expected: %d != %d", runningPods, databaseNodeSet.Spec.Nodes), + }) + return r.updateStatus(ctx, databaseNodeSet, DefaultRequeueDelay) } - if databaseNodeSet.Status.State == DatabaseNodeSetProvisioning { + if !meta.IsStatusConditionTrue(databaseNodeSet.Status.Conditions, NodeSetProvisionedCondition) { meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ - Type: DatabaseNodeSetReadyCondition, - Status: "True", + Type: NodeSetProvisionedCondition, + Status: metav1.ConditionTrue, Reason: ReasonCompleted, Message: fmt.Sprintf("Scaled DatabaseNodeSet to %d successfully", databaseNodeSet.Spec.Nodes), }) - databaseNodeSet.Status.State = DatabaseNodeSetReady - return r.updateStatus(ctx, databaseNodeSet) + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) } + r.Log.Info("complete step waitForStatefulSetToScale") return Continue, ctrl.Result{Requeue: false}, nil } func (r *Reconciler) updateStatus( ctx context.Context, databaseNodeSet *resources.DatabaseNodeSetResource, + requeueAfter time.Duration, ) (bool, ctrl.Result, error) { - r.Log.Info("running step updateStatus") + r.Log.Info("running updateStatus handler") + + if meta.IsStatusConditionFalse(databaseNodeSet.Status.Conditions, NodeSetPreparedCondition) || + meta.IsStatusConditionFalse(databaseNodeSet.Status.Conditions, NodeSetProvisionedCondition) { + if databaseNodeSet.Spec.Pause { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + } else { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetReadyCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + } + } crDatabaseNodeSet := &v1alpha1.DatabaseNodeSet{} err := r.Get(ctx, types.NamespacedName{ @@ -231,18 +324,18 @@ func (r *Reconciler) updateStatus( } oldStatus := crDatabaseNodeSet.Status.State + crDatabaseNodeSet.Status.State = databaseNodeSet.Status.State + crDatabaseNodeSet.Status.Conditions = databaseNodeSet.Status.Conditions + if err = r.Status().Update(ctx, crDatabaseNodeSet); err != nil { + r.Recorder.Event( + databaseNodeSet, + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("Failed setting status: %s", err), + ) + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + } if oldStatus != databaseNodeSet.Status.State { - crDatabaseNodeSet.Status.State = databaseNodeSet.Status.State - crDatabaseNodeSet.Status.Conditions = databaseNodeSet.Status.Conditions - if err = r.Status().Update(ctx, crDatabaseNodeSet); err != nil { - r.Recorder.Event( - databaseNodeSet, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed setting status: %s", err), - ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err - } r.Recorder.Event( databaseNodeSet, corev1.EventTypeNormal, @@ -251,7 +344,8 @@ func (r *Reconciler) updateStatus( ) } - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil + r.Log.Info("complete updateStatus handler") + return Stop, ctrl.Result{RequeueAfter: requeueAfter}, nil } func shouldIgnoreDatabaseNodeSetChange(databaseNodeSet *resources.DatabaseNodeSetResource) resources.IgnoreChangesFunction { @@ -270,37 +364,70 @@ func (r *Reconciler) handlePauseResume( databaseNodeSet *resources.DatabaseNodeSetResource, ) (bool, ctrl.Result, error) { r.Log.Info("running step handlePauseResume") - if databaseNodeSet.Status.State == DatabaseReady && databaseNodeSet.Spec.Pause { + + if databaseNodeSet.Status.State == DatabaseNodeSetProvisioning { + if databaseNodeSet.Spec.Pause { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + databaseNodeSet.Status.State = DatabaseNodeSetPaused + } else { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetReadyCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + databaseNodeSet.Status.State = DatabaseNodeSetReady + } + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) + } + + if databaseNodeSet.Status.State == DatabaseNodeSetReady && databaseNodeSet.Spec.Pause { r.Log.Info("`pause: true` was noticed, moving DatabaseNodeSet to state `Paused`") meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ - Type: DatabasePausedCondition, - Status: "False", - Reason: ReasonInProgress, - Message: "Transitioning DatabaseNodeSet to Paused state", + Type: NodeSetReadyCondition, + Status: metav1.ConditionFalse, + Reason: ReasonNotRequired, + Message: "Transitioning to state Paused", }) databaseNodeSet.Status.State = DatabaseNodeSetPaused - return r.updateStatus(ctx, databaseNodeSet) + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) } if databaseNodeSet.Status.State == DatabaseNodeSetPaused && !databaseNodeSet.Spec.Pause { r.Log.Info("`pause: false` was noticed, moving DatabaseNodeSet to state `Ready`") - meta.RemoveStatusCondition(&databaseNodeSet.Status.Conditions, DatabasePausedCondition) + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonNotRequired, + Message: "Transitioning to state Ready", + }) databaseNodeSet.Status.State = DatabaseNodeSetReady - return r.updateStatus(ctx, databaseNodeSet) + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) } - return Continue, ctrl.Result{}, nil -} - -func (r *Reconciler) checkDatabaseFrozen( - databaseNodeSet *resources.DatabaseNodeSetResource, -) (bool, ctrl.Result) { - r.Log.Info("running step checkDatabaseFrozen") - - if !databaseNodeSet.Spec.OperatorSync { - r.Log.Info("`operatorSync: false` is set, no further steps will be run") - return Stop, ctrl.Result{} + if databaseNodeSet.Spec.Pause { + if !meta.IsStatusConditionTrue(databaseNodeSet.Status.Conditions, NodeSetPausedCondition) { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) + } + } else { + if !meta.IsStatusConditionTrue(databaseNodeSet.Status.Conditions, NodeSetReadyCondition) { + meta.SetStatusCondition(&databaseNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetReadyCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return r.updateStatus(ctx, databaseNodeSet, StatusUpdateRequeueDelay) + } } - return Continue, ctrl.Result{} + r.Log.Info("complete step handlePauseResume") + return Continue, ctrl.Result{}, nil } diff --git a/internal/controllers/remotedatabasenodeset/controller_test.go b/internal/controllers/remotedatabasenodeset/controller_test.go index 3cd3a67b..f088c48e 100644 --- a/internal/controllers/remotedatabasenodeset/controller_test.go +++ b/internal/controllers/remotedatabasenodeset/controller_test.go @@ -7,7 +7,6 @@ import ( "reflect" "strings" "testing" - "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -350,74 +349,64 @@ var _ = Describe("RemoteDatabaseNodeSet controller tests", func() { When("Created RemoteDatabaseNodeSet in k8s-mgmt-cluster", func() { It("Should receive status from k8s-data-cluster", func() { - By("set dedicated DatabaseNodeSet status to Ready on remote cluster...") - foundDedicatedDatabaseNodeSetOnRemote := v1alpha1.DatabaseNodeSet{} - Expect(remoteClient.Get(ctx, types.NamespacedName{ - Name: databaseSample.Name + "-" + testNodeSetName + "-remote-dedicated", - Namespace: testobjects.YdbNamespace, - }, &foundDedicatedDatabaseNodeSetOnRemote)).Should(Succeed()) - - foundDedicatedDatabaseNodeSetOnRemote.Status.State = DatabaseNodeSetReady - foundDedicatedDatabaseNodeSetOnRemote.Status.Conditions = append( - foundDedicatedDatabaseNodeSetOnRemote.Status.Conditions, - metav1.Condition{ - Type: DatabaseNodeSetReadyCondition, - Status: "True", - Reason: ReasonCompleted, - LastTransitionTime: metav1.NewTime(time.Now()), - Message: fmt.Sprintf("Scaled databaseNodeSet to %d successfully", foundDedicatedDatabaseNodeSetOnRemote.Spec.Nodes), - }, - ) - Expect(remoteClient.Status().Update(ctx, &foundDedicatedDatabaseNodeSetOnRemote)).Should(Succeed()) + By("checking that dedicated DatabaseNodeSet status updated on remote cluster...") + Eventually(func() bool { + foundDatabaseNodeSetOnRemote := v1alpha1.DatabaseNodeSet{} + Expect(remoteClient.Get(ctx, types.NamespacedName{ + Name: databaseSample.Name + "-" + testNodeSetName + "-remote-dedicated", + Namespace: testobjects.YdbNamespace, + }, &foundDatabaseNodeSetOnRemote)).Should(Succeed()) - By("set DatabaseNodeSet status to Ready on remote cluster...") - foundDatabaseNodeSetOnRemote := v1alpha1.DatabaseNodeSet{} - Expect(remoteClient.Get(ctx, types.NamespacedName{ - Name: databaseSample.Name + "-" + testNodeSetName + "-remote", - Namespace: testobjects.YdbNamespace, - }, &foundDatabaseNodeSetOnRemote)).Should(Succeed()) - - foundDatabaseNodeSetOnRemote.Status.State = DatabaseNodeSetReady - foundDatabaseNodeSetOnRemote.Status.Conditions = append( - foundDatabaseNodeSetOnRemote.Status.Conditions, - metav1.Condition{ - Type: DatabaseNodeSetReadyCondition, - Status: "True", - Reason: ReasonCompleted, - LastTransitionTime: metav1.NewTime(time.Now()), - Message: fmt.Sprintf("Scaled databaseNodeSet to %d successfully", foundDatabaseNodeSetOnRemote.Spec.Nodes), - }, - ) - Expect(remoteClient.Status().Update(ctx, &foundDatabaseNodeSetOnRemote)).Should(Succeed()) + return meta.IsStatusConditionPresentAndEqual( + foundDatabaseNodeSetOnRemote.Status.Conditions, + NodeSetPreparedCondition, + metav1.ConditionTrue, + ) + }, test.Timeout, test.Interval).Should(BeTrue()) By("checking that dedicated RemoteDatabaseNodeSet status updated on local cluster...") Eventually(func() bool { - foundRemoteDatabaseNodeSetOnRemote := v1alpha1.RemoteDatabaseNodeSet{} + foundRemoteDatabaseNodeSet := v1alpha1.RemoteDatabaseNodeSet{} Expect(localClient.Get(ctx, types.NamespacedName{ Name: databaseSample.Name + "-" + testNodeSetName + "-remote-dedicated", Namespace: testobjects.YdbNamespace, - }, &foundRemoteDatabaseNodeSetOnRemote)).Should(Succeed()) + }, &foundRemoteDatabaseNodeSet)).Should(Succeed()) return meta.IsStatusConditionPresentAndEqual( - foundRemoteDatabaseNodeSetOnRemote.Status.Conditions, - DatabaseNodeSetReadyCondition, + foundRemoteDatabaseNodeSet.Status.Conditions, + NodeSetPreparedCondition, metav1.ConditionTrue, - ) && foundRemoteDatabaseNodeSetOnRemote.Status.State == DatabaseNodeSetReady + ) + }, test.Timeout, test.Interval).Should(BeTrue()) + + By("checking that DatabaseNodeSet status updated on remote cluster...") + Eventually(func() bool { + foundDatabaseNodeSetOnRemote := v1alpha1.DatabaseNodeSet{} + Expect(remoteClient.Get(ctx, types.NamespacedName{ + Name: databaseSample.Name + "-" + testNodeSetName + "-remote", + Namespace: testobjects.YdbNamespace, + }, &foundDatabaseNodeSetOnRemote)).Should(Succeed()) + + return meta.IsStatusConditionPresentAndEqual( + foundDatabaseNodeSetOnRemote.Status.Conditions, + NodeSetPreparedCondition, + metav1.ConditionTrue, + ) }, test.Timeout, test.Interval).Should(BeTrue()) By("checking that RemoteDatabaseNodeSet status updated on local cluster...") Eventually(func() bool { - foundRemoteDatabaseNodeSetOnRemote := v1alpha1.RemoteDatabaseNodeSet{} + foundRemoteDatabaseNodeSet := v1alpha1.RemoteDatabaseNodeSet{} Expect(localClient.Get(ctx, types.NamespacedName{ Name: databaseSample.Name + "-" + testNodeSetName + "-remote", Namespace: testobjects.YdbNamespace, - }, &foundRemoteDatabaseNodeSetOnRemote)).Should(Succeed()) + }, &foundRemoteDatabaseNodeSet)).Should(Succeed()) return meta.IsStatusConditionPresentAndEqual( - foundRemoteDatabaseNodeSetOnRemote.Status.Conditions, - DatabaseNodeSetReadyCondition, + foundRemoteDatabaseNodeSet.Status.Conditions, + NodeSetPreparedCondition, metav1.ConditionTrue, - ) && foundRemoteDatabaseNodeSetOnRemote.Status.State == DatabaseNodeSetReady + ) }, test.Timeout, test.Interval).Should(BeTrue()) }) }) diff --git a/internal/controllers/remotedatabasenodeset/sync.go b/internal/controllers/remotedatabasenodeset/sync.go index 403c10ea..c0cabc22 100644 --- a/internal/controllers/remotedatabasenodeset/sync.go +++ b/internal/controllers/remotedatabasenodeset/sync.go @@ -44,6 +44,11 @@ func (r *Reconciler) Sync(ctx context.Context, crRemoteDatabaseNodeSet *v1alpha1 return result, err } + stop, result, err = r.updateRemoteStatus(ctx, &remoteDatabaseNodeSet) + if stop { + return result, err + } + return ctrl.Result{}, nil } @@ -98,7 +103,8 @@ func (r *Reconciler) handleResourcesSync( } } - return r.updateRemoteStatus(ctx, remoteDatabaseNodeSet) + r.Log.Info("complete step handleResourcesSync") + return Continue, ctrl.Result{}, nil } func (r *Reconciler) updateRemoteStatus( @@ -141,42 +147,38 @@ func (r *Reconciler) updateRemoteStatus( return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err } - oldStatus := crRemoteDatabaseNodeSet.Status.State - if oldStatus != crDatabaseNodeSet.Status.State { - crRemoteDatabaseNodeSet.Status.State = crDatabaseNodeSet.Status.State - crRemoteDatabaseNodeSet.Status.Conditions = crDatabaseNodeSet.Status.Conditions - if err := r.RemoteClient.Status().Update(ctx, crRemoteDatabaseNodeSet); err != nil { - r.Recorder.Event( - remoteDatabaseNodeSet, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed to update status on remote cluster: %s", err), - ) - r.RemoteRecorder.Event( - remoteDatabaseNodeSet, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed to update status: %s", err), - ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err - } + crRemoteDatabaseNodeSet.Status.State = crDatabaseNodeSet.Status.State + crRemoteDatabaseNodeSet.Status.Conditions = crDatabaseNodeSet.Status.Conditions + if err := r.RemoteClient.Status().Update(ctx, crRemoteDatabaseNodeSet); err != nil { r.Recorder.Event( remoteDatabaseNodeSet, - corev1.EventTypeNormal, - "StatusChanged", - "DatabaseNodeSet status updated on remote cluster", + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("Failed to update status on remote cluster: %s", err), ) r.RemoteRecorder.Event( remoteDatabaseNodeSet, - corev1.EventTypeNormal, - "StatusChanged", - "RemoteDatabaseNodeSet status updated", + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("Failed to update status: %s", err), ) - r.Log.Info("step updateRemoteStatus requeue reconcile") - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err } - r.Log.Info("step updateRemoteStatus completed") - return Continue, ctrl.Result{Requeue: false}, nil + r.Recorder.Event( + remoteDatabaseNodeSet, + corev1.EventTypeNormal, + "StatusChanged", + "Status updated on remote cluster", + ) + r.RemoteRecorder.Event( + remoteDatabaseNodeSet, + corev1.EventTypeNormal, + "StatusChanged", + "Status updated", + ) + + r.Log.Info("complete step updateRemoteStatus") + return Continue, ctrl.Result{}, nil } diff --git a/internal/controllers/remotestoragenodeset/controller_test.go b/internal/controllers/remotestoragenodeset/controller_test.go index e45cd99f..df2f2127 100644 --- a/internal/controllers/remotestoragenodeset/controller_test.go +++ b/internal/controllers/remotestoragenodeset/controller_test.go @@ -4,9 +4,9 @@ import ( "context" "fmt" "path/filepath" + "reflect" "strings" "testing" - "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -285,47 +285,20 @@ var _ = Describe("RemoteStorageNodeSet controller tests", func() { When("Created RemoteStorageNodeSet in k8s-mgmt-cluster", func() { It("Should receive status from k8s-data-cluster", func() { - By("set static StorageNodeSet status to Ready on remote cluster...") - Eventually(func() error { - foundStaticStorageNodeSetOnRemote := &v1alpha1.StorageNodeSet{} + By("checking that static StorageNodeSet status updated on remote cluster...") + Eventually(func() bool { + foundStaticRemoteStorageNodeSetOnRemote := v1alpha1.StorageNodeSet{} Expect(remoteClient.Get(ctx, types.NamespacedName{ Name: storageSample.Name + "-" + testNodeSetName + "-remote-static", Namespace: testobjects.YdbNamespace, - }, foundStaticStorageNodeSetOnRemote)).Should(Succeed()) - foundStaticStorageNodeSetOnRemote.Status.State = StorageNodeSetReady - foundStaticStorageNodeSetOnRemote.Status.Conditions = append( - foundStaticStorageNodeSetOnRemote.Status.Conditions, - metav1.Condition{ - Type: StorageNodeSetReadyCondition, - Status: "True", - Reason: ReasonCompleted, - LastTransitionTime: metav1.NewTime(time.Now()), - Message: fmt.Sprintf("Scaled StorageNodeSet to %d successfully", foundStaticStorageNodeSetOnRemote.Spec.Nodes), - }, - ) - return remoteClient.Status().Update(ctx, foundStaticStorageNodeSetOnRemote) - }, test.Timeout, test.Interval).ShouldNot(HaveOccurred()) + }, &foundStaticRemoteStorageNodeSetOnRemote)).Should(Succeed()) - By("set StorageNodeSet status to Ready on remote cluster...") - Eventually(func() error { - foundStorageNodeSetOnRemote := &v1alpha1.StorageNodeSet{} - Expect(remoteClient.Get(ctx, types.NamespacedName{ - Name: storageSample.Name + "-" + testNodeSetName + "-remote", - Namespace: testobjects.YdbNamespace, - }, foundStorageNodeSetOnRemote)).Should(Succeed()) - foundStorageNodeSetOnRemote.Status.State = StorageNodeSetReady - foundStorageNodeSetOnRemote.Status.Conditions = append( - foundStorageNodeSetOnRemote.Status.Conditions, - metav1.Condition{ - Type: StorageNodeSetReadyCondition, - Status: "True", - Reason: ReasonCompleted, - LastTransitionTime: metav1.NewTime(time.Now()), - Message: fmt.Sprintf("Scaled StorageNodeSet to %d successfully", foundStorageNodeSetOnRemote.Spec.Nodes), - }, + return meta.IsStatusConditionPresentAndEqual( + foundStaticRemoteStorageNodeSetOnRemote.Status.Conditions, + NodeSetPreparedCondition, + metav1.ConditionTrue, ) - return remoteClient.Status().Update(ctx, foundStorageNodeSetOnRemote) - }, test.Timeout, test.Interval).ShouldNot(HaveOccurred()) + }, test.Timeout, test.Interval).Should(BeTrue()) By("checking that static RemoteStorageNodeSet status updated on local cluster...") Eventually(func() bool { @@ -337,9 +310,24 @@ var _ = Describe("RemoteStorageNodeSet controller tests", func() { return meta.IsStatusConditionPresentAndEqual( foundStaticRemoteStorageNodeSet.Status.Conditions, - StorageNodeSetReadyCondition, + NodeSetPreparedCondition, + metav1.ConditionTrue, + ) + }, test.Timeout, test.Interval).Should(BeTrue()) + + By("checking that StorageNodeSet status updated on remote cluster...") + Eventually(func() bool { + foundRemoteStorageNodeSetOnRemote := v1alpha1.StorageNodeSet{} + Expect(remoteClient.Get(ctx, types.NamespacedName{ + Name: storageSample.Name + "-" + testNodeSetName + "-remote", + Namespace: testobjects.YdbNamespace, + }, &foundRemoteStorageNodeSetOnRemote)).Should(Succeed()) + + return meta.IsStatusConditionPresentAndEqual( + foundRemoteStorageNodeSetOnRemote.Status.Conditions, + NodeSetPreparedCondition, metav1.ConditionTrue, - ) && foundStaticRemoteStorageNodeSet.Status.State == StorageNodeSetReady + ) }, test.Timeout, test.Interval).Should(BeTrue()) By("checking that RemoteStorageNodeSet status updated on local cluster...") @@ -352,9 +340,9 @@ var _ = Describe("RemoteStorageNodeSet controller tests", func() { return meta.IsStatusConditionPresentAndEqual( foundRemoteStorageNodeSet.Status.Conditions, - StorageNodeSetReadyCondition, + NodeSetPreparedCondition, metav1.ConditionTrue, - ) && foundRemoteStorageNodeSet.Status.State == StorageNodeSetReady + ) }, test.Timeout, test.Interval).Should(BeTrue()) }) }) @@ -462,7 +450,7 @@ var _ = Describe("RemoteStorageNodeSet controller tests", func() { return nil }, test.Timeout, test.Interval).ShouldNot(HaveOccurred()) - By("checking that static RemoteStorageNodeSet RemoteStatus are updated...") + By("checking that static RemoteStorageNodeSet RemoteResource status are updated...") Eventually(func() bool { foundRemoteStorageNodeSet := &v1alpha1.RemoteStorageNodeSet{} Expect(localClient.Get(ctx, types.NamespacedName{ @@ -476,7 +464,6 @@ var _ = Describe("RemoteStorageNodeSet controller tests", func() { Namespace: testobjects.YdbNamespace, }, &foundConfigMap)).Should(Succeed()) - logf.Log.Info("remoteResources", "status", foundRemoteStorageNodeSet.Status.RemoteResources) for idx := range foundRemoteStorageNodeSet.Status.RemoteResources { remoteResource := foundRemoteStorageNodeSet.Status.RemoteResources[idx] if resources.EqualRemoteResourceWithObject( @@ -495,7 +482,27 @@ var _ = Describe("RemoteStorageNodeSet controller tests", func() { return false }, test.Timeout, test.Interval).Should(BeTrue()) - By("checking that RemoteStorageNodeSet RemoteStatus are updated...") + By("checking that static RemoteStorageNodeSet status are synced...") + Eventually(func() bool { + foundStorageNodeSet := &v1alpha1.StorageNodeSet{} + Expect(remoteClient.Get(ctx, types.NamespacedName{ + Name: storageSample.Name + "-" + testNodeSetName + "-remote-static", + Namespace: testobjects.YdbNamespace, + }, foundStorageNodeSet)).Should(Succeed()) + + foundRemoteStorageNodeSet := &v1alpha1.RemoteStorageNodeSet{} + Expect(localClient.Get(ctx, types.NamespacedName{ + Name: storageSample.Name + "-" + testNodeSetName + "-remote-static", + Namespace: testobjects.YdbNamespace, + }, foundRemoteStorageNodeSet)).Should(Succeed()) + + if foundStorageNodeSet.Status.State != foundRemoteStorageNodeSet.Status.State { + return false + } + return reflect.DeepEqual(foundStorageNodeSet.Status.Conditions, foundRemoteStorageNodeSet.Status.Conditions) + }, test.Timeout, test.Interval).Should(BeTrue()) + + By("checking that RemoteStorageNodeSet RemoteResource status are updated...") Eventually(func() bool { foundRemoteStorageNodeSet := &v1alpha1.RemoteStorageNodeSet{} Expect(localClient.Get(ctx, types.NamespacedName{ @@ -526,6 +533,26 @@ var _ = Describe("RemoteStorageNodeSet controller tests", func() { } return false }, test.Timeout, test.Interval).Should(BeTrue()) + + By("checking that RemoteStorageNodeSet status are synced...") + Eventually(func() bool { + foundStorageNodeSet := &v1alpha1.StorageNodeSet{} + Expect(remoteClient.Get(ctx, types.NamespacedName{ + Name: storageSample.Name + "-" + testNodeSetName + "-remote", + Namespace: testobjects.YdbNamespace, + }, foundStorageNodeSet)).Should(Succeed()) + + foundRemoteStorageNodeSet := &v1alpha1.RemoteStorageNodeSet{} + Expect(localClient.Get(ctx, types.NamespacedName{ + Name: storageSample.Name + "-" + testNodeSetName + "-remote", + Namespace: testobjects.YdbNamespace, + }, foundRemoteStorageNodeSet)).Should(Succeed()) + + if foundStorageNodeSet.Status.State != foundRemoteStorageNodeSet.Status.State { + return false + } + return reflect.DeepEqual(foundStorageNodeSet.Status.Conditions, foundRemoteStorageNodeSet.Status.Conditions) + }, test.Timeout, test.Interval).Should(BeTrue()) }) }) When("Delete Storage with RemoteStorageNodeSet in k8s-mgmt-cluster", func() { diff --git a/internal/controllers/remotestoragenodeset/sync.go b/internal/controllers/remotestoragenodeset/sync.go index d217985e..05295ee2 100644 --- a/internal/controllers/remotestoragenodeset/sync.go +++ b/internal/controllers/remotestoragenodeset/sync.go @@ -6,6 +6,7 @@ import ( "reflect" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -44,6 +45,11 @@ func (r *Reconciler) Sync(ctx context.Context, crRemoteStorageNodeSet *v1alpha1. return result, err } + stop, result, err = r.updateRemoteStatus(ctx, &remoteStorageNodeSet) + if stop { + return result, err + } + return ctrl.Result{}, nil } @@ -99,7 +105,7 @@ func (r *Reconciler) handleResourcesSync( } r.Log.Info("complete step handleResourcesSync") - return r.updateRemoteStatus(ctx, remoteStorageNodeSet) + return Continue, ctrl.Result{}, nil } func (r *Reconciler) updateRemoteStatus( @@ -142,41 +148,38 @@ func (r *Reconciler) updateRemoteStatus( return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err } - oldStatus := crRemoteStorageNodeSet.Status.State - if oldStatus != crStorageNodeSet.Status.State { - crRemoteStorageNodeSet.Status.State = crStorageNodeSet.Status.State - crRemoteStorageNodeSet.Status.Conditions = crStorageNodeSet.Status.Conditions - if err := r.RemoteClient.Status().Update(ctx, crRemoteStorageNodeSet); err != nil { - r.Recorder.Event( - remoteStorageNodeSet, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed to update status on remote cluster: %s", err), - ) - r.RemoteRecorder.Event( - remoteStorageNodeSet, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed to update status: %s", err), - ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err - } + crRemoteStorageNodeSet.Status.State = crStorageNodeSet.Status.State + crRemoteStorageNodeSet.Status.Conditions = append([]metav1.Condition{}, crStorageNodeSet.Status.Conditions...) + if err := r.RemoteClient.Status().Update(ctx, crRemoteStorageNodeSet); err != nil { r.Recorder.Event( remoteStorageNodeSet, - corev1.EventTypeNormal, - "StatusChanged", - "Status updated on remote cluster", + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("Failed to update status on remote cluster: %s", err), ) r.RemoteRecorder.Event( remoteStorageNodeSet, - corev1.EventTypeNormal, - "StatusChanged", - "Status updated", + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("Failed to update status: %s", err), ) - r.Log.Info("step updateRemoteStatus requeue reconcile") - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil + + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err } - r.Log.Info("step updateRemoteStatus completed") - return Continue, ctrl.Result{Requeue: false}, nil + r.Recorder.Event( + remoteStorageNodeSet, + corev1.EventTypeNormal, + "StatusChanged", + "Status updated on remote cluster", + ) + r.RemoteRecorder.Event( + remoteStorageNodeSet, + corev1.EventTypeNormal, + "StatusChanged", + "Status updated", + ) + + r.Log.Info("complete step updateRemoteStatus") + return Continue, ctrl.Result{}, nil } diff --git a/internal/controllers/storagenodeset/sync.go b/internal/controllers/storagenodeset/sync.go index 0a038a6a..a3d525df 100644 --- a/internal/controllers/storagenodeset/sync.go +++ b/internal/controllers/storagenodeset/sync.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "reflect" + "time" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -27,32 +28,60 @@ func (r *Reconciler) Sync(ctx context.Context, crStorageNodeSet *v1alpha1.Storag var err error storageNodeSet := resources.NewStorageNodeSet(crStorageNodeSet) - stop, result, err = storageNodeSet.SetStatusOnFirstReconcile() + + stop, result, err = r.setInitialStatus(ctx, &storageNodeSet) if stop { return result, err } - stop, result = r.checkStorageFrozen(&storageNodeSet) + stop, result, err = r.handleResourcesSync(ctx, &storageNodeSet) if stop { - return result, nil + return result, err } - stop, result, err = r.handlePauseResume(ctx, &storageNodeSet) + stop, result, err = r.waitForStatefulSetToScale(ctx, &storageNodeSet) if stop { return result, err } - stop, result, err = r.handleResourcesSync(ctx, &storageNodeSet) + stop, result, err = r.handlePauseResume(ctx, &storageNodeSet) if stop { return result, err } - stop, result, err = r.waitForStatefulSetToScale(ctx, &storageNodeSet) - if stop { - return result, err + return ctrl.Result{}, nil +} + +func (r *Reconciler) setInitialStatus( + ctx context.Context, + storageNodeSet *resources.StorageNodeSetResource, +) (bool, ctrl.Result, error) { + r.Log.Info("running step setInitialStatus") + + if storageNodeSet.Status.Conditions == nil { + storageNodeSet.Status.Conditions = []metav1.Condition{} + + if storageNodeSet.Spec.Pause { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Transitioning to state Paused", + }) + } else { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetReadyCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Transitioning to state Ready", + }) + } + + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) } - return result, err + r.Log.Info("complete step setInitialStatus") + return Continue, ctrl.Result{}, nil } func (r *Reconciler) handleResourcesSync( @@ -61,6 +90,28 @@ func (r *Reconciler) handleResourcesSync( ) (bool, ctrl.Result, error) { r.Log.Info("running step handleResourcesSync") + if !storageNodeSet.Spec.OperatorSync { + r.Log.Info("`operatorSync: false` is set, no further steps will be run") + r.Recorder.Event( + storageNodeSet, + corev1.EventTypeNormal, + string(StorageNodeSetPreparing), + fmt.Sprintf("Found .spec.operatorSync set to %t, skip further steps", storageNodeSet.Spec.OperatorSync), + ) + return Stop, ctrl.Result{Requeue: false}, nil + } + + if storageNodeSet.Status.State == StorageNodeSetPending { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPreparedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Waiting for sync resources for generation %d", storageNodeSet.Generation), + }) + storageNodeSet.Status.State = StorageNodeSetPreparing + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) + } + for _, builder := range storageNodeSet.GetResourceBuilders(r.Config) { newResource := builder.Placeholder(storageNodeSet) @@ -104,18 +155,35 @@ func (r *Reconciler) handleResourcesSync( "ProvisioningFailed", eventMessage+fmt.Sprintf(", failed to sync, error: %s", err), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPreparedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Failed to sync resources for generation %d", storageNodeSet.Generation), + }) + return r.updateStatus(ctx, storageNodeSet, DefaultRequeueDelay) } else if result == controllerutil.OperationResultCreated || result == controllerutil.OperationResultUpdated { r.Recorder.Event( storageNodeSet, corev1.EventTypeNormal, - string(StorageNodeSetProvisioning), + string(StorageNodeSetPreparing), eventMessage+fmt.Sprintf(", changed, result: %s", result), ) } } - return Continue, ctrl.Result{Requeue: false}, nil + if !meta.IsStatusConditionTrue(storageNodeSet.Status.Conditions, NodeSetPreparedCondition) { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPreparedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully synced resources", + }) + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step handleResourcesSync") + return Continue, ctrl.Result{}, nil } func (r *Reconciler) waitForStatefulSetToScale( @@ -124,14 +192,15 @@ func (r *Reconciler) waitForStatefulSetToScale( ) (bool, ctrl.Result, error) { r.Log.Info("running step waitForStatefulSetToScale") - if storageNodeSet.Status.State == StorageNodeSetPending { - r.Recorder.Event( - storageNodeSet, - corev1.EventTypeNormal, - string(StorageNodeSetProvisioning), - fmt.Sprintf("Starting to track number of running storageNodeSet pods, expected: %d", storageNodeSet.Spec.Nodes)) + if storageNodeSet.Status.State == StorageNodeSetPreparing { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetProvisionedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Waiting for scale to desired nodes: %d", storageNodeSet.Spec.Nodes), + }) storageNodeSet.Status.State = StorageNodeSetProvisioning - return r.updateStatus(ctx, storageNodeSet) + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) } foundStatefulSet := &appsv1.StatefulSet{} @@ -147,7 +216,7 @@ func (r *Reconciler) waitForStatefulSetToScale( "Syncing", fmt.Sprintf("Failed to found StatefulSet: %s", err), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, nil + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err } r.Recorder.Event( storageNodeSet, @@ -175,6 +244,12 @@ func (r *Reconciler) waitForStatefulSetToScale( "Syncing", fmt.Sprintf("Failed to list storageNodeSet pods: %s", err), ) + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetProvisionedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: "Failed to check Pods .status.phase", + }) return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err } @@ -192,28 +267,52 @@ func (r *Reconciler) waitForStatefulSetToScale( string(StorageNodeSetProvisioning), fmt.Sprintf("Waiting for number of running storageNodeSet pods to match expected: %d != %d", runningPods, storageNodeSet.Spec.Nodes), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, nil + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetProvisionedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Number of running nodes does not match expected: %d != %d", runningPods, storageNodeSet.Spec.Nodes), + }) + return r.updateStatus(ctx, storageNodeSet, DefaultRequeueDelay) } - if storageNodeSet.Status.State == StorageNodeSetProvisioning { + if !meta.IsStatusConditionTrue(storageNodeSet.Status.Conditions, NodeSetProvisionedCondition) { meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ - Type: StorageNodeSetReadyCondition, - Status: "True", + Type: NodeSetProvisionedCondition, + Status: metav1.ConditionTrue, Reason: ReasonCompleted, Message: fmt.Sprintf("Scaled StorageNodeSet to %d successfully", storageNodeSet.Spec.Nodes), }) - storageNodeSet.Status.State = StorageNodeSetReady - return r.updateStatus(ctx, storageNodeSet) + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) } - return Continue, ctrl.Result{Requeue: false}, nil + r.Log.Info("complete step waitForStatefulSetToScale") + return Continue, ctrl.Result{}, nil } func (r *Reconciler) updateStatus( ctx context.Context, storageNodeSet *resources.StorageNodeSetResource, + requeueAfter time.Duration, ) (bool, ctrl.Result, error) { - r.Log.Info("running step updateStatus") + r.Log.Info("running updateStatus handler") + + if meta.IsStatusConditionFalse(storageNodeSet.Status.Conditions, NodeSetPreparedCondition) || + meta.IsStatusConditionFalse(storageNodeSet.Status.Conditions, NodeSetProvisionedCondition) { + if storageNodeSet.Spec.Pause { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + } else { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetReadyCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + } + } crStorageNodeSet := &v1alpha1.StorageNodeSet{} err := r.Get(ctx, types.NamespacedName{ @@ -231,18 +330,18 @@ func (r *Reconciler) updateStatus( } oldStatus := crStorageNodeSet.Status.State + crStorageNodeSet.Status.State = storageNodeSet.Status.State + crStorageNodeSet.Status.Conditions = storageNodeSet.Status.Conditions + if err = r.Status().Update(ctx, crStorageNodeSet); err != nil { + r.Recorder.Event( + storageNodeSet, + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("Failed setting status: %s", err), + ) + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + } if oldStatus != storageNodeSet.Status.State { - crStorageNodeSet.Status.State = storageNodeSet.Status.State - crStorageNodeSet.Status.Conditions = storageNodeSet.Status.Conditions - if err = r.Status().Update(ctx, crStorageNodeSet); err != nil { - r.Recorder.Event( - storageNodeSet, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed setting status: %s", err), - ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err - } r.Recorder.Event( storageNodeSet, corev1.EventTypeNormal, @@ -251,7 +350,8 @@ func (r *Reconciler) updateStatus( ) } - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil + r.Log.Info("complete updateStatus handler") + return Stop, ctrl.Result{RequeueAfter: requeueAfter}, nil } func shouldIgnoreStorageNodeSetChange(storageNodeSet *resources.StorageNodeSetResource) resources.IgnoreChangesFunction { @@ -271,34 +371,69 @@ func (r *Reconciler) handlePauseResume( ) (bool, ctrl.Result, error) { r.Log.Info("running step handlePauseResume") - if storageNodeSet.Status.State == StorageReady && storageNodeSet.Spec.Pause { + if storageNodeSet.Status.State == StorageNodeSetProvisioning { + if storageNodeSet.Spec.Pause { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + storageNodeSet.Status.State = StorageNodeSetPaused + } else { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetReadyCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + storageNodeSet.Status.State = StorageNodeSetReady + } + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) + } + + if storageNodeSet.Status.State == StorageNodeSetReady && storageNodeSet.Spec.Pause { r.Log.Info("`pause: true` was noticed, moving StorageNodeSet to state `Paused`") meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ - Type: StoragePausedCondition, - Status: "True", - Reason: ReasonCompleted, - Message: "State StorageNodeSet set to Paused", + Type: NodeSetReadyCondition, + Status: metav1.ConditionFalse, + Reason: ReasonNotRequired, + Message: "Transitioning to state Paused", }) storageNodeSet.Status.State = StorageNodeSetPaused - return r.updateStatus(ctx, storageNodeSet) + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) } - if storageNodeSet.Status.State == StoragePaused && !storageNodeSet.Spec.Pause { - r.Log.Info("`pause: false` was noticed, moving Storage to state `Ready`") - meta.RemoveStatusCondition(&storageNodeSet.Status.Conditions, StoragePausedCondition) + if storageNodeSet.Status.State == StorageNodeSetPaused && !storageNodeSet.Spec.Pause { + r.Log.Info("`pause: false` was noticed, moving StorageNodeSet to state `Ready`") + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonNotRequired, + Message: "Transitioning to state Ready", + }) storageNodeSet.Status.State = StorageNodeSetReady - return r.updateStatus(ctx, storageNodeSet) + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) } - return Continue, ctrl.Result{}, nil -} - -func (r *Reconciler) checkStorageFrozen(storageNodeSet *resources.StorageNodeSetResource) (bool, ctrl.Result) { - r.Log.Info("running step checkStorageFrozen") - if !storageNodeSet.Spec.OperatorSync { - r.Log.Info("`operatorSync: false` is set, no further steps will be run") - return Stop, ctrl.Result{} + if storageNodeSet.Spec.Pause { + if !meta.IsStatusConditionTrue(storageNodeSet.Status.Conditions, NodeSetPausedCondition) { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetPausedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) + } + } else { + if !meta.IsStatusConditionTrue(storageNodeSet.Status.Conditions, NodeSetReadyCondition) { + meta.SetStatusCondition(&storageNodeSet.Status.Conditions, metav1.Condition{ + Type: NodeSetReadyCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return r.updateStatus(ctx, storageNodeSet, StatusUpdateRequeueDelay) + } } - return Continue, ctrl.Result{} + r.Log.Info("complete step handlePauseResume") + return Continue, ctrl.Result{}, nil } diff --git a/internal/resources/databasenodeset.go b/internal/resources/databasenodeset.go index 13db4fdc..cac08b04 100644 --- a/internal/resources/databasenodeset.go +++ b/internal/resources/databasenodeset.go @@ -3,14 +3,11 @@ package resources import ( "errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" api "github.com/ydb-platform/ydb-kubernetes-operator/api/v1alpha1" - . "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants" //nolint:revive,stylecheck ) type DatabaseNodeSetBuilder struct { @@ -75,25 +72,6 @@ func NewDatabaseNodeSet(databaseNodeSet *api.DatabaseNodeSet) DatabaseNodeSetRes return DatabaseNodeSetResource{DatabaseNodeSet: crDatabaseNodeSet} } -func (b *DatabaseNodeSetResource) SetStatusOnFirstReconcile() (bool, ctrl.Result, error) { - if b.Status.Conditions == nil { - b.Status.Conditions = []metav1.Condition{} - - if b.Spec.Pause { - meta.SetStatusCondition(&b.Status.Conditions, metav1.Condition{ - Type: DatabasePausedCondition, - Status: "False", - Reason: ReasonInProgress, - Message: "Transitioning DatabaseNodeSet to Paused state", - }) - - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil - } - } - - return Continue, ctrl.Result{}, nil -} - func (b *DatabaseNodeSetResource) Unwrap() *api.DatabaseNodeSet { return b.DeepCopy() } diff --git a/internal/resources/storagenodeset.go b/internal/resources/storagenodeset.go index cfec8cf2..12709307 100644 --- a/internal/resources/storagenodeset.go +++ b/internal/resources/storagenodeset.go @@ -3,14 +3,11 @@ package resources import ( "errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" api "github.com/ydb-platform/ydb-kubernetes-operator/api/v1alpha1" - . "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants" //nolint:revive,stylecheck ) type StorageNodeSetBuilder struct { @@ -79,25 +76,6 @@ func NewStorageNodeSet(storageNodeSet *api.StorageNodeSet) StorageNodeSetResourc } } -func (b *StorageNodeSetResource) SetStatusOnFirstReconcile() (bool, ctrl.Result, error) { - if b.Status.Conditions == nil { - b.Status.Conditions = []metav1.Condition{} - - if b.Spec.Pause { - meta.SetStatusCondition(&b.Status.Conditions, metav1.Condition{ - Type: StoragePausedCondition, - Status: "False", - Reason: ReasonInProgress, - Message: "Transitioning StorageNodeSet to Paused state", - }) - - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil - } - } - - return Continue, ctrl.Result{}, nil -} - func (b *StorageNodeSetResource) Unwrap() *api.StorageNodeSet { return b.DeepCopy() } From 1a0ee3fd2c364f05c6a9fe182e30cefed1e35a0b Mon Sep 17 00:00:00 2001 From: kobzonega <122476665+kobzonega@users.noreply.github.com> Date: Mon, 13 May 2024 19:09:02 +0200 Subject: [PATCH 4/7] YDBOPS-9612 conditions for Storage and Database objects (#204) --- deploy/ydb-operator/Chart.yaml | 4 +- internal/controllers/constants/constants.go | 13 +- internal/controllers/database/init.go | 97 ++--- internal/controllers/database/sync.go | 347 +++++++++++++----- .../databasenodeset/controller_test.go | 9 +- .../remotedatabasenodeset/controller_test.go | 8 +- internal/controllers/storage/init.go | 161 +++----- internal/controllers/storage/sync.go | 334 ++++++++++++----- internal/resources/database.go | 23 -- internal/resources/storage.go | 23 -- 10 files changed, 620 insertions(+), 399 deletions(-) diff --git a/deploy/ydb-operator/Chart.yaml b/deploy/ydb-operator/Chart.yaml index e0545b49..0ed360fd 100644 --- a/deploy/ydb-operator/Chart.yaml +++ b/deploy/ydb-operator/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.5.8 +version: 0.5.9 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.5.8" +appVersion: "0.5.9" diff --git a/internal/controllers/constants/constants.go b/internal/controllers/constants/constants.go index 5d1e8b80..a2cb1f6d 100644 --- a/internal/controllers/constants/constants.go +++ b/internal/controllers/constants/constants.go @@ -19,10 +19,17 @@ const ( OldStorageInitializedCondition = "StorageReady" OldDatabaseInitializedCondition = "TenantInitialized" - StoragePausedCondition = "StoragePaused" - StorageInitializedCondition = "StorageInitialized" - DatabasePausedCondition = "DatabasePaused" + StoragePreparedCondition = "StoragePrepared" + StorageInitializedCondition = "StorageInitialized" + StorageProvisionedCondition = "StorageProvisioned" + StoragePausedCondition = "StoragePaused" + StorageReadyCondition = "StorageReady" + + DatabasePreparedCondition = "DatabasePrepared" DatabaseInitializedCondition = "DatabaseInitialized" + DatabaseProvisionedCondition = "DatabaseProvisioned" + DatabasePausedCondition = "DatabasePaused" + DatabaseReadyCondition = "DatabaseReady" NodeSetPreparedCondition = "NodeSetPrepared" NodeSetProvisionedCondition = "NodeSetProvisioned" diff --git a/internal/controllers/database/init.go b/internal/controllers/database/init.go index 25e91903..d9c6b16b 100644 --- a/internal/controllers/database/init.go +++ b/internal/controllers/database/init.go @@ -17,62 +17,35 @@ import ( "github.com/ydb-platform/ydb-kubernetes-operator/internal/resources" ) -func (r *Reconciler) processSkipInitPipeline( +func (r *Reconciler) setInitPipelineStatus( ctx context.Context, database *resources.DatabaseBuilder, ) (bool, ctrl.Result, error) { - r.Log.Info("running step processSkipInitPipeline") - r.Log.Info("Database initialization disabled (with annotation), proceed with caution") - - r.Recorder.Event( - database, - corev1.EventTypeWarning, - "SkippingInit", - "Skipping database creation due to skip annotation present, be careful!", - ) - - return r.setInitDatabaseCompleted( - ctx, - database, - "Database creation not performed because initialization is skipped", - ) -} - -func (r *Reconciler) setInitialStatus( - ctx context.Context, - database *resources.DatabaseBuilder, -) (bool, ctrl.Result, error) { - r.Log.Info("running step setInitialStatus") - - if meta.IsStatusConditionTrue(database.Status.Conditions, OldDatabaseInitializedCondition) { + if database.Status.State == DatabasePreparing { meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ Type: DatabaseInitializedCondition, - Status: "True", - Reason: ReasonCompleted, - Message: "Database initialized successfully", + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: "Database has not been initialized yet", }) - database.Status.State = DatabaseReady - return r.updateStatus(ctx, database) + database.Status.State = DatabaseInitializing + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) } + // This block is special internal logic that skips all Database initialization. if value, ok := database.Annotations[v1alpha1.AnnotationSkipInitialization]; ok && value == v1alpha1.AnnotationValueTrue { - if meta.FindStatusCondition(database.Status.Conditions, DatabaseInitializedCondition) == nil || - meta.IsStatusConditionFalse(database.Status.Conditions, DatabaseInitializedCondition) { - return r.processSkipInitPipeline(ctx, database) - } - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, nil + r.Log.Info("Database initialization disabled (with annotation), proceed with caution") + r.Recorder.Event( + database, + corev1.EventTypeWarning, + "SkippingInit", + "Skipping initialization due to skip annotation present, be careful!", + ) + return r.setInitDatabaseCompleted(ctx, database, "Database initialization not performed because initialization is skipped") } - if database.Status.State == DatabasePending || - meta.FindStatusCondition(database.Status.Conditions, DatabaseInitializedCondition) == nil { - meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ - Type: DatabaseInitializedCondition, - Status: "False", - Reason: ReasonInProgress, - Message: "Database has not been initialized yet", - }) - database.Status.State = DatabasePreparing - return r.updateStatus(ctx, database) + if meta.IsStatusConditionTrue(database.Status.Conditions, OldDatabaseInitializedCondition) { + return r.setInitDatabaseCompleted(ctx, database, "Database initialized successfully") } return Continue, ctrl.Result{Requeue: false}, nil @@ -85,26 +58,17 @@ func (r *Reconciler) setInitDatabaseCompleted( ) (bool, ctrl.Result, error) { meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ Type: DatabaseInitializedCondition, - Status: "True", + Status: metav1.ConditionTrue, Reason: ReasonCompleted, Message: message, }) - database.Status.State = DatabaseProvisioning - - return r.updateStatus(ctx, database) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) } -func (r *Reconciler) initializeDatabase( +func (r *Reconciler) initializeTenant( ctx context.Context, database *resources.DatabaseBuilder, ) (bool, ctrl.Result, error) { - r.Log.Info("running step initializeDatabase") - - if database.Status.State == DatabasePreparing { - database.Status.State = DatabaseInitializing - return r.updateStatus(ctx, database) - } - path := database.GetDatabasePath() var storageUnits []v1alpha1.StorageUnit var shared bool @@ -150,16 +114,15 @@ func (r *Reconciler) initializeDatabase( return Stop, ctrl.Result{RequeueAfter: SharedDatabaseAwaitRequeueDelay}, err } - if sharedDatabaseCr.Status.State != "Ready" { + if !meta.IsStatusConditionTrue(sharedDatabaseCr.Status.Conditions, DatabaseProvisionedCondition) { r.Recorder.Event( database, corev1.EventTypeWarning, "Pending", fmt.Sprintf( - "Referenced shared Database (%s, %s) in a bad state: %s != Ready", + "Referenced shared Database (%s, %s) is not Provisioned", database.Spec.ServerlessResources.SharedDatabaseRef.Name, database.Spec.ServerlessResources.SharedDatabaseRef.Namespace, - sharedDatabaseCr.Status.State, ), ) return Stop, ctrl.Result{RequeueAfter: SharedDatabaseAwaitRequeueDelay}, err @@ -213,7 +176,12 @@ func (r *Reconciler) initializeDatabase( "InitializingFailed", fmt.Sprintf("Error creating tenant %s: %s", tenant.Path, err), ) - return Stop, ctrl.Result{RequeueAfter: DatabaseInitializationRequeueDelay}, err + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseInitializedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + return r.updateStatus(ctx, database, DatabaseInitializationRequeueDelay) } r.Recorder.Event( database, @@ -222,12 +190,5 @@ func (r *Reconciler) initializeDatabase( fmt.Sprintf("Tenant %s created", tenant.Path), ) - r.Recorder.Event( - database, - corev1.EventTypeNormal, - "DatabaseReady", - "Database is initialized", - ) - return r.setInitDatabaseCompleted(ctx, database, "Database initialized successfully") } diff --git a/internal/controllers/database/sync.go b/internal/controllers/database/sync.go index e2524229..1d17464e 100644 --- a/internal/controllers/database/sync.go +++ b/internal/controllers/database/sync.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "reflect" + "time" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -32,17 +33,8 @@ func (r *Reconciler) Sync(ctx context.Context, ydbCr *v1alpha1.Database) (ctrl.R var err error database := resources.NewDatabase(ydbCr) - stop, result, err = database.SetStatusOnFirstReconcile() - if stop { - return result, err - } - - stop, result = r.checkDatabaseFrozen(&database) - if stop { - return result, nil - } - stop, result, err = r.handlePauseResume(ctx, &database) + stop, result, err = r.setInitialStatus(ctx, &database) if stop { return result, err } @@ -67,7 +59,7 @@ func (r *Reconciler) Sync(ctx context.Context, ydbCr *v1alpha1.Database) (ctrl.R } if database.Spec.NodeSets != nil { - stop, result, err = r.waitForDatabaseNodeSetsToReady(ctx, &database) + stop, result, err = r.waitForNodeSetsToProvisioned(ctx, &database) if stop { return result, err } @@ -78,19 +70,59 @@ func (r *Reconciler) Sync(ctx context.Context, ydbCr *v1alpha1.Database) (ctrl.R } } - if database.Status.State != DatabaseReady { - database.Status.State = DatabaseReady - stop, result, err = r.updateStatus(ctx, &database) - if stop { - return result, err - } + stop, result, err = r.handlePauseResume(ctx, &database) + if stop { + return result, err } return ctrl.Result{}, nil } +func (r *Reconciler) setInitialStatus( + ctx context.Context, + database *resources.DatabaseBuilder, +) (bool, ctrl.Result, error) { + r.Log.Info("running step setInitialStatus") + if database.Status.Conditions == nil { + database.Status.Conditions = []metav1.Condition{} + + if database.Spec.Pause { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePausedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Transitioning to state Paused", + }) + } else { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseReadyCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Transitioning to state Ready", + }) + } + + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step setInitialStatus") + return Continue, ctrl.Result{}, nil +} + func (r *Reconciler) waitForClusterResources(ctx context.Context, database *resources.DatabaseBuilder) (bool, ctrl.Result, error) { r.Log.Info("running step waitForClusterResources") + + if database.Status.State == DatabasePending { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePreparedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Waiting for sync resources for generation %d", database.Generation), + }) + database.Status.State = DatabasePreparing + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) + } + storage := &v1alpha1.Storage{} err := r.Get(ctx, types.NamespacedName{ Name: database.Spec.StorageClusterRef.Name, @@ -115,7 +147,7 @@ func (r *Reconciler) waitForClusterResources(ctx context.Context, database *reso corev1.EventTypeWarning, "Pending", fmt.Sprintf( - "Failed to get Database (%s, %s) resource, error: %s", + "Failed to get Storage (%s, %s) resource, error: %s", database.Spec.StorageClusterRef.Name, database.Spec.StorageClusterRef.Namespace, err, @@ -124,48 +156,57 @@ func (r *Reconciler) waitForClusterResources(ctx context.Context, database *reso return Stop, ctrl.Result{RequeueAfter: StorageAwaitRequeueDelay}, err } - if storage.Status.State != DatabaseReady { + if !meta.IsStatusConditionTrue(storage.Status.Conditions, StorageInitializedCondition) { r.Recorder.Event( database, corev1.EventTypeWarning, "Pending", fmt.Sprintf( - "Referenced storage cluster (%s, %s) in a bad state: %s != Ready", + "Referenced storage cluster (%s, %s) is not initialized", database.Spec.StorageClusterRef.Name, database.Spec.StorageClusterRef.Namespace, - storage.Status.State, ), ) - return Stop, ctrl.Result{RequeueAfter: StorageAwaitRequeueDelay}, err + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePreparedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf( + "Referenced storage cluster (%s, %s) is not initialized", + database.Spec.StorageClusterRef.Name, + database.Spec.StorageClusterRef.Namespace, + ), + }) + return r.updateStatus(ctx, database, StorageAwaitRequeueDelay) } database.Storage = storage + r.Log.Info("complete step waitForClusterResources") return Continue, ctrl.Result{Requeue: false}, nil } -func (r *Reconciler) waitForDatabaseNodeSetsToReady( +func (r *Reconciler) waitForNodeSetsToProvisioned( ctx context.Context, database *resources.DatabaseBuilder, ) (bool, ctrl.Result, error) { - r.Log.Info("running step waitForDatabaseNodeSetToReady") + r.Log.Info("running step waitForNodeSetsToProvisioned") if database.Status.State == DatabaseInitializing { - r.Recorder.Event( - database, - corev1.EventTypeNormal, - string(DatabaseProvisioning), - fmt.Sprintf("Starting to track readiness of running nodeSets objects, expected: %d", len(database.Spec.NodeSets)), - ) + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseProvisionedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Waiting for NodeSets conditions to be Provisioned", + }) database.Status.State = DatabaseProvisioning - return r.updateStatus(ctx, database) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) } for _, nodeSetSpec := range database.Spec.NodeSets { var nodeSetObject client.Object var nodeSetKind string - var nodeSetStatus ClusterState - + var nodeSetConditions []metav1.Condition if nodeSetSpec.Remote != nil { nodeSetObject = &v1alpha1.RemoteDatabaseNodeSet{} nodeSetKind = RemoteDatabaseNodeSetKind @@ -197,28 +238,48 @@ func (r *Reconciler) waitForDatabaseNodeSetsToReady( } if nodeSetSpec.Remote != nil { - nodeSetStatus = nodeSetObject.(*v1alpha1.RemoteDatabaseNodeSet).Status.State + nodeSetConditions = nodeSetObject.(*v1alpha1.RemoteDatabaseNodeSet).Status.Conditions } else { - nodeSetStatus = nodeSetObject.(*v1alpha1.DatabaseNodeSet).Status.State + nodeSetConditions = nodeSetObject.(*v1alpha1.DatabaseNodeSet).Status.Conditions } - if nodeSetStatus != DatabaseNodeSetReady { - eventMessage := fmt.Sprintf( - "Waiting %s with name %s for Ready state , current: %s", - nodeSetKind, - nodeSetName, - nodeSetStatus, - ) + // TODO: also check observedGeneration to guarantee that compare with updated object + if !meta.IsStatusConditionTrue(nodeSetConditions, NodeSetProvisionedCondition) { r.Recorder.Event( database, corev1.EventTypeNormal, string(DatabaseProvisioning), - eventMessage, + fmt.Sprintf( + "Waiting %s with name %s for condition NodeSetProvisioned to be True", + nodeSetKind, + nodeSetName, + ), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, nil + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseProvisionedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf( + "Waiting %s with name %s for condition NodeSetProvisioned to be True", + nodeSetKind, + nodeSetName, + ), + }) + return r.updateStatus(ctx, database, DefaultRequeueDelay) } } + if !meta.IsStatusConditionTrue(database.Status.Conditions, DatabaseProvisionedCondition) { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseProvisionedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully scaled to desired number of nodes", + }) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step waitForNodeSetsToProvisioned") return Continue, ctrl.Result{Requeue: false}, nil } @@ -228,15 +289,15 @@ func (r *Reconciler) waitForStatefulSetToScale( ) (bool, ctrl.Result, error) { r.Log.Info("running step waitForStatefulSetToScale") - if database.Status.State == DatabasePreparing { - r.Recorder.Event( - database, - corev1.EventTypeNormal, - string(DatabaseProvisioning), - fmt.Sprintf("Starting to track number of running database pods, expected: %d", database.Spec.Nodes), - ) + if database.Status.State == DatabaseInitializing { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseProvisionedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Waiting for scale to desired nodes: %d", database.Spec.Nodes), + }) database.Status.State = DatabaseProvisioning - return r.updateStatus(ctx, database) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) } if database.Spec.ServerlessResources != nil { @@ -308,9 +369,26 @@ func (r *Reconciler) waitForStatefulSetToScale( string(DatabaseProvisioning), fmt.Sprintf("Waiting for number of running dynamic pods to match expected: %d != %d", runningPods, database.Spec.Nodes), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseProvisionedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Number of running dynamic pods does not match expected: %d != %d", runningPods, database.Spec.Nodes), + }) + return r.updateStatus(ctx, database, DefaultRequeueDelay) + } + + if !meta.IsStatusConditionTrue(database.Status.Conditions, DatabaseProvisionedCondition) { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseProvisionedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully scaled to desired number of nodes", + }) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) } + r.Log.Info("complete step waitForStatefulSetToScale") return Continue, ctrl.Result{Requeue: false}, nil } @@ -331,6 +409,17 @@ func (r *Reconciler) handleResourcesSync( ) (bool, ctrl.Result, error) { r.Log.Info("running step handleResourcesSync") + if !database.Spec.OperatorSync { + r.Log.Info("`operatorSync: false` is set, no further steps will be run") + r.Recorder.Event( + database, + corev1.EventTypeNormal, + string(DatabasePreparing), + fmt.Sprintf("Found .spec.operatorSync set to %t, skip further steps", database.Spec.OperatorSync), + ) + return Stop, ctrl.Result{}, nil + } + for _, builder := range database.GetResourceBuilders(r.Config) { newResource := builder.Placeholder(database) @@ -375,7 +464,13 @@ func (r *Reconciler) handleResourcesSync( "ProvisioningFailed", eventMessage+fmt.Sprintf(", failed to sync, error: %s", err), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePreparedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Failed to sync resources for generation %d", database.Generation), + }) + return r.updateStatus(ctx, database, DefaultRequeueDelay) } else if result == controllerutil.OperationResultCreated || result == controllerutil.OperationResultUpdated { r.Recorder.Event( database, @@ -386,14 +481,34 @@ func (r *Reconciler) handleResourcesSync( } } + r.Log.Info("complete step handleResourcesSync") return Continue, ctrl.Result{Requeue: false}, nil } func (r *Reconciler) updateStatus( ctx context.Context, database *resources.DatabaseBuilder, + requeueAfter time.Duration, ) (bool, ctrl.Result, error) { - r.Log.Info("running step updateStatus") + r.Log.Info("running updateStatus handler") + + if meta.IsStatusConditionFalse(database.Status.Conditions, DatabasePreparedCondition) || + meta.IsStatusConditionFalse(database.Status.Conditions, DatabaseInitializedCondition) || + meta.IsStatusConditionFalse(database.Status.Conditions, DatabaseProvisionedCondition) { + if database.Spec.Pause { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePausedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + } else { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseReadyCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + } + } databaseCr := &v1alpha1.Database{} err := r.Get(ctx, types.NamespacedName{ @@ -411,19 +526,19 @@ func (r *Reconciler) updateStatus( } oldStatus := databaseCr.Status.State + databaseCr.Status.State = database.Status.State + databaseCr.Status.Conditions = database.Status.Conditions + err = r.Status().Update(ctx, databaseCr) + if err != nil { + r.Recorder.Event( + database, + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("failed setting status: %s", err), + ) + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + } if oldStatus != database.Status.State { - databaseCr.Status.State = database.Status.State - databaseCr.Status.Conditions = database.Status.Conditions - err = r.Status().Update(ctx, databaseCr) - if err != nil { - r.Recorder.Event( - database, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("failed setting status: %s", err), - ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err - } r.Recorder.Event( database, corev1.EventTypeNormal, @@ -432,7 +547,8 @@ func (r *Reconciler) updateStatus( ) } - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil + r.Log.Info("complete updateStatus handler") + return Stop, ctrl.Result{RequeueAfter: requeueAfter}, nil } func (r *Reconciler) syncNodeSetSpecInline( @@ -484,7 +600,7 @@ func (r *Reconciler) syncNodeSetSpecInline( corev1.EventTypeNormal, "Syncing", fmt.Sprintf("Resource: %s, Namespace: %s, Name: %s, deleted", - reflect.TypeOf(databaseNodeSet), + DatabaseNodeSetKind, databaseNodeSet.Namespace, databaseNodeSet.Name), ) @@ -535,13 +651,24 @@ func (r *Reconciler) syncNodeSetSpecInline( corev1.EventTypeNormal, "Syncing", fmt.Sprintf("Resource: %s, Namespace: %s, Name: %s, deleted", - reflect.TypeOf(remoteDatabaseNodeSet), + RemoteDatabaseNodeSetKind, remoteDatabaseNodeSet.Namespace, remoteDatabaseNodeSet.Name), ) } } + if !meta.IsStatusConditionTrue(database.Status.Conditions, DatabasePreparedCondition) { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePreparedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully synced resources", + }) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step syncNodeSetSpecInline") return Continue, ctrl.Result{Requeue: false}, nil } @@ -550,25 +677,71 @@ func (r *Reconciler) handlePauseResume( database *resources.DatabaseBuilder, ) (bool, ctrl.Result, error) { r.Log.Info("running step handlePauseResume") + + if database.Status.State == DatabaseProvisioning { + if database.Spec.Pause { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePausedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + database.Status.State = DatabasePaused + } else { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseReadyCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + database.Status.State = DatabaseReady + } + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) + } + if database.Status.State == DatabaseReady && database.Spec.Pause { r.Log.Info("`pause: true` was noticed, moving Database to state `Paused`") meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ - Type: DatabasePausedCondition, - Status: "True", - Reason: ReasonCompleted, - Message: "State Database set to Paused", + Type: DatabaseReadyCondition, + Status: metav1.ConditionFalse, + Reason: ReasonNotRequired, + Message: "Transitioning to state Paused", }) database.Status.State = DatabasePaused - return r.updateStatus(ctx, database) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) } if database.Status.State == DatabasePaused && !database.Spec.Pause { r.Log.Info("`pause: false` was noticed, moving Database to state `Ready`") - meta.RemoveStatusCondition(&database.Status.Conditions, DatabasePausedCondition) + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePausedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonNotRequired, + Message: "Transitioning to state Ready", + }) database.Status.State = DatabaseReady - return r.updateStatus(ctx, database) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) + } + + if database.Spec.Pause { + if !meta.IsStatusConditionTrue(database.Status.Conditions, DatabasePausedCondition) { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabasePausedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) + } + } else { + if !meta.IsStatusConditionTrue(database.Status.Conditions, DatabaseReadyCondition) { + meta.SetStatusCondition(&database.Status.Conditions, metav1.Condition{ + Type: DatabaseReadyCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return r.updateStatus(ctx, database, StatusUpdateRequeueDelay) + } } + r.Log.Info("complete step handlePauseResume") return Continue, ctrl.Result{}, nil } @@ -576,28 +749,18 @@ func (r *Reconciler) handleTenantCreation( ctx context.Context, database *resources.DatabaseBuilder, ) (ctrl.Result, error) { - stop, result, err := r.setInitialStatus(ctx, database) + r.Log.Info("running step handleTenantCreation") + + stop, result, err := r.setInitPipelineStatus(ctx, database) if stop { return result, err } - stop, result, err = r.initializeDatabase(ctx, database) + stop, result, err = r.initializeTenant(ctx, database) if stop { return result, err } + r.Log.Info("complete step handleTenantCreation") return ctrl.Result{}, nil } - -func (r *Reconciler) checkDatabaseFrozen( - database *resources.DatabaseBuilder, -) (bool, ctrl.Result) { - r.Log.Info("running step checkDatabaseFrozen") - - if !database.Spec.OperatorSync { - r.Log.Info("`operatorSync: false` is set, no further steps will be run") - return Stop, ctrl.Result{} - } - - return Continue, ctrl.Result{} -} diff --git a/internal/controllers/databasenodeset/controller_test.go b/internal/controllers/databasenodeset/controller_test.go index 2bfe3b11..819c7ba6 100644 --- a/internal/controllers/databasenodeset/controller_test.go +++ b/internal/controllers/databasenodeset/controller_test.go @@ -9,6 +9,7 @@ import ( . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -83,14 +84,18 @@ var _ = Describe("DatabaseNodeSet controller medium tests", func() { return foundStorage.Status.State == StorageInitializing }, test.Timeout, test.Interval).Should(BeTrue()) - By("set status Ready to Storage...") + By("set condition Initialized to Storage...") Eventually(func() error { foundStorage := v1alpha1.Storage{} Expect(k8sClient.Get(ctx, types.NamespacedName{ Name: storageSample.Name, Namespace: testobjects.YdbNamespace, }, &foundStorage)) - foundStorage.Status.State = StorageReady + meta.SetStatusCondition(&foundStorage.Status.Conditions, metav1.Condition{ + Type: StorageInitializedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) return k8sClient.Status().Update(ctx, &foundStorage) }, test.Timeout, test.Interval).ShouldNot(HaveOccurred()) diff --git a/internal/controllers/remotedatabasenodeset/controller_test.go b/internal/controllers/remotedatabasenodeset/controller_test.go index f088c48e..3780c6cf 100644 --- a/internal/controllers/remotedatabasenodeset/controller_test.go +++ b/internal/controllers/remotedatabasenodeset/controller_test.go @@ -259,14 +259,18 @@ var _ = Describe("RemoteDatabaseNodeSet controller tests", func() { return foundStorage.Status.State == StorageInitializing }, test.Timeout, test.Interval).Should(BeTrue()) - By("set status Ready to Storage...") + By("set condition Initialized to Storage...") Eventually(func() error { foundStorage := v1alpha1.Storage{} Expect(localClient.Get(ctx, types.NamespacedName{ Name: storageSample.Name, Namespace: testobjects.YdbNamespace, }, &foundStorage)) - foundStorage.Status.State = StorageReady + meta.SetStatusCondition(&foundStorage.Status.Conditions, metav1.Condition{ + Type: StorageInitializedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) return localClient.Status().Update(ctx, &foundStorage) }, test.Timeout, test.Interval).ShouldNot(HaveOccurred()) diff --git a/internal/controllers/storage/init.go b/internal/controllers/storage/init.go index b20628cb..cdcb5b78 100644 --- a/internal/controllers/storage/init.go +++ b/internal/controllers/storage/init.go @@ -28,69 +28,35 @@ import ( var mismatchItemConfigGenerationRegexp = regexp.MustCompile(".*mismatch.*ItemConfigGenerationProvided# " + "0.*ItemConfigGenerationExpected# 1.*") -func (r *Reconciler) processSkipInitPipeline( +func (r *Reconciler) setInitPipelineStatus( ctx context.Context, storage *resources.StorageClusterBuilder, ) (bool, ctrl.Result, error) { - r.Log.Info("running step processSkipInitPipeline") - r.Log.Info("Storage initialization disabled (with annotation), proceed with caution") - - r.Recorder.Event( - storage, - corev1.EventTypeWarning, - "SkippingInit", - "Skipping initialization due to skip annotation present, be careful!", - ) - - meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ - Type: StorageInitializedCondition, - Status: "True", - Reason: ReasonCompleted, - Message: "Storage initialization not performed because initialization is skipped", - }) - storage.Status.State = StorageReady - return r.updateStatus(ctx, storage) -} - -func (r *Reconciler) setInitialStatus( - ctx context.Context, - storage *resources.StorageClusterBuilder, -) (bool, ctrl.Result, error) { - r.Log.Info("running step setInitialStatus") - - if meta.IsStatusConditionTrue(storage.Status.Conditions, OldStorageInitializedCondition) { + if storage.Status.State == StoragePreparing { meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ Type: StorageInitializedCondition, - Status: "True", - Reason: ReasonCompleted, - Message: "Storage initialized successfully", + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Storage has not been initialized yet", }) - storage.Status.State = StorageReady - return r.updateStatus(ctx, storage) + storage.Status.State = StorageInitializing + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) } // This block is special internal logic that skips all Storage initialization. - // It is needed when large clusters are migrated where `waitForStatefulSetToScale` - // does not make sense, since some nodes can be down for a long time (and it is okay, since - // database is healthy even with partial outage). if value, ok := storage.Annotations[v1alpha1.AnnotationSkipInitialization]; ok && value == v1alpha1.AnnotationValueTrue { - if meta.FindStatusCondition(storage.Status.Conditions, StorageInitializedCondition) == nil || - meta.IsStatusConditionFalse(storage.Status.Conditions, StorageInitializedCondition) { - return r.processSkipInitPipeline(ctx, storage) - } - return Stop, ctrl.Result{RequeueAfter: StorageInitializationRequeueDelay}, nil + r.Log.Info("Storage initialization disabled (with annotation), proceed with caution") + r.Recorder.Event( + storage, + corev1.EventTypeWarning, + "SkippingInit", + "Skipping initialization due to skip annotation present, be careful!", + ) + return r.setInitStorageCompleted(ctx, storage, "Storage initialization not performed because initialization is skipped") } - if storage.Status.State == StoragePending || - meta.FindStatusCondition(storage.Status.Conditions, StorageInitializedCondition) == nil { - meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ - Type: StorageInitializedCondition, - Status: "False", - Reason: ReasonInProgress, - Message: "Storage has not been initialized yet", - }) - storage.Status.State = StoragePreparing - return r.updateStatus(ctx, storage) + if meta.IsStatusConditionTrue(storage.Status.Conditions, OldStorageInitializedCondition) { + return r.setInitStorageCompleted(ctx, storage, "Storage initialized successfully") } return Continue, ctrl.Result{Requeue: false}, nil } @@ -102,25 +68,17 @@ func (r *Reconciler) setInitStorageCompleted( ) (bool, ctrl.Result, error) { meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ Type: StorageInitializedCondition, - Status: "True", + Status: metav1.ConditionTrue, Reason: ReasonCompleted, Message: message, }) - storage.Status.State = StorageProvisioning - return r.updateStatus(ctx, storage) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) } -func (r *Reconciler) initializeStorage( +func (r *Reconciler) initializeBlobstorage( ctx context.Context, storage *resources.StorageClusterBuilder, ) (bool, ctrl.Result, error) { - r.Log.Info("running step initializeStorage") - - if storage.Status.State == StoragePreparing { - storage.Status.State = StorageInitializing - return r.updateStatus(ctx, storage) - } - initJob := &batchv1.Job{} err := r.Get(ctx, types.NamespacedName{ Name: fmt.Sprintf(resources.InitJobNameFormat, storage.Name), @@ -193,57 +151,60 @@ func (r *Reconciler) initializeStorage( } } - //nolint:nestif - if initJob.Status.Failed > 0 { - initialized, err := r.checkFailedJob(ctx, storage, initJob) - if err != nil { + initialized, err := r.checkFailedJob(ctx, storage, initJob) + if err != nil { + r.Recorder.Event( + storage, + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("Failed to check logs for initBlobstorage Job: %s", err), + ) + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + } + + if initialized { + r.Log.Info("Storage is already initialized, continuing...") + r.Recorder.Event( + storage, + corev1.EventTypeNormal, + "InitializingStorage", + "Storage initialization attempted and skipped, storage already initialized", + ) + if err := r.Delete(ctx, initJob, client.PropagationPolicy(metav1.DeletePropagationForeground)); err != nil { r.Recorder.Event( storage, corev1.EventTypeWarning, "ControllerError", - fmt.Sprintf("Failed to check logs from failed Pod for Job: %s", err), + fmt.Sprintf("Failed to delete Job: %s", err), ) return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err } + return r.setInitStorageCompleted(ctx, storage, "Storage already initialized") + } - if initialized { - r.Log.Info("Storage is already initialized, continuing...") - r.Recorder.Event( - storage, - corev1.EventTypeNormal, - "InitializingStorage", - "Storage initialization attempted and skipped, storage already initialized", - ) - if err := r.Delete(ctx, initJob, client.PropagationPolicy(metav1.DeletePropagationForeground)); err != nil { - r.Recorder.Event( - storage, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed to delete Job: %s", err), - ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err - } - return r.setInitStorageCompleted(ctx, storage, "Storage already initialized") - } - - if initJob.Status.Failed == *initJob.Spec.BackoffLimit || conditionFailed { - r.Log.Info("Init Job status failed") + if initJob.Status.Failed == *initJob.Spec.BackoffLimit || conditionFailed { + r.Recorder.Event( + storage, + corev1.EventTypeWarning, + "InitializingStorage", + "Failed initBlobstorage Job, check Pod logs for addditional info", + ) + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageInitializedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + if err := r.Delete(ctx, initJob, client.PropagationPolicy(metav1.DeletePropagationForeground)); err != nil { r.Recorder.Event( storage, corev1.EventTypeWarning, - "InitializingStorage", - "Failed to initializing Storage", + "ControllerError", + fmt.Sprintf("Failed to delete initBlobstorage Job: %s", err), ) - if err := r.Delete(ctx, initJob, client.PropagationPolicy(metav1.DeletePropagationForeground)); err != nil { - r.Recorder.Event( - storage, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed to delete Job: %s", err), - ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err - } + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err } + + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) } return Stop, ctrl.Result{RequeueAfter: StorageInitializationRequeueDelay}, nil diff --git a/internal/controllers/storage/sync.go b/internal/controllers/storage/sync.go index 79d69466..1be13c41 100644 --- a/internal/controllers/storage/sync.go +++ b/internal/controllers/storage/sync.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "reflect" + "time" "github.com/ydb-platform/ydb-go-genproto/protos/Ydb_Monitoring" appsv1 "k8s.io/api/apps/v1" @@ -30,17 +31,8 @@ func (r *Reconciler) Sync(ctx context.Context, cr *v1alpha1.Storage) (ctrl.Resul var err error storage := resources.NewCluster(cr) - stop, result, err = storage.SetStatusOnFirstReconcile() - if stop { - return result, err - } - - stop, result = r.checkStorageFrozen(&storage) - if stop { - return result, nil - } - stop, result, err = r.handlePauseResume(ctx, &storage) + stop, result, err = r.setInitialStatus(ctx, &storage) if stop { return result, err } @@ -60,7 +52,7 @@ func (r *Reconciler) Sync(ctx context.Context, cr *v1alpha1.Storage) (ctrl.Resul } if storage.Spec.NodeSets != nil { - stop, result, err = r.waitForStorageNodeSetsToReady(ctx, &storage) + stop, result, err = r.waitForNodeSetsToProvisioned(ctx, &storage) if stop { return result, err } @@ -71,20 +63,48 @@ func (r *Reconciler) Sync(ctx context.Context, cr *v1alpha1.Storage) (ctrl.Resul } } + stop, result, err = r.handlePauseResume(ctx, &storage) + if stop { + return result, err + } + stop, result, err = r.runSelfCheck(ctx, &storage, false) if stop { return result, err } - if storage.Status.State != StorageReady { - storage.Status.State = StorageReady - stop, result, err = r.updateStatus(ctx, &storage) - if stop { - return result, err + return ctrl.Result{}, nil +} + +func (r *Reconciler) setInitialStatus( + ctx context.Context, + storage *resources.StorageClusterBuilder, +) (bool, ctrl.Result, error) { + r.Log.Info("running step setInitialStatus") + if storage.Status.Conditions == nil { + storage.Status.Conditions = []metav1.Condition{} + + if storage.Spec.Pause { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePausedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Transitioning to state Paused", + }) + } else { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageReadyCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Transitioning to state Ready", + }) } + + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) } - return ctrl.Result{}, nil + r.Log.Info("complete step setInitialStatus") + return Continue, ctrl.Result{}, nil } func (r *Reconciler) waitForStatefulSetToScale( @@ -94,14 +114,14 @@ func (r *Reconciler) waitForStatefulSetToScale( r.Log.Info("running step waitForStatefulSetToScale") if storage.Status.State == StorageInitializing { - r.Recorder.Event( - storage, - corev1.EventTypeNormal, - string(StorageProvisioning), - fmt.Sprintf("Starting to track number of running storage pods, expected: %d", storage.Spec.Nodes), - ) + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageProvisionedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Waiting for scale to desired nodes: %d", storage.Spec.Nodes), + }) storage.Status.State = StorageProvisioning - return r.updateStatus(ctx, storage) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) } found := &appsv1.StatefulSet{} @@ -169,33 +189,50 @@ func (r *Reconciler) waitForStatefulSetToScale( string(StorageProvisioning), fmt.Sprintf("Waiting for number of running storage pods to match expected: %d != %d", runningPods, storage.Spec.Nodes), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, nil + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageProvisionedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Number of running nodes does not match expected: %d != %d", runningPods, storage.Spec.Nodes), + }) + return r.updateStatus(ctx, storage, DefaultRequeueDelay) } + if !meta.IsStatusConditionTrue(storage.Status.Conditions, StorageProvisionedCondition) { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageProvisionedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully scaled to desired number of nodes", + }) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step waitForStatefulSetToScale") return Continue, ctrl.Result{Requeue: false}, nil } -func (r *Reconciler) waitForStorageNodeSetsToReady( +func (r *Reconciler) waitForNodeSetsToProvisioned( ctx context.Context, storage *resources.StorageClusterBuilder, ) (bool, ctrl.Result, error) { - r.Log.Info("running step waitForStorageNodeSetToReady") + r.Log.Info("running step waitForNodeSetsToProvisioned") if storage.Status.State == StorageInitializing { - r.Recorder.Event( - storage, - corev1.EventTypeNormal, - string(StorageProvisioning), - fmt.Sprintf("Starting to track readiness of running nodeSets objects, expected: %d", storage.Spec.Nodes), - ) + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageProvisionedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: "Waiting for NodeSets conditions to be Provisioned", + }) storage.Status.State = StorageProvisioning - return r.updateStatus(ctx, storage) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) } - var nodeSetObject client.Object - var nodeSetKind string - var nodeSetStatus ClusterState for _, nodeSetSpec := range storage.Spec.NodeSets { + var nodeSetObject client.Object + var nodeSetKind string + var nodeSetConditions []metav1.Condition if nodeSetSpec.Remote != nil { nodeSetObject = &v1alpha1.RemoteStorageNodeSet{} nodeSetKind = RemoteStorageNodeSetKind @@ -228,29 +265,49 @@ func (r *Reconciler) waitForStorageNodeSetsToReady( } if nodeSetSpec.Remote != nil { - nodeSetStatus = nodeSetObject.(*v1alpha1.RemoteStorageNodeSet).Status.State + nodeSetConditions = nodeSetObject.(*v1alpha1.RemoteStorageNodeSet).Status.Conditions } else { - nodeSetStatus = nodeSetObject.(*v1alpha1.StorageNodeSet).Status.State + nodeSetConditions = nodeSetObject.(*v1alpha1.StorageNodeSet).Status.Conditions } - if nodeSetStatus != StorageNodeSetReady { - eventMessage := fmt.Sprintf( - "Waiting %s with name %s for Ready state , current: %s", - nodeSetKind, - nodeSetName, - nodeSetStatus, - ) + // TODO: also check observedGeneration to guarantee that compare with updated object + if !meta.IsStatusConditionTrue(nodeSetConditions, NodeSetProvisionedCondition) { r.Recorder.Event( storage, corev1.EventTypeNormal, string(StorageProvisioning), - eventMessage, + fmt.Sprintf( + "Waiting %s with name %s for condition NodeSetProvisioned to be True", + nodeSetKind, + nodeSetName, + ), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, nil + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageProvisionedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf( + "Waiting %s with name %s for condition NodeSetProvisioned to be True", + nodeSetKind, + nodeSetName, + ), + }) + return r.updateStatus(ctx, storage, DefaultRequeueDelay) } } - return Continue, ctrl.Result{Requeue: false}, nil + if !meta.IsStatusConditionTrue(storage.Status.Conditions, StorageProvisionedCondition) { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageProvisionedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully scaled to desired number of nodes", + }) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step waitForNodeSetsToProvisioned") + return Continue, ctrl.Result{}, nil } func shouldIgnoreStorageChange(storage *resources.StorageClusterBuilder) resources.IgnoreChangesFunction { @@ -270,6 +327,28 @@ func (r *Reconciler) handleResourcesSync( ) (bool, ctrl.Result, error) { r.Log.Info("running step handleResourcesSync") + if storage.Status.State == StoragePending { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePreparedCondition, + Status: metav1.ConditionUnknown, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Waiting for sync resources for generation %d", storage.Generation), + }) + storage.Status.State = StoragePreparing + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) + } + + if !storage.Spec.OperatorSync { + r.Log.Info("`operatorSync: false` is set, no further steps will be run") + r.Recorder.Event( + storage, + corev1.EventTypeNormal, + string(StoragePreparing), + fmt.Sprintf("Found .spec.operatorSync set to %t, skip further steps", storage.Spec.OperatorSync), + ) + return Stop, ctrl.Result{}, nil + } + for _, builder := range storage.GetResourceBuilders(r.Config) { newResource := builder.Placeholder(storage) @@ -286,6 +365,7 @@ func (r *Reconciler) handleResourcesSync( ) return err } + err = ctrl.SetControllerReference(storage.Unwrap(), newResource, r.Scheme) if err != nil { r.Recorder.Event( @@ -313,7 +393,13 @@ func (r *Reconciler) handleResourcesSync( "ProvisioningFailed", eventMessage+fmt.Sprintf(", failed to sync, error: %s", err), ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePreparedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + Message: fmt.Sprintf("Failed to sync resources for generation %d", storage.Generation), + }) + return r.updateStatus(ctx, storage, DefaultRequeueDelay) } else if result == controllerutil.OperationResultCreated || result == controllerutil.OperationResultUpdated { r.Recorder.Event( storage, @@ -324,6 +410,17 @@ func (r *Reconciler) handleResourcesSync( } } + if !meta.IsStatusConditionTrue(storage.Status.Conditions, StoragePreparedCondition) { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePreparedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully synced resources", + }) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step handleResourcesSync") return Continue, ctrl.Result{Requeue: false}, nil } @@ -434,6 +531,17 @@ func (r *Reconciler) syncNodeSetSpecInline( } } + if !meta.IsStatusConditionTrue(storage.Status.Conditions, StoragePreparedCondition) { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePreparedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + Message: "Successfully synced resources", + }) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) + } + + r.Log.Info("complete step syncNodeSetSpecInline") return Continue, ctrl.Result{Requeue: false}, nil } @@ -491,14 +599,34 @@ func (r *Reconciler) runSelfCheck( return Stop, ctrl.Result{RequeueAfter: SelfCheckRequeueDelay}, err } + r.Log.Info("complete step runSelfCheck") return Continue, ctrl.Result{}, nil } func (r *Reconciler) updateStatus( ctx context.Context, storage *resources.StorageClusterBuilder, + requeueAfter time.Duration, ) (bool, ctrl.Result, error) { - r.Log.Info("running step updateStatus") + r.Log.Info("running updateStatus handler") + + if meta.IsStatusConditionFalse(storage.Status.Conditions, StoragePreparedCondition) || + meta.IsStatusConditionFalse(storage.Status.Conditions, StorageInitializedCondition) || + meta.IsStatusConditionFalse(storage.Status.Conditions, StorageProvisionedCondition) { + if storage.Spec.Pause { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePausedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + } else { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageReadyCondition, + Status: metav1.ConditionFalse, + Reason: ReasonInProgress, + }) + } + } storageCr := &v1alpha1.Storage{} err := r.Get(ctx, types.NamespacedName{ @@ -516,18 +644,18 @@ func (r *Reconciler) updateStatus( } oldStatus := storageCr.Status.State + storageCr.Status.State = storage.Status.State + storageCr.Status.Conditions = storage.Status.Conditions + if err = r.Status().Update(ctx, storageCr); err != nil { + r.Recorder.Event( + storage, + corev1.EventTypeWarning, + "ControllerError", + fmt.Sprintf("Failed setting status: %s", err), + ) + return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err + } if oldStatus != storage.Status.State { - storageCr.Status.State = storage.Status.State - storageCr.Status.Conditions = storage.Status.Conditions - if err = r.Status().Update(ctx, storageCr); err != nil { - r.Recorder.Event( - storage, - corev1.EventTypeWarning, - "ControllerError", - fmt.Sprintf("Failed setting status: %s", err), - ) - return Stop, ctrl.Result{RequeueAfter: DefaultRequeueDelay}, err - } r.Recorder.Event( storage, corev1.EventTypeNormal, @@ -536,7 +664,8 @@ func (r *Reconciler) updateStatus( ) } - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil + r.Log.Info("complete updateStatus handler") + return Stop, ctrl.Result{RequeueAfter: requeueAfter}, nil } func (r *Reconciler) handlePauseResume( @@ -544,25 +673,71 @@ func (r *Reconciler) handlePauseResume( storage *resources.StorageClusterBuilder, ) (bool, ctrl.Result, error) { r.Log.Info("running step handlePauseResume") + + if storage.Status.State == StorageProvisioning { + if storage.Spec.Pause { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePausedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + storage.Status.State = StoragePaused + } else { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageReadyCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + storage.Status.State = StorageReady + } + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) + } + if storage.Status.State == StorageReady && storage.Spec.Pause { r.Log.Info("`pause: true` was noticed, moving Storage to state `Paused`") meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ - Type: StoragePausedCondition, - Status: "True", - Reason: ReasonCompleted, - Message: "State Storage set to Paused", + Type: StorageReadyCondition, + Status: metav1.ConditionFalse, + Reason: ReasonNotRequired, + Message: "Transitioning to state Paused", }) storage.Status.State = StoragePaused - return r.updateStatus(ctx, storage) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) } if storage.Status.State == StoragePaused && !storage.Spec.Pause { r.Log.Info("`pause: false` was noticed, moving Storage to state `Ready`") - meta.RemoveStatusCondition(&storage.Status.Conditions, StoragePausedCondition) + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePausedCondition, + Status: metav1.ConditionFalse, + Reason: ReasonNotRequired, + Message: "Transitioning to state Ready", + }) storage.Status.State = StorageReady - return r.updateStatus(ctx, storage) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) } + if storage.Spec.Pause { + if !meta.IsStatusConditionTrue(storage.Status.Conditions, StoragePausedCondition) { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StoragePausedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) + } + } else { + if !meta.IsStatusConditionTrue(storage.Status.Conditions, StorageReadyCondition) { + meta.SetStatusCondition(&storage.Status.Conditions, metav1.Condition{ + Type: StorageReadyCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return r.updateStatus(ctx, storage, StatusUpdateRequeueDelay) + } + } + + r.Log.Info("complete step handlePauseResume") return Continue, ctrl.Result{}, nil } @@ -570,27 +745,18 @@ func (r *Reconciler) handleBlobstorageInit( ctx context.Context, storage *resources.StorageClusterBuilder, ) (ctrl.Result, error) { - stop, result, err := r.setInitialStatus(ctx, storage) + r.Log.Info("running step handleBlobstorageInit") + + stop, result, err := r.setInitPipelineStatus(ctx, storage) if stop { return result, err } - stop, result, err = r.initializeStorage(ctx, storage) + stop, result, err = r.initializeBlobstorage(ctx, storage) if stop { return result, err } + r.Log.Info("complete step handleBlobstorageInit") return ctrl.Result{}, nil } - -func (r *Reconciler) checkStorageFrozen( - storage *resources.StorageClusterBuilder, -) (bool, ctrl.Result) { - r.Log.Info("running step checkStorageFrozen") - if !storage.Spec.OperatorSync { - r.Log.Info("`operatorSync: false` is set, no further steps will be run") - return Stop, ctrl.Result{} - } - - return Continue, ctrl.Result{} -} diff --git a/internal/resources/database.go b/internal/resources/database.go index eedcc8c9..531181e7 100644 --- a/internal/resources/database.go +++ b/internal/resources/database.go @@ -2,13 +2,9 @@ package resources import ( corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" - ctrl "sigs.k8s.io/controller-runtime" api "github.com/ydb-platform/ydb-kubernetes-operator/api/v1alpha1" - . "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants" //nolint:revive,stylecheck "github.com/ydb-platform/ydb-kubernetes-operator/internal/labels" "github.com/ydb-platform/ydb-kubernetes-operator/internal/metrics" ) @@ -24,25 +20,6 @@ func NewDatabase(ydbCr *api.Database) DatabaseBuilder { return DatabaseBuilder{Database: cr, Storage: nil} } -func (b *DatabaseBuilder) SetStatusOnFirstReconcile() (bool, ctrl.Result, error) { - if b.Status.Conditions == nil { - b.Status.Conditions = []metav1.Condition{} - - if b.Spec.Pause { - meta.SetStatusCondition(&b.Status.Conditions, metav1.Condition{ - Type: DatabasePausedCondition, - Status: "True", - Reason: ReasonCompleted, - Message: "State Database set to Paused", - }) - - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil - } - } - - return Continue, ctrl.Result{}, nil -} - func (b *DatabaseBuilder) Unwrap() *api.Database { return b.DeepCopy() } diff --git a/internal/resources/storage.go b/internal/resources/storage.go index e1db131c..1b5acf3d 100644 --- a/internal/resources/storage.go +++ b/internal/resources/storage.go @@ -2,13 +2,9 @@ package resources import ( corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" - ctrl "sigs.k8s.io/controller-runtime" api "github.com/ydb-platform/ydb-kubernetes-operator/api/v1alpha1" - . "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants" //nolint:revive,stylecheck "github.com/ydb-platform/ydb-kubernetes-operator/internal/labels" "github.com/ydb-platform/ydb-kubernetes-operator/internal/metrics" ) @@ -23,25 +19,6 @@ func NewCluster(ydbCr *api.Storage) StorageClusterBuilder { return StorageClusterBuilder{cr} } -func (b *StorageClusterBuilder) SetStatusOnFirstReconcile() (bool, ctrl.Result, error) { - if b.Status.Conditions == nil { - b.Status.Conditions = []metav1.Condition{} - - if b.Spec.Pause { - meta.SetStatusCondition(&b.Status.Conditions, metav1.Condition{ - Type: StoragePausedCondition, - Status: "True", - Reason: ReasonCompleted, - Message: "State Storage set to Paused", - }) - - return Stop, ctrl.Result{RequeueAfter: StatusUpdateRequeueDelay}, nil - } - } - - return Continue, ctrl.Result{}, nil -} - func (b *StorageClusterBuilder) Unwrap() *api.Storage { return b.DeepCopy() } From c33c5925ec7813aab9e11839aafd62e7b302eff2 Mon Sep 17 00:00:00 2001 From: kobzonega <122476665+kobzonega@users.noreply.github.com> Date: Tue, 14 May 2024 12:40:29 +0200 Subject: [PATCH 5/7] YDBOPS-9680 node `--label` command line args `deployment` and `shared` (#205) --- api/v1alpha1/const.go | 6 + deploy/ydb-operator/Chart.yaml | 4 +- .../controllers/database/controller_test.go | 146 ++++++++++++++++++ .../controllers/storage/controller_test.go | 15 ++ internal/resources/database_statefulset.go | 15 ++ internal/resources/storage_statefulset.go | 3 + 6 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 internal/controllers/database/controller_test.go diff --git a/api/v1alpha1/const.go b/api/v1alpha1/const.go index 1832f7c9..e67b55c9 100644 --- a/api/v1alpha1/const.go +++ b/api/v1alpha1/const.go @@ -36,6 +36,12 @@ const ( DefaultRootUsername = "root" DefaultRootPassword = "" + LabelDeploymentKey = "deployment" + LabelDeploymentValueKubernetes = "kubernetes" + LabelSharedDatabaseKey = "shared" + LabelSharedDatabaseValueTrue = "true" + LabelSharedDatabaseValueFalse = "false" + AnnotationUpdateStrategyOnDelete = "ydb.tech/update-strategy-on-delete" AnnotationUpdateDNSPolicy = "ydb.tech/update-dns-policy" AnnotationSkipInitialization = "ydb.tech/skip-initialization" diff --git a/deploy/ydb-operator/Chart.yaml b/deploy/ydb-operator/Chart.yaml index 0ed360fd..4a133e76 100644 --- a/deploy/ydb-operator/Chart.yaml +++ b/deploy/ydb-operator/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.5.9 +version: 0.5.10 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.5.9" +appVersion: "0.5.10" diff --git a/internal/controllers/database/controller_test.go b/internal/controllers/database/controller_test.go new file mode 100644 index 00000000..f37b4f26 --- /dev/null +++ b/internal/controllers/database/controller_test.go @@ -0,0 +1,146 @@ +package database_test + +import ( + "context" + "errors" + "path/filepath" + "strings" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" + + "github.com/ydb-platform/ydb-kubernetes-operator/api/v1alpha1" + testobjects "github.com/ydb-platform/ydb-kubernetes-operator/e2e/tests/test-objects" + . "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/constants" + "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/database" + "github.com/ydb-platform/ydb-kubernetes-operator/internal/controllers/storage" + "github.com/ydb-platform/ydb-kubernetes-operator/internal/test" +) + +var ( + k8sClient client.Client + ctx context.Context +) + +func TestAPIs(t *testing.T) { + RegisterFailHandler(Fail) + + test.SetupK8STestManager(&ctx, &k8sClient, func(mgr *manager.Manager) []test.Reconciler { + return []test.Reconciler{ + &storage.Reconciler{ + Client: k8sClient, + Scheme: (*mgr).GetScheme(), + }, + &database.Reconciler{ + Client: k8sClient, + Scheme: (*mgr).GetScheme(), + }, + } + }) + + RunSpecs(t, "Database controller medium tests suite") +} + +var _ = Describe("Database controller medium tests", func() { + var namespace corev1.Namespace + var storageSample v1alpha1.Storage + + BeforeEach(func() { + namespace = corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testobjects.YdbNamespace, + }, + } + Expect(k8sClient.Create(ctx, &namespace)).Should(Succeed()) + storageSample = *testobjects.DefaultStorage(filepath.Join("..", "..", "..", "e2e", "tests", "data", "storage-block-4-2-config.yaml")) + Expect(k8sClient.Create(ctx, &storageSample)).Should(Succeed()) + + By("checking that Storage created on local cluster...") + foundStorage := v1alpha1.Storage{} + Eventually(func() bool { + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: storageSample.Name, + Namespace: testobjects.YdbNamespace, + }, &foundStorage)) + return foundStorage.Status.State == StorageInitializing + }, test.Timeout, test.Interval).Should(BeTrue()) + + By("set condition Initialized to Storage...") + Eventually(func() error { + foundStorage := v1alpha1.Storage{} + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: storageSample.Name, + Namespace: testobjects.YdbNamespace, + }, &foundStorage)) + meta.SetStatusCondition(&foundStorage.Status.Conditions, metav1.Condition{ + Type: StorageInitializedCondition, + Status: metav1.ConditionTrue, + Reason: ReasonCompleted, + }) + return k8sClient.Status().Update(ctx, &foundStorage) + }, test.Timeout, test.Interval).ShouldNot(HaveOccurred()) + }) + + AfterEach(func() { + Expect(k8sClient.Delete(ctx, &storageSample)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, &namespace)).Should(Succeed()) + }) + + It("Checking field propagation to objects", func() { + By("Check that Shared Database was created...") + databaseSample := *testobjects.DefaultDatabase() + databaseSample.Spec.SharedResources = &v1alpha1.DatabaseResources{ + StorageUnits: []v1alpha1.StorageUnit{ + { + UnitKind: "ssd", + Count: 1, + }, + }, + } + Expect(k8sClient.Create(ctx, &databaseSample)).Should(Succeed()) + + By("Check that StatefulSet was created...") + databaseStatefulSet := appsv1.StatefulSet{} + foundStatefulSets := appsv1.StatefulSetList{} + Eventually(func() error { + err := k8sClient.List(ctx, &foundStatefulSets, client.InNamespace( + testobjects.YdbNamespace)) + if err != nil { + return err + } + for idx, statefulSet := range foundStatefulSets.Items { + if statefulSet.Name == testobjects.DatabaseName { + databaseStatefulSet = foundStatefulSets.Items[idx] + return nil + } + } + return errors.New("failed to find StatefulSet") + }, test.Timeout, test.Interval).ShouldNot(HaveOccurred()) + + By("Check that args `--label` propagated to pods...", func() { + podContainerArgs := databaseStatefulSet.Spec.Template.Spec.Containers[0].Args + var labelArgKey string + var labelArgValue string + for idx, arg := range podContainerArgs { + if arg == "--label" { + labelArgKey = strings.Split(podContainerArgs[idx+1], "=")[0] + labelArgValue = strings.Split(podContainerArgs[idx+1], "=")[1] + if labelArgKey == v1alpha1.LabelDeploymentKey { + Expect(labelArgValue).Should(BeEquivalentTo(v1alpha1.LabelDeploymentValueKubernetes)) + } + if labelArgKey == v1alpha1.LabelSharedDatabaseKey { + Expect(labelArgValue).Should(BeEquivalentTo(v1alpha1.LabelSharedDatabaseValueTrue)) + } + } + } + }) + }) +}) diff --git a/internal/controllers/storage/controller_test.go b/internal/controllers/storage/controller_test.go index f679f97d..31e45e5a 100644 --- a/internal/controllers/storage/controller_test.go +++ b/internal/controllers/storage/controller_test.go @@ -5,6 +5,7 @@ import ( "fmt" "path/filepath" "strconv" + "strings" "testing" . "github.com/onsi/ginkgo/v2" @@ -135,5 +136,19 @@ var _ = Describe("Storage controller medium tests", func() { } Expect(foundConfigurationChecksumAnnotation).To(BeTrue()) }) + + By("Check that args with --label propagated to pods...", func() { + podContainerArgs := storageSS.Spec.Template.Spec.Containers[0].Args + var labelArgKey string + var labelArgValue string + for idx, arg := range podContainerArgs { + if arg == "--label" { + labelArgKey = strings.Split(podContainerArgs[idx+1], "=")[0] + labelArgValue = strings.Split(podContainerArgs[idx+1], "=")[1] + } + } + Expect(labelArgKey).Should(BeEquivalentTo(v1alpha1.LabelDeploymentKey)) + Expect(labelArgValue).Should(BeEquivalentTo(v1alpha1.LabelDeploymentValueKubernetes)) + }) }) }) diff --git a/internal/resources/database_statefulset.go b/internal/resources/database_statefulset.go index 2118d290..5644a90b 100644 --- a/internal/resources/database_statefulset.go +++ b/internal/resources/database_statefulset.go @@ -520,6 +520,21 @@ func (b *DatabaseStatefulSetBuilder) buildContainerArgs() ([]string, []string) { "--node-broker", b.Spec.StorageEndpoint, + + "--label", + fmt.Sprintf("%s=%s", api.LabelDeploymentKey, api.LabelDeploymentValueKubernetes), + } + + if b.Spec.SharedResources != nil { + args = append(args, + "--label", + fmt.Sprintf("%s=%s", api.LabelSharedDatabaseKey, api.LabelSharedDatabaseValueTrue), + ) + } else { + args = append(args, + "--label", + fmt.Sprintf("%s=%s", api.LabelSharedDatabaseKey, api.LabelSharedDatabaseValueFalse), + ) } for _, secret := range b.Spec.Secrets { diff --git a/internal/resources/storage_statefulset.go b/internal/resources/storage_statefulset.go index 47832f7c..228ce2c8 100644 --- a/internal/resources/storage_statefulset.go +++ b/internal/resources/storage_statefulset.go @@ -466,6 +466,9 @@ func (b *StorageStatefulSetBuilder) buildContainerArgs() ([]string, []string) { "--node", "static", + + "--label", + fmt.Sprintf("%s=%s", api.LabelDeploymentKey, api.LabelDeploymentValueKubernetes), ) for _, secret := range b.Spec.Secrets { From e612e4a18d185282089639dcbf012573fe5ea882 Mon Sep 17 00:00:00 2001 From: kobzonega <122476665+kobzonega@users.noreply.github.com> Date: Tue, 14 May 2024 14:53:21 +0200 Subject: [PATCH 6/7] YDBOPS-9691 fix GRPC TLS certificates in dynnodes (#207) --- deploy/ydb-operator/Chart.yaml | 4 ++-- internal/resources/database_statefulset.go | 24 ++++++++++++++++------ internal/resources/resource.go | 13 +++++++----- internal/resources/storage_statefulset.go | 4 ++-- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/deploy/ydb-operator/Chart.yaml b/deploy/ydb-operator/Chart.yaml index 4a133e76..e79ed180 100644 --- a/deploy/ydb-operator/Chart.yaml +++ b/deploy/ydb-operator/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.5.10 +version: 0.5.11 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.5.10" +appVersion: "0.5.11" diff --git a/internal/resources/database_statefulset.go b/internal/resources/database_statefulset.go index 5644a90b..6d4b9919 100644 --- a/internal/resources/database_statefulset.go +++ b/internal/resources/database_statefulset.go @@ -304,15 +304,15 @@ func buildTLSVolume(name string, configuration *api.TLSConfiguration) corev1.Vol Items: []corev1.KeyToPath{ { Key: configuration.CertificateAuthority.Key, - Path: "ca.crt", + Path: wellKnownNameForTLSCertificateAuthority, }, { Key: configuration.Certificate.Key, - Path: "tls.crt", + Path: wellKnownNameForTLSCertificate, }, { Key: configuration.Key.Key, - Path: "tls.key", + Path: wellKnownNameForTLSPrivateKey, }, }, }, @@ -436,7 +436,7 @@ func (b *DatabaseStatefulSetBuilder) buildVolumeMounts() []corev1.VolumeMount { volumeMounts = append(volumeMounts, corev1.VolumeMount{ Name: grpcTLSVolumeName, ReadOnly: true, - MountPath: "/tls/grpc", // fixme const + MountPath: grpcTLSVolumeMountPath, }) } @@ -444,7 +444,7 @@ func (b *DatabaseStatefulSetBuilder) buildVolumeMounts() []corev1.VolumeMount { volumeMounts = append(volumeMounts, corev1.VolumeMount{ Name: interconnectTLSVolumeName, ReadOnly: true, - MountPath: "/tls/interconnect", // fixme const + MountPath: interconnectTLSVolumeMountPath, }) } @@ -466,7 +466,7 @@ func (b *DatabaseStatefulSetBuilder) buildVolumeMounts() []corev1.VolumeMount { volumeMounts = append(volumeMounts, corev1.VolumeMount{ Name: datastreamsTLSVolumeName, ReadOnly: true, - MountPath: "/tls/datastreams", // fixme const + MountPath: datastreamsTLSVolumeMountPath, }) } } @@ -537,6 +537,18 @@ func (b *DatabaseStatefulSetBuilder) buildContainerArgs() ([]string, []string) { ) } + // hotfix KIKIMR-16728 + if b.Spec.Service.GRPC.TLSConfiguration.Enabled { + args = append(args, + "--grpc-cert", + fmt.Sprintf("%s/%s", grpcTLSVolumeMountPath, wellKnownNameForTLSCertificate), + "--grpc-key", + fmt.Sprintf("%s/%s", grpcTLSVolumeMountPath, wellKnownNameForTLSPrivateKey), + "--grpc-ca", + fmt.Sprintf("%s/%s", grpcTLSVolumeMountPath, wellKnownNameForTLSCertificateAuthority), + ) + } + for _, secret := range b.Spec.Secrets { exist, err := CheckSecretKey( context.Background(), diff --git a/internal/resources/resource.go b/internal/resources/resource.go index 29ccd4dd..0794ea94 100644 --- a/internal/resources/resource.go +++ b/internal/resources/resource.go @@ -52,9 +52,12 @@ const ( localCertsVolumeName = "init-main-shared-source-dir-volume" operatorTokenVolumeName = "operator-token-volume" - wellKnownDirForAdditionalSecrets = "/opt/ydb/secrets" - wellKnownDirForAdditionalVolumes = "/opt/ydb/volumes" - wellKnownNameForOperatorToken = "token-file" + wellKnownDirForAdditionalSecrets = "/opt/ydb/secrets" + wellKnownDirForAdditionalVolumes = "/opt/ydb/volumes" + wellKnownNameForOperatorToken = "token-file" + wellKnownNameForTLSCertificateAuthority = "ca.crt" + wellKnownNameForTLSCertificate = "tls.crt" + wellKnownNameForTLSPrivateKey = "tls.key" caBundleEnvName = "CA_BUNDLE" caBundleFileName = "userCABundle.crt" @@ -509,11 +512,11 @@ func buildCAStorePatchingCommandArgs( } if grpcService.TLSConfiguration.Enabled { - arg += fmt.Sprintf("cp %s/ca.crt %s/grpcRoot.crt && ", grpcTLSVolumeMountPath, localCertsDir) + arg += fmt.Sprintf("cp %s/%s %s/grpcRoot.crt && ", grpcTLSVolumeMountPath, wellKnownNameForTLSCertificateAuthority, localCertsDir) } if interconnectService.TLSConfiguration.Enabled { - arg += fmt.Sprintf("cp %s/ca.crt %s/interconnectRoot.crt && ", interconnectTLSVolumeMountPath, localCertsDir) + arg += fmt.Sprintf("cp %s/%s %s/interconnectRoot.crt && ", interconnectTLSVolumeMountPath, wellKnownNameForTLSCertificateAuthority, localCertsDir) } if arg != "" { diff --git a/internal/resources/storage_statefulset.go b/internal/resources/storage_statefulset.go index 228ce2c8..61fd1e5d 100644 --- a/internal/resources/storage_statefulset.go +++ b/internal/resources/storage_statefulset.go @@ -407,7 +407,7 @@ func (b *StorageStatefulSetBuilder) buildVolumeMounts() []corev1.VolumeMount { volumeMounts = append(volumeMounts, corev1.VolumeMount{ Name: grpcTLSVolumeName, ReadOnly: true, - MountPath: "/tls/grpc", // fixme const + MountPath: grpcTLSVolumeMountPath, }) } @@ -415,7 +415,7 @@ func (b *StorageStatefulSetBuilder) buildVolumeMounts() []corev1.VolumeMount { volumeMounts = append(volumeMounts, corev1.VolumeMount{ Name: interconnectTLSVolumeName, ReadOnly: true, - MountPath: "/tls/interconnect", // fixme const + MountPath: interconnectTLSVolumeMountPath, }) } From 11716fcb5d7af33973912afdfd504fea2c3e8128 Mon Sep 17 00:00:00 2001 From: kobzonega <122476665+kobzonega@users.noreply.github.com> Date: Tue, 14 May 2024 18:22:34 +0200 Subject: [PATCH 7/7] YDBOPS-9691 use global ca-certificates.crt for database (#208) --- deploy/ydb-operator/Chart.yaml | 4 ++-- internal/resources/database_statefulset.go | 2 +- internal/resources/resource.go | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/deploy/ydb-operator/Chart.yaml b/deploy/ydb-operator/Chart.yaml index e79ed180..7b8b2932 100644 --- a/deploy/ydb-operator/Chart.yaml +++ b/deploy/ydb-operator/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.5.11 +version: 0.5.12 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.5.11" +appVersion: "0.5.12" diff --git a/internal/resources/database_statefulset.go b/internal/resources/database_statefulset.go index 6d4b9919..1ad985a6 100644 --- a/internal/resources/database_statefulset.go +++ b/internal/resources/database_statefulset.go @@ -545,7 +545,7 @@ func (b *DatabaseStatefulSetBuilder) buildContainerArgs() ([]string, []string) { "--grpc-key", fmt.Sprintf("%s/%s", grpcTLSVolumeMountPath, wellKnownNameForTLSPrivateKey), "--grpc-ca", - fmt.Sprintf("%s/%s", grpcTLSVolumeMountPath, wellKnownNameForTLSCertificateAuthority), + fmt.Sprintf("%s/%s", systemCertsDir, caCertificatesFileName), ) } diff --git a/internal/resources/resource.go b/internal/resources/resource.go index 0794ea94..a9d60517 100644 --- a/internal/resources/resource.go +++ b/internal/resources/resource.go @@ -61,6 +61,7 @@ const ( caBundleEnvName = "CA_BUNDLE" caBundleFileName = "userCABundle.crt" + caCertificatesFileName = "ca-certificates.crt" updateCACertificatesBin = "update-ca-certificates" localCertsDir = "/usr/local/share/ca-certificates"