From 6f882ad43e83ba34bd9adb224d90816ed7dc0529 Mon Sep 17 00:00:00 2001 From: Carl Kyrillos Date: Tue, 25 Feb 2025 14:38:19 -0500 Subject: [PATCH] fix: addressed PR feedback --- pkg/cluster/cluster_config.go | 48 ++++-- pkg/cluster/cluster_config_test.go | 143 ++++++++++++++---- .../action_verify_supported_architectures.go | 30 ++-- ...ion_verify_supported_architectures_test.go | 140 ++++++++++++++--- pkg/metadata/labels/types.go | 1 + 5 files changed, 289 insertions(+), 73 deletions(-) diff --git a/pkg/cluster/cluster_config.go b/pkg/cluster/cluster_config.go index 06bd4d44116..a37ca6191e7 100644 --- a/pkg/cluster/cluster_config.go +++ b/pkg/cluster/cluster_config.go @@ -289,28 +289,52 @@ func IsDefaultAuthMethod(ctx context.Context, cli client.Client) (bool, error) { return authenticationobj.Spec.Type == configv1.AuthenticationTypeIntegratedOAuth || authenticationobj.Spec.Type == "", nil } -func GetNodeArchitectures(ctx context.Context, client client.Client) (map[string]struct{}, error) { +func GetReadyWorkerNodes(ctx context.Context, k8sclient client.Client) ([]corev1.Node, error) { nodeList := &corev1.NodeList{} - if err := client.List(ctx, nodeList); err != nil { - return nil, fmt.Errorf("failed to list nodes: %w", err) + + labelSelector := client.MatchingLabels{ + labels.WorkerNode: "", } - // Create a map to track unique architectures - nodeArchitectures := make(map[string]struct{}) + if err := k8sclient.List(ctx, nodeList, labelSelector); err != nil { + return nil, fmt.Errorf("failed to list worker nodes: %w", err) + } + var readyWorkerNodes []corev1.Node for _, node := range nodeList.Items { - if arch, exists := node.Labels[labels.NodeArch]; exists { - if node.Status.Conditions != nil { - // Only count nodes that are Ready - for _, condition := range node.Status.Conditions { - if condition.Type == corev1.NodeReady && condition.Status == corev1.ConditionTrue { - nodeArchitectures[arch] = struct{}{} - } + if node.Status.Conditions != nil { + // Only count nodes that are Ready + for _, condition := range node.Status.Conditions { + if condition.Type == corev1.NodeReady && condition.Status == corev1.ConditionTrue { + readyWorkerNodes = append(readyWorkerNodes, node) } } } } + if len(readyWorkerNodes) < 1 { + return nil, errors.New("no ready worker nodes found") + } + + return readyWorkerNodes, nil +} + +func GetNodeArchitectures(nodes []corev1.Node) ([]string, error) { + // Create a map to track unique architectures + nodeArchitecturesMap := make(map[string]struct{}) + + for _, node := range nodes { + if arch, exists := node.Labels[labels.NodeArch]; exists { + nodeArchitecturesMap[arch] = struct{}{} + } + } + + // Convert map to a slice now that any duplicate architectures have been removed + nodeArchitectures := make([]string, 0) + for arch := range nodeArchitecturesMap { + nodeArchitectures = append(nodeArchitectures, arch) + } + if len(nodeArchitectures) < 1 { return nil, errors.New("no valid architectures found") } diff --git a/pkg/cluster/cluster_config_test.go b/pkg/cluster/cluster_config_test.go index c74494c95b0..7afce937272 100644 --- a/pkg/cluster/cluster_config_test.go +++ b/pkg/cluster/cluster_config_test.go @@ -16,22 +16,12 @@ import ( ) func TestGetNodeArchitectures(t *testing.T) { - ctx := context.Background() - nodeTypeMeta := metav1.TypeMeta{ APIVersion: gvk.Node.GroupVersion().String(), Kind: gvk.Node.GroupVersion().String(), } - nodeStatusReady := corev1.NodeStatus{ - Conditions: []corev1.NodeCondition{ - { - Type: corev1.NodeReady, - Status: corev1.ConditionTrue, - }, - }, - } - amdNode := &corev1.Node{ + amdNode := corev1.Node{ TypeMeta: nodeTypeMeta, ObjectMeta: metav1.ObjectMeta{ Name: "amd-node", @@ -39,9 +29,8 @@ func TestGetNodeArchitectures(t *testing.T) { labels.NodeArch: "amd64", }, }, - Status: nodeStatusReady, } - powerNode := &corev1.Node{ + powerNode := corev1.Node{ TypeMeta: nodeTypeMeta, ObjectMeta: metav1.ObjectMeta{ Name: "power-node", @@ -49,52 +38,48 @@ func TestGetNodeArchitectures(t *testing.T) { labels.NodeArch: "ppc64le", }, }, - Status: nodeStatusReady, } - notReadyNode := &corev1.Node{ + unlabeledNode := corev1.Node{ TypeMeta: nodeTypeMeta, ObjectMeta: metav1.ObjectMeta{ - Name: "not-ready-node", - Labels: map[string]string{ - labels.NodeArch: "amd64", - }, + Name: "power-node", }, } type args struct { - client client.Client + nodes []corev1.Node } tests := []struct { name string args args - want map[string]struct{} + want []string wantErr bool }{ { - name: "Single-node cluster", + name: "Single-arch nodes", args: args{ - client: fake.NewFakeClient(amdNode), + nodes: []corev1.Node{amdNode}, }, - want: map[string]struct{}{ - "amd64": {}, + want: []string{ + "amd64", }, wantErr: false, }, { - name: "Multi-node cluster", + name: "Multi-arch nodes", args: args{ - client: fake.NewFakeClient(amdNode, powerNode), + nodes: []corev1.Node{amdNode, powerNode}, }, - want: map[string]struct{}{ - "amd64": {}, - "ppc64le": {}, + want: []string{ + "amd64", + "ppc64le", }, wantErr: false, }, { - name: "No ready nodes cluster", + name: "Unlabeled nodes", args: args{ - client: fake.NewFakeClient(notReadyNode), + nodes: []corev1.Node{unlabeledNode}, }, want: nil, wantErr: true, @@ -102,7 +87,7 @@ func TestGetNodeArchitectures(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := cluster.GetNodeArchitectures(ctx, tt.args.client) + got, err := cluster.GetNodeArchitectures(tt.args.nodes) if (err != nil) != tt.wantErr { t.Errorf("GetNodeArchitectures() error = %v, wantErr %v", err, tt.wantErr) return @@ -113,3 +98,95 @@ func TestGetNodeArchitectures(t *testing.T) { }) } } + +func TestGetReadyWorkerNodes(t *testing.T) { + ctx := context.Background() + + nodeTypeMeta := metav1.TypeMeta{ + APIVersion: gvk.Node.GroupVersion().String(), + Kind: gvk.Node.GroupVersion().String(), + } + + readyWorkerNode := corev1.Node{ + TypeMeta: nodeTypeMeta, + ObjectMeta: metav1.ObjectMeta{ + Name: "ready-worker-node", + Labels: map[string]string{ + labels.WorkerNode: "", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + notReadyWorkerNode := corev1.Node{ + TypeMeta: nodeTypeMeta, + ObjectMeta: metav1.ObjectMeta{ + Name: "not-ready-worker-node", + Labels: map[string]string{ + labels.WorkerNode: "", + }, + }, + } + masterNode := corev1.Node{ + TypeMeta: nodeTypeMeta, + ObjectMeta: metav1.ObjectMeta{ + Name: "master-node", + Labels: map[string]string{ + "node-role.kubernetes.io/master": "", + }, + }, + } + + type args struct { + k8sclient client.Client + } + tests := []struct { + name string + args args + want []corev1.Node + wantErr bool + }{ + { + name: "Ready worker nodes", + args: args{ + k8sclient: fake.NewFakeClient(&readyWorkerNode, &masterNode, ¬ReadyWorkerNode), + }, + want: []corev1.Node{readyWorkerNode}, + wantErr: false, + }, + { + name: "No worker nodes", + args: args{ + k8sclient: fake.NewFakeClient(&masterNode), + }, + want: nil, + wantErr: true, + }, + { + name: "No ready worker nodes", + args: args{ + k8sclient: fake.NewFakeClient(¬ReadyWorkerNode), + }, + want: nil, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := cluster.GetReadyWorkerNodes(ctx, tt.args.k8sclient) + if (err != nil) != tt.wantErr { + t.Errorf("GetReadyWorkerNodes() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("GetReadyWorkerNodes() got = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/controller/actions/architecture/action_verify_supported_architectures.go b/pkg/controller/actions/architecture/action_verify_supported_architectures.go index 1bbf59493d1..478a7f24e6b 100644 --- a/pkg/controller/actions/architecture/action_verify_supported_architectures.go +++ b/pkg/controller/actions/architecture/action_verify_supported_architectures.go @@ -14,6 +14,10 @@ import ( odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types" ) +const ( + DefaultArchitecture = "amd64" +) + // VerifySupportedArchitectures determines whether a component can be enabled based on the architecture of each node. // // This is accomplished by doing the following: @@ -27,25 +31,29 @@ func VerifySupportedArchitectures(ctx context.Context, rr *odhtypes.Reconciliati } // Fetch the architecture(s) that the component supports - supportedArchitectures := make(map[string]struct{}) + var supportedArchitectures []string componentReleases := obj.GetReleaseStatus() if componentReleases == nil || len(*componentReleases) < 1 { return fmt.Errorf("instance %v has no releases", rr.Instance) } for _, release := range *componentReleases { - for _, arch := range release.SupportedArchitectures { - supportedArchitectures[arch] = struct{}{} - } + supportedArchitectures = append(supportedArchitectures, release.SupportedArchitectures...) } // TODO: Refactor after all components explicitly list supportedArchitectures in their component_metadata.yaml file // If supportedArchitectures is empty, assume the component only works on amd64 if len(supportedArchitectures) == 0 { - supportedArchitectures["amd64"] = struct{}{} + supportedArchitectures = append(supportedArchitectures, DefaultArchitecture) + } + + // Fetch the ready worker nodes + readyWorkerNodes, err := cluster.GetReadyWorkerNodes(ctx, rr.Client) + if err != nil { + return err } // Fetch the architecture(s) that the nodes are running on - nodeArchitectures, err := cluster.GetNodeArchitectures(ctx, rr.Client) + nodeArchitectures, err := cluster.GetNodeArchitectures(readyWorkerNodes) if err != nil { return err } @@ -71,10 +79,12 @@ func VerifySupportedArchitectures(ctx context.Context, rr *odhtypes.Reconciliati // HasCompatibleArchitecture Returns true if there's at least one architecture that's in both supportedArches and nodeArches. // Otherwise, it returns false. -func HasCompatibleArchitecture(supportedArches map[string]struct{}, nodeArches map[string]struct{}) bool { - for arch := range nodeArches { - if _, exists := supportedArches[arch]; exists { - return true +func HasCompatibleArchitecture(supportedArches []string, nodeArches []string) bool { + for _, nodeArch := range nodeArches { + for _, supportedArch := range supportedArches { + if nodeArch == supportedArch { + return true + } } } diff --git a/pkg/controller/actions/architecture/action_verify_supported_architectures_test.go b/pkg/controller/actions/architecture/action_verify_supported_architectures_test.go index 4f7d2d3ac2e..f0330c82da4 100644 --- a/pkg/controller/actions/architecture/action_verify_supported_architectures_test.go +++ b/pkg/controller/actions/architecture/action_verify_supported_architectures_test.go @@ -29,7 +29,7 @@ func TestVerifySupportedArchitectures(t *testing.T) { dsci := &dsciv1.DSCInitialization{Spec: dsciv1.DSCInitializationSpec{ApplicationsNamespace: ns}} release := common.Release{Name: cluster.OpenDataHub} - cl, err := fakeclient.New( + healthyClient, err := fakeclient.New( &corev1.Node{ TypeMeta: metav1.TypeMeta{ APIVersion: gvk.Node.GroupVersion().String(), @@ -38,7 +38,8 @@ func TestVerifySupportedArchitectures(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "amd64-node", Labels: map[string]string{ - labels.NodeArch: "amd64", + labels.NodeArch: "amd64", + labels.WorkerNode: "", }, }, Status: corev1.NodeStatus{ @@ -58,7 +59,32 @@ func TestVerifySupportedArchitectures(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "ppc64le-node", Labels: map[string]string{ - labels.NodeArch: "ppc64le", + labels.NodeArch: "ppc64le", + labels.WorkerNode: "", + }, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + { + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + ) + g.Expect(err).ShouldNot(HaveOccurred()) + + noWorkerNodeClient, err := fakeclient.New( + &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: gvk.Node.GroupVersion().String(), + Kind: gvk.Node.GroupVersion().String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "amd64-node", + Labels: map[string]string{ + labels.NodeArch: "amd64", }, }, Status: corev1.NodeStatus{ @@ -73,8 +99,26 @@ func TestVerifySupportedArchitectures(t *testing.T) { ) g.Expect(err).ShouldNot(HaveOccurred()) + noReadyNodeClient, err := fakeclient.New( + &corev1.Node{ + TypeMeta: metav1.TypeMeta{ + APIVersion: gvk.Node.GroupVersion().String(), + Kind: gvk.Node.GroupVersion().String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "amd64-node", + Labels: map[string]string{ + labels.NodeArch: "amd64", + labels.WorkerNode: "", + }, + }, + }, + ) + g.Expect(err).ShouldNot(HaveOccurred()) + + // Should not have an error when the component doesn't have any supported architectures (since defaulting to amd64) err = architecture.VerifySupportedArchitectures(ctx, &types.ReconciliationRequest{ - Client: cl, + Client: healthyClient, Instance: &componentApi.CodeFlare{ ObjectMeta: metav1.ObjectMeta{ Name: "codeflare-no-arch", @@ -98,8 +142,9 @@ func TestVerifySupportedArchitectures(t *testing.T) { }) g.Expect(err).ShouldNot(HaveOccurred()) + // Should not have an error when the component and node share the same architecture err = architecture.VerifySupportedArchitectures(ctx, &types.ReconciliationRequest{ - Client: cl, + Client: healthyClient, Instance: &componentApi.CodeFlare{ ObjectMeta: metav1.ObjectMeta{ Name: "codeflare-ppc64le", @@ -126,8 +171,9 @@ func TestVerifySupportedArchitectures(t *testing.T) { }) g.Expect(err).ShouldNot(HaveOccurred()) + // Should have an error when the component and node don't share any common architecture err = architecture.VerifySupportedArchitectures(ctx, &types.ReconciliationRequest{ - Client: cl, + Client: healthyClient, Instance: &componentApi.CodeFlare{ ObjectMeta: metav1.ObjectMeta{ Name: "codeflare-ppc64le", @@ -153,12 +199,70 @@ func TestVerifySupportedArchitectures(t *testing.T) { Release: release, }) g.Expect(err).Should(HaveOccurred()) + + // Should have an error when none of the nodes are worker nodes + err = architecture.VerifySupportedArchitectures(ctx, &types.ReconciliationRequest{ + Client: noWorkerNodeClient, + Instance: &componentApi.CodeFlare{ + ObjectMeta: metav1.ObjectMeta{ + Name: "codeflare-ppc64le", + }, + Status: componentApi.CodeFlareStatus{ + CodeFlareCommonStatus: componentApi.CodeFlareCommonStatus{ + ComponentReleaseStatus: common.ComponentReleaseStatus{ + Releases: []common.ComponentRelease{ + { + Name: "CodeFlare operator", + Version: "1.15.0", + RepoURL: "https://github.com/project-codeflare/codeflare-operator", + SupportedArchitectures: []string{ + "ppc64le", + }, + }, + }, + }, + }, + }, + }, + DSCI: dsci, + Release: release, + }) + g.Expect(err).Should(HaveOccurred()) + + // Should have an error when node of the nodes are ready + err = architecture.VerifySupportedArchitectures(ctx, &types.ReconciliationRequest{ + Client: noReadyNodeClient, + Instance: &componentApi.CodeFlare{ + ObjectMeta: metav1.ObjectMeta{ + Name: "codeflare-ppc64le", + }, + Status: componentApi.CodeFlareStatus{ + CodeFlareCommonStatus: componentApi.CodeFlareCommonStatus{ + ComponentReleaseStatus: common.ComponentReleaseStatus{ + Releases: []common.ComponentRelease{ + { + Name: "CodeFlare operator", + Version: "1.15.0", + RepoURL: "https://github.com/project-codeflare/codeflare-operator", + SupportedArchitectures: []string{ + "ppc64le", + }, + }, + }, + }, + }, + }, + }, + DSCI: dsci, + Release: release, + }) + g.Expect(err).Should(HaveOccurred()) } func Test_hasCompatibleArchitecture(t *testing.T) { type args struct { - supportedArches map[string]struct{} - nodeArches map[string]struct{} + supportedArches []string + nodeArches []string } tests := []struct { name string @@ -168,12 +272,12 @@ func Test_hasCompatibleArchitecture(t *testing.T) { { name: "Common architecture exists", args: args{ - supportedArches: map[string]struct{}{ - "amd64": {}, - "ppc64le": {}, + supportedArches: []string{ + "amd64", + "ppc64le", }, - nodeArches: map[string]struct{}{ - "amd64": {}, + nodeArches: []string{ + "amd64", }, }, want: true, @@ -181,12 +285,12 @@ func Test_hasCompatibleArchitecture(t *testing.T) { { name: "No common architecture exists", args: args{ - supportedArches: map[string]struct{}{ - "amd64": {}, - "ppc64le": {}, + supportedArches: []string{ + "amd64", + "ppc64le", }, - nodeArches: map[string]struct{}{ - "s390x": {}, + nodeArches: []string{ + "s390x", }, }, want: false, diff --git a/pkg/metadata/labels/types.go b/pkg/metadata/labels/types.go index ce600aebb8d..78ba37fc020 100644 --- a/pkg/metadata/labels/types.go +++ b/pkg/metadata/labels/types.go @@ -5,6 +5,7 @@ const ( InjectTrustCA = "config.openshift.io/inject-trusted-cabundle" SecurityEnforce = "pod-security.kubernetes.io/enforce" NodeArch = "kubernetes.io/arch" + WorkerNode = "node-role.kubernetes.io/worker" ClusterMonitoring = "openshift.io/cluster-monitoring" PlatformPartOf = "platform.opendatahub.io/part-of" Platform = "platform"