Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Target Allocator implementation (Part 1 - OTEL Operator Enhancements) #351

Merged
merged 33 commits into from
Aug 13, 2021
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
130e69d
load balancer deployment implementation & controller & e2e tests
alexperez52 Jul 8, 2021
2835c16
Load balancer crd update, reconcile logic and e2e tests
Jul 8, 2021
6f81007
Reset last commit and only uploading changes to main.go
alexperez52 Jul 8, 2021
31f97fe
Updated bundle and api with new autogenerated resources
rsvarma95 Jul 8, 2021
47c5fe8
Updated code with lint fixes
Jul 9, 2021
8ef4cde
Updated code to include minor fixes
rsvarma95 Jul 12, 2021
c73470e
Update helper.go to add header
Jul 12, 2021
7482e13
Updated comment and bundles
rsvarma95 Jul 13, 2021
729d4ea
Updated lb reconcile helper function and its invocations
rsvarma95 Jul 13, 2021
2dcf8a1
Updated naming scheme and changed CR config
rsvarma95 Jul 16, 2021
a0d3f34
Updated bundle files
rsvarma95 Jul 16, 2021
492749e
Minor changes
rsvarma95 Jul 20, 2021
6a72424
Lint and default file fixes
rsvarma95 Jul 21, 2021
8657668
Added rolebinding to automate manual setting
rsvarma95 Jul 30, 2021
0528856
Update opentelemetry-operator.clusterserviceversion.yaml
Jul 30, 2021
1a921cb
Merge branch 'main' into load-balancer-implementation
Jul 30, 2021
bf1010b
Removed role/rolebinding files & Minor changes
Aug 3, 2021
38ca88a
Minor changes
Aug 3, 2021
be56aa7
Merge branch 'main' into load-balancer-implementation
Aug 3, 2021
e339eba
Added error check in configmap reconcile & spelling correction
Aug 3, 2021
6f39c47
Updated target allocator KUTTL tests & renamed folder
rsvarma95 Aug 3, 2021
367c623
Updated folder structure to reduce code duplication between collector…
rsvarma95 Aug 4, 2021
7dd75b3
Updated kuttl tests to use namespace-scope resources
rsvarma95 Aug 4, 2021
0ef5424
Merge branch 'main' into load-balancer-implementation
rsvarma95 Aug 5, 2021
db40373
Revert part of "Updated folder structure to reduce code duplication b…
Aug 5, 2021
67af909
Removed separate controller for target allocation
Aug 6, 2021
888ae31
Added additional label for collector pod selection and removed test-step
Aug 9, 2021
11d3c87
Minor changes
Aug 9, 2021
a7764b9
Added version management & Minor fixes
Aug 11, 2021
33cb042
Updated docker image to quay.io image
Aug 11, 2021
3d37148
Added validation in webhook to validate prometheus config
Aug 11, 2021
a496825
Update api/v1alpha1/opentelemetrycollector_webhook.go
Aug 11, 2021
98acb2f
Updated struct name & minor changes
Aug 12, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions api/v1alpha1/opentelemetrycollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ type OpenTelemetryCollectorSpec struct {
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
Image string `json:"image,omitempty"`

// TargetAllocator indicates a value which determines whether to spawn a target allocation resource or not.
// +optional
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
TargetAllocator OpenTelemetryTargetAllocator `json:"targetAllocator,omitempty"`

// Mode represents how the collector should be deployed (deployment, daemonset, statefulset or sidecar)
// +optional
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
Expand Down Expand Up @@ -116,6 +121,17 @@ type OpenTelemetryCollectorStatus struct {
Messages []string `json:"messages,omitempty"`
}

// OpenTelemetryTargetAllocator defines the configurations for the Prometheus target allocator.
type OpenTelemetryTargetAllocator struct {
jpkrohling marked this conversation as resolved.
Show resolved Hide resolved
// Enabled indicates whether to use a target allocation mechanism for Prometheus targets or not.
// +optional
Enabled bool `json:"enabled,omitempty"`

// Image indicates the container image to use for the OpenTelemetry TargetAllocator.
// +optional
Image string `json:"image,omitempty"`
}

// +kubebuilder:object:root=true
// +kubebuilder:resource:shortName=otelcol;otelcols
// +kubebuilder:subresource:status
Expand Down
16 changes: 16 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ metadata:
containerImage: quay.io/opentelemetry/opentelemetry-operator
createdAt: "2020-12-16T13:37:00+00:00"
description: Provides the OpenTelemetry components, including the Collector
operators.operatorframework.io/builder: operator-sdk-v1.8.0+git
operators.operatorframework.io/builder: operator-sdk-v1.8.0
rsvarma95 marked this conversation as resolved.
Show resolved Hide resolved
operators.operatorframework.io/project_layout: go.kubebuilder.io/v2
repository: github.com/open-telemetry/opentelemetry-operator
support: OpenTelemetry Community
Expand Down Expand Up @@ -77,6 +77,14 @@ spec:
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down Expand Up @@ -174,6 +182,26 @@ spec:
- get
- patch
- update
- apiGroups:
- rbac.authorization.k8s.io
resources:
- rolebindings
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
Expand Down
13 changes: 13 additions & 0 deletions bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,19 @@ spec:
description: ServiceAccount indicates the name of an existing service
account to use with this instance.
type: string
targetAllocator:
description: TargetAllocator indicates a value which determines whether
to spawn a target allocation resource or not.
properties:
enabled:
description: Enabled indicates whether to use a target allocation
mechanism for Prometheus targets or not.
type: boolean
image:
description: Image indicates the container image to use for the
OpenTelemetry TargetAllocator.
type: string
type: object
tolerations:
description: Toleration to schedule OpenTelemetry Collector pods.
This is only relevant to daemonsets, statefulsets and deployments
Expand Down
13 changes: 13 additions & 0 deletions config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,19 @@ spec:
description: ServiceAccount indicates the name of an existing service
account to use with this instance.
type: string
targetAllocator:
description: TargetAllocator indicates a value which determines whether
to spawn a target allocation resource or not.
properties:
enabled:
description: Enabled indicates whether to use a target allocation
mechanism for Prometheus targets or not.
type: boolean
image:
description: Image indicates the container image to use for the
OpenTelemetry TargetAllocator.
type: string
type: object
tolerations:
description: Toleration to schedule OpenTelemetry Collector pods.
This is only relevant to daemonsets, statefulsets and deployments
Expand Down
28 changes: 28 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ rules:
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down Expand Up @@ -129,3 +137,23 @@ rules:
- get
- patch
- update
- apiGroups:
- rbac.authorization.k8s.io
resources:
- rolebindings
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need to create role bindings?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main reason to create a rolebinding is to allow the target allocator pod to view the collector pods in the same namespace. I am basically automating the below command. If I dont create a appropriate rolebinding, it will not be able to query for the collector pods. The logic of this part is in Part2.

kubectl create clusterrolebinding default-view --clusterrole=view --serviceaccount={namespace}:default

Reference link

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The operator should definitely not be able to create cluster roles/bindings by default, as it would require the operator to have cluster-admin permissions. There should be a separate role/binding YAML file that users can opt in to use, giving the operator the extra permissions. At runtime, as part of the "auto detect" package, the operator can then detect if it has been given this extra permission and self-adjust, enabling/disabling this feature.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok this makes sense. I would remove that part then and add a check instead just to ensure the appropriate permissions are granted. This would remove the role/rolebinding files.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of having it in the auto detect during which we do not have access to the request namespace while initializing the auto detect, would it be fine to create a helper method in targetallocator/reconcile which would use the k8s client to check the service account for appropriate permissions and return an error if permissions are not available?

Another option is when the rest.inClusterConfig() is called inside the logic (Part of PR2), we can just return an error.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. The operator then should indeed provision the role/service account/binding. Are you sure it requires cluster roles, though?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually role and role bindings are enough but for the operator to create a role with the appropriate permissions, the operator needs to have these permissions thmeselves. Thus, the operator needs to have a cluster role defined since the collector namespaces are dynamic.

One solution can be just to let the user create the role and rolebindings as defined in the kuttl tests (latest update).

verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
rsvarma95 marked this conversation as resolved.
Show resolved Hide resolved
verbs:
- create
- delete
- get
- list
- watch
156 changes: 156 additions & 0 deletions controllers/targetallocator_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package controllers contains the main controller, where the reconciliation starts.
package controllers

import (
"context"
"fmt"

ctrl "sigs.k8s.io/controller-runtime"
rsvarma95 marked this conversation as resolved.
Show resolved Hide resolved

"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/open-telemetry/opentelemetry-operator/api/v1alpha1"
"github.com/open-telemetry/opentelemetry-operator/internal/config"
"github.com/open-telemetry/opentelemetry-operator/pkg/targetallocator/reconcile"
)

// OpenTelemetryTargetAllocatorReconciler reconciles a OpenTelemetryTargetAllocator object.
type OpenTelemetryTargetAllocatorReconciler struct {
client.Client
log logr.Logger
scheme *runtime.Scheme
config config.Config
tasks []TgAlTask
}

// TgAlTask represents a reconciliation task to be executed by the reconciler.
type TgAlTask struct {
rsvarma95 marked this conversation as resolved.
Show resolved Hide resolved
Name string
Do func(context.Context, reconcile.Params) error
BailOnError bool
}

// TgAlParams is the set of options to build a new OpenTelemetryTargetAllocatorReconciler.
type TgAlParams struct {
rsvarma95 marked this conversation as resolved.
Show resolved Hide resolved
client.Client
Log logr.Logger
Scheme *runtime.Scheme
Config config.Config
Tasks []TgAlTask
}

// NewTgAlReconciler creates a new reconciler for OpenTelemetryTargetAllocator objects.
func NewTgAlReconciler(p TgAlParams) *OpenTelemetryTargetAllocatorReconciler {
rsvarma95 marked this conversation as resolved.
Show resolved Hide resolved
if len(p.Tasks) == 0 {
p.Tasks = []TgAlTask{
{
"roles",
reconcile.Roles,
true,
},
{
"role bindings",
reconcile.RoleBindings,
true,
},
{
"config maps",
reconcile.ConfigMaps,
true,
},
{
"deployments",
reconcile.Deployments,
true,
},
{
"services",
reconcile.Services,
true,
},
}
}

return &OpenTelemetryTargetAllocatorReconciler{
Client: p.Client,
log: p.Log,
scheme: p.Scheme,
config: p.Config,
tasks: p.Tasks,
}
}

// Reconcile the current state of an OpenTelemetry target allocation resource with the desired state.
// follows the same design as the opentelemetrycollector controller where an empty context is passed
// and a new context initialized inside.
func (r *OpenTelemetryTargetAllocatorReconciler) Reconcile(_ context.Context, req ctrl.Request) (ctrl.Result, error) {
ctx := context.Background()
log := r.log.WithValues("opentelemtrytargetallocator", req.NamespacedName)
rsvarma95 marked this conversation as resolved.
Show resolved Hide resolved

var instance v1alpha1.OpenTelemetryCollector
if err := r.Get(ctx, req.NamespacedName, &instance); err != nil {
if !apierrors.IsNotFound(err) {
log.Error(err, "unable to fetch OpenTelemetryTargetAllocator")
rsvarma95 marked this conversation as resolved.
Show resolved Hide resolved
}
return ctrl.Result{}, client.IgnoreNotFound(err)
}

params := reconcile.Params{
Config: r.config,
Client: r.Client,
Instance: instance,
Log: log,
Scheme: r.scheme,
}

if err := r.RunTasks(ctx, params); err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

// RunTasks runs all the tasks associated with this reconciler.
func (r *OpenTelemetryTargetAllocatorReconciler) RunTasks(ctx context.Context, params reconcile.Params) error {
for _, task := range r.tasks {
if err := task.Do(ctx, params); err != nil {
r.log.Error(err, fmt.Sprintf("failed to reconcile %s", task.Name))
if task.BailOnError {
return err
}
}
}
return nil
}

// SetupWithManager tells the manager what our controller is interested in.
func (r *OpenTelemetryTargetAllocatorReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.OpenTelemetryCollector{}).
Owns(&rbacv1.Role{}).
Owns(&rbacv1.RoleBinding{}).
Owns(&corev1.ConfigMap{}).
Owns(&corev1.Service{}).
Owns(&appsv1.Deployment{}).
Complete(r)
}
Loading