From 77de9c60f0c23c6be9bc70804d2294151338db84 Mon Sep 17 00:00:00 2001 From: Matej Gera <38492574+matej-g@users.noreply.github.com> Date: Fri, 9 Feb 2024 16:35:36 +0100 Subject: [PATCH] [target-allocator] Introduce "per node" allocation strategy to target allocator (#2430) --- .chloggen/per-node-allocation-strategy.yaml | 16 ++ .github/workflows/e2e.yaml | 1 + Makefile | 5 + apis/v1alpha1/allocation_strategy.go | 5 +- apis/v1alpha1/collector_webhook.go | 66 +++-- apis/v1alpha1/collector_webhook_test.go | 13 + apis/v1alpha1/opentelemetrycollector_types.go | 3 +- ...ntelemetry.io_opentelemetrycollectors.yaml | 5 +- .../allocation/allocatortest.go | 6 +- .../allocation/consistent_hashing.go | 2 +- .../allocation/least_weighted.go | 2 +- cmd/otel-allocator/allocation/per_node.go | 242 ++++++++++++++++++ .../allocation/per_node_test.go | 128 +++++++++ cmd/otel-allocator/allocation/strategy.go | 13 +- .../allocation/strategy_test.go | 10 +- cmd/otel-allocator/collector/collector.go | 9 +- .../collector/collector_test.go | 24 +- cmd/otel-allocator/target/target.go | 19 ++ ...ntelemetry.io_opentelemetrycollectors.yaml | 5 +- docs/api.md | 4 +- kuttl-test-targetallocator.yaml | 6 + .../targetallocator-features/00-assert.yaml | 0 .../targetallocator-features/00-install.yaml | 0 .../targetallocator-features/01-assert.yaml | 0 .../targetallocator-features/01-liveness.yaml | 0 .../00-assert.yaml | 51 ++++ .../00-install.yaml | 133 ++++++++++ .../01-assert.yaml | 0 .../01-install.yaml | 49 ++++ .../check-daemonset.sh | 15 ++ .../00-assert.yaml | 0 .../00-install.yaml | 0 .../01-assert.yaml | 20 ++ .../01-install.yaml | 0 34 files changed, 801 insertions(+), 51 deletions(-) create mode 100755 .chloggen/per-node-allocation-strategy.yaml create mode 100644 cmd/otel-allocator/allocation/per_node.go create mode 100644 cmd/otel-allocator/allocation/per_node_test.go create mode 100644 kuttl-test-targetallocator.yaml rename tests/{e2e => e2e-targetallocator}/targetallocator-features/00-assert.yaml (100%) rename tests/{e2e => e2e-targetallocator}/targetallocator-features/00-install.yaml (100%) rename tests/{e2e => e2e-targetallocator}/targetallocator-features/01-assert.yaml (100%) rename tests/{e2e => e2e-targetallocator}/targetallocator-features/01-liveness.yaml (100%) create mode 100644 tests/e2e-targetallocator/targetallocator-kubernetessd/00-assert.yaml create mode 100644 tests/e2e-targetallocator/targetallocator-kubernetessd/00-install.yaml rename tests/{e2e/targetallocator-prometheuscr => e2e-targetallocator/targetallocator-kubernetessd}/01-assert.yaml (100%) create mode 100644 tests/e2e-targetallocator/targetallocator-kubernetessd/01-install.yaml create mode 100755 tests/e2e-targetallocator/targetallocator-kubernetessd/check-daemonset.sh rename tests/{e2e => e2e-targetallocator}/targetallocator-prometheuscr/00-assert.yaml (100%) rename tests/{e2e => e2e-targetallocator}/targetallocator-prometheuscr/00-install.yaml (100%) create mode 100644 tests/e2e-targetallocator/targetallocator-prometheuscr/01-assert.yaml rename tests/{e2e => e2e-targetallocator}/targetallocator-prometheuscr/01-install.yaml (100%) diff --git a/.chloggen/per-node-allocation-strategy.yaml b/.chloggen/per-node-allocation-strategy.yaml new file mode 100755 index 0000000000..e4a6637bfe --- /dev/null +++ b/.chloggen/per-node-allocation-strategy.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action) +component: target allocator + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add new "per node" allocation strategy to target allocator. This strategy will allocate targets to nodes on which given target resides. It should only be used conjunction with the daemonset mode. + +# One or more tracking issues related to the change +issues: [1828] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 190d94fc6d..3fc54126e3 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -30,6 +30,7 @@ jobs: - e2e-autoscale - e2e-pdb - e2e-opampbridge + - e2e-targetallocator - e2e-prometheuscr - e2e-multi-instrumentation include: diff --git a/Makefile b/Makefile index cd26d6eedb..ce3a9787b3 100644 --- a/Makefile +++ b/Makefile @@ -246,6 +246,11 @@ e2e-multi-instrumentation: e2e-opampbridge: $(KUTTL) test --config kuttl-test-opampbridge.yaml +# Target allocator end-to-tests +.PHONY: e2e-targetallocator +e2e-targetallocator: + $(KUTTL) test --config kuttl-test-targetallocator.yaml + .PHONY: prepare-e2e prepare-e2e: kuttl set-image-controller add-image-targetallocator add-image-opampbridge container container-target-allocator container-operator-opamp-bridge start-kind cert-manager install-metrics-server install-targetallocator-prometheus-crds load-image-all deploy diff --git a/apis/v1alpha1/allocation_strategy.go b/apis/v1alpha1/allocation_strategy.go index 49c7945171..9b65bd7e27 100644 --- a/apis/v1alpha1/allocation_strategy.go +++ b/apis/v1alpha1/allocation_strategy.go @@ -16,7 +16,7 @@ package v1alpha1 type ( // OpenTelemetryTargetAllocatorAllocationStrategy represent which strategy to distribute target to each collector - // +kubebuilder:validation:Enum=least-weighted;consistent-hashing + // +kubebuilder:validation:Enum=least-weighted;consistent-hashing;per-node OpenTelemetryTargetAllocatorAllocationStrategy string ) @@ -26,4 +26,7 @@ const ( // OpenTelemetryTargetAllocatorAllocationStrategyConsistentHashing targets will be consistently added to collectors, which allows a high-availability setup. OpenTelemetryTargetAllocatorAllocationStrategyConsistentHashing OpenTelemetryTargetAllocatorAllocationStrategy = "consistent-hashing" + + // OpenTelemetryTargetAllocatorAllocationStrategyPerNode targets will be assigned to the collector on the node they reside on (use only with daemon set). + OpenTelemetryTargetAllocatorAllocationStrategyPerNode OpenTelemetryTargetAllocatorAllocationStrategy = "per-node" ) diff --git a/apis/v1alpha1/collector_webhook.go b/apis/v1alpha1/collector_webhook.go index fbd5c165bb..97ee9cc3a4 100644 --- a/apis/v1alpha1/collector_webhook.go +++ b/apis/v1alpha1/collector_webhook.go @@ -228,32 +228,14 @@ func (c CollectorWebhook) validate(ctx context.Context, r *OpenTelemetryCollecto return warnings, fmt.Errorf("the OpenTelemetry Collector mode is set to %s, which does not support the attribute 'AdditionalContainers'", r.Spec.Mode) } - // validate target allocation - if r.Spec.TargetAllocator.Enabled && r.Spec.Mode != ModeStatefulSet { - return warnings, fmt.Errorf("the OpenTelemetry Collector mode is set to %s, which does not support the target allocation deployment", r.Spec.Mode) - } - - // validate Prometheus config for target allocation + // validate target allocator configs if r.Spec.TargetAllocator.Enabled { - promCfg, err := ta.ConfigToPromConfig(r.Spec.Config) - if err != nil { - return warnings, fmt.Errorf("the OpenTelemetry Spec Prometheus configuration is incorrect, %w", err) - } - err = ta.ValidatePromConfig(promCfg, r.Spec.TargetAllocator.Enabled, featuregate.EnableTargetAllocatorRewrite.IsEnabled()) - if err != nil { - return warnings, fmt.Errorf("the OpenTelemetry Spec Prometheus configuration is incorrect, %w", err) + taWarnings, err := c.validateTargetAllocatorConfig(ctx, r) + if taWarnings != nil { + warnings = append(warnings, taWarnings...) } - err = ta.ValidateTargetAllocatorConfig(r.Spec.TargetAllocator.PrometheusCR.Enabled, promCfg) if err != nil { - return warnings, fmt.Errorf("the OpenTelemetry Spec Prometheus configuration is incorrect, %w", err) - } - // if the prometheusCR is enabled, it needs a suite of permissions to function - if r.Spec.TargetAllocator.PrometheusCR.Enabled { - if subjectAccessReviews, err := c.reviewer.CheckPolicyRules(ctx, r.GetNamespace(), r.Spec.TargetAllocator.ServiceAccount, targetAllocatorCRPolicyRules...); err != nil { - return warnings, fmt.Errorf("unable to check rbac rules %w", err) - } else if allowed, deniedReviews := rbac.AllSubjectAccessReviewsAllowed(subjectAccessReviews); !allowed { - warnings = append(warnings, warningsGroupedByResource(deniedReviews)...) - } + return warnings, err } } @@ -365,6 +347,44 @@ func (c CollectorWebhook) validate(ctx context.Context, r *OpenTelemetryCollecto return warnings, nil } +func (c CollectorWebhook) validateTargetAllocatorConfig(ctx context.Context, r *OpenTelemetryCollector) (admission.Warnings, error) { + if r.Spec.Mode != ModeStatefulSet && r.Spec.Mode != ModeDaemonSet { + return nil, fmt.Errorf("the OpenTelemetry Collector mode is set to %s, which does not support the target allocation deployment", r.Spec.Mode) + } + + if r.Spec.Mode == ModeDaemonSet && r.Spec.TargetAllocator.AllocationStrategy != OpenTelemetryTargetAllocatorAllocationStrategyPerNode { + return nil, fmt.Errorf("the OpenTelemetry Collector mode is set to %s, which must be used with target allocation strategy %s ", r.Spec.Mode, OpenTelemetryTargetAllocatorAllocationStrategyPerNode) + } + + if r.Spec.TargetAllocator.AllocationStrategy == OpenTelemetryTargetAllocatorAllocationStrategyPerNode && r.Spec.Mode != ModeDaemonSet { + return nil, fmt.Errorf("target allocation strategy %s is only supported in OpenTelemetry Collector mode %s", OpenTelemetryTargetAllocatorAllocationStrategyPerNode, ModeDaemonSet) + } + + // validate Prometheus config for target allocation + promCfg, err := ta.ConfigToPromConfig(r.Spec.Config) + if err != nil { + return nil, fmt.Errorf("the OpenTelemetry Spec Prometheus configuration is incorrect, %w", err) + } + err = ta.ValidatePromConfig(promCfg, r.Spec.TargetAllocator.Enabled, featuregate.EnableTargetAllocatorRewrite.IsEnabled()) + if err != nil { + return nil, fmt.Errorf("the OpenTelemetry Spec Prometheus configuration is incorrect, %w", err) + } + err = ta.ValidateTargetAllocatorConfig(r.Spec.TargetAllocator.PrometheusCR.Enabled, promCfg) + if err != nil { + return nil, fmt.Errorf("the OpenTelemetry Spec Prometheus configuration is incorrect, %w", err) + } + // if the prometheusCR is enabled, it needs a suite of permissions to function + if r.Spec.TargetAllocator.PrometheusCR.Enabled { + if subjectAccessReviews, err := c.reviewer.CheckPolicyRules(ctx, r.GetNamespace(), r.Spec.TargetAllocator.ServiceAccount, targetAllocatorCRPolicyRules...); err != nil { + return nil, fmt.Errorf("unable to check rbac rules %w", err) + } else if allowed, deniedReviews := rbac.AllSubjectAccessReviewsAllowed(subjectAccessReviews); !allowed { + return warningsGroupedByResource(deniedReviews), nil + } + } + + return nil, nil +} + func checkAutoscalerSpec(autoscaler *AutoscalerSpec) error { if autoscaler.Behavior != nil { if autoscaler.Behavior.ScaleDown != nil && autoscaler.Behavior.ScaleDown.StabilizationWindowSeconds != nil && diff --git a/apis/v1alpha1/collector_webhook_test.go b/apis/v1alpha1/collector_webhook_test.go index b6d3effca9..d54e9577c4 100644 --- a/apis/v1alpha1/collector_webhook_test.go +++ b/apis/v1alpha1/collector_webhook_test.go @@ -670,6 +670,19 @@ func TestOTELColValidatingWebhook(t *testing.T) { }, expectedErr: "the OpenTelemetry Spec Prometheus configuration is incorrect", }, + { + name: "invalid target allocation strategy", + otelcol: OpenTelemetryCollector{ + Spec: OpenTelemetryCollectorSpec{ + Mode: ModeDaemonSet, + TargetAllocator: OpenTelemetryTargetAllocator{ + Enabled: true, + AllocationStrategy: OpenTelemetryTargetAllocatorAllocationStrategyLeastWeighted, + }, + }, + }, + expectedErr: "mode is set to daemonset, which must be used with target allocation strategy per-node", + }, { name: "invalid port name", otelcol: OpenTelemetryCollector{ diff --git a/apis/v1alpha1/opentelemetrycollector_types.go b/apis/v1alpha1/opentelemetrycollector_types.go index 48bad7dd53..1647ca54a9 100644 --- a/apis/v1alpha1/opentelemetrycollector_types.go +++ b/apis/v1alpha1/opentelemetrycollector_types.go @@ -305,7 +305,8 @@ type OpenTelemetryTargetAllocator struct { // +optional Resources v1.ResourceRequirements `json:"resources,omitempty"` // AllocationStrategy determines which strategy the target allocator should use for allocation. - // The current options are least-weighted and consistent-hashing. The default option is consistent-hashing + // The current options are least-weighted, consistent-hashing and per-node. The default is + // consistent-hashing. // +optional // +kubebuilder:default:=consistent-hashing AllocationStrategy OpenTelemetryTargetAllocatorAllocationStrategy `json:"allocationStrategy,omitempty"` diff --git a/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml b/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml index c143aa9318..cf15404595 100644 --- a/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml +++ b/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml @@ -5067,11 +5067,12 @@ spec: default: consistent-hashing description: AllocationStrategy determines which strategy the target allocator should use for allocation. The current options - are least-weighted and consistent-hashing. The default option - is consistent-hashing + are least-weighted, consistent-hashing and per-node. The default + is consistent-hashing. enum: - least-weighted - consistent-hashing + - per-node type: string enabled: description: Enabled indicates whether to use a target allocation diff --git a/cmd/otel-allocator/allocation/allocatortest.go b/cmd/otel-allocator/allocation/allocatortest.go index 74dc8a44e1..c47f5976ce 100644 --- a/cmd/otel-allocator/allocation/allocatortest.go +++ b/cmd/otel-allocator/allocation/allocatortest.go @@ -51,6 +51,7 @@ func MakeNCollectors(n int, startingIndex int) map[string]*Collector { toReturn[collector] = &Collector{ Name: collector, NumTargets: 0, + NodeName: fmt.Sprintf("node-%d", i), } } return toReturn @@ -60,8 +61,9 @@ func MakeNNewTargetsWithEmptyCollectors(n int, startingIndex int) map[string]*ta toReturn := map[string]*target.Item{} for i := startingIndex; i < n+startingIndex; i++ { label := model.LabelSet{ - "i": model.LabelValue(strconv.Itoa(i)), - "total": model.LabelValue(strconv.Itoa(n + startingIndex)), + "i": model.LabelValue(strconv.Itoa(i)), + "total": model.LabelValue(strconv.Itoa(n + startingIndex)), + "__meta_kubernetes_pod_node_name": model.LabelValue("node-0"), } newTarget := target.NewItem(fmt.Sprintf("test-job-%d", i), fmt.Sprintf("test-url-%d", i), label, "") toReturn[newTarget.Hash()] = newTarget diff --git a/cmd/otel-allocator/allocation/consistent_hashing.go b/cmd/otel-allocator/allocation/consistent_hashing.go index d98aca72f6..f69a2f25d2 100644 --- a/cmd/otel-allocator/allocation/consistent_hashing.go +++ b/cmd/otel-allocator/allocation/consistent_hashing.go @@ -161,7 +161,7 @@ func (c *consistentHashingAllocator) handleCollectors(diff diff.Changes[*Collect } // Insert the new collectors for _, i := range diff.Additions() { - c.collectors[i.Name] = NewCollector(i.Name) + c.collectors[i.Name] = NewCollector(i.Name, i.NodeName) c.consistentHasher.Add(c.collectors[i.Name]) } diff --git a/cmd/otel-allocator/allocation/least_weighted.go b/cmd/otel-allocator/allocation/least_weighted.go index 6ae9c5eb2b..729dc85680 100644 --- a/cmd/otel-allocator/allocation/least_weighted.go +++ b/cmd/otel-allocator/allocation/least_weighted.go @@ -191,7 +191,7 @@ func (allocator *leastWeightedAllocator) handleCollectors(diff diff.Changes[*Col } // Insert the new collectors for _, i := range diff.Additions() { - allocator.collectors[i.Name] = NewCollector(i.Name) + allocator.collectors[i.Name] = NewCollector(i.Name, i.NodeName) } if allocateTargets { for _, item := range allocator.targetItems { diff --git a/cmd/otel-allocator/allocation/per_node.go b/cmd/otel-allocator/allocation/per_node.go new file mode 100644 index 0000000000..7820f0093c --- /dev/null +++ b/cmd/otel-allocator/allocation/per_node.go @@ -0,0 +1,242 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package allocation + +import ( + "sync" + + "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/diff" + "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/target" + + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" +) + +var _ Allocator = &perNodeAllocator{} + +const perNodeStrategyName = "per-node" + +// perNodeAllocator makes decisions to distribute work among +// a number of OpenTelemetry collectors based on the node on which +// the collector is running. This allocator should be used only when +// collectors are running as daemon set (agent) on each node. +// Users need to call SetTargets when they have new targets in their +// clusters and call SetCollectors when the collectors have changed. +type perNodeAllocator struct { + // m protects collectors and targetItems for concurrent use. + m sync.RWMutex + // collectors is a map from a Collector's node name to a Collector instance + collectors map[string]*Collector + // targetItems is a map from a target item's hash to the target items allocated state + targetItems map[string]*target.Item + + // collectorKey -> job -> target item hash -> true + targetItemsPerJobPerCollector map[string]map[string]map[string]bool + + log logr.Logger + + filter Filter +} + +// SetCollectors sets the set of collectors with key=collectorName, value=Collector object. +// This method is called when Collectors are added or removed. +func (allocator *perNodeAllocator) SetCollectors(collectors map[string]*Collector) { + timer := prometheus.NewTimer(TimeToAssign.WithLabelValues("SetCollectors", perNodeStrategyName)) + defer timer.ObserveDuration() + + CollectorsAllocatable.WithLabelValues(perNodeStrategyName).Set(float64(len(collectors))) + if len(collectors) == 0 { + allocator.log.Info("No collector instances present") + return + } + + allocator.m.Lock() + defer allocator.m.Unlock() + + // Check for collector changes + collectorsDiff := diff.Maps(allocator.collectors, collectors) + if len(collectorsDiff.Additions()) != 0 || len(collectorsDiff.Removals()) != 0 { + for _, k := range allocator.collectors { + delete(allocator.collectors, k.NodeName) + delete(allocator.targetItemsPerJobPerCollector, k.Name) + TargetsPerCollector.WithLabelValues(k.Name, perNodeStrategyName).Set(0) + } + + for _, k := range collectors { + allocator.collectors[k.NodeName] = NewCollector(k.Name, k.NodeName) + } + + // Re-allocate any already existing targets. + for _, item := range allocator.targetItems { + allocator.addTargetToTargetItems(item) + } + } +} + +// SetTargets accepts a list of targets that will be used to make +// load balancing decisions. This method should be called when there are +// new targets discovered or existing targets are shutdown. +func (allocator *perNodeAllocator) SetTargets(targets map[string]*target.Item) { + timer := prometheus.NewTimer(TimeToAssign.WithLabelValues("SetTargets", perNodeStrategyName)) + defer timer.ObserveDuration() + + if allocator.filter != nil { + targets = allocator.filter.Apply(targets) + } + RecordTargetsKept(targets) + + allocator.m.Lock() + defer allocator.m.Unlock() + + // Check for target changes + targetsDiff := diff.Maps(allocator.targetItems, targets) + // If there are any additions or removals + if len(targetsDiff.Additions()) != 0 || len(targetsDiff.Removals()) != 0 { + allocator.handleTargets(targetsDiff) + } +} + +// handleTargets receives the new and removed targets and reconciles the current state. +// Any removals are removed from the allocator's targetItems and unassigned from the corresponding collector. +// Any net-new additions are assigned to the collector on the same node as the target. +func (allocator *perNodeAllocator) handleTargets(diff diff.Changes[*target.Item]) { + // Check for removals + for k, item := range allocator.targetItems { + // if the current item is in the removals list + if _, ok := diff.Removals()[k]; ok { + c, ok := allocator.collectors[item.GetNodeName()] + if ok { + c.NumTargets-- + TargetsPerCollector.WithLabelValues(item.CollectorName, perNodeStrategyName).Set(float64(c.NumTargets)) + } + delete(allocator.targetItems, k) + delete(allocator.targetItemsPerJobPerCollector[item.CollectorName][item.JobName], item.Hash()) + } + } + + // Check for additions + var unassignedTargets int + for k, item := range diff.Additions() { + // Do nothing if the item is already there + if _, ok := allocator.targetItems[k]; ok { + continue + } else { + // Add item to item pool and assign a collector + collectorAssigned := allocator.addTargetToTargetItems(item) + if !collectorAssigned { + unassignedTargets++ + } + } + } + + // Check for unassigned targets + if unassignedTargets > 0 { + allocator.log.Info("Could not assign targets for some jobs due to missing node labels", "targets", unassignedTargets) + TargetsUnassigned.Set(float64(unassignedTargets)) + } +} + +// addTargetToTargetItems assigns a target to the collector and adds it to the allocator's targetItems +// This method is called from within SetTargets and SetCollectors, which acquire the needed lock. +// This is only called after the collectors are cleared or when a new target has been found in the tempTargetMap. +// Also, any targets that cannot be assigned to a collector, due to no matching node name, will remain unassigned. These +// targets are still "silently" added to the targetItems map, to make sure they exist if collector for a node is added +// later and to prevent them from being reported as unassigned on each new target items setting. +func (allocator *perNodeAllocator) addTargetToTargetItems(tg *target.Item) bool { + allocator.targetItems[tg.Hash()] = tg + chosenCollector, ok := allocator.collectors[tg.GetNodeName()] + if !ok { + allocator.log.V(2).Info("Couldn't find a collector for the target item", "item", tg, "collectors", allocator.collectors) + return false + } + tg.CollectorName = chosenCollector.Name + allocator.addCollectorTargetItemMapping(tg) + chosenCollector.NumTargets++ + TargetsPerCollector.WithLabelValues(chosenCollector.Name, perNodeStrategyName).Set(float64(chosenCollector.NumTargets)) + return true +} + +// addCollectorTargetItemMapping keeps track of which collector has which jobs and targets +// this allows the allocator to respond without any extra allocations to http calls. The caller of this method +// has to acquire a lock. +func (allocator *perNodeAllocator) addCollectorTargetItemMapping(tg *target.Item) { + if allocator.targetItemsPerJobPerCollector[tg.CollectorName] == nil { + allocator.targetItemsPerJobPerCollector[tg.CollectorName] = make(map[string]map[string]bool) + } + if allocator.targetItemsPerJobPerCollector[tg.CollectorName][tg.JobName] == nil { + allocator.targetItemsPerJobPerCollector[tg.CollectorName][tg.JobName] = make(map[string]bool) + } + allocator.targetItemsPerJobPerCollector[tg.CollectorName][tg.JobName][tg.Hash()] = true +} + +// TargetItems returns a shallow copy of the targetItems map. +func (allocator *perNodeAllocator) TargetItems() map[string]*target.Item { + allocator.m.RLock() + defer allocator.m.RUnlock() + targetItemsCopy := make(map[string]*target.Item) + for k, v := range allocator.targetItems { + targetItemsCopy[k] = v + } + return targetItemsCopy +} + +// Collectors returns a shallow copy of the collectors map. +func (allocator *perNodeAllocator) Collectors() map[string]*Collector { + allocator.m.RLock() + defer allocator.m.RUnlock() + collectorsCopy := make(map[string]*Collector) + for k, v := range allocator.collectors { + collectorsCopy[k] = v + } + return collectorsCopy +} + +func (allocator *perNodeAllocator) GetTargetsForCollectorAndJob(collector string, job string) []*target.Item { + allocator.m.RLock() + defer allocator.m.RUnlock() + if _, ok := allocator.targetItemsPerJobPerCollector[collector]; !ok { + return []*target.Item{} + } + if _, ok := allocator.targetItemsPerJobPerCollector[collector][job]; !ok { + return []*target.Item{} + } + targetItemsCopy := make([]*target.Item, len(allocator.targetItemsPerJobPerCollector[collector][job])) + index := 0 + for targetHash := range allocator.targetItemsPerJobPerCollector[collector][job] { + targetItemsCopy[index] = allocator.targetItems[targetHash] + index++ + } + return targetItemsCopy +} + +// SetFilter sets the filtering hook to use. +func (allocator *perNodeAllocator) SetFilter(filter Filter) { + allocator.filter = filter +} + +func newPerNodeAllocator(log logr.Logger, opts ...AllocationOption) Allocator { + pnAllocator := &perNodeAllocator{ + log: log, + collectors: make(map[string]*Collector), + targetItems: make(map[string]*target.Item), + targetItemsPerJobPerCollector: make(map[string]map[string]map[string]bool), + } + + for _, opt := range opts { + opt(pnAllocator) + } + + return pnAllocator +} diff --git a/cmd/otel-allocator/allocation/per_node_test.go b/cmd/otel-allocator/allocation/per_node_test.go new file mode 100644 index 0000000000..a7d695bba7 --- /dev/null +++ b/cmd/otel-allocator/allocation/per_node_test.go @@ -0,0 +1,128 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package allocation + +import ( + "testing" + + "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/target" + + "github.com/prometheus/common/model" + "github.com/stretchr/testify/assert" + logf "sigs.k8s.io/controller-runtime/pkg/log" +) + +var loggerPerNode = logf.Log.WithName("unit-tests") + +// Tests that two targets with the same target url and job name but different label set are both added. +func TestAllocationPerNode(t *testing.T) { + // prepare allocator with initial targets and collectors + s, _ := New("per-node", loggerPerNode) + + cols := MakeNCollectors(3, 0) + s.SetCollectors(cols) + firstLabels := model.LabelSet{ + "test": "test1", + "__meta_kubernetes_pod_node_name": "node-0", + } + secondLabels := model.LabelSet{ + "test": "test2", + "__meta_kubernetes_node_name": "node-1", + } + // no label, should be skipped + thirdLabels := model.LabelSet{ + "test": "test3", + } + firstTarget := target.NewItem("sample-name", "0.0.0.0:8000", firstLabels, "") + secondTarget := target.NewItem("sample-name", "0.0.0.0:8000", secondLabels, "") + thirdTarget := target.NewItem("sample-name", "0.0.0.0:8000", thirdLabels, "") + + targetList := map[string]*target.Item{ + firstTarget.Hash(): firstTarget, + secondTarget.Hash(): secondTarget, + thirdTarget.Hash(): thirdTarget, + } + + // test that targets and collectors are added properly + s.SetTargets(targetList) + + // verify length + actualItems := s.TargetItems() + + // one target should be skipped + expectedTargetLen := len(targetList) + assert.Len(t, actualItems, expectedTargetLen) + + // verify allocation to nodes + for targetHash, item := range targetList { + actualItem, found := actualItems[targetHash] + // if third target, should be skipped + assert.True(t, found, "target with hash %s not found", item.Hash()) + + // only the first two targets should be allocated + itemsForCollector := s.GetTargetsForCollectorAndJob(actualItem.CollectorName, actualItem.JobName) + + // first two should be assigned one to each collector; if third target, should not be assigned + if targetHash == thirdTarget.Hash() { + assert.Len(t, itemsForCollector, 0) + continue + } + assert.Len(t, itemsForCollector, 1) + assert.Equal(t, actualItem, itemsForCollector[0]) + } +} + +func TestTargetsWithNoCollectorsPerNode(t *testing.T) { + // prepare allocator with initial targets and collectors + c, _ := New("per-node", loggerPerNode) + + // Adding 10 new targets + numItems := 10 + c.SetTargets(MakeNNewTargetsWithEmptyCollectors(numItems, 0)) + actualTargetItems := c.TargetItems() + assert.Len(t, actualTargetItems, numItems) + + // Adding 5 new targets, and removing the old 10 targets + numItemsUpdate := 5 + c.SetTargets(MakeNNewTargetsWithEmptyCollectors(numItemsUpdate, 10)) + actualTargetItemsUpdated := c.TargetItems() + assert.Len(t, actualTargetItemsUpdated, numItemsUpdate) + + // Adding 5 new targets, and one existing target + numItemsUpdate = 6 + c.SetTargets(MakeNNewTargetsWithEmptyCollectors(numItemsUpdate, 14)) + actualTargetItemsUpdated = c.TargetItems() + assert.Len(t, actualTargetItemsUpdated, numItemsUpdate) + + // Adding collectors to test allocation + numCols := 2 + cols := MakeNCollectors(2, 0) + c.SetCollectors(cols) + + // Checking to see that there is no change to number of targets + actualTargetItems = c.TargetItems() + assert.Len(t, actualTargetItems, numItemsUpdate) + // Checking to see collectors are added correctly + actualCollectors := c.Collectors() + assert.Len(t, actualCollectors, numCols) + // Based on lable all targets should be assigned to node-0 + for name, ac := range actualCollectors { + if name == "node-0" { + assert.Equal(t, 6, ac.NumTargets) + } else { + assert.Equal(t, 0, ac.NumTargets) + } + } +} diff --git a/cmd/otel-allocator/allocation/strategy.go b/cmd/otel-allocator/allocation/strategy.go index b994557732..b61313bd1f 100644 --- a/cmd/otel-allocator/allocation/strategy.go +++ b/cmd/otel-allocator/allocation/strategy.go @@ -49,6 +49,10 @@ var ( Name: "opentelemetry_allocator_targets_remaining", Help: "Number of targets kept after filtering.", }) + TargetsUnassigned = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "opentelemetry_allocator_targets_unassigned", + Help: "Number of targets that could not be assigned due to missing node label.", + }) ) type AllocationOption func(Allocator) @@ -106,6 +110,7 @@ var _ consistent.Member = Collector{} // This struct can be extended with information like annotations and labels in the future. type Collector struct { Name string + NodeName string NumTargets int } @@ -117,8 +122,8 @@ func (c Collector) String() string { return c.Name } -func NewCollector(name string) *Collector { - return &Collector{Name: name} +func NewCollector(name, node string) *Collector { + return &Collector{Name: name, NodeName: node} } func init() { @@ -130,4 +135,8 @@ func init() { if err != nil { panic(err) } + err = Register(perNodeStrategyName, newPerNodeAllocator) + if err != nil { + panic(err) + } } diff --git a/cmd/otel-allocator/allocation/strategy_test.go b/cmd/otel-allocator/allocation/strategy_test.go index c12529d8d8..10c61f5365 100644 --- a/cmd/otel-allocator/allocation/strategy_test.go +++ b/cmd/otel-allocator/allocation/strategy_test.go @@ -89,11 +89,11 @@ func Benchmark_Setting(b *testing.B) { } func TestCollectorDiff(t *testing.T) { - collector0 := NewCollector("collector-0") - collector1 := NewCollector("collector-1") - collector2 := NewCollector("collector-2") - collector3 := NewCollector("collector-3") - collector4 := NewCollector("collector-4") + collector0 := NewCollector("collector-0", "") + collector1 := NewCollector("collector-1", "") + collector2 := NewCollector("collector-2", "") + collector3 := NewCollector("collector-3", "") + collector4 := NewCollector("collector-4", "") type args struct { current map[string]*Collector new map[string]*Collector diff --git a/cmd/otel-allocator/collector/collector.go b/cmd/otel-allocator/collector/collector.go index a48797f587..68d8db6038 100644 --- a/cmd/otel-allocator/collector/collector.go +++ b/cmd/otel-allocator/collector/collector.go @@ -80,7 +80,7 @@ func (k *Client) Watch(ctx context.Context, labelSelector *metav1.LabelSelector, for i := range pods.Items { pod := pods.Items[i] if pod.GetObjectMeta().GetDeletionTimestamp() == nil { - collectorMap[pod.Name] = allocation.NewCollector(pod.Name) + collectorMap[pod.Name] = allocation.NewCollector(pod.Name, pod.Spec.NodeName) } } @@ -131,9 +131,14 @@ func runWatch(ctx context.Context, k *Client, c <-chan watch.Event, collectorMap return "" } + if pod.Spec.NodeName == "" { + k.log.Info("Node name is missing from the spec. Restarting watch routine") + return "" + } + switch event.Type { //nolint:exhaustive case watch.Added: - collectorMap[pod.Name] = allocation.NewCollector(pod.Name) + collectorMap[pod.Name] = allocation.NewCollector(pod.Name, pod.Spec.NodeName) case watch.Deleted: delete(collectorMap, pod.Name) } diff --git a/cmd/otel-allocator/collector/collector_test.go b/cmd/otel-allocator/collector/collector_test.go index 3a652d491b..77d84f7352 100644 --- a/cmd/otel-allocator/collector/collector_test.go +++ b/cmd/otel-allocator/collector/collector_test.go @@ -71,6 +71,9 @@ func pod(name string) *v1.Pod { Namespace: "test-ns", Labels: labelSet, }, + Spec: v1.PodSpec{ + NodeName: "test-node", + }, } } @@ -99,13 +102,16 @@ func Test_runWatch(t *testing.T) { }, want: map[string]*allocation.Collector{ "test-pod1": { - Name: "test-pod1", + Name: "test-pod1", + NodeName: "test-node", }, "test-pod2": { - Name: "test-pod2", + Name: "test-pod2", + NodeName: "test-node", }, "test-pod3": { - Name: "test-pod3", + Name: "test-pod3", + NodeName: "test-node", }, }, }, @@ -121,19 +127,23 @@ func Test_runWatch(t *testing.T) { }, collectorMap: map[string]*allocation.Collector{ "test-pod1": { - Name: "test-pod1", + Name: "test-pod1", + NodeName: "test-node", }, "test-pod2": { - Name: "test-pod2", + Name: "test-pod2", + NodeName: "test-node", }, "test-pod3": { - Name: "test-pod3", + Name: "test-pod3", + NodeName: "test-node", }, }, }, want: map[string]*allocation.Collector{ "test-pod1": { - Name: "test-pod1", + Name: "test-pod1", + NodeName: "test-node", }, }, }, diff --git a/cmd/otel-allocator/target/target.go b/cmd/otel-allocator/target/target.go index ee5d58cf95..a73e40b6a0 100644 --- a/cmd/otel-allocator/target/target.go +++ b/cmd/otel-allocator/target/target.go @@ -21,6 +21,15 @@ import ( "github.com/prometheus/common/model" ) +// nodeLabels are labels that are used to identify the node on which the given +// target is residing. To learn more about these labels, please refer to: +// https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config +var nodeLabels = []model.LabelName{ + "__meta_kubernetes_pod_node_name", + "__meta_kubernetes_node_name", + "__meta_kubernetes_endpoint_node_name", +} + // LinkJSON This package contains common structs and methods that relate to scrape targets. type LinkJSON struct { Link string `json:"_link"` @@ -39,6 +48,16 @@ func (t *Item) Hash() string { return t.hash } +func (t *Item) GetNodeName() string { + for _, label := range nodeLabels { + if val, ok := t.Labels[label]; ok { + return string(val) + } + } + + return "" +} + // NewItem Creates a new target item. // INVARIANTS: // * Item fields must not be modified after creation. diff --git a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml index 40cc76ab39..5ed95c6145 100644 --- a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml +++ b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml @@ -5064,11 +5064,12 @@ spec: default: consistent-hashing description: AllocationStrategy determines which strategy the target allocator should use for allocation. The current options - are least-weighted and consistent-hashing. The default option - is consistent-hashing + are least-weighted, consistent-hashing and per-node. The default + is consistent-hashing. enum: - least-weighted - consistent-hashing + - per-node type: string enabled: description: Enabled indicates whether to use a target allocation diff --git a/docs/api.md b/docs/api.md index 8fb054e25b..a586f024e8 100644 --- a/docs/api.md +++ b/docs/api.md @@ -18555,9 +18555,9 @@ TargetAllocator indicates a value which determines whether to spawn a target all allocationStrategy enum - AllocationStrategy determines which strategy the target allocator should use for allocation. The current options are least-weighted and consistent-hashing. The default option is consistent-hashing
+ AllocationStrategy determines which strategy the target allocator should use for allocation. The current options are least-weighted, consistent-hashing and per-node. The default is consistent-hashing.

- Enum: least-weighted, consistent-hashing
+ Enum: least-weighted, consistent-hashing, per-node
Default: consistent-hashing
false diff --git a/kuttl-test-targetallocator.yaml b/kuttl-test-targetallocator.yaml new file mode 100644 index 0000000000..eca8f5336e --- /dev/null +++ b/kuttl-test-targetallocator.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestSuite +artifactsDir: ./tests/_build/artifacts/ +testDirs: + - ./tests/e2e-targetallocator/ +timeout: 600 diff --git a/tests/e2e/targetallocator-features/00-assert.yaml b/tests/e2e-targetallocator/targetallocator-features/00-assert.yaml similarity index 100% rename from tests/e2e/targetallocator-features/00-assert.yaml rename to tests/e2e-targetallocator/targetallocator-features/00-assert.yaml diff --git a/tests/e2e/targetallocator-features/00-install.yaml b/tests/e2e-targetallocator/targetallocator-features/00-install.yaml similarity index 100% rename from tests/e2e/targetallocator-features/00-install.yaml rename to tests/e2e-targetallocator/targetallocator-features/00-install.yaml diff --git a/tests/e2e/targetallocator-features/01-assert.yaml b/tests/e2e-targetallocator/targetallocator-features/01-assert.yaml similarity index 100% rename from tests/e2e/targetallocator-features/01-assert.yaml rename to tests/e2e-targetallocator/targetallocator-features/01-assert.yaml diff --git a/tests/e2e/targetallocator-features/01-liveness.yaml b/tests/e2e-targetallocator/targetallocator-features/01-liveness.yaml similarity index 100% rename from tests/e2e/targetallocator-features/01-liveness.yaml rename to tests/e2e-targetallocator/targetallocator-features/01-liveness.yaml diff --git a/tests/e2e-targetallocator/targetallocator-kubernetessd/00-assert.yaml b/tests/e2e-targetallocator/targetallocator-kubernetessd/00-assert.yaml new file mode 100644 index 0000000000..3875ffb539 --- /dev/null +++ b/tests/e2e-targetallocator/targetallocator-kubernetessd/00-assert.yaml @@ -0,0 +1,51 @@ +# This KUTTL assert uses the check-daemonset.sh script to ensure the number of ready pods in a daemonset matches the desired count, retrying until successful or a timeout occurs. The script is needed as the number of Kubernetes cluster nodes can vary and we cannot statically set desiredNumberScheduled and numberReady in the assert for daemonset status. + +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +- script: ./tests/e2e/smoke-daemonset/check-daemonset.sh +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-kubernetessd-targetallocator +status: + replicas: 1 + readyReplicas: 1 + observedGeneration: 1 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-kubernetessd-targetallocator +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-kubernetessd-collector +data: + collector.yaml: | + exporters: + prometheus: + endpoint: 0.0.0.0:9090 + receivers: + prometheus: + config: {} + target_allocator: + collector_id: ${POD_NAME} + endpoint: http://prometheus-kubernetessd-targetallocator:80 + interval: 30s + service: + pipelines: + metrics: + exporters: + - prometheus + processors: [] + receivers: + - prometheus +--- +# Print TA pod logs if test fails +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +collectors: + - selector: app.kubernetes.io/managed-by=opentelemetry-operator diff --git a/tests/e2e-targetallocator/targetallocator-kubernetessd/00-install.yaml b/tests/e2e-targetallocator/targetallocator-kubernetessd/00-install.yaml new file mode 100644 index 0000000000..9442b4023f --- /dev/null +++ b/tests/e2e-targetallocator/targetallocator-kubernetessd/00-install.yaml @@ -0,0 +1,133 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ta +automountServiceAccountToken: true +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: collector +automountServiceAccountToken: true +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: ta +rules: +- apiGroups: [""] + resources: + - pods + - nodes + - services + - endpoints + - configmaps + - secrets + - namespaces + verbs: + - get + - watch + - list +- apiGroups: ["apps"] + resources: + - statefulsets + - daemonsets + - services + - endpoints + verbs: + - get + - watch + - list +- apiGroups: ["discovery.k8s.io"] + resources: + - endpointslices + verbs: + - get + - watch + - list +- apiGroups: ["networking.k8s.io"] + resources: + - ingresses + verbs: + - get + - watch + - list +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: collector +rules: +- apiGroups: [""] + resources: + - pods + - nodes + - nodes/metrics + - services + - endpoints + verbs: + - get + - watch + - list +- apiGroups: ["networking.k8s.io"] + resources: + - ingresses + verbs: + - get + - watch + - list +- nonResourceURLs: ["/metrics", "/metrics/cadvisor"] + verbs: ["get"] +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - command: kubectl create clusterrolebinding ta-$NAMESPACE --clusterrole=ta --serviceaccount=$NAMESPACE:ta + - command: kubectl create clusterrolebinding collector-$NAMESPACE --clusterrole=collector --serviceaccount=$NAMESPACE:collector +--- +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: prometheus-kubernetessd +spec: + mode: daemonset + serviceAccount: collector + targetAllocator: + enabled: true + allocationStrategy: "per-node" + serviceAccount: ta + prometheusCR: + enabled: false + config: | + receivers: + prometheus: + config: + scrape_configs: + - job_name: kubelet + scheme: https + authorization: + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + honor_labels: true + kubernetes_sd_configs: + - role: node + metric_relabel_configs: + - action: keep + regex: "kubelet_running_pods" + source_labels: [__name__] + + processors: + + exporters: + prometheus: + endpoint: 0.0.0.0:9090 + service: + pipelines: + metrics: + receivers: [prometheus] + processors: [] + exporters: [prometheus] diff --git a/tests/e2e/targetallocator-prometheuscr/01-assert.yaml b/tests/e2e-targetallocator/targetallocator-kubernetessd/01-assert.yaml similarity index 100% rename from tests/e2e/targetallocator-prometheuscr/01-assert.yaml rename to tests/e2e-targetallocator/targetallocator-kubernetessd/01-assert.yaml diff --git a/tests/e2e-targetallocator/targetallocator-kubernetessd/01-install.yaml b/tests/e2e-targetallocator/targetallocator-kubernetessd/01-install.yaml new file mode 100644 index 0000000000..5090dd5c89 --- /dev/null +++ b/tests/e2e-targetallocator/targetallocator-kubernetessd/01-install.yaml @@ -0,0 +1,49 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-metrics +spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: check-metrics + image: curlimages/curl + args: + - /bin/sh + - -c + - curl -s http://prometheus-kubernetessd-collector:9090/metrics | grep "kubelet_running_pods{" +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-jobs +spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: check-metrics + image: curlimages/curl + args: + - /bin/sh + - -c + - curl -s http://prometheus-kubernetessd-targetallocator/scrape_configs | grep "kubelet" +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-scrape-configs +spec: + template: + spec: + restartPolicy: OnFailure + containers: + - name: check-metrics + image: curlimages/curl + args: + - /bin/sh + - -c + # First get the collector pod name, subsequently check that the targets for the collector include the node name label. + - curl -s http://prometheus-kubernetessd-targetallocator/jobs/kubelet/targets?collector_id=$(curl -s http://prometheus-kubernetessd-targetallocator/jobs/kubelet/targets | grep -oE "prometheus-kubernetessd-collector-.{5}") | grep "__meta_kubernetes_node_name" \ No newline at end of file diff --git a/tests/e2e-targetallocator/targetallocator-kubernetessd/check-daemonset.sh b/tests/e2e-targetallocator/targetallocator-kubernetessd/check-daemonset.sh new file mode 100755 index 0000000000..ae9c64b8d9 --- /dev/null +++ b/tests/e2e-targetallocator/targetallocator-kubernetessd/check-daemonset.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Name of the daemonset to check +DAEMONSET_NAME="prometheus-kubernetessd-collector" + +# Get the desired and ready pod counts for the daemonset +read DESIRED READY <<< $(kubectl get daemonset -n $NAMESPACE $DAEMONSET_NAME -o custom-columns=:status.desiredNumberScheduled,:status.numberReady --no-headers) + +# Check if the desired count matches the ready count +if [ "$DESIRED" -eq "$READY" ]; then + echo "Desired count ($DESIRED) matches the ready count ($READY) for $DAEMONSET_NAME." +else + echo "Desired count ($DESIRED) does not match the ready count ($READY) for $DAEMONSET_NAME." + exit 1 +fi diff --git a/tests/e2e/targetallocator-prometheuscr/00-assert.yaml b/tests/e2e-targetallocator/targetallocator-prometheuscr/00-assert.yaml similarity index 100% rename from tests/e2e/targetallocator-prometheuscr/00-assert.yaml rename to tests/e2e-targetallocator/targetallocator-prometheuscr/00-assert.yaml diff --git a/tests/e2e/targetallocator-prometheuscr/00-install.yaml b/tests/e2e-targetallocator/targetallocator-prometheuscr/00-install.yaml similarity index 100% rename from tests/e2e/targetallocator-prometheuscr/00-install.yaml rename to tests/e2e-targetallocator/targetallocator-prometheuscr/00-install.yaml diff --git a/tests/e2e-targetallocator/targetallocator-prometheuscr/01-assert.yaml b/tests/e2e-targetallocator/targetallocator-prometheuscr/01-assert.yaml new file mode 100644 index 0000000000..b3b95bf022 --- /dev/null +++ b/tests/e2e-targetallocator/targetallocator-prometheuscr/01-assert.yaml @@ -0,0 +1,20 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: check-metrics +status: + succeeded: 1 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-jobs +status: + succeeded: 1 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-ta-scrape-configs +status: + succeeded: 1 \ No newline at end of file diff --git a/tests/e2e/targetallocator-prometheuscr/01-install.yaml b/tests/e2e-targetallocator/targetallocator-prometheuscr/01-install.yaml similarity index 100% rename from tests/e2e/targetallocator-prometheuscr/01-install.yaml rename to tests/e2e-targetallocator/targetallocator-prometheuscr/01-install.yaml