diff --git a/.chloggen/3216-ta-retry-namespace-informer-creation.yaml b/.chloggen/3216-ta-retry-namespace-informer-creation.yaml new file mode 100644 index 0000000000..2db25c9caa --- /dev/null +++ b/.chloggen/3216-ta-retry-namespace-informer-creation.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) +component: target allocator + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Retrying failed namespace informer creation in promOperator CRD watcher, then exit if creation issue cannot be resolved" + +# One or more tracking issues related to the change +issues: [3216] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/cmd/otel-allocator/watcher/promOperator.go b/cmd/otel-allocator/watcher/promOperator.go index aa066a7468..ae2ddcb68e 100644 --- a/cmd/otel-allocator/watcher/promOperator.go +++ b/cmd/otel-allocator/watcher/promOperator.go @@ -42,6 +42,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/retry" allocatorconfig "github.com/open-telemetry/opentelemetry-operator/cmd/otel-allocator/config" ) @@ -101,14 +102,23 @@ func NewPrometheusCRWatcher(ctx context.Context, logger logr.Logger, cfg allocat eventRecorderFactory := operator.NewEventRecorderFactory(false) eventRecorder := eventRecorderFactory(clientset, "target-allocator") - nsMonInf, err := getNamespaceInformer(ctx, map[string]struct{}{v1.NamespaceAll: {}}, promOperatorLogger, clientset, operatorMetrics) + var nsMonInf cache.SharedIndexInformer + getNamespaceInformerErr := retry.OnError(retry.DefaultRetry, + func(err error) bool { + logger.Error(err, "Retrying namespace informer creation in promOperator CRD watcher") + return true + }, func() error { + nsMonInf, err = getNamespaceInformer(ctx, map[string]struct{}{v1.NamespaceAll: {}}, promOperatorLogger, clientset, operatorMetrics) + return err + }) + if getNamespaceInformerErr != nil { + logger.Error(getNamespaceInformerErr, "Failed to create namespace informer in promOperator CRD watcher") + return nil, getNamespaceInformerErr + } + + resourceSelector, err = prometheus.NewResourceSelector(promOperatorSlogLogger, prom, store, nsMonInf, operatorMetrics, eventRecorder) if err != nil { - logger.Error(err, "Failed to create namespace informer in promOperator CRD watcher") - } else { - resourceSelector, err = prometheus.NewResourceSelector(promOperatorSlogLogger, prom, store, nsMonInf, operatorMetrics, eventRecorder) - if err != nil { - logger.Error(err, "Failed to create resource selector in promOperator CRD watcher") - } + logger.Error(err, "Failed to create resource selector in promOperator CRD watcher") } return &PrometheusCRWatcher{