Skip to content

Commit

Permalink
fix: fetch secrets on-demand to fix controller boot for large clusters (
Browse files Browse the repository at this point in the history
#829)

Using a list cache of secrets is not needed as we do not need to react
to any changes and we can always fetch the individual secrets we need
for analysis arguments when the time comes. For large kubernetes
clusters with thousands of secrets this causes the argo-rollouts
controller to be unbootable.

Co-authored-by: Jason Yanowitz <jason.yanowitz@getbraintree.com>
Signed-off-by: Eric Mrak <emrak@paypal.com>
  • Loading branch information
mrak and yanowitz authored Nov 9, 2020
1 parent 71428ce commit 0eb4df0
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 26 deletions.
3 changes: 2 additions & 1 deletion analysis/analysis.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package analysis

import (
"context"
"encoding/json"
"fmt"
"strings"
Expand Down Expand Up @@ -223,7 +224,7 @@ func (c *Controller) resolveArgs(tasks []metricTask, args []v1alpha1.Argument, n
return nil, nil, err
}
name := arg.ValueFrom.SecretKeyRef.Name
secret, err := c.secretLister.Secrets(namespace).Get(name)
secret, err := c.kubeclientset.CoreV1().Secrets(namespace).Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
return nil, nil, err
}
Expand Down
11 changes: 6 additions & 5 deletions analysis/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package analysis

import (
"bytes"
"context"
"fmt"
"strings"
"testing"
Expand Down Expand Up @@ -1150,8 +1151,8 @@ func TestSecretContentReferenceSuccess(t *testing.T) {
},
}
defer f.Close()
f.secretRunLister = append(f.secretRunLister, secret)
c, _, _ := f.newController(noResyncPeriodFunc)
f.kubeclient.CoreV1().Secrets(metav1.NamespaceDefault).Create(context.TODO(), secret, metav1.CreateOptions{})
argName := "apikey"
run := &v1alpha1.AnalysisRun{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -1202,8 +1203,8 @@ func TestSecretContentReferenceProviderError(t *testing.T) {
},
}
defer f.Close()
f.secretRunLister = append(f.secretRunLister, secret)
c, _, _ := f.newController(noResyncPeriodFunc)
f.kubeclient.CoreV1().Secrets(metav1.NamespaceDefault).Create(context.TODO(), secret, metav1.CreateOptions{})
run := &v1alpha1.AnalysisRun{
ObjectMeta: metav1.ObjectMeta{
Namespace: metav1.NamespaceDefault,
Expand Down Expand Up @@ -1268,8 +1269,8 @@ func TestSecretContentReferenceAndMultipleArgResolutionSuccess(t *testing.T) {
},
}
defer f.Close()
f.secretRunLister = append(f.secretRunLister, secret)
c, _, _ := f.newController(noResyncPeriodFunc)
f.kubeclient.CoreV1().Secrets(metav1.NamespaceDefault).Create(context.TODO(), secret, metav1.CreateOptions{})
run := &v1alpha1.AnalysisRun{
ObjectMeta: metav1.ObjectMeta{
Namespace: metav1.NamespaceDefault,
Expand Down Expand Up @@ -1332,7 +1333,7 @@ func TestSecretNotFound(t *testing.T) {
incompleteMeasurement: nil,
}}
_, _, err := c.resolveArgs(tasks, args, metav1.NamespaceDefault)
assert.Equal(t, "secret \"secret-does-not-exist\" not found", err.Error())
assert.Equal(t, "secrets \"secret-does-not-exist\" not found", err.Error())
}

func TestArgDoesNotContainSecretRefError(t *testing.T) {
Expand Down Expand Up @@ -1366,8 +1367,8 @@ func TestKeyNotInSecret(t *testing.T) {
},
}
defer f.Close()
f.secretRunLister = append(f.secretRunLister, secret)
c, _, _ := f.newController(noResyncPeriodFunc)
f.kubeclient.CoreV1().Secrets(metav1.NamespaceDefault).Create(context.TODO(), secret, metav1.CreateOptions{})

args := []v1alpha1.Argument{{
Name: "secret-wrong-key",
Expand Down
6 changes: 0 additions & 6 deletions analysis/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ import (
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/util/wait"
batchinformers "k8s.io/client-go/informers/batch/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/kubernetes"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
Expand All @@ -33,8 +31,6 @@ type Controller struct {
// analysisclientset is a clientset for our own API group
argoProjClientset clientset.Interface

secretLister corelisters.SecretLister

analysisRunLister listers.AnalysisRunLister

analysisRunSynced cache.InformerSynced
Expand Down Expand Up @@ -66,7 +62,6 @@ type ControllerConfig struct {
KubeClientSet kubernetes.Interface
ArgoProjClientset clientset.Interface
AnalysisRunInformer informers.AnalysisRunInformer
SecretInformer coreinformers.SecretInformer
JobInformer batchinformers.JobInformer
ResyncPeriod time.Duration
AnalysisRunWorkQueue workqueue.RateLimitingInterface
Expand All @@ -83,7 +78,6 @@ func NewController(cfg ControllerConfig) *Controller {
analysisRunLister: cfg.AnalysisRunInformer.Lister(),
metricsServer: cfg.MetricsServer,
analysisRunWorkQueue: cfg.AnalysisRunWorkQueue,
secretLister: cfg.SecretInformer.Lister(),
jobInformer: cfg.JobInformer,
analysisRunSynced: cfg.AnalysisRunInformer.Informer().HasSynced,
recorder: cfg.Recorder,
Expand Down
8 changes: 0 additions & 8 deletions analysis/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/undefinedlabs/go-mpatch"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -40,8 +39,6 @@ type fixture struct {
client *fake.Clientset
kubeclient *k8sfake.Clientset

// Secrets to put in the store.
secretRunLister []*corev1.Secret
// Objects to put in the store.
analysisRunLister []*v1alpha1.AnalysisRun
// Actions expected to happen on the client.
Expand Down Expand Up @@ -101,7 +98,6 @@ func (f *fixture) newController(resync resyncFunc) (*Controller, informers.Share
KubeClientSet: f.kubeclient,
ArgoProjClientset: f.client,
AnalysisRunInformer: i.Argoproj().V1alpha1().AnalysisRuns(),
SecretInformer: k8sI.Core().V1().Secrets(),
JobInformer: k8sI.Batch().V1().Jobs(),
ResyncPeriod: resync(),
AnalysisRunWorkQueue: analysisRunWorkqueue,
Expand Down Expand Up @@ -135,10 +131,6 @@ func (f *fixture) newController(resync resyncFunc) (*Controller, informers.Share
i.Argoproj().V1alpha1().AnalysisRuns().Informer().GetIndexer().Add(ar)
}

for _, s := range f.secretRunLister {
k8sI.Core().V1().Secrets().Informer().GetIndexer().Add(s)
}

return c, i, k8sI
}

Expand Down
1 change: 0 additions & 1 deletion cmd/rollouts-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ func newCommand() *cobra.Command {
kubeInformerFactory.Apps().V1().ReplicaSets(),
kubeInformerFactory.Core().V1().Services(),
kubeInformerFactory.Extensions().V1beta1().Ingresses(),
kubeInformerFactory.Core().V1().Secrets(),
jobInformerFactory.Batch().V1().Jobs(),
tolerantinformer.NewTolerantRolloutInformer(dynamicInformerFactory),
tolerantinformer.NewTolerantExperimentInformer(dynamicInformerFactory),
Expand Down
6 changes: 1 addition & 5 deletions controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ type Manager struct {
analysisRunSynced cache.InformerSynced
analysisTemplateSynced cache.InformerSynced
clusterAnalysisTemplateSynced cache.InformerSynced
secretSynced cache.InformerSynced
serviceSynced cache.InformerSynced
ingressSynced cache.InformerSynced
jobSynced cache.InformerSynced
Expand Down Expand Up @@ -106,7 +105,6 @@ func NewManager(
replicaSetInformer appsinformers.ReplicaSetInformer,
servicesInformer coreinformers.ServiceInformer,
ingressesInformer extensionsinformers.IngressInformer,
secretInformer coreinformers.SecretInformer,
jobInformer batchinformers.JobInformer,
rolloutsInformer informers.RolloutInformer,
experimentsInformer informers.ExperimentInformer,
Expand Down Expand Up @@ -193,7 +191,6 @@ func NewManager(
KubeClientSet: kubeclientset,
ArgoProjClientset: argoprojclientset,
AnalysisRunInformer: analysisRunInformer,
SecretInformer: secretInformer,
JobInformer: jobInformer,
ResyncPeriod: resyncPeriod,
AnalysisRunWorkQueue: analysisRunWorkqueue,
Expand Down Expand Up @@ -231,7 +228,6 @@ func NewManager(
rolloutSynced: rolloutsInformer.Informer().HasSynced,
serviceSynced: servicesInformer.Informer().HasSynced,
ingressSynced: ingressesInformer.Informer().HasSynced,
secretSynced: secretInformer.Informer().HasSynced,
jobSynced: jobInformer.Informer().HasSynced,
experimentSynced: experimentsInformer.Informer().HasSynced,
analysisRunSynced: analysisRunInformer.Informer().HasSynced,
Expand Down Expand Up @@ -271,7 +267,7 @@ func (c *Manager) Run(rolloutThreadiness, serviceThreadiness, ingressThreadiness
defer c.analysisRunWorkqueue.ShutDown()
// Wait for the caches to be synced before starting workers
log.Info("Waiting for controller's informer caches to sync")
if ok := cache.WaitForCacheSync(stopCh, c.serviceSynced, c.ingressSynced, c.secretSynced, c.jobSynced, c.rolloutSynced, c.experimentSynced, c.analysisRunSynced, c.analysisTemplateSynced, c.replicasSetSynced); !ok {
if ok := cache.WaitForCacheSync(stopCh, c.serviceSynced, c.ingressSynced, c.jobSynced, c.rolloutSynced, c.experimentSynced, c.analysisRunSynced, c.analysisTemplateSynced, c.replicasSetSynced); !ok {
return fmt.Errorf("failed to wait for caches to sync")
}
// only wait for cluster scoped informers to sync if we are running in cluster-wide mode
Expand Down

0 comments on commit 0eb4df0

Please sign in to comment.