diff --git a/controllers/remote/cluster_cache_tracker.go b/controllers/remote/cluster_cache_tracker.go index b9ac6973f9a4..5192890fa4e3 100644 --- a/controllers/remote/cluster_cache_tracker.go +++ b/controllers/remote/cluster_cache_tracker.go @@ -347,6 +347,13 @@ func (t *ClusterCacheTracker) newClusterAccessor(ctx context.Context, cluster cl return nil, err } + // Wrap the client with a client that sets timeouts on all Get and List calls + // If we don't set timeouts here Get and List calls can get stuck if they lazily create a new informer + // and the informer than doesn't sync because the workload cluster apiserver is not reachable. + // An alternative would be to set timeouts in the contexts we pass into all Get and List calls. + // It should be reasonable to have Get and List calls timeout within the duration configured in the restConfig. + delegatingClient = newClientWithTimeout(delegatingClient, config.Timeout) + // Generating a new private key to be used for generating temporary certificates to connect to // etcd on the target cluster. // NOTE: Generating a private key is an expensive operation, so we store it in the cluster accessor. @@ -595,3 +602,33 @@ func (t *ClusterCacheTracker) healthCheckCluster(ctx context.Context, in *health t.deleteAccessor(ctx, in.cluster) } } + +// newClientWithTimeout returns a new client which sets the specified timeout on all Get and List calls. +// If we don't set timeouts here Get and List calls can get stuck if they lazily create a new informer +// and the informer than doesn't sync because the workload cluster apiserver is not reachable. +// An alternative would be to set timeouts in the contexts we pass into all Get and List calls. +func newClientWithTimeout(client client.Client, timeout time.Duration) client.Client { + return clientWithTimeout{ + Client: client, + timeout: timeout, + } +} + +type clientWithTimeout struct { + client.Client + timeout time.Duration +} + +var _ client.Client = &clientWithTimeout{} + +func (c clientWithTimeout) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + ctx, cancel := context.WithTimeout(ctx, c.timeout) + defer cancel() + return c.Client.Get(ctx, key, obj, opts...) +} + +func (c clientWithTimeout) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + ctx, cancel := context.WithTimeout(ctx, c.timeout) + defer cancel() + return c.Client.List(ctx, list, opts...) +}