Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve race condition between CARM ConfigMap and reconciler for annotated namespaces #138

Merged
merged 1 commit into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 31 additions & 9 deletions pkg/runtime/adoption_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package runtime

import (
"context"
"fmt"

"github.com/go-logr/logr"
"github.com/pkg/errors"
Expand Down Expand Up @@ -107,10 +108,28 @@ func (r *adoptionReconciler) reconcile(ctx context.Context, req ctrlrt.Request)
return ackerr.NotAdoptable
}

targetDescriptor := rmf.ResourceDescriptor()
acctID := r.getOwnerAccountID(res)
// If a user has specified a namespace that is annotated with the
// an owner account ID, we need an appropriate role ARN to assume
// in order to perform the reconciliation. The roles ARN are typically
// stored in a ConfigMap in the ACK system namespace.
// If the ConfigMap is not created, or not populated with an
// accountID to roleARN mapping, we need to properly requeue with a
// helpful message to the user.
var roleARN ackv1alpha1.AWSResourceName
acctID, needCARMLookup := r.getOwnerAccountID(res)
if needCARMLookup {
// This means that the user is specifying a namespace that is
// annotated with an owner account ID. We need to retrieve the
// roleARN from the ConfigMap and properly requeue if the roleARN
// is not available.
roleARN, err = r.getRoleARN(acctID)
if err != nil {
// r.getRoleARN errors are not terminal, we should requeue.
return requeue.NeededAfter(err, roleARNNotAvailableRequeueDelay)
}
}
region := r.getRegion(res)
roleARN := r.getRoleARN(acctID)
targetDescriptor := rmf.ResourceDescriptor()
endpointURL := r.getEndpointURL(res)
gvk := targetDescriptor.GroupVersionKind()

Expand Down Expand Up @@ -428,16 +447,16 @@ func (r *adoptionReconciler) handleReconcileError(err error) (ctrlrt.Result, err
// that the service controller is in.
func (r *adoptionReconciler) getOwnerAccountID(
res *ackv1alpha1.AdoptedResource,
) ackv1alpha1.AWSAccountID {
) (ackv1alpha1.AWSAccountID, bool) {
// look for owner account id in the namespace annotations
namespace := res.GetNamespace()
accID, ok := r.cache.Namespaces.GetOwnerAccountID(namespace)
if ok {
return ackv1alpha1.AWSAccountID(accID)
return ackv1alpha1.AWSAccountID(accID), true
}

// use controller configuration
return ackv1alpha1.AWSAccountID(r.cfg.AccountID)
return ackv1alpha1.AWSAccountID(r.cfg.AccountID), false
}

// getEndpointURL returns the AWS account that owns the supplied resource.
Expand All @@ -462,9 +481,12 @@ func (r *adoptionReconciler) getEndpointURL(
// the resources.
func (r *adoptionReconciler) getRoleARN(
acctID ackv1alpha1.AWSAccountID,
) ackv1alpha1.AWSResourceName {
roleARN, _ := r.cache.Accounts.GetAccountRoleARN(string(acctID))
return ackv1alpha1.AWSResourceName(roleARN)
) (ackv1alpha1.AWSResourceName, error) {
roleARN, err := r.cache.Accounts.GetAccountRoleARN(string(acctID))
if err != nil {
return "", fmt.Errorf("unable to retrieve role ARN for account %s: %v", acctID, err)
}
return ackv1alpha1.AWSResourceName(roleARN), nil
}

// getRegion returns the AWS region that the given resource is in or should be
Expand Down
59 changes: 48 additions & 11 deletions pkg/runtime/cache/account.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package cache

import (
"errors"
"sync"

"github.com/go-logr/logr"
Expand All @@ -23,6 +24,18 @@ import (
k8scache "k8s.io/client-go/tools/cache"
)

var (
// ErrCARMConfigMapNotFound is an error that is returned when the CARM
// configmap is not found.
ErrCARMConfigMapNotFound = errors.New("CARM configmap not found")
// ErrAccountIDNotFound is an error that is returned when the account ID
// is not found in the CARM configmap.
ErrAccountIDNotFound = errors.New("account ID not found in CARM configmap")
// ErrEmptyRoleARN is an error that is returned when the role ARN is empty
// in the CARM configmap.
ErrEmptyRoleARN = errors.New("role ARN is empty in CARM configmap")
)

const (
// ACKRoleAccountMap is the name of the configmap map object storing
// all the AWS Account IDs associated with their AWS Role ARNs.
Expand All @@ -34,15 +47,17 @@ const (
// make the changes accordingly.
type AccountCache struct {
sync.RWMutex
log logr.Logger
roleARNs map[string]string
log logr.Logger
roleARNs map[string]string
configMapCreated bool
}

// NewAccountCache instanciate a new AccountCache.
func NewAccountCache(log logr.Logger) *AccountCache {
return &AccountCache{
log: log.WithName("cache.account"),
roleARNs: make(map[string]string),
log: log.WithName("cache.account"),
roleARNs: make(map[string]string),
configMapCreated: false,
}
}

Expand All @@ -55,6 +70,7 @@ func resourceMatchACKRoleAccountsConfigMap(raw interface{}) bool {

// Run instantiate a new SharedInformer for ConfigMaps and runs it to begin processing items.
func (c *AccountCache) Run(clientSet kubernetes.Interface, stopCh <-chan struct{}) {
c.log.V(1).Info("Starting shared informer for accounts cache", "targetConfigMap", ACKRoleAccountMap)
informer := informersv1.NewConfigMapInformer(
clientSet,
ackSystemNamespace,
Expand All @@ -66,7 +82,10 @@ func (c *AccountCache) Run(clientSet kubernetes.Interface, stopCh <-chan struct{
if resourceMatchACKRoleAccountsConfigMap(obj) {
cm := obj.(*corev1.ConfigMap)
object := cm.DeepCopy()
c.updateAccountRoleData(object.Data)
// To avoid multiple mutex locks, we are updating the cache
// and the configmap existence flag in the same function.
configMapCreated := true
c.updateAccountRoleData(configMapCreated, object.Data)
c.log.V(1).Info("created account config map", "name", cm.ObjectMeta.Name)
}
},
Expand All @@ -75,15 +94,18 @@ func (c *AccountCache) Run(clientSet kubernetes.Interface, stopCh <-chan struct{
cm := desired.(*corev1.ConfigMap)
object := cm.DeepCopy()
//TODO(a-hilaly): compare data checksum before updating the cache
c.updateAccountRoleData(object.Data)
c.updateAccountRoleData(true, object.Data)
c.log.V(1).Info("updated account config map", "name", cm.ObjectMeta.Name)
}
},
DeleteFunc: func(obj interface{}) {
if resourceMatchACKRoleAccountsConfigMap(obj) {
cm := obj.(*corev1.ConfigMap)
newMap := make(map[string]string)
c.updateAccountRoleData(newMap)
// To avoid multiple mutex locks, we are updating the cache
// and the configmap existence flag in the same function.
configMapCreated := false
c.updateAccountRoleData(configMapCreated, newMap)
c.log.V(1).Info("deleted account config map", "name", cm.ObjectMeta.Name)
}
},
Expand All @@ -92,17 +114,32 @@ func (c *AccountCache) Run(clientSet kubernetes.Interface, stopCh <-chan struct{
}

// GetAccountRoleARN queries the AWS accountID associated Role ARN
// from the cached CARM configmap. This function is thread safe.
func (c *AccountCache) GetAccountRoleARN(accountID string) (string, bool) {
// from the cached CARM configmap. It will return an error if the
// configmap is not found, the accountID is not found or the role ARN
// is empty.
//
// This function is thread safe.
func (c *AccountCache) GetAccountRoleARN(accountID string) (string, error) {
c.RLock()
defer c.RUnlock()

if !c.configMapCreated {
return "", ErrCARMConfigMapNotFound
}
roleARN, ok := c.roleARNs[accountID]
return roleARN, ok && roleARN != ""
if !ok {
return "", ErrAccountIDNotFound
}
if roleARN == "" {
return "", ErrEmptyRoleARN
}
return roleARN, nil
}

// updateAccountRoleData updates the CARM map. This function is thread safe.
func (c *AccountCache) updateAccountRoleData(data map[string]string) {
func (c *AccountCache) updateAccountRoleData(exist bool, data map[string]string) {
c.Lock()
defer c.Unlock()
c.roleARNs = data
c.configMapCreated = exist
}
74 changes: 58 additions & 16 deletions pkg/runtime/cache/account_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,14 @@ const (
testAccountARN1 = "arn:aws:iam::012345678912:role/S3Access"
testAccount2 = "219876543210"
testAccountARN2 = "arn:aws:iam::012345678912:role/root"
testAccount3 = "321987654321"
testAccountARN3 = ""
)

func TestAccountCache(t *testing.T) {
accountsMap1 := map[string]string{
testAccount1: testAccountARN1,
testAccount3: testAccountARN3,
}

accountsMap2 := map[string]string{
Expand All @@ -65,8 +68,14 @@ func TestAccountCache(t *testing.T) {
stopCh := make(chan struct{})
accountCache.Run(k8sClient, stopCh)

// Before creating the configmap, the accountCache should error for any
// GetAccountRoleARN call.
_, err := accountCache.GetAccountRoleARN(testAccount1)
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrCARMConfigMapNotFound)

// Test create events
_, err := k8sClient.CoreV1().ConfigMaps(testNamespace).Create(
_, err = k8sClient.CoreV1().ConfigMaps(testNamespace).Create(
context.Background(),
&corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -80,8 +89,15 @@ func TestAccountCache(t *testing.T) {

time.Sleep(time.Second)

_, ok := accountCache.GetAccountRoleARN("random-account")
require.False(t, ok)
// Test with non existing account
_, err = accountCache.GetAccountRoleARN("random-account-not-exist")
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrCARMConfigMapNotFound)

// Test with existing account
_, err = accountCache.GetAccountRoleARN(testAccount1)
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrCARMConfigMapNotFound)

k8sClient.CoreV1().ConfigMaps(testNamespace).Create(
context.Background(),
Expand All @@ -97,12 +113,20 @@ func TestAccountCache(t *testing.T) {

time.Sleep(time.Second)

roleARN, ok := accountCache.GetAccountRoleARN(testAccount1)
require.True(t, ok)
require.Equal(t, roleARN, testAccountARN1)
// Test with non existing account
_, err = accountCache.GetAccountRoleARN("random-account-not-exist")
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrAccountIDNotFound)

_, ok = accountCache.GetAccountRoleARN(testAccount2)
require.False(t, ok)
// Test with existing account - but role ARN is empty
_, err = accountCache.GetAccountRoleARN(testAccount3)
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrEmptyRoleARN)

// Test with existing account
roleARN, err := accountCache.GetAccountRoleARN(testAccount1)
require.Nil(t, err)
require.Equal(t, roleARN, testAccountARN1)

// Test update events
k8sClient.CoreV1().ConfigMaps("ack-system").Update(
Expand All @@ -119,12 +143,23 @@ func TestAccountCache(t *testing.T) {

time.Sleep(time.Second)

roleARN, ok = accountCache.GetAccountRoleARN(testAccount1)
require.True(t, ok)
// Test with non existing account
_, err = accountCache.GetAccountRoleARN("random-account-not-exist")
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrAccountIDNotFound)

// Test that account was removed
_, err = accountCache.GetAccountRoleARN(testAccount3)
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrAccountIDNotFound)

// Test with existing account
roleARN, err = accountCache.GetAccountRoleARN(testAccount1)
require.Nil(t, err)
require.Equal(t, roleARN, testAccountARN1)

roleARN, ok = accountCache.GetAccountRoleARN(testAccount2)
require.True(t, ok)
roleARN, err = accountCache.GetAccountRoleARN(testAccount2)
require.Nil(t, err)
require.Equal(t, roleARN, testAccountARN2)

// Test delete events
Expand All @@ -136,9 +171,16 @@ func TestAccountCache(t *testing.T) {

time.Sleep(time.Second)

_, ok = accountCache.GetAccountRoleARN(testAccount1)
require.False(t, ok)
_, ok = accountCache.GetAccountRoleARN(testAccount2)
require.False(t, ok)
// Test that accounts ware removed
_, err = accountCache.GetAccountRoleARN(testAccount1)
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrCARMConfigMapNotFound)

_, err = accountCache.GetAccountRoleARN(testAccount2)
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrCARMConfigMapNotFound)

_, err = accountCache.GetAccountRoleARN(testAccount3)
require.NotNil(t, err)
require.Equal(t, err, ackrtcache.ErrCARMConfigMapNotFound)
}
Loading