Skip to content

Commit

Permalink
Fix race in GN pod for out-of-order RemoteEndpoint events
Browse files Browse the repository at this point in the history
When there is a gateway migration in a remote cluster or if there
is any stale endpoint on the Broker associated with the remoteCluster,
the events might come in out of order which can create issues for
datapath connectivity. This PR includes the necessary checks in
Globalnet pod to ignore any stale events.

Related to: submariner-io#2399
Signed-off-by: Sridhar Gaddam <sgaddam@redhat.com>
  • Loading branch information
sridhargaddam committed Jun 5, 2023
1 parent 28f674a commit 09ea569
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 15 deletions.
32 changes: 27 additions & 5 deletions pkg/globalnet/controllers/gateway_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/submariner-io/submariner/pkg/netlink"
routeAgent "github.com/submariner-io/submariner/pkg/routeagent_driver/constants"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/dynamic"
Expand All @@ -47,11 +48,12 @@ import (
func NewGatewayMonitor(spec Specification, localCIDRs []string, config *watcher.Config) (Interface, error) {
// We'll panic if config is nil, this is intentional
gatewayMonitor := &gatewayMonitor{
baseController: newBaseController(),
spec: spec,
isGatewayNode: atomic.Bool{},
localSubnets: sets.New(localCIDRs...).UnsortedList(),
remoteSubnets: sets.New[string](),
baseController: newBaseController(),
spec: spec,
isGatewayNode: atomic.Bool{},
localSubnets: sets.New(localCIDRs...).UnsortedList(),
remoteSubnets: sets.New[string](),
remoteEndpointTimeStamp: map[string]metav1.Time{},
}

var err error
Expand Down Expand Up @@ -143,6 +145,14 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, numR
logger.V(log.DEBUG).Infof("In processNextEndpoint, endpoint info: %+v", endpoint)

if endpoint.Spec.ClusterID != g.spec.ClusterID {
lastProcessedTime, ok := g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID]

if ok && lastProcessedTime.After(endpoint.CreationTimestamp.Time) {
logger.Infof("Ignoring new remote %#v since a later endpoint was already"+
"processed", endpoint)
return false
}

logger.V(log.DEBUG).Infof("Endpoint %q, host: %q belongs to a remote cluster",
endpoint.Spec.ClusterID, endpoint.Spec.Hostname)

Expand All @@ -168,6 +178,8 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, numR
}
}

g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID] = endpoint.CreationTimestamp

return false
}

Expand Down Expand Up @@ -206,6 +218,16 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, numR
func (g *gatewayMonitor) handleRemovedEndpoint(obj runtime.Object, numRequeues int) bool {
endpoint := obj.(*v1.Endpoint)

lastProcessedTime, ok := g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID]

if ok && lastProcessedTime.After(endpoint.CreationTimestamp.Time) {
logger.Infof("Ignoring deleted remote %#v since a later endpoint was already"+
"processed", endpoint)
return false
}

delete(g.remoteEndpointTimeStamp, endpoint.Spec.ClusterID)

logger.V(log.DEBUG).Infof("Informed of removed endpoint for gateway monitor: %v", endpoint)

hostname, err := os.Hostname()
Expand Down
21 changes: 11 additions & 10 deletions pkg/globalnet/controllers/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,17 @@ type baseController struct {

type gatewayMonitor struct {
*baseController
syncerConfig *syncer.ResourceSyncerConfig
endpointWatcher watcher.Interface
spec Specification
ipt iptables.Interface
isGatewayNode atomic.Bool
nodeName string
localSubnets []string
remoteSubnets sets.Set[string]
controllersMutex sync.Mutex // Protects controllers
controllers []Interface
syncerConfig *syncer.ResourceSyncerConfig
endpointWatcher watcher.Interface
remoteEndpointTimeStamp map[string]metav1.Time
spec Specification
ipt iptables.Interface
isGatewayNode atomic.Bool
nodeName string
localSubnets []string
remoteSubnets sets.Set[string]
controllersMutex sync.Mutex // Protects controllers
controllers []Interface
}

type baseSyncerController struct {
Expand Down

0 comments on commit 09ea569

Please sign in to comment.