Skip to content

Commit

Permalink
Fix race in GN pod for out-of-order RemoteEndpoint events
Browse files Browse the repository at this point in the history
When there is a gateway migration in a remote cluster or if there
is any stale endpoint on the Broker associated with the remoteCluster,
the events might come in out of order which can create issues for
datapath connectivity. This PR includes the necessary checks in
Globalnet pod to ignore any stale events.

Related to: #2399
Signed-off-by: Sridhar Gaddam <sgaddam@redhat.com>
  • Loading branch information
sridhargaddam authored and tpantelis committed Jun 5, 2023
1 parent 5ec40ef commit 4f06a48
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 15 deletions.
32 changes: 27 additions & 5 deletions pkg/globalnet/controllers/gateway_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/submariner-io/submariner/pkg/netlink"
routeAgent "github.com/submariner-io/submariner/pkg/routeagent_driver/constants"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/kubernetes/scheme"
Expand All @@ -45,11 +46,12 @@ import (
func NewGatewayMonitor(spec Specification, localCIDRs []string, config *watcher.Config) (Interface, error) {
// We'll panic if config is nil, this is intentional
gatewayMonitor := &gatewayMonitor{
baseController: newBaseController(),
spec: spec,
isGatewayNode: false,
localSubnets: stringset.New(localCIDRs...).Elements(),
remoteSubnets: stringset.NewSynchronized(),
baseController: newBaseController(),
spec: spec,
isGatewayNode: false,
localSubnets: stringset.New(localCIDRs...).Elements(),
remoteSubnets: stringset.NewSynchronized(),
remoteEndpointTimeStamp: map[string]metav1.Time{},
}

var err error
Expand Down Expand Up @@ -143,6 +145,14 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, numR
logger.V(log.DEBUG).Infof("In processNextEndpoint, endpoint info: %+v", endpoint)

if endpoint.Spec.ClusterID != g.spec.ClusterID {
lastProcessedTime, ok := g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID]

if ok && lastProcessedTime.After(endpoint.CreationTimestamp.Time) {
logger.Infof("Ignoring new remote %#v since a later endpoint was already"+
"processed", endpoint)
return false
}

logger.V(log.DEBUG).Infof("Endpoint %q, host: %q belongs to a remote cluster",
endpoint.Spec.ClusterID, endpoint.Spec.Hostname)

Expand All @@ -166,6 +176,8 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, numR
g.markRemoteClusterTraffic(remoteSubnet, AddRules)
}

g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID] = endpoint.CreationTimestamp

return false
}

Expand Down Expand Up @@ -211,6 +223,16 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, numR
func (g *gatewayMonitor) handleRemovedEndpoint(obj runtime.Object, numRequeues int) bool {
endpoint := obj.(*v1.Endpoint)

lastProcessedTime, ok := g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID]

if ok && lastProcessedTime.After(endpoint.CreationTimestamp.Time) {
logger.Infof("Ignoring deleted remote %#v since a later endpoint was already"+
"processed", endpoint)
return false
}

delete(g.remoteEndpointTimeStamp, endpoint.Spec.ClusterID)

logger.V(log.DEBUG).Infof("Informed of removed endpoint for gateway monitor: %v", endpoint)

hostname, err := os.Hostname()
Expand Down
21 changes: 11 additions & 10 deletions pkg/globalnet/controllers/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,17 @@ type baseController struct {

type gatewayMonitor struct {
*baseController
syncerConfig *syncer.ResourceSyncerConfig
endpointWatcher watcher.Interface
spec Specification
ipt iptables.Interface
isGatewayNode bool
nodeName string
syncMutex sync.Mutex
localSubnets []string
remoteSubnets stringset.Interface
controllers []Interface
syncerConfig *syncer.ResourceSyncerConfig
endpointWatcher watcher.Interface
remoteEndpointTimeStamp map[string]metav1.Time
spec Specification
ipt iptables.Interface
isGatewayNode bool
nodeName string
syncMutex sync.Mutex
localSubnets []string
remoteSubnets stringset.Interface
controllers []Interface
}

type baseSyncerController struct {
Expand Down

0 comments on commit 4f06a48

Please sign in to comment.