Skip to content

Commit

Permalink
Fix race in GN pod for out-of-order RemoteEndpoint events
Browse files Browse the repository at this point in the history
When there is a gateway migration in a remote cluster or if there
is any stale endpoint on the Broker associated with the remoteCluster,
the events might come in out of order which can create issues for
datapath connectivity. This PR includes the necessary checks in
Globalnet pod to ignore any stale events.

Related to: submariner-io#2399
Signed-off-by: Sridhar Gaddam <sgaddam@redhat.com>
  • Loading branch information
sridhargaddam committed Jun 5, 2023
1 parent e803302 commit 823aec5
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 15 deletions.
32 changes: 27 additions & 5 deletions pkg/globalnet/controllers/gateway_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/submariner-io/submariner/pkg/netlink"
routeAgent "github.com/submariner-io/submariner/pkg/routeagent_driver/constants"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/dynamic"
Expand All @@ -47,11 +48,12 @@ import (
func NewGatewayMonitor(spec Specification, localCIDRs []string, config *watcher.Config) (Interface, error) {
// We'll panic if config is nil, this is intentional
gatewayMonitor := &gatewayMonitor{
baseController: newBaseController(),
spec: spec,
isGatewayNode: atomic.Bool{},
localSubnets: sets.New(localCIDRs...).UnsortedList(),
remoteSubnets: sets.New[string](),
baseController: newBaseController(),
spec: spec,
isGatewayNode: atomic.Bool{},
localSubnets: sets.New(localCIDRs...).UnsortedList(),
remoteSubnets: sets.New[string](),
remoteEndpointTimeStamp: map[string]metav1.Time{},
}

var err error
Expand Down Expand Up @@ -143,6 +145,14 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, _ in
logger.V(log.DEBUG).Infof("In processNextEndpoint, endpoint info: %+v", endpoint)

if endpoint.Spec.ClusterID != g.spec.ClusterID {
lastProcessedTime, ok := g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID]

if ok && lastProcessedTime.After(endpoint.CreationTimestamp.Time) {
logger.Infof("Ignoring new remote %#v since a later endpoint was already"+
"processed", endpoint)
return false
}

logger.V(log.DEBUG).Infof("Endpoint %q, host: %q belongs to a remote cluster",
endpoint.Spec.ClusterID, endpoint.Spec.Hostname)

Expand All @@ -168,6 +178,8 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, _ in
}
}

g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID] = endpoint.CreationTimestamp

return false
}

Expand Down Expand Up @@ -206,6 +218,16 @@ func (g *gatewayMonitor) handleCreatedOrUpdatedEndpoint(obj runtime.Object, _ in
func (g *gatewayMonitor) handleRemovedEndpoint(obj runtime.Object, _ int) bool {
endpoint := obj.(*v1.Endpoint)

lastProcessedTime, ok := g.remoteEndpointTimeStamp[endpoint.Spec.ClusterID]

if ok && lastProcessedTime.After(endpoint.CreationTimestamp.Time) {
logger.Infof("Ignoring deleted remote %#v since a later endpoint was already"+
"processed", endpoint)
return false
}

delete(g.remoteEndpointTimeStamp, endpoint.Spec.ClusterID)

logger.V(log.DEBUG).Infof("Informed of removed endpoint for gateway monitor: %v", endpoint)

hostname, err := os.Hostname()
Expand Down
21 changes: 11 additions & 10 deletions pkg/globalnet/controllers/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,17 @@ type baseController struct {

type gatewayMonitor struct {
*baseController
syncerConfig *syncer.ResourceSyncerConfig
endpointWatcher watcher.Interface
spec Specification
ipt iptables.Interface
isGatewayNode atomic.Bool
nodeName string
localSubnets []string
remoteSubnets sets.Set[string]
controllersMutex sync.Mutex // Protects controllers
controllers []Interface
syncerConfig *syncer.ResourceSyncerConfig
endpointWatcher watcher.Interface
remoteEndpointTimeStamp map[string]metav1.Time
spec Specification
ipt iptables.Interface
isGatewayNode atomic.Bool
nodeName string
localSubnets []string
remoteSubnets sets.Set[string]
controllersMutex sync.Mutex // Protects controllers
controllers []Interface
}

type baseSyncerController struct {
Expand Down

0 comments on commit 823aec5

Please sign in to comment.