Skip to content

Commit

Permalink
Improved recovery for policy base routing
Browse files Browse the repository at this point in the history
Related issue: networkservicemesh/deployments-k8s/9463

Fix for faulty ip rules after forwarder restart
Using proper key in internal Map for routing table IDs;
Ensure that the routing table belongs to the link handled by NSM;
At policy recovery do not flush the routing table if more than one rule reffering to it,
   deleting the last rule with reference to a given routing table will flush the table.

Signed-off-by: Laszlo Kiraly <laszlo.kiraly@est.tech>
  • Loading branch information
ljkiraly committed Jul 5, 2023
1 parent a23442d commit 8a9f7c9
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"github.com/networkservicemesh/sdk/pkg/tools/log"

link "github.com/networkservicemesh/sdk-kernel/pkg/kernel"
"github.com/networkservicemesh/sdk-kernel/pkg/kernel/tools/nshandle"
)

func create(ctx context.Context, conn *networkservice.Connection, tableIDs *Map, nsRTableNextIDToConnID *NetnsRTableNextIDToConnMap) error {
Expand Down Expand Up @@ -70,14 +71,20 @@ func create(ctx context.Context, conn *networkservice.Connection, tableIDs *Map,

// Remove no longer existing policies
for tableID, policy := range toRemove {
if errRule := delRule(ctx, netlinkHandle, policy, tableID); errRule != nil {
if errRule := delRule(ctx, netlinkHandle, policy, tableID, l.Attrs().Index); errRule != nil {
return errRule
}
delete(ps, tableID)
tableIDs.Store(connID, ps)
}

// Get netns for key to namespace to routing tableID map
netNS, err := nshandle.FromURL(mechanism.GetNetNSURL())
if err != nil {
return err
}

// Add new policies
netNSInode := mechanism.GetNetNSInode()
for _, policy := range toAdd {
var tableID int
var nsrtid *NetnsRTableNextID
Expand All @@ -87,8 +94,12 @@ func create(ctx context.Context, conn *networkservice.Connection, tableIDs *Map,
if err != nil {
return err
}
nsrtid = NewNetnsRTableNextID(netNSInode, tableID)
nsrtid = NewNetnsRTableNextID(netNS.UniqueId(), tableID)
storedConnID, _ := nsRTableNextIDToConnID.LoadOrStore(*nsrtid, connID)
log.FromContext(ctx).
WithField("nsrtid", *nsrtid).
WithField("ConnID", storedConnID).
Debug("storedTableID")
if connID == storedConnID {
// No other connection adding policy using this free routing table ID
break
Expand Down Expand Up @@ -272,15 +283,23 @@ func del(ctx context.Context, conn *networkservice.Connection, tableIDs *Map, ns
return err
}
defer netlinkHandle.Close()
ifName := mechanism.GetInterfaceName()
l, err := netlinkHandle.LinkByName(ifName)
if err != nil {
return errors.Wrapf(err, "failed to find link %s", ifName)
}
ps, ok := tableIDs.LoadAndDelete(conn.GetId())
if ok {
netNSInode := mechanism.GetNetNSInode()
netNS, err := nshandle.FromURL(mechanism.GetNetNSURL())
if err != nil {
return err
}
for tableID := range ps {
nsrtid := NewNetnsRTableNextID(netNSInode, tableID)
nsrtid := NewNetnsRTableNextID(netNS.UniqueId(), tableID)
nsRTableNextIDToConnID.Delete(*nsrtid)
}
for tableID, policy := range ps {
if err := delRule(ctx, netlinkHandle, policy, tableID); err != nil {
if err := delRule(ctx, netlinkHandle, policy, tableID, l.Attrs().Index); err != nil {
return err
}
}
Expand All @@ -289,16 +308,11 @@ func del(ctx context.Context, conn *networkservice.Connection, tableIDs *Map, ns
return nil
}

func delRule(ctx context.Context, handle *netlink.Handle, policy *networkservice.PolicyRoute, tableID int) error {
func delRuleOnly(ctx context.Context, handle *netlink.Handle, policy *networkservice.PolicyRoute) error {
rule, err := policyToRule(policy)
if err != nil {
return err
}

if err := flushTable(ctx, handle, tableID); err != nil {
return err
}

now := time.Now()
if err := handle.RuleDel(rule); err != nil {
log.FromContext(ctx).
Expand All @@ -320,10 +334,20 @@ func delRule(ctx context.Context, handle *netlink.Handle, policy *networkservice
return nil
}

func flushTable(ctx context.Context, handle *netlink.Handle, tableID int) error {
func delRule(ctx context.Context, handle *netlink.Handle, policy *networkservice.PolicyRoute, tableID, linkIndex int) error {
if err := flushTable(ctx, handle, tableID, linkIndex); err != nil {
return err
}
if err := delRuleOnly(ctx, handle, policy); err != nil {
return err
}
return nil
}
func flushTable(ctx context.Context, handle *netlink.Handle, tableID, linkIndex int) error {
routes, err := handle.RouteListFiltered(netlink.FAMILY_ALL,
&netlink.Route{
Table: tableID,
Table: tableID,
LinkIndex: linkIndex,
},
netlink.RT_FILTER_TABLE)
if err != nil {
Expand All @@ -334,6 +358,9 @@ func flushTable(ctx context.Context, handle *netlink.Handle, tableID int) error
if err != nil {
return errors.Wrapf(err, "failed to delete route")
}
log.FromContext(ctx).
WithField("Route", &routes[i]).
WithField("netlink", "RouteDel").Debug("completed")
}
log.FromContext(ctx).
WithField("tableID", tableID).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,18 @@ func recoverTableIDs(ctx context.Context, conn *networkservice.Connection, table
}
defer netlinkHandle.Close()

ifName := mechanism.GetInterfaceName()
l, err := netlinkHandle.LinkByName(ifName)
if err != nil {
return errors.Wrapf(err, "failed to find link %s", ifName)
}

podRules, err := netlinkHandle.RuleList(netlink.FAMILY_ALL)
if err != nil {
return errors.Wrap(err, "failed to get list of rules")
}

tableIDtoPolicyMap := make(map[int]*networkservice.PolicyRoute)
// try to find the corresponding missing policies in the network namespace of the pod
for _, policy := range conn.Context.IpContext.Policies {
policyRule, err := policyToRule(policy)
Expand All @@ -65,20 +72,42 @@ func recoverTableIDs(ctx context.Context, conn *networkservice.Connection, table
}
for i := range podRules {
if ruleEquals(&podRules[i], policyRule) {
tableIDtoPolicyMap[podRules[i].Table] = policy
log.FromContext(ctx).
WithField("From", policy.From).
WithField("IPProto", policy.Proto).
WithField("DstPort", policy.DstPort).
WithField("SrcPort", policy.SrcPort).
WithField("Table", podRules[i].Table).Debug("policy recovered")
err := delRule(ctx, netlinkHandle, policy, podRules[i].Table)
if err != nil {
return err
}
break
}
}
}

return deleteRemainders(ctx, netlinkHandle, tableIDtoPolicyMap, podRules, l)
}
return nil
}

func deleteRemainders(ctx context.Context, netlinkHandle *netlink.Handle, tableIDtoPolicyMap map[int]*networkservice.PolicyRoute, podRules []netlink.Rule, l netlink.Link) error {
for tableID, policy := range tableIDtoPolicyMap {
usage := 0
for i := range podRules {
if podRules[i].Table == tableID {
usage++
}
}
if usage == 1 {
err := delRule(ctx, netlinkHandle, policy, tableID, l.Attrs().Index)
if err != nil {
return err
}
} else {
err := delRuleOnly(ctx, netlinkHandle, policy)
if err != nil {
return err
}
}
}
return nil
}
Expand Down

0 comments on commit 8a9f7c9

Please sign in to comment.