Skip to content

Commit

Permalink
when IPAMD connection fails, try to teardown pod network using prevRe…
Browse files Browse the repository at this point in the history
…sult (#2145)
  • Loading branch information
jdn5126 authored Dec 8, 2022
1 parent 261af6c commit 320153f
Show file tree
Hide file tree
Showing 2 changed files with 401 additions and 35 deletions.
119 changes: 88 additions & 31 deletions cmd/routed-eni-cni-plugin/cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ import (

const ipamdAddress = "127.0.0.1:50051"

const dummyVlanInterfacePrefix = "dummy"
const dummyInterfacePrefix = "dummy"

var version string

Expand Down Expand Up @@ -187,11 +187,11 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
log.Infof("Received add network response from ipamd for container %s interface %s: %+v",
args.ContainerID, args.IfName, r)

//We will let the values in result struct guide us in terms of IP Address Family configured.
// We will let the values in result struct guide us in terms of IP Address Family configured.
var v4Addr, v6Addr, addr *net.IPNet
var addrFamily string

//We don't support dual stack mode currently so it has to be either v4 or v6 mode.
// We don't support dual stack mode currently so it has to be either v4 or v6 mode.
if r.IPv4Addr != "" {
v4Addr = &net.IPNet{
IP: net.ParseIP(r.IPv4Addr),
Expand All @@ -209,31 +209,28 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
}

var hostVethName string
var dummyVlanInterface *current.Interface
var dummyInterface *current.Interface

// The dummy interface is purely virtual and is stored in the prevResult struct to assist in cleanup during the DEL command.
dummyInterfaceName := networkutils.GeneratePodHostVethName(dummyInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))

// Non-zero value means pods are using branch ENI
if r.PodVlanId != 0 {
hostVethNamePrefix := sgpp.BuildHostVethNamePrefix(conf.VethPrefix, conf.PodSGEnforcingMode)
hostVethName = networkutils.GeneratePodHostVethName(hostVethNamePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
err = driverClient.SetupBranchENIPodNetwork(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.PodVlanId), r.PodENIMAC,
r.PodENISubnetGW, int(r.ParentIfIndex), mtu, conf.PodSGEnforcingMode, log)

// This is a dummyVlanInterfaceName generated to identify dummyVlanInterface
// which will be created for PPSG scenario to pass along the vlanId information
// as a part of the ADD cmd Result struct
// The podVlanId is used by DEL cmd, fetched from the prevResult struct to cleanup the pod network
dummyVlanInterfaceName := networkutils.GeneratePodHostVethName(dummyVlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))

// The dummyVlanInterface is purely virtual and relevent only for ppsg, so we decided to keep it separate
// and not overload the already available hostVethInterface
dummyVlanInterface = &current.Interface{Name: dummyVlanInterfaceName, Mac: fmt.Sprint(r.PodVlanId)}
log.Debugf("Using dummy vlanInterface: %v", dummyVlanInterface)
// For branch ENI mode, the pod VLAN ID is packed in Interface.Mac
dummyInterface = &current.Interface{Name: dummyInterfaceName, Mac: fmt.Sprint(r.PodVlanId)}
} else {
// build hostVethName
// Note: the maximum length for linux interface name is 15
hostVethName = networkutils.GeneratePodHostVethName(conf.VethPrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
err = driverClient.SetupPodNetwork(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.DeviceNumber), mtu, log)
// For non-branch ENI, the pod VLAN ID value of 0 is packed in Interface.Mac, while the interface device number is packed in Interface.Sandbox
dummyInterface = &current.Interface{Name: dummyInterfaceName, Mac: fmt.Sprint(0), Sandbox: fmt.Sprint(r.DeviceNumber)}
}
log.Debugf("Using dummy interface: %v", dummyInterface)

if err != nil {
log.Errorf("Failed SetupPodNetwork for container %s: %v",
Expand Down Expand Up @@ -280,10 +277,8 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
},
}

// We append dummyVlanInterface only for pods using branch ENI
if dummyVlanInterface != nil {
result.Interfaces = append(result.Interfaces, dummyVlanInterface)
}
// dummy interface is appended to PrevResult for use during cleanup
result.Interfaces = append(result.Interfaces, dummyInterface)

return cniTypes.PrintResult(result, conf.CNIVersion)
}
Expand Down Expand Up @@ -311,6 +306,7 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
return errors.Wrap(err, "del cmd: failed to load k8s config from args")
}

// For pods using branch ENI, try to delete using previous result
handled, err := tryDelWithPrevResult(driverClient, conf, k8sArgs, args.IfName, args.Netns, log)
if err != nil {
return errors.Wrap(err, "del cmd: failed to delete with prevResult")
Expand All @@ -328,6 +324,13 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
log.Errorf("Failed to connect to backend server for container %s: %v",
args.ContainerID, err)

// When IPAMD is unreachable, try to teardown pod network using previous result. This action prevents rules from leaking while IPAMD is unreachable.
// Note that connection error is still returned to kubelet so that delete retries until IPAMD can be notified.
if teardownPodNetworkWithPrevResult(driverClient, conf, k8sArgs, args.IfName, log) {
log.Infof("Handled pod teardown using prevResult: ContainerID(%s) Netns(%s) IfName(%s) PodNamespace(%s) PodName(%s)",
args.ContainerID, args.Netns, args.IfName, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
}

return errors.Wrap(err, "del cmd: failed to connect to backend server")
}
defer conn.Close()
Expand Down Expand Up @@ -355,6 +358,12 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
}
log.Errorf("Error received from DelNetwork gRPC call for container %s: %v",
args.ContainerID, err)

// DelNetworkRequest may return a connection error, so try to delete using PrevResult whenever an error is returned.
if teardownPodNetworkWithPrevResult(driverClient, conf, k8sArgs, args.IfName, log) {
log.Infof("Handled pod teardown using prevResult: ContainerID(%s) Netns(%s) IfName(%s) PodNamespace(%s) PodName(%s)",
args.ContainerID, args.Netns, args.IfName, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
}
return errors.Wrap(err, "del cmd: error received from DelNetwork gRPC call")
}

Expand Down Expand Up @@ -405,6 +414,18 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
return nil
}

func getContainerIP(prevResult *current.Result, contVethName string) (net.IPNet, error) {
containerIfaceIndex, _, found := cniutils.FindInterfaceByName(prevResult.Interfaces, contVethName)
if !found {
return net.IPNet{}, errors.Errorf("cannot find contVethName %s in prevResult", contVethName)
}
containerIPs := cniutils.FindIPConfigsByIfaceIndex(prevResult.IPs, containerIfaceIndex)
if len(containerIPs) != 1 {
return net.IPNet{}, errors.Errorf("found %d containerIPs for %v in prevResult", len(containerIPs), contVethName)
}
return containerIPs[0].Address, nil
}

// tryDelWithPrevResult will try to process CNI delete request without IPAMD.
// returns true if the del request is handled.
func tryDelWithPrevResult(driverClient driver.NetworkAPIs, conf *NetConf, k8sArgs K8sArgs, contVethName string, netNS string, log logger.Logger) (bool, error) {
Expand All @@ -414,36 +435,72 @@ func tryDelWithPrevResult(driverClient driver.NetworkAPIs, conf *NetConf, k8sArg
return false, nil
}

dummyIfaceName := networkutils.GeneratePodHostVethName(dummyVlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
dummyIfaceName := networkutils.GeneratePodHostVethName(dummyInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
_, dummyIface, found := cniutils.FindInterfaceByName(prevResult.Interfaces, dummyIfaceName)
if !found {
return false, nil
}
podVlanID, err := strconv.Atoi(dummyIface.Mac)
if err != nil || podVlanID == 0 {
return true, errors.Errorf("malformed vlanID in prevResult: %s", dummyIface.Mac)
if err != nil {
return false, errors.Errorf("malformed vlanID in prevResult: %s", dummyIface.Mac)
}
// For non-branch ENI pods, deletion requires handshake with IPAMD
if podVlanID == 0 {
return false, nil
}
if isNetnsEmpty(netNS) {
log.Infof("Ignoring TeardownPodENI as Netns is empty for SG pod:%s namespace: %s containerID:%s", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE, k8sArgs.K8S_POD_INFRA_CONTAINER_ID)
log.Infof("Ignoring TeardownPodENI as Netns is empty for SG pod: %s namespace: %s containerID: %s", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE, k8sArgs.K8S_POD_INFRA_CONTAINER_ID)
return true, nil
}

containerIfaceIndex, _, found := cniutils.FindInterfaceByName(prevResult.Interfaces, contVethName)
if !found {
return false, errors.Errorf("cannot find contVethName %s in prevResult", contVethName)
}
containerIPs := cniutils.FindIPConfigsByIfaceIndex(prevResult.IPs, containerIfaceIndex)
if len(containerIPs) != 1 {
return false, errors.Errorf("found %d containerIP for %v in prevResult", len(containerIPs), contVethName)
containerIP, err := getContainerIP(prevResult, contVethName)
if err != nil {
return false, err
}
containerIP := containerIPs[0].Address

if err := driverClient.TeardownBranchENIPodNetwork(&containerIP, podVlanID, conf.PodSGEnforcingMode, log); err != nil {
return true, err
}
return true, nil
}

// teardownPodNetworkWithPrevResult will try to process CNI delete for non-branch ENIs without IPAMD.
// Returns true if pod network is torn down
func teardownPodNetworkWithPrevResult(driverClient driver.NetworkAPIs, conf *NetConf, k8sArgs K8sArgs, contVethName string, log logger.Logger) bool {
// For non-branch ENI, prevResult is only available in v1.13.0+
prevResult, ok := conf.PrevResult.(*current.Result)
if !ok {
return false
}
dummyIfaceName := networkutils.GeneratePodHostVethName(dummyInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
_, dummyIface, found := cniutils.FindInterfaceByName(prevResult.Interfaces, dummyIfaceName)
if !found {
return false
}
// For non-branch ENI, VLAN ID of 0 is encoded in Mac and device number is encoded in Sandbox
podVlanID, err := strconv.Atoi(dummyIface.Mac)
if err != nil || podVlanID != 0 {
log.Errorf("Invalid VLAN ID for non-branch ENI pod: %s", dummyIface.Mac)
return false
}
deviceNumber, err := strconv.Atoi(dummyIface.Sandbox)
if err != nil {
log.Errorf("Invalid device number for pod: %s", dummyIface.Sandbox)
return false
}
containerIP, err := getContainerIP(prevResult, contVethName)
if err != nil {
log.Errorf("Failed to get container IP: %v", err)
return false
}

if err := driverClient.TeardownPodNetwork(&containerIP, deviceNumber, log); err != nil {
log.Errorf("Failed to teardown pod network: %v", err)
return false
}
return true
}

// Scope usage of this function to only SG pods scenario
// Don't process deletes when NetNS is empty
// as it implies that veth for this request is already deleted
Expand Down
Loading

0 comments on commit 320153f

Please sign in to comment.