Skip to content

Commit

Permalink
support new SGPP standard mode
Browse files Browse the repository at this point in the history
  • Loading branch information
M00nF1sh committed Mar 31, 2022
1 parent bef587e commit a9f9a1d
Show file tree
Hide file tree
Showing 17 changed files with 5,750 additions and 839 deletions.
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

# amazon-vpc-cni-k8s

Networking plugin for pod networking in [Kubernetes](https://kubernetes.io/) using [Elastic Network Interfaces](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-eni.html) on AWS.
Expand Down Expand Up @@ -421,6 +422,31 @@ Any of the WARM targets do not impact the scale of the branch ENI pods so you wi

**NOTE!** Toggling `ENABLE_POD_ENI` from `true` to `false` will not detach the Trunk ENI from instance. To delete/detach the Trunk ENI from instance, you need recycle the instance.


---

#### `POD_SECURITY_GROUP_ENFORCING_MODE` (v1.11.0+)

Type: String

Default: `strict`

Valid Values: `strict`, `standard`

Once `ENABLE_POD_ENI` is set to `true`, this value controls how the traffic of pods with security group behaves.

* `strict` mode: all inbound/outbound traffic from pod with security group will be enforced by security group rules. This is the **default** mode if POD_SECURITY_GROUP_ENFORCING_MODE is not set.

* `standard` mode: the traffic of pod with security group behaves same as pods without security group, except that each pod occupies a dedicated branch ENI.
* inbound traffic to pod with security group from another host will be enforced by security group rules.
* outbound traffic from pod with security group to another host in same VPC will be enforced by security group rules.
* inbound/outbound traffic from another pod on same host or another service on same host(such as kubelet/nodeLocalDNS) won't be enforced by security group rules.
* outbound traffic from pod with security group to IP address outside VPC
* if externalSNAT enabled, traffic won't be SNATed, thus will be enforced by security group rules.
* if externalSNAT disabled, traffic will be SNATed via eth0, thus will only be enforced by security group associated with eth0.

**NOTE!**: To make new behavior be in effect after switching the mode, existing pods with security group must be recycled. Alternatively you can restart the nodes as well.

---

#### `DISABLE_TCP_EARLY_DEMUX` (v1.7.3+)
Expand Down
113 changes: 61 additions & 52 deletions cmd/routed-eni-cni-plugin/cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ import (
"strconv"
"strings"

"github.com/aws/amazon-vpc-cni-k8s/pkg/utils/cniutils"

"github.com/aws/amazon-vpc-cni-k8s/pkg/sgpp"

"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/types"
"github.com/containernetworking/cni/pkg/types/current"
Expand All @@ -45,7 +49,6 @@ import (

const ipamdAddress = "127.0.0.1:50051"

const vlanInterfacePrefix = "vlan"
const dummyVlanInterfacePrefix = "dummy"

var version string
Expand All @@ -62,6 +65,9 @@ type NetConf struct {
// MTU for eth0
MTU string `json:"mtu"`

// PodSGEnforcingMode is the enforcing mode for Security groups for pods feature
PodSGEnforcingMode sgpp.EnforcingMode `json:"podSGEnforcingMode"`

PluginLogFile string `json:"pluginLogFile"`

PluginLogLevel string `json:"pluginLogLevel"`
Expand Down Expand Up @@ -91,8 +97,9 @@ func init() {
func LoadNetConf(bytes []byte) (*NetConf, logger.Logger, error) {
// Default config
conf := NetConf{
MTU: "9001",
VethPrefix: "eni",
MTU: "9001",
VethPrefix: "eni",
PodSGEnforcingMode: sgpp.DefaultEnforcingMode,
}

if err := json.Unmarshal(bytes, &conf); err != nil {
Expand Down Expand Up @@ -208,9 +215,10 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap

// Non-zero value means pods are using branch ENI
if r.PodVlanId != 0 {
hostVethName = generateHostVethName(vlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
err = driverClient.SetupPodENINetwork(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.PodVlanId), r.PodENIMAC,
r.PodENISubnetGW, int(r.ParentIfIndex), mtu, log)
hostVethNamePrefix := sgpp.BuildHostVethNamePrefix(conf.VethPrefix, conf.PodSGEnforcingMode)
hostVethName = generateHostVethName(hostVethNamePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
err = driverClient.SetupBranchENIPodNetwork(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.PodVlanId), r.PodENIMAC,
r.PodENISubnetGW, int(r.ParentIfIndex), mtu, conf.PodSGEnforcingMode, log)

// This is a dummyVlanInterfaceName generated to identify dummyVlanInterface
// which will be created for PPSG scenario to pass along the vlanId information
Expand All @@ -226,7 +234,7 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
// build hostVethName
// Note: the maximum length for linux interface name is 15
hostVethName = generateHostVethName(conf.VethPrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
err = driverClient.SetupNS(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.DeviceNumber), r.VPCv4CIDRs, r.UseExternalSNAT, mtu, log)
err = driverClient.SetupPodNetwork(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.DeviceNumber), mtu, log)
}

if err != nil {
Expand Down Expand Up @@ -314,42 +322,14 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
return errors.Wrap(err, "del cmd: failed to load k8s config from args")
}

prevResult, ok := conf.PrevResult.(*current.Result)

// Try to use prevResult if available
// prevResult might not be availabe, if we are still using older cni spec < 0.4.0.
// So we should fallback to the old clean up method
if ok {
dummyVlanInterfaceName := generateHostVethName(dummyVlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
for _, iface := range prevResult.Interfaces {
if iface.Name == dummyVlanInterfaceName {
podVlanId, err := strconv.Atoi(iface.Mac)
if err != nil {
log.Errorf("Failed to parse vlanId from prevResult: %v", err)
return errors.Wrap(err, "del cmd: failed to parse vlanId from prevResult")
}

// podVlanID can not be 0 as we add dummyVlanInterface only for ppsg
// if it is 0 then we should return an error
if podVlanId == 0 {
log.Errorf("Found SG pod:%s namespace:%s with 0 vlanID", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE)
return errors.Wrap(err, "del cmd: found Incorrect 0 vlandId for ppsg")
}

if isNetnsEmpty(args.Netns) {
log.Infof("Ignoring TeardownPodENI as Netns is empty for SG pod:%s namespace: %s containerID:%s", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE, k8sArgs.K8S_POD_INFRA_CONTAINER_ID)
return nil
}

err = cleanUpPodENI(podVlanId, log, args.ContainerID, driverClient)
if err != nil {
return err
}
log.Infof("Received del network response for pod %s namespace %s sandbox %s with vlanId: %v", string(k8sArgs.K8S_POD_NAME),
string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_INFRA_CONTAINER_ID), podVlanId)
return nil
}
}
handled, err := tryDelWithPrevResult(driverClient, conf, k8sArgs, args.IfName, args.Netns, log)
if err != nil {
return errors.Wrap(err, "del cmd: failed to delete with prevResult")
}
if handled {
log.Infof("Handled CNI del request with prevResult: ContainerID(%s) Netns(%s) IfName(%s) PodNamespace(%s) PodName(%s)",
args.ContainerID, args.Netns, args.IfName, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
return nil
}

// notify local IP address manager to free secondary IP
Expand Down Expand Up @@ -420,9 +400,9 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
log.Infof("Ignoring TeardownPodENI as Netns is empty for SG pod:%s namespace: %s containerID:%s", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE, k8sArgs.K8S_POD_INFRA_CONTAINER_ID)
return nil
}
err = driverClient.TeardownPodENINetwork(int(r.PodVlanId), log)
err = driverClient.TeardownBranchENIPodNetwork(addr, int(r.PodVlanId), conf.PodSGEnforcingMode, log)
} else {
err = driverClient.TeardownNS(addr, int(r.DeviceNumber), log)
err = driverClient.TeardownPodNetwork(addr, int(r.DeviceNumber), log)
}

if err != nil {
Expand All @@ -436,14 +416,43 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
return nil
}

func cleanUpPodENI(podVlanId int, log logger.Logger, containerId string, driverClient driver.NetworkAPIs) error {
err := driverClient.TeardownPodENINetwork(podVlanId, log)
if err != nil {
log.Errorf("Failed on TeardownPodNetwork for container ID %s: %v",
containerId, err)
return errors.Wrap(err, "del cmd: failed on tear down pod network")
// tryDelWithPrevResult will try to process CNI delete request without IPAMD.
// returns true if the del request is handled.
func tryDelWithPrevResult(driverClient driver.NetworkAPIs, conf *NetConf, k8sArgs K8sArgs, contVethName string, netNS string, log logger.Logger) (bool, error) {
// prevResult might not be available, if we are still using older cni spec < 0.4.0.
prevResult, ok := conf.PrevResult.(*current.Result)
if !ok {
return false, nil
}
return nil

dummyIfaceName := generateHostVethName(dummyVlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
_, dummyIface, found := cniutils.FindInterfaceByName(prevResult.Interfaces, dummyIfaceName)
if !found {
return false, nil
}
podVlanID, err := strconv.Atoi(dummyIface.Mac)
if err != nil || podVlanID == 0 {
return true, errors.Errorf("malformed vlanID in prevResult: %s", dummyIface.Mac)
}
if isNetnsEmpty(netNS) {
log.Infof("Ignoring TeardownPodENI as Netns is empty for SG pod:%s namespace: %s containerID:%s", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE, k8sArgs.K8S_POD_INFRA_CONTAINER_ID)
return true, nil
}

containerIfaceIndex, _, found := cniutils.FindInterfaceByName(prevResult.Interfaces, contVethName)
if !found {
return false, errors.Errorf("cannot find contVethName %s in prevResult", contVethName)
}
containerIPs := cniutils.FindIPConfigsByIfaceIndex(prevResult.IPs, containerIfaceIndex)
if len(containerIPs) != 1 {
return false, errors.Errorf("found %d containerIP for %v in prevResult", len(containerIPs), contVethName)
}
containerIP := containerIPs[0].Address

if err := driverClient.TeardownBranchENIPodNetwork(&containerIP, podVlanID, conf.PodSGEnforcingMode, log); err != nil {
return true, err
}
return true, nil
}

// Scope usage of this function to only SG pods scenario (https://harbinger.amazon.com/notices/65203)
Expand Down
Loading

0 comments on commit a9f9a1d

Please sign in to comment.