Skip to content

Commit

Permalink
add env var to enable nftables
Browse files Browse the repository at this point in the history
  • Loading branch information
jdn5126 committed Dec 8, 2022
1 parent 320153f commit d1cdfd4
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 77 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,17 @@ configured to operate in IPv6 mode. Prefix delegation is only supported on nitro

---

#### `ENABLE_NFTABLES` (v1.13.0+)

Type: Boolean as a String

Default: `false`

VPC CNI uses `iptables-legacy` by default. Setting `ENABLE_NFTABLES` to `true` will update VPC CNI to use `iptables-nft`.

**Note:** VPC CNI image contains `iptables-legacy` and `iptables-nft`. Switching between them is done via `update-alternatives`. It is *strongly* recommended that the iptables mode matches that which is used by the base OS and `kube-proxy`.
Switching modes while pods are running or rules are installed will not trigger reconciliation. It is recommended that rules are manually updated or nodes are drained and cordoned before updating. If reloading node, ensure that previous rules are not set to be persisted.

### VPC CNI Feature Matrix

IP Mode | Secondary IP Mode | Prefix Delegation | Security Groups Per Pod | WARM & MIN IP/Prefix Targets | External SNAT
Expand Down
46 changes: 23 additions & 23 deletions cmd/aws-vpc-cni-init/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ package main
import (
"os"

"github.com/aws/amazon-vpc-cni-k8s/pkg/procsyswrapper"
"github.com/aws/amazon-vpc-cni-k8s/utils/cp"
"github.com/aws/amazon-vpc-cni-k8s/utils/imds"
"github.com/aws/amazon-vpc-cni-k8s/utils/sysctl"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
Expand Down Expand Up @@ -68,68 +68,68 @@ func getNodePrimaryIF() (string, error) {
return primaryIF, nil
}

func configureSystemParams(sysctlUtil sysctl.Interface, primaryIF string) error {
func configureSystemParams(procSys procsyswrapper.ProcSys, primaryIF string) error {
var err error
// Configure rp_filter in loose mode
entry := "net/ipv4/conf/" + primaryIF + "/rp_filter"
err = sysctlUtil.Set(entry, 2)
err = procSys.Set(entry, "2")
if err != nil {
return errors.Wrapf(err, "Failed to set rp_filter for %s", primaryIF)
}
val, _ := sysctlUtil.Get(entry)
log.Infof("Updated %s to %d", entry, val)
val, _ := procSys.Get(entry)
log.Infof("Updated %s to %s", entry, val)

// Enable or disable TCP early demux based on environment variable
// Note that older kernels may not support tcp_early_demux, so we must first check that it exists.
entry = "net/ipv4/tcp_early_demux"
if _, err := sysctlUtil.Get(entry); err == nil {
if _, err := procSys.Get(entry); err == nil {
disableIPv4EarlyDemux := getEnv(envDisableIPv4TcpEarlyDemux, "false")
if disableIPv4EarlyDemux == "true" {
err = sysctlUtil.Set(entry, 0)
err = procSys.Set(entry, "0")
if err != nil {
return errors.Wrap(err, "Failed to disable tcp_early_demux")
}
} else {
err = sysctlUtil.Set(entry, 1)
err = procSys.Set(entry, "1")
if err != nil {
return errors.Wrap(err, "Failed to enable tcp_early_demux")
}
}
val, _ = sysctlUtil.Get(entry)
log.Infof("Updated %s to %d", entry, val)
val, _ = procSys.Get(entry)
log.Infof("Updated %s to %s", entry, val)
}
return nil
}

func configureIPv6Settings(sysctlUtil sysctl.Interface, primaryIF string) error {
func configureIPv6Settings(procSys procsyswrapper.ProcSys, primaryIF string) error {
var err error
// Enable IPv6 when environment variable is set
// Note that IPv6 is not disabled when environment variable is unset. This is omitted to preserve default host semantics.
enableIPv6 := getEnv(envEnableIPv6, "false")
if enableIPv6 == "true" {
entry := "net/ipv6/conf/all/disable_ipv6"
err = sysctlUtil.Set(entry, 0)
err = procSys.Set(entry, "0")
if err != nil {
return errors.Wrap(err, "Failed to set disable_ipv6 to 0")
}
val, _ := sysctlUtil.Get(entry)
log.Infof("Updated %s to %d", entry, val)
val, _ := procSys.Get(entry)
log.Infof("Updated %s to %s", entry, val)

entry = "net/ipv6/conf/all/forwarding"
err = sysctlUtil.Set(entry, 1)
err = procSys.Set(entry, "1")
if err != nil {
return errors.Wrap(err, "Failed to enable ipv6 forwarding")
}
val, _ = sysctlUtil.Get(entry)
log.Infof("Updated %s to %d", entry, val)
val, _ = procSys.Get(entry)
log.Infof("Updated %s to %s", entry, val)

entry = "net/ipv6/conf/" + primaryIF + "/accept_ra"
err = sysctlUtil.Set(entry, 2)
err = procSys.Set(entry, "2")
if err != nil {
return errors.Wrap(err, "Failed to enable ipv6 accept_ra")
}
val, _ = sysctlUtil.Get(entry)
log.Infof("Updated %s to %d", entry, val)
val, _ = procSys.Get(entry)
log.Infof("Updated %s to %s", entry, val)
}
return nil
}
Expand Down Expand Up @@ -166,14 +166,14 @@ func _main() int {
}
log.Infof("Found primaryIF %s", primaryIF)

sysctlUtil := sysctl.New()
err = configureSystemParams(sysctlUtil, primaryIF)
procSys := procsyswrapper.NewProcSys()
err = configureSystemParams(procSys, primaryIF)
if err != nil {
log.WithError(err).Errorf("Failed to configure system parameters")
return 1
}

err = configureIPv6Settings(sysctlUtil, primaryIF)
err = configureIPv6Settings(procSys, primaryIF)
if err != nil {
log.WithError(err).Errorf("Failed to configure IPv6 settings")
return 1
Expand Down
30 changes: 27 additions & 3 deletions cmd/aws-vpc-cni/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ const (
defaultPluginLogLevel = "Debug"
defaultEnableIPv6 = "false"
defaultRandomizeSNAT = "prng"
defaultEnableNftables = "false"
awsConflistFile = "/10-aws.conflist"
vpcCniInitDonePath = "/vpc-cni-init/done"

Expand All @@ -88,6 +89,7 @@ const (
envEnBandwidthPlugin = "ENABLE_BANDWIDTH_PLUGIN"
envEnIPv6 = "ENABLE_IPv6"
envRandomizeSNAT = "AWS_VPC_K8S_CNI_RANDOMIZESNAT"
envEnableNftables = "ENABLE_NFTABLES"
)

func getEnv(env, defaultVal string) string {
Expand Down Expand Up @@ -209,8 +211,6 @@ func getNodePrimaryV4Address() (string, error) {
if hostIP != "" {
return hostIP, nil
}

time.Sleep(1 * time.Second)
}
}

Expand Down Expand Up @@ -324,6 +324,26 @@ func validateEnvVars() bool {
return true
}

func configureNftablesIfEnabled() error {
// By default, VPC CNI container uses iptables-legacy. Update to iptables-nft when env var is set
nftables := getEnv(envEnableNftables, defaultEnableNftables)
if nftables == "true" {
log.Infof("Updating iptables mode to nft")
var cmd *exec.Cmd
// Command output is not suppressed so that log shows iptables mode being set
cmd = exec.Command("update-alternatives", "--set", "iptables", "/usr/sbin/iptables-nft")
if err := cmd.Run(); err != nil {
return errors.Wrap(err, "Failed to use iptables-nft")
}
cmd = exec.Command("update-alternatives", "--set", "ip6tables", "/usr/sbin/ip6tables-nft")
if err := cmd.Run(); err != nil {
log.WithError(err).Errorf("Failed to use ip6tables-nft")
return errors.Wrap(err, "Failed to use iptables6-nft")
}
}
return nil
}

func main() {
os.Exit(_main())
}
Expand All @@ -334,11 +354,15 @@ func _main() int {
return 1
}

if err := configureNftablesIfEnabled(); err != nil {
log.WithError(err).Error("Failed to enable nftables")
}

pluginBins := []string{"aws-cni", "egress-v4-cni"}
hostCNIBinPath := getEnv(envHostCniBinPath, defaultHostCNIBinPath)
err := cp.InstallBinaries(pluginBins, hostCNIBinPath)
if err != nil {
log.WithError(err).Errorf("Failed to install CNI binaries")
log.WithError(err).Error("Failed to install CNI binaries")
return 1
}

Expand Down
7 changes: 5 additions & 2 deletions docs/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,10 @@ kubectl apply -f https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/releas

The [CNI image](../scripts/dockerfiles/Dockerfile.release) built for the `aws-node` manifest uses Amazon Linux 2 as the base image. Support for other Linux distributions (custom AMIs) is best-effort. Known issues with other Linux distributions are captured here:

- **iptables** - iptables is installed by default in `aws-node` container images. Newer distributions of RHEL (RHEL 8.x+), Ubuntu (Ubuntu 20.x+), etc. have moved to using `nftables`. This leads to issues such as [this](https://github.com/aws/amazon-vpc-cni-k8s/issues/1847) when running IPAMD.
- **iptables**
Prior to v1.13.0, the VPC CNI image only contained `iptables-legacy`. Newer distributions of RHEL (RHEL 8.x+), Ubuntu (Ubuntu 21.x+), etc. have moved to using `nftables`. This leads to issues such as [this](https://github.com/aws/amazon-vpc-cni-k8s/issues/1847) when running IPAMD.

To resolve this issue on distributions that use `nftables`, there are currently two options:
To resolve this issue in versions before v1.13.0, there are currently two options:
1. Uninstall `nftables` and install `iptables-legacy` in base distribution
2. Build a custom CNI image based on `nftables`, such as:
```
Expand All @@ -235,6 +236,8 @@ The [CNI image](../scripts/dockerfiles/Dockerfile.release) built for the `aws-no
run cd /usr/sbin && rm iptables && ln -s xtables-nft-multi iptables
```

In v1.13.0+, `iptables-legacy` and `iptables-nft` are present in the VPC CNI container image. Setting `ENABLE_NFTABLES` environment variable to `true` instructs VPC CNI to use `iptables-nft`. By default, `iptables-legacy` is used.

## cni-metrics-helper

See the [cni-metrics-helper README](../cmd/cni-metrics-helper/README.md).
Expand Down
49 changes: 0 additions & 49 deletions utils/sysctl/sysctl.go

This file was deleted.

0 comments on commit d1cdfd4

Please sign in to comment.