diff --git a/pkg/utils/conntrack.go b/pkg/utils/conntrack.go new file mode 100644 index 000000000..d07a4592d --- /dev/null +++ b/pkg/utils/conntrack.go @@ -0,0 +1,84 @@ +// Copyright 2020 CNI authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "fmt" + "log" + "net" + + "github.com/vishvananda/netlink" + "golang.org/x/sys/unix" +) + +// Assigned Internet Protocol Numbers +// https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml +const ( + PROTOCOL_TCP = 6 + PROTOCOL_UDP = 17 + PROTOCOL_SCTP = 132 +) + +// getNetlinkFamily returns the Netlink IP family constant +func getNetlinkFamily(isIPv6 bool) netlink.InetFamily { + if isIPv6 { + return unix.AF_INET6 + } + return unix.AF_INET +} + +// DeleteConntrackEntriesForDstIP delete the conntrack entries for the connections +// specified by the given destination IP and protocol +func DeleteConntrackEntriesForDstIP(dstIP string, protocol uint8) error { + ip := net.ParseIP(dstIP) + if ip == nil { + return fmt.Errorf("error deleting connection tracking state, bad IP %s", ip) + } + family := getNetlinkFamily(ip.To4() == nil) + + filter := &netlink.ConntrackFilter{} + filter.AddIP(netlink.ConntrackOrigDstIP, ip) + filter.AddProtocol(protocol) + + n, err := netlink.ConntrackDeleteFilter(netlink.ConntrackTable, family, filter) + if err != nil { + // TODO: Better handling for deletion failure. When failure occur, stale udp connection may not get flushed. + // These stale udp connection will keep black hole traffic. Making this a best effort operation for now. + return fmt.Errorf("error deleting connection tracking state for protocol: %d IP: %s, error: %v", protocol, ip, err) + } + if n == 0 { + log.Printf("no entries found for protocol: %d IP: %s", protocol, ip) + } + return nil +} + +// DeleteConntrackEntriesForDstPort delete the conntrack entries for the connections specified +// by the given destination port, protocol and IP family +func DeleteConntrackEntriesForDstPort(port uint16, protocol uint8, family netlink.InetFamily) error { + filter := &netlink.ConntrackFilter{} + filter.AddPort(netlink.ConntrackOrigDstPort, port) + filter.AddProtocol(protocol) + + n, err := netlink.ConntrackDeleteFilter(netlink.ConntrackTable, family, filter) + if err != nil { + // TODO: Better handling for deletion failure. When failure occur, stale udp connection may not get flushed. + // These stale udp connection will keep black hole traffic. Making this a best effort operation for now. + return fmt.Errorf("error deleting connection tracking state for protocol: %d Port: %d, error: %v", protocol, port, err) + } + if n == 0 { + log.Printf("no entries found for protocol: %d Port: %d", protocol, port) + } + return nil +} diff --git a/plugins/meta/portmap/main.go b/plugins/meta/portmap/main.go index 9d1559162..6e71c60a7 100644 --- a/plugins/meta/portmap/main.go +++ b/plugins/meta/portmap/main.go @@ -34,6 +34,7 @@ import ( "github.com/containernetworking/cni/pkg/types" "github.com/containernetworking/cni/pkg/types/current" "github.com/containernetworking/cni/pkg/version" + "golang.org/x/sys/unix" bv "github.com/containernetworking/plugins/pkg/utils/buildversion" ) @@ -89,12 +90,24 @@ func cmdAdd(args *skel.CmdArgs) error { if err := forwardPorts(netConf, netConf.ContIPv4); err != nil { return err } + // Delete conntrack entries for UDP to avoid conntrack blackholing traffic + // due to stale connections. We do that after the iptables rules are set, so + // the new traffic picks them + if err := deletePortmapStaleConnections(netConf.RuntimeConfig.PortMaps, unix.AF_INET); err != nil { + return err + } } if netConf.ContIPv6.IP != nil { if err := forwardPorts(netConf, netConf.ContIPv6); err != nil { return err } + // Delete conntrack entries for UDP to avoid conntrack blackholing traffic + // due to stale connections. We do that after the iptables rules are set, so + // the new traffic picks them + if err := deletePortmapStaleConnections(netConf.RuntimeConfig.PortMaps, unix.AF_INET6); err != nil { + return err + } } // Pass through the previous result diff --git a/plugins/meta/portmap/portmap.go b/plugins/meta/portmap/portmap.go index f4e287745..d35136f0d 100644 --- a/plugins/meta/portmap/portmap.go +++ b/plugins/meta/portmap/portmap.go @@ -19,10 +19,12 @@ import ( "net" "sort" "strconv" + "strings" "github.com/containernetworking/plugins/pkg/utils" "github.com/containernetworking/plugins/pkg/utils/sysctl" "github.com/coreos/go-iptables/iptables" + "github.com/vishvananda/netlink" ) // This creates the chains to be added to iptables. The basic structure is @@ -42,10 +44,12 @@ import ( // The names of the top-level summary chains. // These should never be changed, or else upgrading will require manual // intervention. -const TopLevelDNATChainName = "CNI-HOSTPORT-DNAT" -const SetMarkChainName = "CNI-HOSTPORT-SETMARK" -const MarkMasqChainName = "CNI-HOSTPORT-MASQ" -const OldTopLevelSNATChainName = "CNI-HOSTPORT-SNAT" +const ( + TopLevelDNATChainName = "CNI-HOSTPORT-DNAT" + SetMarkChainName = "CNI-HOSTPORT-SETMARK" + MarkMasqChainName = "CNI-HOSTPORT-MASQ" + OldTopLevelSNATChainName = "CNI-HOSTPORT-SNAT" +) // forwardPorts establishes port forwarding to a given container IP. // containerNet.IP can be either v4 or v6. @@ -113,7 +117,6 @@ func forwardPorts(config *PortMapConf, containerNet net.IPNet) error { } func checkPorts(config *PortMapConf, containerNet net.IPNet) error { - dnatChain := genDnatChain(config.Name, config.ContainerID) fillDnatRules(&dnatChain, config, containerNet) @@ -189,7 +192,7 @@ func fillDnatRules(c *chain, config *PortMapConf, containerNet net.IPNet) { setMarkChainName = *config.ExternalSetMarkChain } - //Generate the dnat entry rules. We'll use multiport, but it ony accepts + // Generate the dnat entry rules. We'll use multiport, but it ony accepts // up to 15 rules, so partition the list if needed. // Do it in a stable order for testing protoPorts := groupByProto(entries) @@ -243,7 +246,8 @@ func fillDnatRules(c *chain, config *PortMapConf, containerNet net.IPNet) { ruleBase := []string{ "-p", entry.Protocol, - "--dport", strconv.Itoa(entry.HostPort)} + "--dport", strconv.Itoa(entry.HostPort), + } if addRuleBaseDst { ruleBase = append(ruleBase, "-d", entry.HostIP) @@ -406,3 +410,19 @@ func maybeGetIptables(isV6 bool) *iptables.IPTables { return ipt } + +// deletePortmapStaleConnections delete the UDP conntrack entries on the specified IP family +// from the ports mapped to the container +func deletePortmapStaleConnections(portMappings []PortMapEntry, family netlink.InetFamily) error { + for _, pm := range portMappings { + // skip if is not UDP + if strings.ToLower(pm.Protocol) != "udp" { + continue + } + err := utils.DeleteConntrackEntriesForDstPort(uint16(pm.HostPort), utils.PROTOCOL_UDP, family) + if err != nil { + return err + } + } + return nil +}