diff --git a/src/cloud-api-adaptor/pkg/podnetwork/podnetwork_test.go b/src/cloud-api-adaptor/pkg/podnetwork/podnetwork_test.go index 3c3a62856..78dbf1618 100644 --- a/src/cloud-api-adaptor/pkg/podnetwork/podnetwork_test.go +++ b/src/cloud-api-adaptor/pkg/podnetwork/podnetwork_test.go @@ -99,10 +99,13 @@ func TestWorkerNode(t *testing.T) { require.Equal(t, expected.workerNodeIP, config.WorkerNodeIP.String(), "hostInterface=%q", hostInterface) require.Equal(t, mockTunnelType, config.TunnelType, "hostInterface=%q", hostInterface) - require.Equal(t, len(config.Routes), 1, "hostInterface=%q", hostInterface) + require.Equal(t, len(config.Routes), 2, "hostInterface=%q", hostInterface) require.Equal(t, config.Routes[0].Dst.String(), "0.0.0.0/0", "hostInterface=%q", hostInterface) require.Equal(t, config.Routes[0].GW.String(), "172.16.0.1", "hostInterface=%q", hostInterface) require.Equal(t, config.Routes[0].Dev, "eth0", "hostInterface=%q", hostInterface) + require.Equal(t, config.Routes[1].Dst.String(), "172.16.0.0/24", "hostInterface=%q", hostInterface) + require.Equal(t, config.Routes[1].GW.IsValid(), false, "hostInterface=%q", hostInterface) + require.Equal(t, config.Routes[1].Dev, "eth0", "hostInterface=%q", hostInterface) err = workerNode.Teardown(workerPodNS.Path(), config) require.Nil(t, err, "hostInterface=%q", hostInterface) @@ -128,9 +131,6 @@ func TestPodNode(t *testing.T) { tuntest.AddrAdd(t, podNodeNS, "ens1", "192.168.1.3/24") tuntest.RouteAdd(t, podNodeNS, "", "192.168.0.1", "ens0") - podNS := tuntest.NewNamedNS(t, "test-pod") - defer tuntest.DeleteNamedNS(t, podNS) - for hostInterface, expected := range map[string]struct { podNodeIP string workerNodeIP string @@ -149,35 +149,48 @@ func TestPodNode(t *testing.T) { }, } { - err := podNodeNS.Run(func() error { - - config := &tunneler.Config{ - PodIP: netip.MustParsePrefix("172.16.0.2/24"), - Routes: []*tunneler.Route{ - { - GW: netip.MustParseAddr("172.16.0.1"), - Dev: "eth0", + podNS := tuntest.NewNamedNS(t, "test-pod") + func() { + defer tuntest.DeleteNamedNS(t, podNS) + + tuntest.BridgeAdd(t, podNS, "eth0") + tuntest.AddrAdd(t, podNS, "eth0", "172.16.0.2/24") + + err := podNodeNS.Run(func() error { + + config := &tunneler.Config{ + PodIP: netip.MustParsePrefix("172.16.0.2/24"), + Routes: []*tunneler.Route{ + { + Dst: netip.MustParsePrefix("0.0.0.0/0"), + GW: netip.MustParseAddr("172.16.0.1"), + Dev: "eth0", + }, + { + Dst: netip.MustParsePrefix("172.16.0.0/24"), + Dev: "eth0", + }, }, - }, - InterfaceName: "eth0", - MTU: 1500, - WorkerNodeIP: netip.MustParsePrefix(expected.workerNodeIP), - TunnelType: mockTunnelType, - Dedicated: hostInterface == "ens1", - } - - podNode := NewPodNode(podNS.Path(), hostInterface, config) - require.NotNil(t, podNode, "hostInterface=%q", hostInterface) - - err := podNode.Setup() - require.Nil(t, err, "hostInterface=%q", hostInterface) + InterfaceName: "eth0", + MTU: 1500, + WorkerNodeIP: netip.MustParsePrefix(expected.workerNodeIP), + TunnelType: mockTunnelType, + Dedicated: hostInterface == "ens1", + } - err = podNode.Teardown() - require.Nil(t, err, "hostInterface=%q", hostInterface) + podNode := NewPodNode(podNS.Path(), hostInterface, config) + require.NotNil(t, podNode, "hostInterface=%q", hostInterface) - return nil - }) - require.Nil(t, err, "hostInterface=%q", hostInterface) + err := podNode.Setup() + require.Nil(t, err, "hostInterface=%q", hostInterface) + + err = podNode.Teardown() + require.Nil(t, err, "hostInterface=%q", hostInterface) + + return nil + }) + require.Nil(t, err, "hostInterface=%q", hostInterface) + }() } } diff --git a/src/cloud-api-adaptor/pkg/podnetwork/podnode.go b/src/cloud-api-adaptor/pkg/podnetwork/podnode.go index 0d6ee1e71..218b9afdd 100644 --- a/src/cloud-api-adaptor/pkg/podnetwork/podnode.go +++ b/src/cloud-api-adaptor/pkg/podnetwork/podnode.go @@ -95,6 +95,49 @@ func (n *podNode) Setup() error { return fmt.Errorf("failed to set up tunnel %q: %w", n.config.TunnelType, err) } + if !n.config.PodIP.IsSingleIP() { + // Delete the nRoute that was automatically added by kernel for eth0 + // CNI plugins like PTP and GKE need this trick, otherwise adding a route will fail in a later step. + // The deleted route will be restored again in the cases of usual CNI plugins such as Flannel and Calico. + // https://github.com/containernetworking/plugins/blob/acf8ddc8e1128e6f68a34f7fe91122afeb1fa93d/plugins/main/ptp/ptp.go#L58-L61 + + nRoute := netops.Route{ + Destination: n.config.PodIP.Masked(), + Device: n.config.InterfaceName, + } + if err := podNS.RouteDel(&nRoute); err != nil { + return fmt.Errorf("failed to remove route %s dev %s: %v", nRoute.Destination, nRoute.Device, err) + } + logger.Printf("removed route %s dev %s", nRoute.Destination, nRoute.Device) + } + + // We need to process routes without gateway address first. Processing routes with a gateway causes an error if the gateway is not reachable. + // Calico sets up routes with this pattern. + // https://github.com/projectcalico/cni-plugin/blob/7495c0279c34faac315b82c1838bca638e23dbbe/pkg/dataplane/linux/dataplane_linux.go#L158-L167 + + var first, second []*tunneler.Route + for _, route := range n.config.Routes { + if !route.GW.IsValid() { + first = append(first, route) + } else { + second = append(second, route) + } + } + routes := append(first, second...) + + for _, route := range routes { + nRoute := netops.Route{ + Destination: route.Dst, + Gateway: route.GW, + Device: route.Dev, + Protocol: route.Protocol, + Scope: route.Scope, + } + if err := podNS.RouteAdd(&nRoute); err != nil { + return fmt.Errorf("failed to add a route to %s via %s on pod network namespace %s: %w", route.Dst, route.GW, podNS.Path(), err) + } + } + return nil } diff --git a/src/cloud-api-adaptor/pkg/podnetwork/tunneler/routing/podnode.go b/src/cloud-api-adaptor/pkg/podnetwork/tunneler/routing/podnode.go index 2bbdf9258..18ace2490 100644 --- a/src/cloud-api-adaptor/pkg/podnetwork/tunneler/routing/podnode.go +++ b/src/cloud-api-adaptor/pkg/podnetwork/tunneler/routing/podnode.go @@ -21,7 +21,6 @@ func NewPodNodeTunneler() tunneler.Tunneler { } const ( - podVEthName = "eth0" hostVEthName = "veth0" podTableID = 45001 @@ -39,6 +38,11 @@ func (t *podNodeTunneler) Setup(nsPath string, podNodeIPs []netip.Addr, config * return errors.New("secondary pod node IP is not available") } + podVEthName := config.InterfaceName + if podVEthName == "" { + return errors.New("InterfaceName is not specified") + } + podNodeIP := podNodeIPs[1] podIP := config.PodIP @@ -97,21 +101,7 @@ func (t *podNodeTunneler) Setup(nsPath string, podNodeIPs []netip.Addr, config * var defaultRouteGateway netip.Addr - // We need to process routes without gateway address first. Processing routes with a gateway causes an error if the gateway is not reachable. - // Calico sets up routes with this pattern. - // https://github.com/projectcalico/cni-plugin/blob/7495c0279c34faac315b82c1838bca638e23dbbe/pkg/dataplane/linux/dataplane_linux.go#L158-L167 - - var first, second []*tunneler.Route for _, route := range config.Routes { - if !route.GW.IsValid() { - first = append(first, route) - } else { - second = append(second, route) - } - } - routes := append(first, second...) - - for _, route := range routes { if err := podNS.RouteAdd(&netops.Route{Destination: route.Dst, Gateway: route.GW, Device: podVEthName}); err != nil { return fmt.Errorf("failed to add a route to %s via %s on pod network namespace %s: %w", route.Dst, route.GW, nsPath, err) } diff --git a/src/cloud-api-adaptor/pkg/podnetwork/tunneler/tunneler.go b/src/cloud-api-adaptor/pkg/podnetwork/tunneler/tunneler.go index f66ce761f..fa9f8f1c8 100644 --- a/src/cloud-api-adaptor/pkg/podnetwork/tunneler/tunneler.go +++ b/src/cloud-api-adaptor/pkg/podnetwork/tunneler/tunneler.go @@ -6,6 +6,8 @@ package tunneler import ( "fmt" "net/netip" + + "github.com/confidential-containers/cloud-api-adaptor/src/cloud-api-adaptor/pkg/util/netops" ) type Tunneler interface { @@ -28,9 +30,11 @@ type Config struct { } type Route struct { - Dst netip.Prefix - GW netip.Addr - Dev string + Dst netip.Prefix + GW netip.Addr + Dev string + Protocol netops.RouteProtocol + Scope netops.RouteScope } type driver struct { diff --git a/src/cloud-api-adaptor/pkg/podnetwork/tunneler/vxlan/podnode.go b/src/cloud-api-adaptor/pkg/podnetwork/tunneler/vxlan/podnode.go index b7200af5b..30dd3e824 100644 --- a/src/cloud-api-adaptor/pkg/podnetwork/tunneler/vxlan/podnode.go +++ b/src/cloud-api-adaptor/pkg/podnetwork/tunneler/vxlan/podnode.go @@ -4,6 +4,7 @@ package vxlan import ( + "errors" "fmt" "net/netip" @@ -12,8 +13,8 @@ import ( ) const ( - podVxlanInterface = "vxlan0" - maxMTU = 1450 + hostVxlanInterface = "vxlan0" + maxMTU = 1450 ) type podNodeTunneler struct { @@ -25,6 +26,11 @@ func NewPodNodeTunneler() tunneler.Tunneler { func (t *podNodeTunneler) Setup(nsPath string, podNodeIPs []netip.Addr, config *tunneler.Config) error { + podVxlanInterface := config.InterfaceName + if podVxlanInterface == "" { + return errors.New("InterfaceName is not specified") + } + nodeAddr := config.WorkerNodeIP if !nodeAddr.IsValid() { @@ -53,13 +59,17 @@ func (t *podNodeTunneler) Setup(nsPath string, podNodeIPs []netip.Addr, config * ID: config.VXLANID, Port: config.VXLANPort, } - vxlan, err := hostNS.LinkAdd(podVxlanInterface, vxlanDevice) + vxlan, err := hostNS.LinkAdd(hostVxlanInterface, vxlanDevice) if err != nil { - return fmt.Errorf("failed to add vxlan interface %s: %w", podVxlanInterface, err) + return fmt.Errorf("failed to add vxlan interface %s: %w", hostVxlanInterface, err) } if err := vxlan.SetNamespace(podNS); err != nil { - return fmt.Errorf("failed to move vxlan interface %s to netns %s: %w", podVxlanInterface, podNS.Path(), err) + return fmt.Errorf("failed to move vxlan interface %s to netns %s: %w", hostVxlanInterface, podNS.Path(), err) + } + + if err := vxlan.SetName(podVxlanInterface); err != nil { + return fmt.Errorf("failed to rename vxlan interface %s on netns %s: %w", hostVxlanInterface, podNS.Path(), err) } if err := vxlan.SetHardwareAddr(config.PodHwAddr); err != nil { @@ -82,26 +92,6 @@ func (t *podNodeTunneler) Setup(nsPath string, podNodeIPs []netip.Addr, config * return err } - // We need to process routes without gateway address first. Processing routes with a gateway causes an error if the gateway is not reachable. - // Calico sets up routes with this pattern. - // https://github.com/projectcalico/cni-plugin/blob/7495c0279c34faac315b82c1838bca638e23dbbe/pkg/dataplane/linux/dataplane_linux.go#L158-L167 - - var first, second []*tunneler.Route - for _, route := range config.Routes { - if !route.GW.IsValid() { - first = append(first, route) - } else { - second = append(second, route) - } - } - routes := append(first, second...) - - for _, route := range routes { - if err := podNS.RouteAdd(&netops.Route{Destination: route.Dst, Gateway: route.GW, Device: podVxlanInterface}); err != nil { - return fmt.Errorf("failed to add a route to %s via %s on pod network namespace %s: %w", route.Dst, route.GW, nsPath, err) - } - } - return nil } diff --git a/src/cloud-api-adaptor/pkg/podnetwork/workernode.go b/src/cloud-api-adaptor/pkg/podnetwork/workernode.go index dbaed59e4..0ff47224f 100644 --- a/src/cloud-api-adaptor/pkg/podnetwork/workernode.go +++ b/src/cloud-api-adaptor/pkg/podnetwork/workernode.go @@ -164,9 +164,11 @@ func (n *workerNode) Inspect(nsPath string) (*tunneler.Config, error) { for _, route := range routes { r := &tunneler.Route{ - Dst: route.Destination, - Dev: route.Device, - GW: route.Gateway, + Dst: route.Destination, + Dev: route.Device, + GW: route.Gateway, + Protocol: route.Protocol, + Scope: route.Scope, } config.Routes = append(config.Routes, r) } diff --git a/src/cloud-api-adaptor/pkg/util/netops/netops.go b/src/cloud-api-adaptor/pkg/util/netops/netops.go index c621c0258..865a8b3eb 100644 --- a/src/cloud-api-adaptor/pkg/util/netops/netops.go +++ b/src/cloud-api-adaptor/pkg/util/netops/netops.go @@ -5,6 +5,7 @@ package netops import ( "bytes" + "encoding/json" "fmt" "net" "net/netip" @@ -428,7 +429,8 @@ type Route struct { Priority int Table int Type int - Protocol int + Protocol RouteProtocol + Scope RouteScope Onlink bool } @@ -524,6 +526,7 @@ func (ns *namespace) RouteList(filters ...*Route) ([]*Route, error) { {Table: unix.RT_TABLE_MAIN, Type: unix.RTN_UNICAST, Protocol: unix.RTPROT_STATIC}, {Table: unix.RT_TABLE_MAIN, Type: unix.RTN_UNICAST, Protocol: unix.RTPROT_BOOT}, {Table: unix.RT_TABLE_MAIN, Type: unix.RTN_UNICAST, Protocol: unix.RTPROT_DHCP}, + {Table: unix.RT_TABLE_MAIN, Type: unix.RTN_UNICAST, Protocol: unix.RTPROT_KERNEL}, } filters = append(filters, defaultFilters...) } @@ -556,7 +559,8 @@ func (ns *namespace) RouteList(filters ...*Route) ([]*Route, error) { Priority: r.Priority, Table: r.Table, Type: r.Type, - Protocol: int(r.Protocol), + Protocol: RouteProtocol(r.Protocol), + Scope: RouteScope(r.Scope), Onlink: onlink, } @@ -581,6 +585,8 @@ func (ns *namespace) RouteAdd(route *Route) error { Priority: route.Priority, Table: route.Table, Type: route.Type, + Scope: netlink.Scope(route.Scope), + Protocol: netlink.RouteProtocol(route.Protocol), } if route.Device != "" { @@ -611,7 +617,7 @@ func (ns *namespace) RouteDel(route *Route) error { } if len(routes) == 0 { - return fmt.Errorf("failed to identify routes to be deleted: dest: %s, gw: %s, dev %s: %w", route.Destination, route.Gateway, route.Device, err) + return fmt.Errorf("failed to identify routes to be deleted: dest: %s, gw: %s, dev %s", route.Destination, route.Gateway, route.Device) } for _, r := range routes { @@ -622,6 +628,84 @@ func (ns *namespace) RouteDel(route *Route) error { return nil } +func initLookupTable[T fmt.Stringer](s []T) map[string]T { + + m := make(map[string]T) + for _, x := range s { + m[x.String()] = x + } + return m +} + +func unmarshalJSON[T any](name string, data []byte, table map[string]T) (v T, _ error) { + + var str string + if err := json.Unmarshal(data, &str); err != nil { + return v, fmt.Errorf("invalid %s: %v: %w", name, data, err) + } + + v, ok := table[str] + if !ok { + return v, fmt.Errorf("unknown %s: %s", name, str) + } + return v, nil +} + +type RouteProtocol netlink.RouteProtocol + +var routeProtocols = initLookupTable([]RouteProtocol{ + unix.RTPROT_STATIC, + unix.RTPROT_BOOT, + unix.RTPROT_DHCP, + unix.RTPROT_KERNEL, +}) + +func (p RouteProtocol) String() string { + return netlink.RouteProtocol(p).String() +} + +func (p RouteProtocol) MarshalJSON() ([]byte, error) { + return json.Marshal(p.String()) +} + +func (p *RouteProtocol) UnmarshalJSON(data []byte) error { + + v, err := unmarshalJSON("route protocol", data, routeProtocols) + if err != nil { + return err + } + *p = v + return nil +} + +type RouteScope netlink.Scope + +var routeScopes = initLookupTable([]netlink.Scope{ + netlink.SCOPE_UNIVERSE, + netlink.SCOPE_SITE, + netlink.SCOPE_LINK, + netlink.SCOPE_HOST, + netlink.SCOPE_NOWHERE, +}) + +func (s RouteScope) String() string { + return netlink.Scope(s).String() +} + +func (s RouteScope) MarshalJSON() ([]byte, error) { + return json.Marshal(s.String()) +} + +func (s *RouteScope) UnmarshalJSON(data []byte) error { + + rs, err := unmarshalJSON("route scope", data, routeScopes) + if err != nil { + return err + } + *s = RouteScope(rs) + return nil +} + type Rule struct { Src netip.Prefix IifName string