Skip to content

Commit

Permalink
[BPF] When lo has IP override it when CTLB is disabled
Browse files Browse the repository at this point in the history
When CTLB is disabled, we route traffic for services via bpfnatin/out
device. Since the final destination isn't resolved yet, Linux picks up
an address set on loopback device (if there is any) as source. This may
not be (likely is not) an address that can be used by the destination to
return traffic. Therefore we need to replace it by the host's IP that is
routable within the cluster.

We use the same mechanism as for replacing main host device IP with a
tunnel IP when we need to reach a remote workload via an overlay.
  • Loading branch information
tomastigera committed Mar 15, 2024
1 parent b549e68 commit 6777ac7
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 13 deletions.
42 changes: 30 additions & 12 deletions felix/bpf-gpl/tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,12 @@ static CALI_BPF_INLINE void calico_tc_process_ct_lookup(struct cali_tc_ctx *ctx)
}
}

if (CALI_F_TO_WEP && !skb_seen(ctx->skb) &&
if (CALI_F_TO_WEP &&
/* We have not seen the packet yet, must originate from the host. */
(!skb_seen(ctx->skb) ||
/* We have seen the packet, but was looped via NAT iface and so it must be from the host. */
skb_mark_equals(ctx->skb, CALI_SKB_MARK_FROM_NAT_IFACE_OUT, CALI_SKB_MARK_FROM_NAT_IFACE_OUT)) &&
/* Double check that it has host source IP - do it last, it is the most expensive test. */
cali_rt_flags_local_host(cali_rt_lookup_flags(&ctx->state->ip_src))) {
/* Host to workload traffic always allowed. We discount traffic that was
* seen by another program since it must have come in via another interface.
Expand Down Expand Up @@ -1329,17 +1334,30 @@ int calico_tc_skb_new_flow_entrypoint(struct __sk_buff *skb)

if ((CALI_F_TO_HOST && CALI_F_NAT_IF) || (CALI_F_TO_HEP && (CALI_F_LO || CALI_F_MAIN))) {
struct cali_rt *r = cali_rt_lookup(&state->post_nat_ip_dst);
if (r && cali_rt_flags_remote_workload(r->flags) && cali_rt_is_tunneled(r)) {
CALI_DEBUG("remote wl %x tunneled via %x\n",
debug_ip(state->post_nat_ip_dst), debug_ip(HOST_TUNNEL_IP));
ct_ctx_nat->src = HOST_TUNNEL_IP;
/* This would be the place to set a new source port if we
* had a way how to allocate it. Instead we rely on source
* port collision resolution.
* ct_ctx_nat->sport = 10101;
*/
state->ct_result.nat_sip = ct_ctx_nat->src;
state->ct_result.nat_sport = ct_ctx_nat->sport;
if (r) {
if (cali_rt_flags_remote_workload(r->flags) && cali_rt_is_tunneled(r)) {
CALI_DEBUG("remote wl %x tunneled via %x\n",
debug_ip(state->post_nat_ip_dst), debug_ip(HOST_TUNNEL_IP));
ct_ctx_nat->src = HOST_TUNNEL_IP;
/* This would be the place to set a new source port if we
* had a way how to allocate it. Instead we rely on source
* port collision resolution.
* ct_ctx_nat->sport = 10101;
*/
state->ct_result.nat_sip = ct_ctx_nat->src;
state->ct_result.nat_sport = ct_ctx_nat->sport;
} else if (!cali_rt_is_local(r) && !ip_equal(state->ip_src, HOST_IP)) {
CALI_DEBUG("remote wl %x fixing unexpected IP from lo %x\n",
debug_ip(state->post_nat_ip_dst), debug_ip(HOST_IP));
ct_ctx_nat->src = HOST_IP;
/* This would be the place to set a new source port if we
* had a way how to allocate it. Instead we rely on source
* port collision resolution.
* ct_ctx_nat->sport = 10101;
*/
state->ct_result.nat_sip = ct_ctx_nat->src;
state->ct_result.nat_sport = ct_ctx_nat->sport;
}
}
}

Expand Down
45 changes: 44 additions & 1 deletion felix/fv/bpf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1209,11 +1209,13 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
clusterIP := "10.101.0.10"
extIP := "10.1.2.3"
excludeSvcIP := "10.101.0.222"
loIP := "5.6.5.6"

if testOpts.ipv6 {
clusterIP = "dead:beef::abcd:0:0:10"
extIP = "dead:beef::abcd:1:2:3"
excludeSvcIP = "dead:beef::abcd:0:0:222"
loIP = "dead:beef::abcd:0:5656:5656"
}

if testOpts.protocol == "udp" && testOpts.udpUnConnected {
Expand Down Expand Up @@ -3215,7 +3217,6 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
cc.Expect(Some, hostW[0], TargetIP(node1IP), ports, hostW0SrcIP)
cc.Expect(Some, hostW[1], TargetIP(node0IP), ports, hostW1SrcIP)
cc.Expect(Some, hostW[1], TargetIP(node1IP), ports, hostW1SrcIP)

cc.CheckConnectivity()
})

Expand Down Expand Up @@ -3476,6 +3477,48 @@ func describeBPFTests(opts ...bpfTestOpt) bool {

cc.CheckConnectivity()
})

It("should have connectivity from all host-networked workloads to workload 0 "+
"via clusterIP with non-routable address set on lo", func() {
// It only makes sense for turned off CTLB as with CTLB routing
// picks the right source IP.
if testOpts.connTimeEnabled {
return
}
By("Configuring ip on lo")
tc.Felixes[0].Exec("ip", "addr", "add", loIP+"/"+ipMask(), "dev", "lo")
tc.Felixes[1].Exec("ip", "addr", "add", loIP+"/"+ipMask(), "dev", "lo")

By("testing connectivity")

node1IP := felixIP(1)
hostW0SrcIP := ExpectWithSrcIPs(loIP)
hostW1SrcIP := ExpectWithSrcIPs(node1IP)

switch testOpts.tunnel {
case "ipip":
hostW1SrcIP = ExpectWithSrcIPs(tc.Felixes[1].ExpectedIPIPTunnelAddr)
case "wireguard":
if testOpts.ipv6 {
hostW1SrcIP = ExpectWithSrcIPs(tc.Felixes[1].ExpectedWireguardV6TunnelAddr)
} else {
hostW1SrcIP = ExpectWithSrcIPs(tc.Felixes[1].ExpectedWireguardTunnelAddr)
}
case "vxlan":
if testOpts.ipv6 {
hostW1SrcIP = ExpectWithSrcIPs(tc.Felixes[1].ExpectedVXLANV6TunnelAddr)
} else {
hostW1SrcIP = ExpectWithSrcIPs(tc.Felixes[1].ExpectedVXLANTunnelAddr)
}
}
clusterIP := testSvc.Spec.ClusterIP
ports := ExpectWithPorts(uint16(testSvc.Spec.Ports[0].Port))

cc.Expect(Some, hostW[0], TargetIP(clusterIP), ports, hostW0SrcIP)
cc.Expect(Some, hostW[1], TargetIP(clusterIP), ports, hostW1SrcIP)

cc.CheckConnectivity()
})
})
}

Expand Down

0 comments on commit 6777ac7

Please sign in to comment.