Skip to content

Commit

Permalink
northd: Respect --ecmp-symmetric-reply for single routes.
Browse files Browse the repository at this point in the history
When preparing to build ECMP and static route flows, routes are sorted
into unique routes (meaning they are not part of a group) or they are
added to EMCP groups. Then, ECMP route flows are built out of the
groups, and static route flows are built out of the unique routes.
However, 'unique routes' include ones that use the
--ecmp-symmetric-reply flag, meaning that they may not be added to an
ECMP group, and thus ECMP symmetric reply would not be used for those
flows.

For example, if one route is added and traffic is started, and then
later another route is added, the already flowing traffic might be
rerouted since it wasn't conntracked initially. This could break
symmetric reply with traffic using a different next-hop than before.

This change makes it so that when the --ecmp-symmetric-reply flag is
used, even for unique routes, an ECMP group is created which they are
added to. Thus they are added to the ECMP route flow, rather than
static. This allows ECMP groups to persist even when there is only one
route.

Edited documentation to support this change.
Also updated incorrect actions in documentation.

Fixes: 4fdca65 ("Add ECMP symmetric replies.")
Reported-at: https://issues.redhat.com/browse/FDP-786
Signed-off-by: Rosemarie O'Riorden <rosemarie@redhat.com>
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
(cherry picked from commit b93e9a5)
  • Loading branch information
roseoriorden authored and dceara committed Sep 26, 2024
1 parent 5c78002 commit e2fa4a5
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 13 deletions.
33 changes: 22 additions & 11 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -11570,21 +11570,27 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,

struct ds actions = DS_EMPTY_INITIALIZER;
ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16
"; %s = select(", REG_ECMP_GROUP_ID, eg->id,
REG_ECMP_MEMBER_ID);
"; %s = ", REG_ECMP_GROUP_ID, eg->id, REG_ECMP_MEMBER_ID);

bool is_first = true;
LIST_FOR_EACH (er, list_node, &eg->route_list) {
if (is_first) {
is_first = false;
} else {
ds_put_cstr(&actions, ", ");
if (!ovs_list_is_singleton(&eg->route_list)) {
bool is_first = true;

ds_put_cstr(&actions, "select(");
LIST_FOR_EACH (er, list_node, &eg->route_list) {
if (is_first) {
is_first = false;
} else {
ds_put_cstr(&actions, ", ");
}
ds_put_format(&actions, "%"PRIu16, er->id);
}
ds_put_format(&actions, "%"PRIu16, er->id);
ds_put_cstr(&actions, ");");
} else {
er = CONTAINER_OF(ovs_list_front(&eg->route_list),
struct ecmp_route_list_node, list_node);
ds_put_format(&actions, "%"PRIu16"; next;", er->id);
}

ds_put_cstr(&actions, ");");

ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, priority,
ds_cstr(&route_match), ds_cstr(&actions));

Expand Down Expand Up @@ -13485,6 +13491,11 @@ build_static_route_flows_for_lrouter(
if (group) {
ecmp_groups_add_route(group, route);
}
} else if (route->ecmp_symmetric_reply) {
/* Traffic for symmetric reply routes has to be conntracked
* even if there is only one next-hop, in case another next-hop
* is added later. */
ecmp_groups_add(&ecmp_groups, route);
} else {
unique_routes_add(&unique_routes, route);
}
Expand Down
13 changes: 12 additions & 1 deletion northd/ovn-northd.8.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4143,7 +4143,18 @@ next;
ip.ttl--;
flags.loopback = 1;
reg8[0..15] = <var>GID</var>;
select(reg8[16..31], <var>MID1</var>, <var>MID2</var>, ...);
reg8[16..31] = select(<var>MID1</var>, <var>MID2</var>, ...);
</pre>
<p>
However, when there is only one route in an ECMP group, group actions
will be:
</p>

<pre>
ip.ttl--;
flags.loopback = 1;
reg8[0..15] = <var>GID</var>;
reg8[16..31] = <var>MID1</var>);
</pre>
</li>

Expand Down
10 changes: 9 additions & 1 deletion tests/ovn-northd.at
Original file line number Diff line number Diff line change
Expand Up @@ -6437,14 +6437,22 @@ check ovn-nbctl lsp-set-type public-lr0 router
check ovn-nbctl lsp-set-addresses public-lr0 router
check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public

# ECMP flows will be added even if there is only one next-hop.
check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10

ovn-sbctl dump-flows lr0 > lr0flows

AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |sort], [0], [dnl
AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;)
table=??(lr_in_ip_routing ), priority=10300, match=(ct.rpl && ct_label.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;)
table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;)
table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = 1; next;)
])
AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/table=../table=??/' |sort], [0], [dnl
table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;)
table=??(lr_in_ip_routing_ecmp), priority=100 , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
table=??(lr_in_ip_routing_ecmp), priority=150 , match=(reg8[[0..15]] == 0), action=(next;)
])

Expand Down
48 changes: 48 additions & 0 deletions tests/system-ovn.at
Original file line number Diff line number Diff line change
Expand Up @@ -6146,6 +6146,30 @@ test $(ovs-ofctl dump-flows br-int | grep -c 'table=77, n_packets') -eq 0

ovs-ofctl dump-flows br-int

# Now remove one ECMP route and check that traffic is still being conntracked.
check ovn-nbctl --policy="src-ip" lr-route-del R1 10.0.0.0/24 20.0.0.3
check ovn-nbctl --wait=hv sync
AT_CHECK([ovs-appctl dpctl/flush-conntrack])
NS_CHECK_EXEC([bob1], [nc -z 10.0.0.2 80], [0])
NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])

AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl
2
])
AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl
2
])

AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(172.16.0.1) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl
icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000
tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>)
])

OVS_APP_EXIT_AND_WAIT([ovn-controller])

as ovn-sb
Expand Down Expand Up @@ -6349,6 +6373,30 @@ test $(ovs-ofctl dump-flows br-int | grep -c 'table=77, n_packets') -eq 0

ovs-ofctl dump-flows br-int

# Now remove one ECMP route and check that traffic is still being conntracked.
check ovn-nbctl --policy="src-ip" lr-route-del R1 fd01::/126 fd02::3
check ovn-nbctl --wait=hv sync
AT_CHECK([ovs-appctl dpctl/flush-conntrack])
NS_CHECK_EXEC([bob1], [nc -6 -z fd01::2 80], [0])
NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \
[0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])

AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl
2
])
AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl
2
])

AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(fd01::2) | \
sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000
tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>)
])

OVS_APP_EXIT_AND_WAIT([ovn-controller])

as ovn-sb
Expand Down

0 comments on commit e2fa4a5

Please sign in to comment.