Skip to content

Commit

Permalink
net/mlx5e: VF tunnel RX traffic offloading
Browse files Browse the repository at this point in the history
When tunnel endpoint is on VF the encapsulated RX traffic is exposed on the
representor of the VF without any further processing of rules installed on
the VF. Detect such case by checking if the device returned by route lookup
in decap rule handling code is a mlx5 VF and handle it with new redirection
tables API.

Example TC rules for VF tunnel traffic:

1. Rule that encapsulates the tunneled flow and redirects packets from
source VF rep to tunnel device:

$ tc -s filter show dev enp8s0f0_1 ingress
filter protocol ip pref 4 flower chain 0
filter protocol ip pref 4 flower chain 0 handle 0x1
  dst_mac 0a:40:bd:30:89:99
  src_mac ca:2e:a7:3f:f5:0f
  eth_type ipv4
  ip_tos 0/0x3
  ip_flags nofrag
  in_hw in_hw_count 1
        action order 1: tunnel_key  set
        src_ip 7.7.7.5
        dst_ip 7.7.7.1
        key_id 98
        dst_port 4789
        nocsum
        ttl 64 pipe
         index 1 ref 1 bind 1 installed 411 sec used 411 sec
        Action statistics:
        Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
        backlog 0b 0p requeues 0
        no_percpu
        used_hw_stats delayed

        action order 2: mirred (Egress Redirect to device vxlan_sys_4789) stolen
        index 1 ref 1 bind 1 installed 411 sec used 0 sec
        Action statistics:
        Sent 5615833 bytes 4028 pkt (dropped 0, overlimits 0 requeues 0)
        Sent software 0 bytes 0 pkt
        Sent hardware 5615833 bytes 4028 pkt
        backlog 0b 0p requeues 0
        cookie bb406d45d343bf7ade9690ae80c7cba4
        no_percpu
        used_hw_stats delayed

2. Rule that redirects from tunnel device to UL rep:

$ tc -s filter show dev vxlan_sys_4789 ingress
filter protocol ip pref 4 flower chain 0
filter protocol ip pref 4 flower chain 0 handle 0x1
  dst_mac ca:2e:a7:3f:f5:0f
  src_mac 0a:40:bd:30:89:99
  eth_type ipv4
  enc_dst_ip 7.7.7.5
  enc_src_ip 7.7.7.1
  enc_key_id 98
  enc_dst_port 4789
  enc_tos 0
  ip_flags nofrag
  in_hw in_hw_count 1
        action order 1: tunnel_key  unset pipe
         index 2 ref 1 bind 1 installed 434 sec used 434 sec
        Action statistics:
        Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
        backlog 0b 0p requeues 0
        used_hw_stats delayed

        action order 2: mirred (Egress Redirect to device enp8s0f0_1) stolen
        index 4 ref 1 bind 1 installed 434 sec used 0 sec
        Action statistics:
        Sent 129936 bytes 1082 pkt (dropped 0, overlimits 0 requeues 0)
        Sent software 0 bytes 0 pkt
        Sent hardware 129936 bytes 1082 pkt
        backlog 0b 0p requeues 0
        cookie ac17cf398c4c69e4a5b2f7aabd1b88ff
        no_percpu
        used_hw_stats delayed

Co-developed-by: Dmytro Linkin <dlinkin@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
  • Loading branch information
w1ldptr authored and Saeed Mahameed committed Feb 6, 2021
1 parent 4ad9116 commit a508728
Show file tree
Hide file tree
Showing 5 changed files with 271 additions and 8 deletions.
51 changes: 51 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,57 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
}
#endif

int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *flow_attr)
{
struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
TC_TUN_ROUTE_ATTR_INIT(attr);
u16 vport_num;
int err = 0;

if (flow_attr->ip_version == 4) {
/* Addresses are swapped for decap */
attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr);
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
else if (flow_attr->ip_version == 6) {
/* Addresses are swapped for decap */
attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr);
}
#endif
else
return 0;

if (err)
return err;

if (attr.route_dev->netdev_ops != &mlx5e_netdev_ops ||
!mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev))
goto out;

err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
if (err)
goto out;

esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
misc_parameters.vxlan_vni);
esw_attr->rx_tun_attr->decap_vport = vport_num;

out:
if (flow_attr->ip_version == 4)
mlx5e_route_lookup_ipv4_put(&attr);
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
else if (flow_attr->ip_version == 6)
mlx5e_route_lookup_ipv6_put(&attr);
#endif
return err;
}

bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev)
{
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; }
#endif
int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr);

bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
Expand Down
102 changes: 99 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1322,7 +1322,7 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)

static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);

static bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
{
struct mlx5_core_dev *out_mdev, *route_mdev;
struct mlx5e_priv *out_priv, *route_priv;
Expand All @@ -1339,8 +1339,7 @@ static bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device
return same_hw_devs(out_priv, route_priv);
}

static int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev,
u16 *vport)
int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
{
struct mlx5e_priv *out_priv, *route_priv;
struct mlx5_core_dev *route_mdev;
Expand Down Expand Up @@ -1504,6 +1503,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
kfree(attr->parse_attr->tun_info[out_index]);
}
kvfree(attr->parse_attr);
kvfree(attr->esw_attr->rx_tun_attr);

mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);

Expand Down Expand Up @@ -2134,6 +2134,67 @@ void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
}
}

static u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
{
void *headers_v;
u16 ethertype;
u8 ip_version;

if (outer)
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
else
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);

ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
/* Return ip_version converted from ethertype anyway */
if (!ip_version) {
ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
ip_version = 4;
else if (ethertype == ETH_P_IPV6)
ip_version = 6;
}
return ip_version;
}

static int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec)
{
struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
struct mlx5_rx_tun_attr *tun_attr;
void *daddr, *saddr;
u8 ip_version;

tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
if (!tun_attr)
return -ENOMEM;

esw_attr->rx_tun_attr = tun_attr;
ip_version = mlx5e_tc_get_ip_version(spec, true);

if (ip_version == 4) {
daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
tun_attr->dst_ip.v4 = *(__be32 *)daddr;
tun_attr->src_ip.v4 = *(__be32 *)saddr;
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
else if (ip_version == 6) {
int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);

daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
}
#endif
return 0;
}

static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
Expand All @@ -2142,6 +2203,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
u8 *match_level,
bool *match_inner)
{
struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
bool needs_mapping, sets_mapping;
Expand Down Expand Up @@ -2179,6 +2241,31 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
*/
if (!netif_is_bareudp(filter_dev))
flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
err = mlx5e_tc_set_attr_rx_tun(flow, spec);
if (err)
return err;
} else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
struct mlx5_flow_spec *tmp_spec;

tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
if (!tmp_spec) {
NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
return -ENOMEM;
}
memcpy(tmp_spec, spec, sizeof(*tmp_spec));

err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
if (err) {
kvfree(tmp_spec);
NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
return err;
}
err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
kvfree(tmp_spec);
if (err)
return err;
}

if (!needs_mapping && !sets_mapping)
Expand Down Expand Up @@ -4473,6 +4560,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
}
}

if (decap && esw_attr->rx_tun_attr) {
err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
if (err)
return err;
}

/* always set IP version for indirect table handling */
attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);

if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
/* For prio tag mode, replace vlan pop with rewrite vlan prio
Expand Down
4 changes: 4 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv,
struct mlx5_flow_handle *rule,
struct mlx5_flow_attr *attr);

bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev);
int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev,
u16 *vport);

#else /* CONFIG_MLX5_CLS_ACT */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
Expand Down
Loading

0 comments on commit a508728

Please sign in to comment.