From ae0662f84b105776734cb089703a7bf834bac195 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Sat, 20 Jan 2018 04:27:58 +0800 Subject: [PATCH 01/47] netfilter: nf_tables: nf_tables_obj_lookup_byhandle() can be static Fixes: 3ecbfd65f50e ("netfilter: nf_tables: allocate handle and delete objects via handle") Signed-off-by: Fengguang Wu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8b9fe30de0cdda..8cc7fc970f0cec 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4328,9 +4328,9 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, } EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); -struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table, - const struct nlattr *nla, - u32 objtype, u8 genmask) +static struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table, + const struct nlattr *nla, + u32 objtype, u8 genmask) { struct nft_object *obj; @@ -4850,7 +4850,7 @@ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, } EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup); -struct nft_flowtable * +static struct nft_flowtable * nf_tables_flowtable_lookup_byhandle(const struct nft_table *table, const struct nlattr *nla, u8 genmask) { From cceae76ef3a1181242e4f7b559a7bfc904a9855c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 11 Feb 2018 19:17:20 +0900 Subject: [PATCH 02/47] netfilter: nfnetlink_acct: remove useless parameter parameter skb in nfnl_acct_overquota is not used anywhere. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_acct.h | 3 +-- net/netfilter/nfnetlink_acct.c | 3 +-- net/netfilter/xt_nfacct.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index b4d741195c28d9..beee8bffe49e0d 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -16,6 +16,5 @@ struct nf_acct; struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name); void nfnl_acct_put(struct nf_acct *acct); void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); -int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb, - struct nf_acct *nfacct); +int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct); #endif /* _NFNL_ACCT_H */ diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 88d427f9f9e6a0..b9505bcd3827d9 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -467,8 +467,7 @@ static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct) GFP_ATOMIC); } -int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb, - struct nf_acct *nfacct) +int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct) { u64 now; u64 *quota; diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index c8674deed4eb43..6b56f4170860c4 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -28,7 +28,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) nfnl_acct_update(skb, info->nfacct); - overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct); + overquota = nfnl_acct_overquota(xt_net(par), info->nfacct); return overquota == NFACCT_UNDERQUOTA ? false : true; } From 580c7d9e4cc69802189b872ad2df0d704c649441 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 11 Feb 2018 22:57:29 +0900 Subject: [PATCH 03/47] netfilter: xt_cluster: get rid of xt_cluster_ipv6_is_multicast If use the ipv6_addr_is_multicast instead of xt_cluster_ipv6_is_multicast, then we can reduce code size. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cluster.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 0068688995c82d..dfbdbb2fc0ed8b 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -59,13 +59,6 @@ xt_cluster_hash(const struct nf_conn *ct, return reciprocal_scale(hash, info->total_nodes); } -static inline bool -xt_cluster_ipv6_is_multicast(const struct in6_addr *addr) -{ - __be32 st = addr->s6_addr32[0]; - return ((st & htonl(0xFF000000)) == htonl(0xFF000000)); -} - static inline bool xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) { @@ -76,8 +69,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr); break; case NFPROTO_IPV6: - is_multicast = - xt_cluster_ipv6_is_multicast(&ipv6_hdr(skb)->daddr); + is_multicast = ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr); break; default: WARN_ON(1); From 433029ecc62788296cacca50ceb24db90c17a4a2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 11 Feb 2018 23:28:18 +0900 Subject: [PATCH 04/47] netfilter: nf_conntrack_broadcast: remove useless parameter parameter protoff in nf_conntrack_broadcast_help is not used anywhere. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 3 +-- net/netfilter/nf_conntrack_broadcast.c | 1 - net/netfilter/nf_conntrack_netbios_ns.c | 5 +++-- net/netfilter/nf_conntrack_snmp.c | 5 +++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index fc39bbaf107c79..32c2a94a219d60 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -132,8 +132,7 @@ void nf_conntrack_helper_pernet_fini(struct net *net); int nf_conntrack_helper_init(void); void nf_conntrack_helper_fini(void); -int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int protoff, - struct nf_conn *ct, +int nf_conntrack_broadcast_help(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int timeout); diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index ecc3ab78463392..a1086bdec2429c 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -20,7 +20,6 @@ #include int nf_conntrack_broadcast_help(struct sk_buff *skb, - unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int timeout) diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 496ce173f0c193..a4a59dc7cf1760 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -41,9 +41,10 @@ static struct nf_conntrack_expect_policy exp_policy = { }; static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, - struct nf_conn *ct, enum ip_conntrack_info ctinfo) + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { - return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); + return nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout); } static struct nf_conntrack_helper helper __read_mostly = { diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index 87b95a2c270cd3..2d0f8e010821ba 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -36,11 +36,12 @@ int (*nf_nat_snmp_hook)(struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff, - struct nf_conn *ct, enum ip_conntrack_info ctinfo) + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { typeof(nf_nat_snmp_hook) nf_nat_snmp; - nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); + nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout); nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook); if (nf_nat_snmp && ct->status & IPS_NAT_MASK) From 2db3fec507bd822ed81c031221b97701238949dc Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 13 Feb 2018 08:25:57 -0600 Subject: [PATCH 05/47] netfilter: ipt_ah: return boolean instead of integer Return statements in functions returning bool should use true/false instead of 1/0. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_ah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index a787d07f6cb757..7c6c20eaf4dbf0 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -47,7 +47,7 @@ static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par) */ pr_debug("Dropping evil AH tinygram.\n"); par->hotdrop = true; - return 0; + return false; } return spi_match(ahinfo->spis[0], ahinfo->spis[1], From f31e5f1a891f989f107e8caa6b49dd4df0e12265 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 16 Feb 2018 18:04:56 +0800 Subject: [PATCH 06/47] netfilter: unlock xt_table earlier in __do_replace Now it's doing cleanup_entry for oldinfo under the xt_table lock, but it's not really necessary. After the replacement job is done in xt_replace_table, oldinfo is not used elsewhere any more, and it can be freed without xt_table lock safely. The important thing is that rtnl_lock is called in some xt_target destroy, which means rtnl_lock, a big lock is used in xt_table lock, a smaller one. It usually could be the reason why a dead lock may happen. Besides, all xt_target/match checkentry is called out of xt_table lock. It's better also to move all cleanup_entry calling out of xt_table lock, just as do_replace_finish does for ebtables. Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 3 ++- net/ipv4/netfilter/ip_tables.c | 3 ++- net/ipv6/netfilter/ip6_tables.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c36ffce3c81249..a0c7ce76879c49 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -925,6 +925,8 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); + xt_table_unlock(t); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ @@ -939,7 +941,6 @@ static int __do_replace(struct net *net, const char *name, net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n"); } vfree(counters); - xt_table_unlock(t); return ret; put_module: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d4f7584d2dbec7..4f7153e25e0bf7 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1087,6 +1087,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); + xt_table_unlock(t); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ @@ -1100,7 +1102,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); } vfree(counters); - xt_table_unlock(t); return ret; put_module: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4de8ac1e5af4a6..6c44033decab36 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1105,6 +1105,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); + xt_table_unlock(t); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ @@ -1118,7 +1120,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); } vfree(counters); - xt_table_unlock(t); return ret; put_module: From 07a9da51b4b6aece8bc71e0b1b601fc4c3eb8b64 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:27 +0100 Subject: [PATCH 07/47] netfilter: x_tables: check standard verdicts in core Userspace must provide a valid verdict to the standard target. The verdict can be either a jump (signed int > 0), or a return code. Allowed return codes are either RETURN (pop from stack), NF_ACCEPT, DROP and QUEUE (latter is allowed for legacy reasons). Jump offsets (verdict > 0) are checked in more detail later on when loop-detection is performed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 5 ---- net/ipv4/netfilter/ip_tables.c | 5 ---- net/ipv6/netfilter/ip6_tables.c | 5 ---- net/netfilter/x_tables.c | 49 +++++++++++++++++++++++++++++---- 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a0c7ce76879c49..c9ffa884a4eef7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -334,11 +334,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo, t->verdict < 0) || visited) { unsigned int oldpos, size; - if ((strcmp(t->target.u.user.name, - XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) - return 0; - /* Return: backtrack through the last * big jump. */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4f7153e25e0bf7..c9b57a6bf96a74 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -402,11 +402,6 @@ mark_source_chains(const struct xt_table_info *newinfo, t->verdict < 0) || visited) { unsigned int oldpos, size; - if ((strcmp(t->target.u.user.name, - XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) - return 0; - /* Return: backtrack through the last big jump. */ do { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6c44033decab36..f4695422193342 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -420,11 +420,6 @@ mark_source_chains(const struct xt_table_info *newinfo, t->verdict < 0) || visited) { unsigned int oldpos, size; - if ((strcmp(t->target.u.user.name, - XT_STANDARD_TARGET) == 0) && - t->verdict < -NF_MAX_VERDICT - 1) - return 0; - /* Return: backtrack through the last big jump. */ do { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d9deebe599ecac..2e4d423e58e62c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -654,6 +654,31 @@ struct compat_xt_standard_target { compat_uint_t verdict; }; +static bool verdict_ok(int verdict) +{ + if (verdict > 0) + return true; + + if (verdict < 0) { + int v = -verdict - 1; + + if (verdict == XT_RETURN) + return true; + + switch (v) { + case NF_ACCEPT: return true; + case NF_DROP: return true; + case NF_QUEUE: return true; + default: + break; + } + + return false; + } + + return false; +} + int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset) @@ -675,9 +700,15 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, if (target_offset + t->u.target_size > next_offset) return -EINVAL; - if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) - return -EINVAL; + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) { + const struct compat_xt_standard_target *st = (const void *)t; + + if (COMPAT_XT_ALIGN(target_offset + sizeof(*st)) != next_offset) + return -EINVAL; + + if (!verdict_ok(st->verdict)) + return -EINVAL; + } /* compat_xt_entry match has less strict alignment requirements, * otherwise they are identical. In case of padding differences @@ -757,9 +788,15 @@ int xt_check_entry_offsets(const void *base, if (target_offset + t->u.target_size > next_offset) return -EINVAL; - if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) - return -EINVAL; + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) { + const struct xt_standard_target *st = (const void *)t; + + if (XT_ALIGN(target_offset + sizeof(*st)) != next_offset) + return -EINVAL; + + if (!verdict_ok(st->verdict)) + return -EINVAL; + } return xt_check_entry_match(elems, base + target_offset, __alignof__(struct xt_entry_match)); From 472ebdcd15ebdb8ebe20474ef1ce09abcb241e7d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:28 +0100 Subject: [PATCH 08/47] netfilter: x_tables: check error target size too Check that userspace ERROR target (custom user-defined chains) match expected format, and the chain name is null terminated. This is irrelevant for kernel, but iptables itself relies on sane input when it dumps rules from kernel. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2e4d423e58e62c..f045bb4f706391 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -654,6 +654,11 @@ struct compat_xt_standard_target { compat_uint_t verdict; }; +struct compat_xt_error_target { + struct compat_xt_entry_target t; + char errorname[XT_FUNCTION_MAXNAMELEN]; +}; + static bool verdict_ok(int verdict) { if (verdict > 0) @@ -679,6 +684,12 @@ static bool verdict_ok(int verdict) return false; } +static bool error_tg_ok(unsigned int usersize, unsigned int kernsize, + const char *msg, unsigned int msglen) +{ + return usersize == kernsize && strnlen(msg, msglen) < msglen; +} + int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset) @@ -708,6 +719,12 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, if (!verdict_ok(st->verdict)) return -EINVAL; + } else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) { + const struct compat_xt_error_target *et = (const void *)t; + + if (!error_tg_ok(t->u.target_size, sizeof(*et), + et->errorname, sizeof(et->errorname))) + return -EINVAL; } /* compat_xt_entry match has less strict alignment requirements, @@ -796,6 +813,12 @@ int xt_check_entry_offsets(const void *base, if (!verdict_ok(st->verdict)) return -EINVAL; + } else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) { + const struct xt_error_target *et = (const void *)t; + + if (!error_tg_ok(t->u.target_size, sizeof(*et), + et->errorname, sizeof(et->errorname))) + return -EINVAL; } return xt_check_entry_match(elems, base + target_offset, From 1b293e30f759b03f246baae862bdf35e57b2c39e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:29 +0100 Subject: [PATCH 09/47] netfilter: x_tables: move hook entry checks into core Allow followup patch to change on location instead of three. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 ++ net/ipv4/netfilter/arp_tables.c | 13 +++---------- net/ipv4/netfilter/ip_tables.c | 13 +++---------- net/ipv6/netfilter/ip6_tables.c | 13 +++---------- net/netfilter/x_tables.c | 29 +++++++++++++++++++++++++++++ 5 files changed, 40 insertions(+), 30 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1313b35c3ab791..fa0c19c328f1de 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -281,6 +281,8 @@ int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); +int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks); + unsigned int *xt_alloc_entry_offsets(unsigned int size); bool xt_find_jump_offset(const unsigned int *offsets, unsigned int target, unsigned int size); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c9ffa884a4eef7..be5821215ea0e9 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -555,16 +555,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, if (i != repl->num_entries) goto out_free; - /* Check hooks all assigned */ - for (i = 0; i < NF_ARP_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(repl->valid_hooks & (1 << i))) - continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) - goto out_free; - if (newinfo->underflow[i] == 0xFFFFFFFF) - goto out_free; - } + ret = xt_check_table_hooks(newinfo, repl->valid_hooks); + if (ret) + goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c9b57a6bf96a74..29bda9484a3328 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -702,16 +702,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (i != repl->num_entries) goto out_free; - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(repl->valid_hooks & (1 << i))) - continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) - goto out_free; - if (newinfo->underflow[i] == 0xFFFFFFFF) - goto out_free; - } + ret = xt_check_table_hooks(newinfo, repl->valid_hooks); + if (ret) + goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f4695422193342..ba3776a4d30553 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -720,16 +720,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (i != repl->num_entries) goto out_free; - /* Check hooks all assigned */ - for (i = 0; i < NF_INET_NUMHOOKS; i++) { - /* Only hooks which are valid */ - if (!(repl->valid_hooks & (1 << i))) - continue; - if (newinfo->hook_entry[i] == 0xFFFFFFFF) - goto out_free; - if (newinfo->underflow[i] == 0xFFFFFFFF) - goto out_free; - } + ret = xt_check_table_hooks(newinfo, repl->valid_hooks); + if (ret) + goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f045bb4f706391..5d8ba89a8da840 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -518,6 +518,35 @@ static int xt_check_entry_match(const char *match, const char *target, return 0; } +/** xt_check_table_hooks - check hook entry points are sane + * + * @info xt_table_info to check + * @valid_hooks - hook entry points that we can enter from + * + * Validates that the hook entry and underflows points are set up. + * + * Return: 0 on success, negative errno on failure. + */ +int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks) +{ + unsigned int i; + + BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow)); + + for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) { + if (!(valid_hooks & (1 << i))) + continue; + + if (info->hook_entry[i] == 0xFFFFFFFF) + return -EINVAL; + if (info->underflow[i] == 0xFFFFFFFF) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(xt_check_table_hooks); + #ifdef CONFIG_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { From e816a2ce49e49e3906f614009c919334a0c6ba6a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:30 +0100 Subject: [PATCH 10/47] netfilter: x_tables: enforce unique and ascending entry points Harmless from kernel point of view, but iptables assumes that this is true when decoding a ruleset. iptables walks the dumped blob from kernel, and, for each entry that creates a new chain it prints out rule/chain information. Base chains (hook entry points) are thus only shown when they appear in the rule blob. One base chain that is referenced multiple times in hook blob is then only printed once. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 5d8ba89a8da840..4e6cbb38e61624 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -529,10 +529,15 @@ static int xt_check_entry_match(const char *match, const char *target, */ int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks) { - unsigned int i; + const char *err = "unsorted underflow"; + unsigned int i, max_uflow, max_entry; + bool check_hooks = false; BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow)); + max_entry = 0; + max_uflow = 0; + for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) { if (!(valid_hooks & (1 << i))) continue; @@ -541,9 +546,33 @@ int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_ho return -EINVAL; if (info->underflow[i] == 0xFFFFFFFF) return -EINVAL; + + if (check_hooks) { + if (max_uflow > info->underflow[i]) + goto error; + + if (max_uflow == info->underflow[i]) { + err = "duplicate underflow"; + goto error; + } + if (max_entry > info->hook_entry[i]) { + err = "unsorted entry"; + goto error; + } + if (max_entry == info->hook_entry[i]) { + err = "duplicate entry"; + goto error; + } + } + max_entry = info->hook_entry[i]; + max_uflow = info->underflow[i]; + check_hooks = true; } return 0; +error: + pr_err_ratelimited("%s at hook %d\n", err, i); + return -EINVAL; } EXPORT_SYMBOL(xt_check_table_hooks); From 19926968ea86a286aa6fbea16ee3f2e7442f10f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:31 +0100 Subject: [PATCH 11/47] netfilter: x_tables: cap allocations at 512 mbyte Arbitrary limit, however, this still allows huge rulesets (> 1 million rules). This helps with automated fuzzer as it prevents oom-killer invocation. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 4e6cbb38e61624..dc68ac49614a94 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define XT_PCPU_BLOCK_SIZE 4096 +#define XT_MAX_TABLE_SIZE (512 * 1024 * 1024) struct compat_delta { unsigned int offset; /* offset in kernel */ @@ -1117,7 +1118,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) struct xt_table_info *info = NULL; size_t sz = sizeof(*info) + size; - if (sz < sizeof(*info)) + if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE) return NULL; /* __GFP_NORETRY is not fully supported by kvmalloc but it should From 9d5c12a7c08f67999772065afd50fb222072114e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:32 +0100 Subject: [PATCH 12/47] netfilter: x_tables: limit allocation requests for blob rule heads This is a very conservative limit (134217728 rules), but good enough to not trigger frequent oom from syzkaller. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index dc68ac49614a94..01f8e122e74ee3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -894,6 +894,9 @@ EXPORT_SYMBOL(xt_check_entry_offsets); */ unsigned int *xt_alloc_entry_offsets(unsigned int size) { + if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int)) + return NULL; + return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO); } From c84ca954ac9fa67a6ce27f91f01e4451c74fd8f6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:33 +0100 Subject: [PATCH 13/47] netfilter: x_tables: add counters allocation wrapper allows to have size checks in a single spot. This is supposed to reduce oom situations when fuzz-testing xtables. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 1 + net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/netfilter/x_tables.c | 15 +++++++++++++++ 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index fa0c19c328f1de..0bd93c589a8c1a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -301,6 +301,7 @@ int xt_data_to_user(void __user *dst, const void *src, void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); +struct xt_counters *xt_counters_alloc(unsigned int counters); struct xt_table *xt_register_table(struct net *net, const struct xt_table *table, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index be5821215ea0e9..82ba09b50fdb7d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -883,7 +883,7 @@ static int __do_replace(struct net *net, const char *name, struct arpt_entry *iter; ret = 0; - counters = vzalloc(num_counters * sizeof(struct xt_counters)); + counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 29bda9484a3328..4901ca6c3e09a4 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1045,7 +1045,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct ipt_entry *iter; ret = 0; - counters = vzalloc(num_counters * sizeof(struct xt_counters)); + counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index ba3776a4d30553..e84cec49b60f9a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1063,7 +1063,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct ip6t_entry *iter; ret = 0; - counters = vzalloc(num_counters * sizeof(struct xt_counters)); + counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 01f8e122e74ee3..82b1f8f52ac6b0 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1290,6 +1290,21 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) return 0; } +struct xt_counters *xt_counters_alloc(unsigned int counters) +{ + struct xt_counters *mem; + + if (counters == 0 || counters > INT_MAX / sizeof(*mem)) + return NULL; + + counters *= sizeof(*mem); + if (counters > XT_MAX_TABLE_SIZE) + return NULL; + + return vzalloc(counters); +} +EXPORT_SYMBOL(xt_counters_alloc); + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, From 9782a11efc072faaf91d4aa60e9d23553f918029 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:34 +0100 Subject: [PATCH 14/47] netfilter: compat: prepare xt_compat_init_offsets to return errors should have no impact, function still always returns 0. This patch is only to ease review. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- net/bridge/netfilter/ebtables.c | 10 ++++++++-- net/ipv4/netfilter/arp_tables.c | 10 +++++++--- net/ipv4/netfilter/ip_tables.c | 8 ++++++-- net/ipv6/netfilter/ip6_tables.c | 10 +++++++--- net/netfilter/x_tables.c | 4 +++- 6 files changed, 32 insertions(+), 12 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 0bd93c589a8c1a..7bd896dc78dfb3 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -510,7 +510,7 @@ void xt_compat_unlock(u_int8_t af); int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); void xt_compat_flush_offsets(u_int8_t af); -void xt_compat_init_offsets(u_int8_t af, unsigned int number); +int xt_compat_init_offsets(u8 af, unsigned int number); int xt_compat_calc_jump(u_int8_t af, unsigned int offset); int xt_compat_match_offset(const struct xt_match *match); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 02c4b409d31733..217aa79f7b2ae6 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1819,10 +1819,14 @@ static int compat_table_info(const struct ebt_table_info *info, { unsigned int size = info->entries_size; const void *entries = info->entries; + int ret; newinfo->entries_size = size; - xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); + ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); + if (ret) + return ret; + return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); } @@ -2245,7 +2249,9 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); - xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + if (ret < 0) + goto out_unlock; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 82ba09b50fdb7d..aaafdbd15ad351 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -769,7 +769,9 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; - xt_compat_init_offsets(NFPROTO_ARP, info->number); + ret = xt_compat_init_offsets(NFPROTO_ARP, info->number); + if (ret) + return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1156,7 +1158,7 @@ static int translate_compat_table(struct xt_table_info **pinfo, struct compat_arpt_entry *iter0; struct arpt_replace repl; unsigned int size; - int ret = 0; + int ret; info = *pinfo; entry0 = *pentry0; @@ -1165,7 +1167,9 @@ static int translate_compat_table(struct xt_table_info **pinfo, j = 0; xt_compat_lock(NFPROTO_ARP); - xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); + ret = xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); + if (ret) + goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4901ca6c3e09a4..f9063513f9d198 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -933,7 +933,9 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; - xt_compat_init_offsets(AF_INET, info->number); + ret = xt_compat_init_offsets(AF_INET, info->number); + if (ret) + return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1407,7 +1409,9 @@ translate_compat_table(struct net *net, j = 0; xt_compat_lock(AF_INET); - xt_compat_init_offsets(AF_INET, compatr->num_entries); + ret = xt_compat_init_offsets(AF_INET, compatr->num_entries); + if (ret) + goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e84cec49b60f9a..3c36a4c77f2920 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -950,7 +950,9 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; - xt_compat_init_offsets(AF_INET6, info->number); + ret = xt_compat_init_offsets(AF_INET6, info->number); + if (ret) + return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1414,7 +1416,7 @@ translate_compat_table(struct net *net, struct compat_ip6t_entry *iter0; struct ip6t_replace repl; unsigned int size; - int ret = 0; + int ret; info = *pinfo; entry0 = *pentry0; @@ -1423,7 +1425,9 @@ translate_compat_table(struct net *net, j = 0; xt_compat_lock(AF_INET6); - xt_compat_init_offsets(AF_INET6, compatr->num_entries); + ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries); + if (ret) + goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 82b1f8f52ac6b0..e878c85a926805 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -632,10 +632,12 @@ int xt_compat_calc_jump(u_int8_t af, unsigned int offset) } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); -void xt_compat_init_offsets(u_int8_t af, unsigned int number) +int xt_compat_init_offsets(u8 af, unsigned int number) { xt[af].number = number; xt[af].cur = 0; + + return 0; } EXPORT_SYMBOL(xt_compat_init_offsets); From 7d7d7e02111e9a4dc9d0658597f528f815d820fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:35 +0100 Subject: [PATCH 15/47] netfilter: compat: reject huge allocation requests no need to bother even trying to allocating huge compat offset arrays, such ruleset is rejected later on anyway becaus we refuse to allocate overly large rule blobs. However, compat translation happens before blob allocation, so we should add a check there too. This is supposed to help with fuzzing by avoiding oom-killer. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e878c85a926805..33724b08b8f036 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -582,14 +582,8 @@ int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { struct xt_af *xp = &xt[af]; - if (!xp->compat_tab) { - if (!xp->number) - return -EINVAL; - xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number); - if (!xp->compat_tab) - return -ENOMEM; - xp->cur = 0; - } + if (WARN_ON(!xp->compat_tab)) + return -ENOMEM; if (xp->cur >= xp->number) return -EINVAL; @@ -634,6 +628,22 @@ EXPORT_SYMBOL_GPL(xt_compat_calc_jump); int xt_compat_init_offsets(u8 af, unsigned int number) { + size_t mem; + + if (!number || number > (INT_MAX / sizeof(struct compat_delta))) + return -EINVAL; + + if (WARN_ON(xt[af].compat_tab)) + return -EINVAL; + + mem = sizeof(struct compat_delta) * number; + if (mem > XT_MAX_TABLE_SIZE) + return -ENOMEM; + + xt[af].compat_tab = vmalloc(mem); + if (!xt[af].compat_tab) + return -ENOMEM; + xt[af].number = number; xt[af].cur = 0; From 89370860686a54fc0642c7ae68213cc1fc6d8e04 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:36 +0100 Subject: [PATCH 16/47] netfilter: x_tables: make sure compat af mutex is held Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 33724b08b8f036..7521e8a72c064e 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -582,6 +582,8 @@ int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { struct xt_af *xp = &xt[af]; + WARN_ON(!mutex_is_locked(&xt[af].compat_mutex)); + if (WARN_ON(!xp->compat_tab)) return -ENOMEM; @@ -599,6 +601,8 @@ EXPORT_SYMBOL_GPL(xt_compat_add_offset); void xt_compat_flush_offsets(u_int8_t af) { + WARN_ON(!mutex_is_locked(&xt[af].compat_mutex)); + if (xt[af].compat_tab) { vfree(xt[af].compat_tab); xt[af].compat_tab = NULL; @@ -630,6 +634,8 @@ int xt_compat_init_offsets(u8 af, unsigned int number) { size_t mem; + WARN_ON(!mutex_is_locked(&xt[af].compat_mutex)); + if (!number || number > (INT_MAX / sizeof(struct compat_delta))) return -EINVAL; From 0d7df906a0e78079a02108b06d32c3ef2238ad25 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Feb 2018 19:42:37 +0100 Subject: [PATCH 17/47] netfilter: x_tables: ensure last rule in base chain matches underflow/policy Harmless from kernel point of view, but again iptables assumes that this is true when decoding ruleset coming from kernel. If a (syzkaller generated) ruleset doesn't have the underflow/policy stored as the last rule in the base chain, then iptables will abort() because it doesn't find the chain policy. libiptc assumes that the policy is the last rule in the basechain, which is only true for iptables-generated rulesets. Unfortunately this needs code duplication -- the functions need the struct layout of the rule head, but that is different for ip/ip6/arptables. NB: pr_warn could be pr_debug but in case this break rulesets somehow its useful to know why blob was rejected. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 17 ++++++++++++++++- net/ipv4/netfilter/ip_tables.c | 17 ++++++++++++++++- net/ipv6/netfilter/ip6_tables.c | 17 ++++++++++++++++- 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index aaafdbd15ad351..f366ff1cfc19ed 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -309,10 +309,13 @@ static int mark_source_chains(const struct xt_table_info *newinfo, for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct arpt_entry *e = entry0 + pos; + unsigned int last_pos, depth; if (!(valid_hooks & (1 << hook))) continue; + depth = 0; + last_pos = pos; /* Set initial back pointer. */ e->counters.pcnt = pos; @@ -343,6 +346,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, pos = e->counters.pcnt; e->counters.pcnt = 0; + if (depth) + --depth; /* We're at the start. */ if (pos == oldpos) goto next; @@ -367,6 +372,9 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; + + if (entry0 + newpos != arpt_next_entry(e)) + ++depth; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -377,8 +385,15 @@ static int mark_source_chains(const struct xt_table_info *newinfo, e->counters.pcnt = pos; pos = newpos; } + if (depth == 0) + last_pos = pos; + } +next: + if (last_pos != newinfo->underflow[hook]) { + pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n", + last_pos, newinfo->underflow[hook], hook); + return 0; } -next: ; } return 1; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f9063513f9d198..2362ca2c9e0c10 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -378,10 +378,13 @@ mark_source_chains(const struct xt_table_info *newinfo, for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ipt_entry *e = entry0 + pos; + unsigned int last_pos, depth; if (!(valid_hooks & (1 << hook))) continue; + depth = 0; + last_pos = pos; /* Set initial back pointer. */ e->counters.pcnt = pos; @@ -410,6 +413,8 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = e->counters.pcnt; e->counters.pcnt = 0; + if (depth) + --depth; /* We're at the start. */ if (pos == oldpos) goto next; @@ -434,6 +439,9 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; + + if (entry0 + newpos != ipt_next_entry(e)) + ++depth; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -444,8 +452,15 @@ mark_source_chains(const struct xt_table_info *newinfo, e->counters.pcnt = pos; pos = newpos; } + if (depth == 0) + last_pos = pos; + } +next: + if (last_pos != newinfo->underflow[hook]) { + pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n", + last_pos, newinfo->underflow[hook], hook); + return 0; } -next: ; } return 1; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 3c36a4c77f2920..004508753abc62 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -396,10 +396,13 @@ mark_source_chains(const struct xt_table_info *newinfo, for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = entry0 + pos; + unsigned int last_pos, depth; if (!(valid_hooks & (1 << hook))) continue; + depth = 0; + last_pos = pos; /* Set initial back pointer. */ e->counters.pcnt = pos; @@ -428,6 +431,8 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = e->counters.pcnt; e->counters.pcnt = 0; + if (depth) + --depth; /* We're at the start. */ if (pos == oldpos) goto next; @@ -452,6 +457,9 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; + + if (entry0 + newpos != ip6t_next_entry(e)) + ++depth; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -462,8 +470,15 @@ mark_source_chains(const struct xt_table_info *newinfo, e->counters.pcnt = pos; pos = newpos; } + if (depth == 0) + last_pos = pos; + } +next: + if (last_pos != newinfo->underflow[hook]) { + pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n", + last_pos, newinfo->underflow[hook], hook); + return 0; } -next: ; } return 1; } From 3427b2ab63faccafe774ea997fc2da7faf690c5a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Mar 2018 18:58:38 -0800 Subject: [PATCH 18/47] netfilter: make xt_rateest hash table per net As suggested by Eric, we need to make the xt_rateest hash table and its lock per netns to reduce lock contentions. Cc: Florian Westphal Cc: Eric Dumazet Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/xt_rateest.h | 4 +- net/netfilter/xt_RATEEST.c | 91 +++++++++++++++++++++--------- net/netfilter/xt_rateest.c | 10 ++-- 3 files changed, 72 insertions(+), 33 deletions(-) diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index b1db13772554c6..832ab69efda57c 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -21,7 +21,7 @@ struct xt_rateest { struct net_rate_estimator __rcu *rate_est; }; -struct xt_rateest *xt_rateest_lookup(const char *name); -void xt_rateest_put(struct xt_rateest *est); +struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name); +void xt_rateest_put(struct net *net, struct xt_rateest *est); #endif /* _XT_RATEEST_H */ diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 141c295191f653..dec843cadf4626 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -14,15 +14,21 @@ #include #include #include +#include #include #include #include -static DEFINE_MUTEX(xt_rateest_mutex); - #define RATEEST_HSIZE 16 -static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly; + +struct xt_rateest_net { + struct mutex hash_lock; + struct hlist_head hash[RATEEST_HSIZE]; +}; + +static unsigned int xt_rateest_id; + static unsigned int jhash_rnd __read_mostly; static unsigned int xt_rateest_hash(const char *name) @@ -31,21 +37,23 @@ static unsigned int xt_rateest_hash(const char *name) (RATEEST_HSIZE - 1); } -static void xt_rateest_hash_insert(struct xt_rateest *est) +static void xt_rateest_hash_insert(struct xt_rateest_net *xn, + struct xt_rateest *est) { unsigned int h; h = xt_rateest_hash(est->name); - hlist_add_head(&est->list, &rateest_hash[h]); + hlist_add_head(&est->list, &xn->hash[h]); } -static struct xt_rateest *__xt_rateest_lookup(const char *name) +static struct xt_rateest *__xt_rateest_lookup(struct xt_rateest_net *xn, + const char *name) { struct xt_rateest *est; unsigned int h; h = xt_rateest_hash(name); - hlist_for_each_entry(est, &rateest_hash[h], list) { + hlist_for_each_entry(est, &xn->hash[h], list) { if (strcmp(est->name, name) == 0) { est->refcnt++; return est; @@ -55,20 +63,23 @@ static struct xt_rateest *__xt_rateest_lookup(const char *name) return NULL; } -struct xt_rateest *xt_rateest_lookup(const char *name) +struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name) { + struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); struct xt_rateest *est; - mutex_lock(&xt_rateest_mutex); - est = __xt_rateest_lookup(name); - mutex_unlock(&xt_rateest_mutex); + mutex_lock(&xn->hash_lock); + est = __xt_rateest_lookup(xn, name); + mutex_unlock(&xn->hash_lock); return est; } EXPORT_SYMBOL_GPL(xt_rateest_lookup); -void xt_rateest_put(struct xt_rateest *est) +void xt_rateest_put(struct net *net, struct xt_rateest *est) { - mutex_lock(&xt_rateest_mutex); + struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); + + mutex_lock(&xn->hash_lock); if (--est->refcnt == 0) { hlist_del(&est->list); gen_kill_estimator(&est->rate_est); @@ -78,7 +89,7 @@ void xt_rateest_put(struct xt_rateest *est) */ kfree_rcu(est, rcu); } - mutex_unlock(&xt_rateest_mutex); + mutex_unlock(&xn->hash_lock); } EXPORT_SYMBOL_GPL(xt_rateest_put); @@ -98,6 +109,7 @@ xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) { + struct xt_rateest_net *xn = net_generic(par->net, xt_rateest_id); struct xt_rateest_target_info *info = par->targinfo; struct xt_rateest *est; struct { @@ -108,10 +120,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); - mutex_lock(&xt_rateest_mutex); - est = __xt_rateest_lookup(info->name); + mutex_lock(&xn->hash_lock); + est = __xt_rateest_lookup(xn, info->name); if (est) { - mutex_unlock(&xt_rateest_mutex); + mutex_unlock(&xn->hash_lock); /* * If estimator parameters are specified, they must match the * existing estimator. @@ -119,7 +131,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) if ((!info->interval && !info->ewma_log) || (info->interval != est->params.interval || info->ewma_log != est->params.ewma_log)) { - xt_rateest_put(est); + xt_rateest_put(par->net, est); return -EINVAL; } info->est = est; @@ -148,14 +160,14 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) goto err2; info->est = est; - xt_rateest_hash_insert(est); - mutex_unlock(&xt_rateest_mutex); + xt_rateest_hash_insert(xn, est); + mutex_unlock(&xn->hash_lock); return 0; err2: kfree(est); err1: - mutex_unlock(&xt_rateest_mutex); + mutex_unlock(&xn->hash_lock); return ret; } @@ -163,7 +175,7 @@ static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par) { struct xt_rateest_target_info *info = par->targinfo; - xt_rateest_put(info->est); + xt_rateest_put(par->net, info->est); } static struct xt_target xt_rateest_tg_reg __read_mostly = { @@ -178,19 +190,46 @@ static struct xt_target xt_rateest_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static int __init xt_rateest_tg_init(void) +static __net_init int xt_rateest_net_init(struct net *net) +{ + struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); + int i; + + mutex_init(&xn->hash_lock); + for (i = 0; i < ARRAY_SIZE(xn->hash); i++) + INIT_HLIST_HEAD(&xn->hash[i]); + return 0; +} + +static void __net_exit xt_rateest_net_exit(struct net *net) { - unsigned int i; + struct xt_rateest_net *xn = net_generic(net, xt_rateest_id); + int i; + + for (i = 0; i < ARRAY_SIZE(xn->hash); i++) + WARN_ON_ONCE(!hlist_empty(&xn->hash[i])); +} - for (i = 0; i < ARRAY_SIZE(rateest_hash); i++) - INIT_HLIST_HEAD(&rateest_hash[i]); +static struct pernet_operations xt_rateest_net_ops = { + .init = xt_rateest_net_init, + .exit = xt_rateest_net_exit, + .id = &xt_rateest_id, + .size = sizeof(struct xt_rateest_net), +}; + +static int __init xt_rateest_tg_init(void) +{ + int err = register_pernet_subsys(&xt_rateest_net_ops); + if (err) + return err; return xt_register_target(&xt_rateest_tg_reg); } static void __exit xt_rateest_tg_fini(void) { xt_unregister_target(&xt_rateest_tg_reg); + unregister_pernet_subsys(&xt_rateest_net_ops); } diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 755d2f6693a2ff..bf77326861afdb 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -95,13 +95,13 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) } ret = -ENOENT; - est1 = xt_rateest_lookup(info->name1); + est1 = xt_rateest_lookup(par->net, info->name1); if (!est1) goto err1; est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { - est2 = xt_rateest_lookup(info->name2); + est2 = xt_rateest_lookup(par->net, info->name2); if (!est2) goto err2; } @@ -111,7 +111,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) return 0; err2: - xt_rateest_put(est1); + xt_rateest_put(par->net, est1); err1: return ret; } @@ -120,9 +120,9 @@ static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) { struct xt_rateest_match_info *info = par->matchinfo; - xt_rateest_put(info->est1); + xt_rateest_put(par->net, info->est1); if (info->est2) - xt_rateest_put(info->est2); + xt_rateest_put(par->net, info->est2); } static struct xt_match xt_rateest_mt_reg __read_mostly = { From 010eacd968a73ddcb8592b14c1607e1004120ede Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 Mar 2018 14:59:54 +0100 Subject: [PATCH 19/47] netfilter: xt_limit: Spelling s/maxmum/maximum/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_limit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 55d18cd676356c..9f098ecb24497e 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -46,7 +46,7 @@ MODULE_ALIAS("ip6t_limit"); See Alexey's formal explanation in net/sched/sch_tbf.c. - To get the maxmum range, we multiply by this factor (ie. you get N + To get the maximum range, we multiply by this factor (ie. you get N credits per jiffy). We want to allow a rate as low as 1 per day (slowest userspace tool allows), which means CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */ From 72597135cdd2fe524f9a185d7f954c2c3980f3ee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 6 Mar 2018 08:26:00 +0100 Subject: [PATCH 20/47] netfilter: x_tables: fix build with CONFIG_COMPAT=n I placed the helpers within CONFIG_COMPAT section, move them outside. Fixes: 472ebdcd15ebdb ("netfilter: x_tables: check error target size too") Fixes: 07a9da51b4b6ae ("netfilter: x_tables: check standard verdicts in core") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 62 ++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 7521e8a72c064e..bac932f1c58208 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -577,6 +577,37 @@ int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_ho } EXPORT_SYMBOL(xt_check_table_hooks); +static bool verdict_ok(int verdict) +{ + if (verdict > 0) + return true; + + if (verdict < 0) { + int v = -verdict - 1; + + if (verdict == XT_RETURN) + return true; + + switch (v) { + case NF_ACCEPT: return true; + case NF_DROP: return true; + case NF_QUEUE: return true; + default: + break; + } + + return false; + } + + return false; +} + +static bool error_tg_ok(unsigned int usersize, unsigned int kernsize, + const char *msg, unsigned int msglen) +{ + return usersize == kernsize && strnlen(msg, msglen) < msglen; +} + #ifdef CONFIG_COMPAT int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { @@ -736,37 +767,6 @@ struct compat_xt_error_target { char errorname[XT_FUNCTION_MAXNAMELEN]; }; -static bool verdict_ok(int verdict) -{ - if (verdict > 0) - return true; - - if (verdict < 0) { - int v = -verdict - 1; - - if (verdict == XT_RETURN) - return true; - - switch (v) { - case NF_ACCEPT: return true; - case NF_DROP: return true; - case NF_QUEUE: return true; - default: - break; - } - - return false; - } - - return false; -} - -static bool error_tg_ok(unsigned int usersize, unsigned int kernsize, - const char *msg, unsigned int msglen) -{ - return usersize == kernsize && strnlen(msg, msglen) < msglen; -} - int xt_compat_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset) From a55efe1d416c345a73bef38848e1ac7109560e12 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 5 Mar 2018 15:35:57 -0600 Subject: [PATCH 21/47] ipvs: use true and false for boolean values Assign true or false to boolean variables instead of an integer value. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_lblc.c | 4 ++-- net/netfilter/ipvs/ip_vs_lblcr.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 6a340c94c4b88e..942e835caf7f9c 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) int i; spin_lock_bh(&svc->sched_lock); - tbl->dead = 1; + tbl->dead = true; for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) { hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { ip_vs_lblc_del(en); @@ -369,7 +369,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; - tbl->dead = 0; + tbl->dead = false; tbl->svc = svc; /* diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0627881128da13..a5acab25c36b3c 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -404,7 +404,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc) struct hlist_node *next; spin_lock_bh(&svc->sched_lock); - tbl->dead = 1; + tbl->dead = true; for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) { hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { ip_vs_lblcr_free(en); @@ -532,7 +532,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; - tbl->dead = 0; + tbl->dead = false; tbl->svc = svc; /* From 6aec208786c2a54cbf6135a0242b224e845bef98 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Sun, 4 Mar 2018 15:29:51 -0800 Subject: [PATCH 22/47] netfilter: Refactor nf_conncount Remove parameter 'family' in nf_conncount_count() and count_tree(). It is because the parameter is not useful after commit 625c556118f3 ("netfilter: connlimit: split xt_connlimit into front and backend"). Signed-off-by: Yi-Hung Wei Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 1 - net/netfilter/nf_conncount.c | 4 +--- net/netfilter/xt_connlimit.c | 4 ++-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index adf8db44cf860d..e61184fbfb710c 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -11,7 +11,6 @@ void nf_conncount_destroy(struct net *net, unsigned int family, unsigned int nf_conncount_count(struct net *net, struct nf_conncount_data *data, const u32 *key, - unsigned int family, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone); #endif diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 6d65389e308f4b..9305a08b442272 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -158,7 +158,6 @@ static void tree_nodes_free(struct rb_root *root, static unsigned int count_tree(struct net *net, struct rb_root *root, const u32 *key, u8 keylen, - u8 family, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { @@ -246,7 +245,6 @@ count_tree(struct net *net, struct rb_root *root, unsigned int nf_conncount_count(struct net *net, struct nf_conncount_data *data, const u32 *key, - unsigned int family, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone) { @@ -259,7 +257,7 @@ unsigned int nf_conncount_count(struct net *net, spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); - count = count_tree(net, root, key, data->keylen, family, tuple, zone); + count = count_tree(net, root, key, data->keylen, tuple, zone); spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index b1b17b9353e1d1..6275106ccf508f 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -67,8 +67,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) key[1] = zone->id; } - connections = nf_conncount_count(net, info->data, key, - xt_family(par), tuple_ptr, zone); + connections = nf_conncount_count(net, info->data, key, tuple_ptr, + zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; From 35d8deb80c30fdb2dee3e2dac71eab00d8a6fed5 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Sun, 4 Mar 2018 15:29:52 -0800 Subject: [PATCH 23/47] netfilter: conncount: Support count only use case Currently, nf_conncount_count() counts the number of connections that matches key and inserts a conntrack 'tuple' with the same key into the accounting data structure. This patch supports another use case that only counts the number of connections where 'tuple' is not provided. Therefore, proper changes are made on nf_conncount_count() to support the case where 'tuple' is NULL. This could be useful for querying statistics or debugging purpose. Signed-off-by: Yi-Hung Wei Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 9305a08b442272..153e690e289399 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -104,7 +104,7 @@ static unsigned int check_hlist(struct net *net, struct nf_conn *found_ct; unsigned int length = 0; - *addit = true; + *addit = tuple ? true : false; /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { @@ -117,7 +117,7 @@ static unsigned int check_hlist(struct net *net, found_ct = nf_ct_tuplehash_to_ctrack(found); - if (nf_ct_tuple_equal(&conn->tuple, tuple)) { + if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple)) { /* * Just to be sure we have it only once in the list. * We should not see tuples twice unless someone hooks @@ -220,6 +220,9 @@ count_tree(struct net *net, struct rb_root *root, goto restart; } + if (!tuple) + return 0; + /* no match, need to insert new node */ rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); if (rbconn == NULL) @@ -242,6 +245,9 @@ count_tree(struct net *net, struct rb_root *root, return 1; } +/* Count and return number of conntrack entries in 'net' with particular 'key'. + * If 'tuple' is not null, insert it into the accounting data structure. + */ unsigned int nf_conncount_count(struct net *net, struct nf_conncount_data *data, const u32 *key, From d719e3f21cf91d3f82bd827d46199ba41af2f73a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Mar 2018 11:57:20 +0100 Subject: [PATCH 24/47] netfilter: nft_ct: add NFT_CT_{SRC,DST}_{IP,IP6} All existing keys, except the NFT_CT_SRC and NFT_CT_DST are assumed to have strict datatypes. This is causing problems with sets and concatenations given the specific length of these keys is not known. Signed-off-by: Pablo Neira Ayuso Acked-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 12 ++++++-- net/netfilter/nft_ct.c | 38 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 66dceee0ae307a..6a3d653d5b2748 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -909,8 +909,8 @@ enum nft_rt_attributes { * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms * @NFT_CT_HELPER: connection tracking helper assigned to conntrack * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol - * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address) - * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address) + * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address, deprecated) + * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address, deprecated) * @NFT_CT_PROTOCOL: conntrack layer 4 protocol * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination @@ -920,6 +920,10 @@ enum nft_rt_attributes { * @NFT_CT_AVGPKT: conntrack average bytes per packet * @NFT_CT_ZONE: conntrack zone * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack + * @NFT_CT_SRC_IP: conntrack layer 3 protocol source (IPv4 address) + * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address) + * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address) + * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address) */ enum nft_ct_keys { NFT_CT_STATE, @@ -941,6 +945,10 @@ enum nft_ct_keys { NFT_CT_AVGPKT, NFT_CT_ZONE, NFT_CT_EVENTMASK, + NFT_CT_SRC_IP, + NFT_CT_DST_IP, + NFT_CT_SRC_IP6, + NFT_CT_DST_IP6, }; /** diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 6ab274b1448405..ea737fd789e871 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -196,6 +196,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr, case NFT_CT_PROTO_DST: nft_reg_store16(dest, (__force u16)tuple->dst.u.all); return; + case NFT_CT_SRC_IP: + if (nf_ct_l3num(ct) != NFPROTO_IPV4) + goto err; + *dest = tuple->src.u3.ip; + return; + case NFT_CT_DST_IP: + if (nf_ct_l3num(ct) != NFPROTO_IPV4) + goto err; + *dest = tuple->dst.u3.ip; + return; + case NFT_CT_SRC_IP6: + if (nf_ct_l3num(ct) != NFPROTO_IPV6) + goto err; + memcpy(dest, tuple->src.u3.ip6, sizeof(struct in6_addr)); + return; + case NFT_CT_DST_IP6: + if (nf_ct_l3num(ct) != NFPROTO_IPV6) + goto err; + memcpy(dest, tuple->dst.u3.ip6, sizeof(struct in6_addr)); + return; default: break; } @@ -419,6 +439,20 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, return -EAFNOSUPPORT; } break; + case NFT_CT_SRC_IP: + case NFT_CT_DST_IP: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + + len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip); + break; + case NFT_CT_SRC_IP6: + case NFT_CT_DST_IP6: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + + len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip6); + break; case NFT_CT_PROTO_SRC: case NFT_CT_PROTO_DST: if (tb[NFTA_CT_DIRECTION] == NULL) @@ -588,6 +622,10 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) switch (priv->key) { case NFT_CT_SRC: case NFT_CT_DST: + case NFT_CT_SRC_IP: + case NFT_CT_DST_IP: + case NFT_CT_SRC_IP6: + case NFT_CT_DST_IP6: case NFT_CT_PROTO_SRC: case NFT_CT_PROTO_DST: if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) From 8039ab43eeac029a9c47c0411918ea82c9ce87cd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Mar 2018 18:14:42 -0500 Subject: [PATCH 25/47] netfilter: cttimeout: remove VLA usage In preparation to enabling -Wvla, remove VLA and replace it with dynamic memory allocation. >From a security viewpoint, the use of Variable Length Arrays can be a vector for stack overflow attacks. Also, in general, as the code evolves it is easy to lose track of how big a VLA can get. Thus, we can end up having segfaults that are hard to debug. Also, fixed as part of the directive to remove all VLAs from the kernel: https://lkml.org/lkml/2018/3/7/621 While at it, remove likely() notation which is not necessary from the control plane code. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 95b04702a655af..9ee5fa551fa68c 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -51,19 +51,27 @@ ctnl_timeout_parse_policy(void *timeouts, const struct nf_conntrack_l4proto *l4proto, struct net *net, const struct nlattr *attr) { + struct nlattr **tb; int ret = 0; - if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) { - struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; + if (!l4proto->ctnl_timeout.nlattr_to_obj) + return 0; - ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy, - NULL); - if (ret < 0) - return ret; + tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), + GFP_KERNEL); - ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); - } + if (!tb) + return -ENOMEM; + + ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr, + l4proto->ctnl_timeout.nla_policy, NULL); + if (ret < 0) + goto err; + + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); + +err: + kfree(tb); return ret; } From 1446385904add0e89f990ee0518434365e50ce86 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Mar 2018 19:21:38 -0500 Subject: [PATCH 26/47] netfilter: nfnetlink_cthelper: Remove VLA usage In preparation to enabling -Wvla, remove VLA and replace it with dynamic memory allocation. >From a security viewpoint, the use of Variable Length Arrays can be a vector for stack overflow attacks. Also, in general, as the code evolves it is easy to lose track of how big a VLA can get. Thus, we can end up having segfaults that are hard to debug. Also, fixed as part of the directive to remove all VLAs from the kernel: https://lkml.org/lkml/2018/3/7/621 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d33ce6d5ebce92..4a4b293fb2e54c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -314,23 +314,30 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], struct nf_conntrack_helper *helper) { - struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1]; + struct nf_conntrack_expect_policy *new_policy; struct nf_conntrack_expect_policy *policy; - int i, err; + int i, ret = 0; + + new_policy = kmalloc_array(helper->expect_class_max + 1, + sizeof(*new_policy), GFP_KERNEL); + if (!new_policy) + return -ENOMEM; /* Check first that all policy attributes are well-formed, so we don't * leave things in inconsistent state on errors. */ for (i = 0; i < helper->expect_class_max + 1; i++) { - if (!tb[NFCTH_POLICY_SET + i]) - return -EINVAL; + if (!tb[NFCTH_POLICY_SET + i]) { + ret = -EINVAL; + goto err; + } - err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], + ret = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], &new_policy[i], tb[NFCTH_POLICY_SET + i]); - if (err < 0) - return err; + if (ret < 0) + goto err; } /* Now we can safely update them. */ for (i = 0; i < helper->expect_class_max + 1; i++) { @@ -340,7 +347,9 @@ static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], policy->timeout = new_policy->timeout; } - return 0; +err: + kfree(new_policy); + return ret; } static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, From 5b4c6e3860daaf089c28e0161dffef7b5ad8000f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 12 Mar 2018 22:16:17 -0500 Subject: [PATCH 27/47] netfilter: nf_tables: remove VLA usage In preparation to enabling -Wvla, remove VLA and replace it with dynamic memory allocation. >From a security viewpoint, the use of Variable Length Arrays can be a vector for stack overflow attacks. Also, in general, as the code evolves it is easy to lose track of how big a VLA can get. Thus, we can end up having segfaults that are hard to debug. Also, fixed as part of the directive to remove all VLAs from the kernel: https://lkml.org/lkml/2018/3/7/621 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8cc7fc970f0cec..92f5606b0deaf2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4357,16 +4357,20 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, const struct nft_object_type *type, const struct nlattr *attr) { - struct nlattr *tb[type->maxattr + 1]; + struct nlattr **tb; const struct nft_object_ops *ops; struct nft_object *obj; - int err; + int err = -ENOMEM; + + tb = kmalloc_array(type->maxattr + 1, sizeof(*tb), GFP_KERNEL); + if (!tb) + goto err1; if (attr) { err = nla_parse_nested(tb, type->maxattr, attr, type->policy, NULL); if (err < 0) - goto err1; + goto err2; } else { memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1)); } @@ -4375,7 +4379,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, ops = type->select_ops(ctx, (const struct nlattr * const *)tb); if (IS_ERR(ops)) { err = PTR_ERR(ops); - goto err1; + goto err2; } } else { ops = type->ops; @@ -4383,18 +4387,21 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, err = -ENOMEM; obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL); - if (obj == NULL) - goto err1; + if (!obj) + goto err2; err = ops->init(ctx, (const struct nlattr * const *)tb, obj); if (err < 0) - goto err2; + goto err3; obj->ops = ops; + kfree(tb); return obj; -err2: +err3: kfree(obj); +err2: + kfree(tb); err1: return ERR_PTR(err); } From d72133e6288030121e425b89584ab3dfb68871cc Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 14 Mar 2018 23:36:53 +0900 Subject: [PATCH 28/47] netfilter: ebtables: use ADD_COUNTER macro xtables uses ADD_COUNTER macro to increase packet and byte count. ebtables also can use this. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 217aa79f7b2ae6..9a26d2b7420ff1 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -223,9 +223,7 @@ unsigned int ebt_do_table(struct sk_buff *skb, return NF_DROP; } - /* increase counter */ - (*(counter_base + i)).pcnt++; - (*(counter_base + i)).bcnt += skb->len; + ADD_COUNTER(*(counter_base + i), 1, skb->len); /* these should only watch: not modify, nor tell us * what to do with the packet @@ -968,10 +966,9 @@ static void get_counters(const struct ebt_counter *oldcounters, if (cpu == 0) continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); - for (i = 0; i < nentries; i++) { - counters[i].pcnt += counter_base[i].pcnt; - counters[i].bcnt += counter_base[i].bcnt; - } + for (i = 0; i < nentries; i++) + ADD_COUNTER(counters[i], counter_base[i].pcnt, + counter_base[i].bcnt); } } @@ -1324,10 +1321,8 @@ static int do_update_counters(struct net *net, const char *name, write_lock_bh(&t->lock); /* we add to the counters of the first cpu */ - for (i = 0; i < num_counters; i++) { - t->private->counters[i].pcnt += tmp[i].pcnt; - t->private->counters[i].bcnt += tmp[i].bcnt; - } + for (i = 0; i < num_counters; i++) + ADD_COUNTER(t->private->counters[i], tmp[i].pcnt, tmp[i].bcnt); write_unlock_bh(&t->lock); ret = 0; From 472a73e00757b971d613d796374d2727b2e4954d Mon Sep 17 00:00:00 2001 From: Jack Ma Date: Mon, 19 Mar 2018 09:41:59 +1300 Subject: [PATCH 29/47] netfilter: xt_conntrack: Support bit-shifting for CONNMARK & MARK targets. This patch introduces a new feature that allows bitshifting (left and right) operations to co-operate with existing iptables options. Reviewed-by: Florian Westphal Signed-off-by: Jack Ma Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_connmark.h | 10 +++ net/netfilter/xt_connmark.c | 77 +++++++++++++++++----- 2 files changed, 70 insertions(+), 17 deletions(-) diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h index 408a9654f05ca1..1aa5c955ee1eea 100644 --- a/include/uapi/linux/netfilter/xt_connmark.h +++ b/include/uapi/linux/netfilter/xt_connmark.h @@ -19,11 +19,21 @@ enum { XT_CONNMARK_RESTORE }; +enum { + D_SHIFT_LEFT = 0, + D_SHIFT_RIGHT, +}; + struct xt_connmark_tginfo1 { __u32 ctmark, ctmask, nfmask; __u8 mode; }; +struct xt_connmark_tginfo2 { + __u32 ctmark, ctmask, nfmask; + __u8 shift_dir, shift_bits, mode; +}; + struct xt_connmark_mtinfo1 { __u32 mark, mask; __u8 invert; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 809639ce6f5a48..773da82190dc32 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -36,9 +36,10 @@ MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); static unsigned int -connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) +connmark_tg_shift(struct sk_buff *skb, + const struct xt_connmark_tginfo1 *info, + u8 shift_bits, u8 shift_dir) { - const struct xt_connmark_tginfo1 *info = par->targinfo; enum ip_conntrack_info ctinfo; struct nf_conn *ct; u_int32_t newmark; @@ -50,6 +51,10 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (info->mode) { case XT_CONNMARK_SET: newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; + if (shift_dir == D_SHIFT_RIGHT) + newmark >>= shift_bits; + else + newmark <<= shift_bits; if (ct->mark != newmark) { ct->mark = newmark; nf_conntrack_event_cache(IPCT_MARK, ct); @@ -57,7 +62,11 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) break; case XT_CONNMARK_SAVE: newmark = (ct->mark & ~info->ctmask) ^ - (skb->mark & info->nfmask); + (skb->mark & info->nfmask); + if (shift_dir == D_SHIFT_RIGHT) + newmark >>= shift_bits; + else + newmark <<= shift_bits; if (ct->mark != newmark) { ct->mark = newmark; nf_conntrack_event_cache(IPCT_MARK, ct); @@ -65,14 +74,34 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) break; case XT_CONNMARK_RESTORE: newmark = (skb->mark & ~info->nfmask) ^ - (ct->mark & info->ctmask); + (ct->mark & info->ctmask); + if (shift_dir == D_SHIFT_RIGHT) + newmark >>= shift_bits; + else + newmark <<= shift_bits; skb->mark = newmark; break; } - return XT_CONTINUE; } +static unsigned int +connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_connmark_tginfo1 *info = par->targinfo; + + return connmark_tg_shift(skb, info, 0, 0); +} + +static unsigned int +connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_connmark_tginfo2 *info = par->targinfo; + + return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info, + info->shift_bits, info->shift_dir); +} + static int connmark_tg_check(const struct xt_tgchk_param *par) { int ret; @@ -119,15 +148,27 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par) nf_ct_netns_put(par->net, par->family); } -static struct xt_target connmark_tg_reg __read_mostly = { - .name = "CONNMARK", - .revision = 1, - .family = NFPROTO_UNSPEC, - .checkentry = connmark_tg_check, - .target = connmark_tg, - .targetsize = sizeof(struct xt_connmark_tginfo1), - .destroy = connmark_tg_destroy, - .me = THIS_MODULE, +static struct xt_target connmark_tg_reg[] __read_mostly = { + { + .name = "CONNMARK", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = connmark_tg_check, + .target = connmark_tg, + .targetsize = sizeof(struct xt_connmark_tginfo1), + .destroy = connmark_tg_destroy, + .me = THIS_MODULE, + }, + { + .name = "CONNMARK", + .revision = 2, + .family = NFPROTO_UNSPEC, + .checkentry = connmark_tg_check, + .target = connmark_tg_v2, + .targetsize = sizeof(struct xt_connmark_tginfo2), + .destroy = connmark_tg_destroy, + .me = THIS_MODULE, + } }; static struct xt_match connmark_mt_reg __read_mostly = { @@ -145,12 +186,14 @@ static int __init connmark_mt_init(void) { int ret; - ret = xt_register_target(&connmark_tg_reg); + ret = xt_register_targets(connmark_tg_reg, + ARRAY_SIZE(connmark_tg_reg)); if (ret < 0) return ret; ret = xt_register_match(&connmark_mt_reg); if (ret < 0) { - xt_unregister_target(&connmark_tg_reg); + xt_unregister_targets(connmark_tg_reg, + ARRAY_SIZE(connmark_tg_reg)); return ret; } return 0; @@ -159,7 +202,7 @@ static int __init connmark_mt_init(void) static void __exit connmark_mt_exit(void) { xt_unregister_match(&connmark_mt_reg); - xt_unregister_target(&connmark_tg_reg); + xt_unregister_target(connmark_tg_reg); } module_init(connmark_mt_init); From 5191d70f83fd1878c40029cffe69f6a2bf65fa0e Mon Sep 17 00:00:00 2001 From: Arushi Singhal Date: Mon, 12 Mar 2018 18:36:29 +0530 Subject: [PATCH 30/47] netfilter: Replace printk() with pr_*() and define pr_fmt() Using pr_() is more concise than printk(KERN_). This patch: * Replace printks having a log level with the appropriate pr_*() macros. * Define pr_fmt() to include relevant name. * Remove redundant prefixes from pr_*() calls. * Indent the code where possible. * Remove the useless output messages. * Remove periods from messages. Signed-off-by: Arushi Singhal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_acct.c | 6 ++++-- net/netfilter/nf_conntrack_ecache.c | 6 ++++-- net/netfilter/nf_conntrack_timestamp.c | 6 ++++-- net/netfilter/nf_nat_core.c | 4 +++- net/netfilter/nf_nat_ftp.c | 7 ++++--- net/netfilter/nf_nat_irc.c | 7 ++++--- net/netfilter/nfnetlink_queue.c | 14 +++++++------- net/netfilter/xt_time.c | 13 +++++++------ 8 files changed, 37 insertions(+), 26 deletions(-) diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 86691671290519..1d66de5151b2d9 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -80,7 +82,7 @@ static int nf_conntrack_acct_init_sysctl(struct net *net) net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.acct_sysctl_header) { - printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); + pr_err("can't register to sysctl\n"); goto out_register; } return 0; @@ -125,7 +127,7 @@ int nf_conntrack_acct_init(void) { int ret = nf_ct_extend_register(&acct_extend); if (ret < 0) - pr_err("nf_conntrack_acct: Unable to register extension\n"); + pr_err("Unable to register extension\n"); return ret; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index caac41ad9483ed..c11822a7d2bfa2 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -372,7 +374,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net) net->ct.event_sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.event_sysctl_header) { - printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n"); + pr_err("can't register to sysctl\n"); goto out_register; } return 0; @@ -419,7 +421,7 @@ int nf_conntrack_ecache_init(void) { int ret = nf_ct_extend_register(&event_extend); if (ret < 0) - pr_err("nf_ct_event: Unable to register event extension.\n"); + pr_err("Unable to register event extension\n"); BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */ diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 4c4734b7831896..56766cb26e40e8 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -6,6 +6,8 @@ * published by the Free Software Foundation (or any later at your option). */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -58,7 +60,7 @@ static int nf_conntrack_tstamp_init_sysctl(struct net *net) net->ct.tstamp_sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.tstamp_sysctl_header) { - printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n"); + pr_err("can't register to sysctl\n"); goto out_register; } return 0; @@ -104,7 +106,7 @@ int nf_conntrack_tstamp_init(void) int ret; ret = nf_ct_extend_register(&tstamp_extend); if (ret < 0) - pr_err("nf_ct_tstamp: Unable to register extension\n"); + pr_err("Unable to register extension\n"); return ret; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6c38421e31f9cd..617693ff9f4cbd 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -814,7 +816,7 @@ static int __init nf_nat_init(void) ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); - printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); + pr_err("Unable to register extension\n"); return ret; } diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index d76afafdc6993b..5063cbf1689c87 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -71,7 +73,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN]; unsigned int buflen; - pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); + pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen); /* Connection will come from wherever this packet goes, hence !dir */ newaddr = ct->tuplehash[!dir].tuple.dst.u3; @@ -136,8 +138,7 @@ static int __init nf_nat_ftp_init(void) /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ static int warn_set(const char *val, const struct kernel_param *kp) { - printk(KERN_INFO KBUILD_MODNAME - ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); + pr_info("kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); return 0; } module_param_call(ports, warn_set, NULL, NULL, 0); diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index dcb5f6375d9df9..3aa35a43100d57 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -10,6 +10,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -79,7 +81,7 @@ static unsigned int help(struct sk_buff *skb, */ /* AAA = "us", ie. where server normally talks to. */ snprintf(buffer, sizeof(buffer), "%u %u", ntohl(newaddr.ip), port); - pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", + pr_debug("inserting '%s' == %pI4, port %u\n", buffer, &newaddr.ip, port); if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, @@ -108,8 +110,7 @@ static int __init nf_nat_irc_init(void) /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ static int warn_set(const char *val, const struct kernel_param *kp) { - printk(KERN_INFO KBUILD_MODNAME - ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); + pr_info("kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); return 0; } module_param_call(ports, warn_set, NULL, NULL, 0); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8bba23160a68fd..74a04638ef03ae 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -14,6 +14,9 @@ * published by the Free Software Foundation. * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -833,11 +836,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) if (diff > skb_tailroom(e->skb)) { nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), diff, GFP_ATOMIC); - if (!nskb) { - printk(KERN_WARNING "nf_queue: OOM " - "in mangle, dropping packet\n"); + if (!nskb) return -ENOMEM; - } kfree_skb(e->skb); e->skb = nskb; } @@ -1536,20 +1536,20 @@ static int __init nfnetlink_queue_init(void) status = register_pernet_subsys(&nfnl_queue_net_ops); if (status < 0) { - pr_err("nf_queue: failed to register pernet ops\n"); + pr_err("failed to register pernet ops\n"); goto out; } netlink_register_notifier(&nfqnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfqnl_subsys); if (status < 0) { - pr_err("nf_queue: failed to create netlink socket\n"); + pr_err("failed to create netlink socket\n"); goto cleanup_netlink_notifier; } status = register_netdevice_notifier(&nfqnl_dev_notifier); if (status < 0) { - pr_err("nf_queue: failed to register netdevice notifier\n"); + pr_err("failed to register netdevice notifier\n"); goto cleanup_netlink_subsys; } diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 0160f505e337c7..c13bcd0ab49130 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -9,6 +9,9 @@ * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from gnu.org/gpl. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -266,13 +269,11 @@ static int __init time_mt_init(void) int minutes = sys_tz.tz_minuteswest; if (minutes < 0) /* east of Greenwich */ - printk(KERN_INFO KBUILD_MODNAME - ": kernel timezone is +%02d%02d\n", - -minutes / 60, -minutes % 60); + pr_info("kernel timezone is +%02d%02d\n", + -minutes / 60, -minutes % 60); else /* west of Greenwich */ - printk(KERN_INFO KBUILD_MODNAME - ": kernel timezone is -%02d%02d\n", - minutes / 60, minutes % 60); + pr_info("kernel timezone is -%02d%02d\n", + minutes / 60, minutes % 60); return xt_register_match(&xt_time_mt_reg); } From 20710b3b81895c89e92bcc32ce85c0bede1171f8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Mar 2018 12:33:51 +0100 Subject: [PATCH 31/47] netfilter: ctnetlink: synproxy support This patch exposes synproxy information per-conntrack. Moreover, send sequence adjustment events once server sends us the SYN,ACK packet, so we can synchronize the sequence adjustment too for packets going as reply from the server, as part of the synproxy logic. Signed-off-by: Pablo Neira Ayuso --- .../linux/netfilter/nf_conntrack_common.h | 1 + .../linux/netfilter/nfnetlink_conntrack.h | 10 +++ net/ipv4/netfilter/ipt_SYNPROXY.c | 8 +- net/ipv6/netfilter/ip6t_SYNPROXY.c | 8 +- net/netfilter/nf_conntrack_netlink.c | 87 ++++++++++++++++++- 5 files changed, 109 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 9574bd40870ba5..c712eb6879f116 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -129,6 +129,7 @@ enum ip_conntrack_events { IPCT_NATSEQADJ = IPCT_SEQADJ, IPCT_SECMARK, /* new security mark has been set */ IPCT_LABEL, /* new connlabel has been set */ + IPCT_SYNPROXY, /* synproxy has been set */ #ifdef __KERNEL__ __IPCT_MAX #endif diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 7397e022ce6e8f..77987111cab0c2 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -54,6 +54,7 @@ enum ctattr_type { CTA_MARK_MASK, CTA_LABELS, CTA_LABELS_MASK, + CTA_SYNPROXY, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) @@ -190,6 +191,15 @@ enum ctattr_natseq { }; #define CTA_NAT_SEQ_MAX (__CTA_NAT_SEQ_MAX - 1) +enum ctattr_synproxy { + CTA_SYNPROXY_UNSPEC, + CTA_SYNPROXY_ISN, + CTA_SYNPROXY_ITS, + CTA_SYNPROXY_TSOFF, + __CTA_SYNPROXY_MAX, +}; +#define CTA_SYNPROXY_MAX (__CTA_SYNPROXY_MAX - 1) + enum ctattr_expect { CTA_EXPECT_UNSPEC, CTA_EXPECT_MASTER, diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index f75fc6b531152a..690b17ef6a44a1 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -16,6 +16,7 @@ #include #include #include +#include static struct iphdr * synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, @@ -384,6 +385,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, synproxy->isn = ntohl(th->ack_seq); if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->its = opts.tsecr; + + nf_conntrack_event_cache(IPCT_SYNPROXY, ct); break; case TCP_CONNTRACK_SYN_RECV: if (!th->syn || !th->ack) @@ -392,8 +395,10 @@ static unsigned int ipv4_synproxy_hook(void *priv, if (!synproxy_parse_options(skb, thoff, th, &opts)) return NF_DROP; - if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) { synproxy->tsoff = opts.tsval - synproxy->its; + nf_conntrack_event_cache(IPCT_SYNPROXY, ct); + } opts.options &= ~(XT_SYNPROXY_OPT_MSS | XT_SYNPROXY_OPT_WSCALE | @@ -403,6 +408,7 @@ static unsigned int ipv4_synproxy_hook(void *priv, synproxy_send_server_ack(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); + nf_conntrack_event_cache(IPCT_SEQADJ, ct); swap(opts.tsval, opts.tsecr); synproxy_send_client_ack(net, skb, th, &opts); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 437af8c95277f7..cb6d42b03cb5dd 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -18,6 +18,7 @@ #include #include #include +#include static struct ipv6hdr * synproxy_build_ip(struct net *net, struct sk_buff *skb, @@ -405,6 +406,8 @@ static unsigned int ipv6_synproxy_hook(void *priv, synproxy->isn = ntohl(th->ack_seq); if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy->its = opts.tsecr; + + nf_conntrack_event_cache(IPCT_SYNPROXY, ct); break; case TCP_CONNTRACK_SYN_RECV: if (!th->syn || !th->ack) @@ -413,8 +416,10 @@ static unsigned int ipv6_synproxy_hook(void *priv, if (!synproxy_parse_options(skb, thoff, th, &opts)) return NF_DROP; - if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) { synproxy->tsoff = opts.tsval - synproxy->its; + nf_conntrack_event_cache(IPCT_SYNPROXY, ct); + } opts.options &= ~(XT_SYNPROXY_OPT_MSS | XT_SYNPROXY_OPT_WSCALE | @@ -424,6 +429,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, synproxy_send_server_ack(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); + nf_conntrack_event_cache(IPCT_SEQADJ, ct); swap(opts.tsval, opts.tsecr); synproxy_send_client_ack(net, skb, th, &opts); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 8884d302d33a64..11ef85a57244a8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -440,6 +440,31 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct) return -1; } +static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct) +{ + struct nf_conn_synproxy *synproxy = nfct_synproxy(ct); + struct nlattr *nest_parms; + + if (!synproxy) + return 0; + + nest_parms = nla_nest_start(skb, CTA_SYNPROXY | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + if (nla_put_be32(skb, CTA_SYNPROXY_ISN, htonl(synproxy->isn)) || + nla_put_be32(skb, CTA_SYNPROXY_ITS, htonl(synproxy->its)) || + nla_put_be32(skb, CTA_SYNPROXY_TSOFF, htonl(synproxy->tsoff))) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + + return 0; + +nla_put_failure: + return -1; +} + static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) @@ -518,7 +543,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || ctnetlink_dump_master(skb, ct) < 0 || - ctnetlink_dump_ct_seq_adj(skb, ct) < 0) + ctnetlink_dump_ct_seq_adj(skb, ct) < 0 || + ctnetlink_dump_ct_synproxy(skb, ct) < 0) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -730,6 +756,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) if (events & (1 << IPCT_SEQADJ) && ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; + + if (events & (1 << IPCT_SYNPROXY) && + ctnetlink_dump_ct_synproxy(skb, ct) < 0) + goto nla_put_failure; } #ifdef CONFIG_NF_CONNTRACK_MARK @@ -1689,6 +1719,39 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, return ret; } +static const struct nla_policy synproxy_policy[CTA_SYNPROXY_MAX + 1] = { + [CTA_SYNPROXY_ISN] = { .type = NLA_U32 }, + [CTA_SYNPROXY_ITS] = { .type = NLA_U32 }, + [CTA_SYNPROXY_TSOFF] = { .type = NLA_U32 }, +}; + +static int ctnetlink_change_synproxy(struct nf_conn *ct, + const struct nlattr * const cda[]) +{ + struct nf_conn_synproxy *synproxy = nfct_synproxy(ct); + struct nlattr *tb[CTA_SYNPROXY_MAX + 1]; + int err; + + if (!synproxy) + return 0; + + err = nla_parse_nested(tb, CTA_SYNPROXY_MAX, cda[CTA_SYNPROXY], + synproxy_policy, NULL); + if (err < 0) + return err; + + if (!tb[CTA_SYNPROXY_ISN] || + !tb[CTA_SYNPROXY_ITS] || + !tb[CTA_SYNPROXY_TSOFF]) + return -EINVAL; + + synproxy->isn = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ISN])); + synproxy->its = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ITS])); + synproxy->tsoff = ntohl(nla_get_be32(tb[CTA_SYNPROXY_TSOFF])); + + return 0; +} + static int ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[]) { @@ -1759,6 +1822,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct, return err; } + if (cda[CTA_SYNPROXY]) { + err = ctnetlink_change_synproxy(ct, cda); + if (err < 0) + return err; + } + if (cda[CTA_LABELS]) { err = ctnetlink_attach_labels(ct, cda); if (err < 0) @@ -1880,6 +1949,12 @@ ctnetlink_create_conntrack(struct net *net, goto err2; } + if (cda[CTA_SYNPROXY]) { + err = ctnetlink_change_synproxy(ct, cda); + if (err < 0) + goto err2; + } + #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); @@ -1991,7 +2066,9 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_SEQADJ) | - (1 << IPCT_MARK) | events, + (1 << IPCT_MARK) | + (1 << IPCT_SYNPROXY) | + events, ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); @@ -2012,7 +2089,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, (1 << IPCT_LABEL) | (1 << IPCT_PROTOINFO) | (1 << IPCT_SEQADJ) | - (1 << IPCT_MARK), + (1 << IPCT_MARK) | + (1 << IPCT_SYNPROXY), ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } @@ -2282,6 +2360,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) ctnetlink_dump_ct_seq_adj(skb, ct) < 0) goto nla_put_failure; + if (ctnetlink_dump_ct_synproxy(skb, ct) < 0) + goto nla_put_failure; + #ifdef CONFIG_NF_CONNTRACK_MARK if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; From 5adc1668ddc42bb44fd6d006cacad74ed0cbf49d Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 4 Mar 2018 09:28:53 +0100 Subject: [PATCH 32/47] netfilter: ebtables: add support for matching ICMP type and code We already have ICMPv6 type/code matches. This adds support for IPv4 ICMP matches in the same way. Signed-off-by: Matthias Schiffer Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_bridge/ebt_ip.h | 13 ++++-- net/bridge/netfilter/ebt_ip.c | 43 +++++++++++++++----- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h index 8e462fb1983f23..4ed7fbb0a48201 100644 --- a/include/uapi/linux/netfilter_bridge/ebt_ip.h +++ b/include/uapi/linux/netfilter_bridge/ebt_ip.h @@ -24,8 +24,9 @@ #define EBT_IP_PROTO 0x08 #define EBT_IP_SPORT 0x10 #define EBT_IP_DPORT 0x20 +#define EBT_IP_ICMP 0x40 #define EBT_IP_MASK (EBT_IP_SOURCE | EBT_IP_DEST | EBT_IP_TOS | EBT_IP_PROTO |\ - EBT_IP_SPORT | EBT_IP_DPORT ) + EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP) #define EBT_IP_MATCH "ip" /* the same values are used for the invflags */ @@ -38,8 +39,14 @@ struct ebt_ip_info { __u8 protocol; __u8 bitmask; __u8 invflags; - __u16 sport[2]; - __u16 dport[2]; + union { + __u16 sport[2]; + __u8 icmp_type[2]; + }; + union { + __u16 dport[2]; + __u8 icmp_code[2]; + }; }; #endif diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 2b46c50abce039..8cb8f8395768be 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -19,9 +19,15 @@ #include #include -struct tcpudphdr { - __be16 src; - __be16 dst; +union pkthdr { + struct { + __be16 src; + __be16 dst; + } tcpudphdr; + struct { + u8 type; + u8 code; + } icmphdr; }; static bool @@ -30,8 +36,8 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct ebt_ip_info *info = par->matchinfo; const struct iphdr *ih; struct iphdr _iph; - const struct tcpudphdr *pptr; - struct tcpudphdr _ports; + const union pkthdr *pptr; + union pkthdr _pkthdr; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) @@ -50,29 +56,38 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) if (info->bitmask & EBT_IP_PROTO) { if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol)) return false; - if (!(info->bitmask & EBT_IP_DPORT) && - !(info->bitmask & EBT_IP_SPORT)) + if (!(info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT | + EBT_IP_ICMP))) return true; if (ntohs(ih->frag_off) & IP_OFFSET) return false; + + /* min icmp headersize is 4, so sizeof(_pkthdr) is ok. */ pptr = skb_header_pointer(skb, ih->ihl*4, - sizeof(_ports), &_ports); + sizeof(_pkthdr), &_pkthdr); if (pptr == NULL) return false; if (info->bitmask & EBT_IP_DPORT) { - u32 dst = ntohs(pptr->dst); + u32 dst = ntohs(pptr->tcpudphdr.dst); if (NF_INVF(info, EBT_IP_DPORT, dst < info->dport[0] || dst > info->dport[1])) return false; } if (info->bitmask & EBT_IP_SPORT) { - u32 src = ntohs(pptr->src); + u32 src = ntohs(pptr->tcpudphdr.src); if (NF_INVF(info, EBT_IP_SPORT, src < info->sport[0] || src > info->sport[1])) return false; } + if ((info->bitmask & EBT_IP_ICMP) && + NF_INVF(info, EBT_IP_ICMP, + pptr->icmphdr.type < info->icmp_type[0] || + pptr->icmphdr.type > info->icmp_type[1] || + pptr->icmphdr.code < info->icmp_code[0] || + pptr->icmphdr.code > info->icmp_code[1])) + return false; } return true; } @@ -101,6 +116,14 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par) return -EINVAL; if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1]) return -EINVAL; + if (info->bitmask & EBT_IP_ICMP) { + if ((info->invflags & EBT_IP_PROTO) || + info->protocol != IPPROTO_ICMP) + return -EINVAL; + if (info->icmp_type[0] > info->icmp_type[1] || + info->icmp_code[0] > info->icmp_code[1]) + return -EINVAL; + } return 0; } From 78d9f4d49bbecd101b4e5faf19f8f70719fee2ca Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 4 Mar 2018 09:28:54 +0100 Subject: [PATCH 33/47] netfilter: ebtables: add support for matching IGMP type We already have ICMPv6 type/code matches (which can be used to distinguish different types of MLD packets). Add support for IPv4 IGMP matches in the same way. Signed-off-by: Matthias Schiffer Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_bridge/ebt_ip.h | 4 +++- net/bridge/netfilter/ebt_ip.c | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h index 4ed7fbb0a48201..46d6261370b0ca 100644 --- a/include/uapi/linux/netfilter_bridge/ebt_ip.h +++ b/include/uapi/linux/netfilter_bridge/ebt_ip.h @@ -25,8 +25,9 @@ #define EBT_IP_SPORT 0x10 #define EBT_IP_DPORT 0x20 #define EBT_IP_ICMP 0x40 +#define EBT_IP_IGMP 0x80 #define EBT_IP_MASK (EBT_IP_SOURCE | EBT_IP_DEST | EBT_IP_TOS | EBT_IP_PROTO |\ - EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP) + EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP | EBT_IP_IGMP) #define EBT_IP_MATCH "ip" /* the same values are used for the invflags */ @@ -42,6 +43,7 @@ struct ebt_ip_info { union { __u16 sport[2]; __u8 icmp_type[2]; + __u8 igmp_type[2]; }; union { __u16 dport[2]; diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 8cb8f8395768be..ffaa8ce2e724f7 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -28,6 +28,9 @@ union pkthdr { u8 type; u8 code; } icmphdr; + struct { + u8 type; + } igmphdr; }; static bool @@ -57,12 +60,12 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol)) return false; if (!(info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT | - EBT_IP_ICMP))) + EBT_IP_ICMP | EBT_IP_IGMP))) return true; if (ntohs(ih->frag_off) & IP_OFFSET) return false; - /* min icmp headersize is 4, so sizeof(_pkthdr) is ok. */ + /* min icmp/igmp headersize is 4, so sizeof(_pkthdr) is ok. */ pptr = skb_header_pointer(skb, ih->ihl*4, sizeof(_pkthdr), &_pkthdr); if (pptr == NULL) @@ -88,6 +91,11 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) pptr->icmphdr.code < info->icmp_code[0] || pptr->icmphdr.code > info->icmp_code[1])) return false; + if ((info->bitmask & EBT_IP_IGMP) && + NF_INVF(info, EBT_IP_IGMP, + pptr->igmphdr.type < info->igmp_type[0] || + pptr->igmphdr.type > info->igmp_type[1])) + return false; } return true; } @@ -124,6 +132,13 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par) info->icmp_code[0] > info->icmp_code[1]) return -EINVAL; } + if (info->bitmask & EBT_IP_IGMP) { + if ((info->invflags & EBT_IP_PROTO) || + info->protocol != IPPROTO_IGMP) + return -EINVAL; + if (info->igmp_type[0] > info->igmp_type[1]) + return -EINVAL; + } return 0; } From 39c202d228c3da5a5531be847e9b06cc9b787f31 Mon Sep 17 00:00:00 2001 From: Bernie Harris Date: Wed, 21 Mar 2018 15:42:15 +1300 Subject: [PATCH 34/47] netfilter: ebtables: Add support for specifying match revision Currently ebtables assumes that the revision number of all match modules is 0, which is an issue when trying to use existing xtables matches with ebtables. The solution is to modify ebtables to allow extensions to specify a revision number, similar to iptables. This gets passed down to the kernel, which is then able to find the match module correctly. To main binary backwards compatibility, the size of the ebt_entry structures is not changed, only the size of the name field is decreased by 1 byte to make room for the revision field. Signed-off-by: Bernie Harris Signed-off-by: Pablo Neira Ayuso --- .../uapi/linux/netfilter_bridge/ebtables.h | 16 +++++-- net/bridge/netfilter/ebtables.c | 47 ++++++++++++------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h index 9ff57c0a019901..0c7dc83150139a 100644 --- a/include/uapi/linux/netfilter_bridge/ebtables.h +++ b/include/uapi/linux/netfilter_bridge/ebtables.h @@ -20,6 +20,7 @@ #define EBT_TABLE_MAXNAMELEN 32 #define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN #define EBT_FUNCTION_MAXNAMELEN EBT_TABLE_MAXNAMELEN +#define EBT_EXTENSION_MAXNAMELEN 31 /* verdicts >0 are "branches" */ #define EBT_ACCEPT -1 @@ -120,7 +121,10 @@ struct ebt_entries { struct ebt_entry_match { union { - char name[EBT_FUNCTION_MAXNAMELEN]; + struct { + char name[EBT_EXTENSION_MAXNAMELEN]; + uint8_t revision; + }; struct xt_match *match; } u; /* size of data */ @@ -130,7 +134,10 @@ struct ebt_entry_match { struct ebt_entry_watcher { union { - char name[EBT_FUNCTION_MAXNAMELEN]; + struct { + char name[EBT_EXTENSION_MAXNAMELEN]; + uint8_t revision; + }; struct xt_target *watcher; } u; /* size of data */ @@ -140,7 +147,10 @@ struct ebt_entry_watcher { struct ebt_entry_target { union { - char name[EBT_FUNCTION_MAXNAMELEN]; + struct { + char name[EBT_EXTENSION_MAXNAMELEN]; + uint8_t revision; + }; struct xt_target *target; } u; /* size of data */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 9a26d2b7420ff1..a8cb543e32962a 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -356,12 +356,12 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; - match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0); + match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision); if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) { if (!IS_ERR(match)) module_put(match->me); request_module("ebt_%s", m->u.name); - match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0); + match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision); } if (IS_ERR(match)) return PTR_ERR(match); @@ -1350,16 +1350,17 @@ static int update_counters(struct net *net, const void __user *user, static inline int ebt_obj_to_user(char __user *um, const char *_name, const char *data, int entrysize, - int usersize, int datasize) + int usersize, int datasize, u8 revision) { - char name[EBT_FUNCTION_MAXNAMELEN] = {0}; + char name[EBT_EXTENSION_MAXNAMELEN] = {0}; - /* ebtables expects 32 bytes long names but xt_match names are 29 bytes + /* ebtables expects 31 bytes long names but xt_match names are 29 bytes * long. Copy 29 bytes and fill remaining bytes with zeroes. */ strlcpy(name, _name, sizeof(name)); - if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) || - put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) || + if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) || + put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) || + put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) || xt_data_to_user(um + entrysize, data, usersize, datasize, XT_ALIGN(datasize))) return -EFAULT; @@ -1372,7 +1373,8 @@ static inline int ebt_match_to_user(const struct ebt_entry_match *m, { return ebt_obj_to_user(ubase + ((char *)m - base), m->u.match->name, m->data, sizeof(*m), - m->u.match->usersize, m->match_size); + m->u.match->usersize, m->match_size, + m->u.match->revision); } static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w, @@ -1380,7 +1382,8 @@ static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w, { return ebt_obj_to_user(ubase + ((char *)w - base), w->u.watcher->name, w->data, sizeof(*w), - w->u.watcher->usersize, w->watcher_size); + w->u.watcher->usersize, w->watcher_size, + w->u.watcher->revision); } static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base, @@ -1411,7 +1414,8 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base, if (ret != 0) return ret; ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t), - t->u.target->usersize, t->target_size); + t->u.target->usersize, t->target_size, + t->u.target->revision); if (ret != 0) return ret; @@ -1599,7 +1603,10 @@ struct compat_ebt_replace { /* struct ebt_entry_match, _target and _watcher have same layout */ struct compat_ebt_entry_mwt { union { - char name[EBT_FUNCTION_MAXNAMELEN]; + struct { + char name[EBT_EXTENSION_MAXNAMELEN]; + u8 revision; + }; compat_uptr_t ptr; } u; compat_uint_t match_size; @@ -1638,8 +1645,9 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, BUG_ON(off >= m->match_size); - if (copy_to_user(cm->u.name, match->name, - strlen(match->name) + 1) || put_user(msize, &cm->match_size)) + if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) || + put_user(match->revision, &cm->u.revision) || + put_user(msize, &cm->match_size)) return -EFAULT; if (match->compat_to_user) { @@ -1668,8 +1676,9 @@ static int compat_target_to_user(struct ebt_entry_target *t, BUG_ON(off >= t->target_size); - if (copy_to_user(cm->u.name, target->name, - strlen(target->name) + 1) || put_user(tsize, &cm->match_size)) + if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) || + put_user(target->revision, &cm->u.revision) || + put_user(tsize, &cm->match_size)) return -EFAULT; if (target->compat_to_user) { @@ -1933,7 +1942,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, struct ebt_entries_buf_state *state, const unsigned char *base) { - char name[EBT_FUNCTION_MAXNAMELEN]; + char name[EBT_EXTENSION_MAXNAMELEN]; struct xt_match *match; struct xt_target *wt; void *dst = NULL; @@ -1947,7 +1956,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, switch (compat_mwt) { case EBT_COMPAT_MATCH: - match = xt_request_find_match(NFPROTO_BRIDGE, name, 0); + match = xt_request_find_match(NFPROTO_BRIDGE, name, + mwt->u.revision); if (IS_ERR(match)) return PTR_ERR(match); @@ -1966,7 +1976,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, break; case EBT_COMPAT_WATCHER: /* fallthrough */ case EBT_COMPAT_TARGET: - wt = xt_request_find_target(NFPROTO_BRIDGE, name, 0); + wt = xt_request_find_target(NFPROTO_BRIDGE, name, + mwt->u.revision); if (IS_ERR(wt)) return PTR_ERR(wt); off = xt_compat_target_offset(wt); From 1be3ac98444066a292de02c2c12e203bdf575e7d Mon Sep 17 00:00:00 2001 From: Bernie Harris Date: Wed, 21 Mar 2018 15:42:16 +1300 Subject: [PATCH 35/47] netfilter: ebtables: Add string filter This patch is part of a proposal to add a string filter to ebtables, which would be similar to the string filter in iptables. Like iptables, the ebtables filter uses the xt_string module. Signed-off-by: Bernie Harris Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_string.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 423293ee57c22f..be1feddadcf076 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -21,6 +21,7 @@ MODULE_DESCRIPTION("Xtables: string-based matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_string"); MODULE_ALIAS("ip6t_string"); +MODULE_ALIAS("ebt_string"); static bool string_mt(const struct sk_buff *skb, struct xt_action_param *par) From 19b351f16fd940092f2daa75773b0320f0785de9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 28 Mar 2018 15:00:43 +0200 Subject: [PATCH 36/47] netfilter: add flowtable documentation This patch adds initial documentation for the Netfilter flowtable infrastructure. Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_flowtable.txt | 112 ++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 Documentation/networking/nf_flowtable.txt diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt new file mode 100644 index 00000000000000..54128c50d508ef --- /dev/null +++ b/Documentation/networking/nf_flowtable.txt @@ -0,0 +1,112 @@ +Netfilter's flowtable infrastructure +==================================== + +This documentation describes the software flowtable infrastructure available in +Netfilter since Linux kernel 4.16. + +Overview +-------- + +Initial packets follow the classic forwarding path, once the flow enters the +established state according to the conntrack semantics (ie. we have seen traffic +in both directions), then you can decide to offload the flow to the flowtable +from the forward chain via the 'flow offload' action available in nftables. + +Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the +output netdevice via neigh_xmit(), hence, they bypass the classic forwarding +path (the visible effect is that you do not see these packets from any of the +netfilter hooks coming after the ingress). In case of flowtable miss, the packet +follows the classic forward path. + +The flowtable uses a resizable hashtable, lookups are based on the following +7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source +and destination ports and the input interface (useful in case there are several +conntrack zones in place). + +Flowtables are populated via the 'flow offload' nftables action, so the user can +selectively specify what flows are placed into the flow table. Hence, packets +follow the classic forwarding path unless the user explicitly instruct packets +to use this new alternative forwarding path via nftables policy. + +This is represented in Fig.1, which describes the classic forwarding path +including the Netfilter hooks and the flowtable fastpath bypass. + + userspace process + ^ | + | | + _____|____ ____\/___ + / \ / \ + | input | | output | + \__________/ \_________/ + ^ | + | | + _________ __________ --------- _____\/_____ + / \ / \ |Routing | / \ + --> ingress ---> prerouting ---> |decision| | postrouting |--> neigh_xmit + \_________/ \__________/ ---------- \____________/ ^ + | ^ | | ^ | + flowtable | | ____\/___ | | + | | | / \ | | + __\/___ | --------->| forward |------------ | + |-----| | \_________/ | + |-----| | 'flow offload' rule | + |-----| | adds entry to | + |_____| | flowtable | + | | | + / \ | | + /hit\_no_| | + \ ? / | + \ / | + |__yes_________________fastpath bypass ____________________________| + + Fig.1 Netfilter hooks and flowtable interactions + +The flowtable entry also stores the NAT configuration, so all packets are +mangled according to the NAT policy that matches the initial packets that went +through the classic forwarding path. The TTL is decremented before calling +neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding +path given that the transport selectors are missing, therefore flowtable lookup +is not possible. + +Example configuration +--------------------- + +Enabling the flowtable bypass is relatively easy, you only need to create a +flowtable and add one rule to your forward chain. + + table inet x { + flowtable f { + hook ingress priority 0 devices = { eth0, eth1 }; + } + chain y { + type filter hook forward priority 0; policy accept; + ip protocol tcp flow offload @f + counter packets 0 bytes 0 + } + } + +This example adds the flowtable 'f' to the ingress hook of the eth0 and eth1 +netdevices. You can create as many flowtables as you want in case you need to +perform resource partitioning. The flowtable priority defines the order in which +hooks are run in the pipeline, this is convenient in case you already have a +nftables ingress chain (make sure the flowtable priority is smaller than the +nftables ingress chain hence the flowtable runs before in the pipeline). + +The 'flow offload' action from the forward chain 'y' adds an entry to the +flowtable for the TCP syn-ack packet coming in the reply direction. Once the +flow is offloaded, you will observe that the counter rule in the example above +does not get updated for the packets that are being forwarded through the +forwarding bypass. + +More reading +------------ + +This documentation is based on the LWN.net articles [1][2]. Rafal Milecki also +made a very complete and comprehensive summary called "A state of network +acceleration" that describes how things were before this infrastructure was +mailined [3] and it also makes a rough summary of this work [4]. + +[1] https://lwn.net/Articles/738214/ +[2] https://lwn.net/Articles/742164/ +[3] http://lists.infradead.org/pipermail/lede-dev/2018-January/010830.html +[4] http://lists.infradead.org/pipermail/lede-dev/2018-January/010829.html From 9124a20d8794663a396b5d6f91f66903848a042b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 21 Mar 2018 04:03:22 -0700 Subject: [PATCH 37/47] netfilter: ebt_stp: Use generic functions for comparisons Instead of unnecessary const declarations, use the generic functions to save a little object space. $ size net/bridge/netfilter/ebt_stp.o* text data bss dec hex filename 1250 144 0 1394 572 net/bridge/netfilter/ebt_stp.o.new 1344 144 0 1488 5d0 net/bridge/netfilter/ebt_stp.o.old Signed-off-by: Joe Perches Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_stp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 3140eb912d7eb4..47ba98db145dd4 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -153,8 +153,6 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) static int ebt_stp_mt_check(const struct xt_mtchk_param *par) { const struct ebt_stp_info *info = par->matchinfo; - const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; - const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const struct ebt_entry *e = par->entryinfo; if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || @@ -162,8 +160,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par) return -EINVAL; /* Make sure the match only receives stp frames */ if (!par->nft_compat && - (!ether_addr_equal(e->destmac, bridge_ula) || - !ether_addr_equal(e->destmsk, msk) || + (!ether_addr_equal(e->destmac, eth_stp_addr) || + !is_broadcast_ether_addr(e->destmsk) || !(e->bitmask & EBT_DESTMAC))) return -EINVAL; From 32537e91847a5686d57d3811c075a46b2d9b6434 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Mar 2018 11:53:05 +0200 Subject: [PATCH 38/47] netfilter: nf_tables: rename struct nf_chain_type Use nft_ prefix. By when I added chain types, I forgot to use the nftables prefix. Rename enum nft_chain_type to enum nft_chain_types too, otherwise there is an overlap. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 ++++++++-------- net/bridge/netfilter/nf_tables_bridge.c | 2 +- net/ipv4/netfilter/nf_tables_arp.c | 2 +- net/ipv4/netfilter/nf_tables_ipv4.c | 2 +- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 2 +- net/ipv4/netfilter/nft_chain_route_ipv4.c | 2 +- net/ipv6/netfilter/nf_tables_ipv6.c | 2 +- net/ipv6/netfilter/nft_chain_nat_ipv6.c | 2 +- net/ipv6/netfilter/nft_chain_route_ipv6.c | 2 +- net/netfilter/nf_tables_api.c | 18 +++++++++--------- net/netfilter/nf_tables_inet.c | 2 +- net/netfilter/nf_tables_netdev.c | 2 +- 12 files changed, 27 insertions(+), 27 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 663b015dace553..4a304997c304b8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -868,7 +868,7 @@ struct nft_chain { char *name; }; -enum nft_chain_type { +enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, NFT_CHAIN_T_NAT, @@ -876,7 +876,7 @@ enum nft_chain_type { }; /** - * struct nf_chain_type - nf_tables chain type info + * struct nft_chain_type - nf_tables chain type info * * @name: name of the type * @type: numeric identifier @@ -885,9 +885,9 @@ enum nft_chain_type { * @hook_mask: mask of valid hooks * @hooks: array of hook functions */ -struct nf_chain_type { +struct nft_chain_type { const char *name; - enum nft_chain_type type; + enum nft_chain_types type; int family; struct module *owner; unsigned int hook_mask; @@ -895,7 +895,7 @@ struct nf_chain_type { }; int nft_chain_validate_dependency(const struct nft_chain *chain, - enum nft_chain_type type); + enum nft_chain_types type); int nft_chain_validate_hooks(const struct nft_chain *chain, unsigned int hook_flags); @@ -917,7 +917,7 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops; - const struct nf_chain_type *type; + const struct nft_chain_type *type; u8 policy; u8 flags; struct nft_stats __percpu *stats; @@ -970,8 +970,8 @@ struct nft_table { char *name; }; -int nft_register_chain_type(const struct nf_chain_type *); -void nft_unregister_chain_type(const struct nf_chain_type *); +int nft_register_chain_type(const struct nft_chain_type *); +void nft_unregister_chain_type(const struct nft_chain_type *); int nft_register_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 5160cf61417631..73a1ec556a0ae1 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -42,7 +42,7 @@ nft_do_chain_bridge(void *priv, return nft_do_chain(&pkt, priv); } -static const struct nf_chain_type filter_bridge = { +static const struct nft_chain_type filter_bridge = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 036c074736b025..5b0be2a10b695e 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -27,7 +27,7 @@ nft_do_chain_arp(void *priv, return nft_do_chain(&pkt, priv); } -static const struct nf_chain_type filter_arp = { +static const struct nft_chain_type filter_arp = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_ARP, diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 96f955496d5f17..13bae5cfa257e0 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -30,7 +30,7 @@ static unsigned int nft_do_chain_ipv4(void *priv, return nft_do_chain(&pkt, priv); } -static const struct nf_chain_type filter_ipv4 = { +static const struct nft_chain_type filter_ipv4 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_IPV4, diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index f2a4909815946f..167f377eb1cb5e 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -67,7 +67,7 @@ static unsigned int nft_nat_ipv4_local_fn(void *priv, return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain); } -static const struct nf_chain_type nft_chain_nat_ipv4 = { +static const struct nft_chain_type nft_chain_nat_ipv4 = { .name = "nat", .type = NFT_CHAIN_T_NAT, .family = NFPROTO_IPV4, diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index d965c225b9f621..48cf1f892314a1 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -58,7 +58,7 @@ static unsigned int nf_route_table_hook(void *priv, return ret; } -static const struct nf_chain_type nft_chain_route_ipv4 = { +static const struct nft_chain_type nft_chain_route_ipv4 = { .name = "route", .type = NFT_CHAIN_T_ROUTE, .family = NFPROTO_IPV4, diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 17e03589331cb5..d99f9ac6f1b6e4 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -28,7 +28,7 @@ static unsigned int nft_do_chain_ipv6(void *priv, return nft_do_chain(&pkt, priv); } -static const struct nf_chain_type filter_ipv6 = { +static const struct nft_chain_type filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_IPV6, diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 73fe2bd13fcfc5..c498aaa8056b9e 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -65,7 +65,7 @@ static unsigned int nft_nat_ipv6_local_fn(void *priv, return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain); } -static const struct nf_chain_type nft_chain_nat_ipv6 = { +static const struct nft_chain_type nft_chain_nat_ipv6 = { .name = "nat", .type = NFT_CHAIN_T_NAT, .family = NFPROTO_IPV6, diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 11d3c3b9aa18fb..d5c7fdc342560c 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -60,7 +60,7 @@ static unsigned int nf_route_table_hook(void *priv, return ret; } -static const struct nf_chain_type nft_chain_route_ipv6 = { +static const struct nft_chain_type nft_chain_route_ipv6 = { .name = "route", .type = NFT_CHAIN_T_ROUTE, .family = NFPROTO_IPV6, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 92f5606b0deaf2..bf564f49108535 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -384,9 +384,9 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) return ++table->hgenerator; } -static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; +static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; -static const struct nf_chain_type * +static const struct nft_chain_type * __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) { int i; @@ -399,10 +399,10 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) return NULL; } -static const struct nf_chain_type * +static const struct nft_chain_type * nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload) { - const struct nf_chain_type *type; + const struct nft_chain_type *type; type = __nf_tables_chain_type_lookup(nla, family); if (type != NULL) @@ -859,7 +859,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) kfree(ctx->table); } -int nft_register_chain_type(const struct nf_chain_type *ctype) +int nft_register_chain_type(const struct nft_chain_type *ctype) { int err = 0; @@ -878,7 +878,7 @@ int nft_register_chain_type(const struct nf_chain_type *ctype) } EXPORT_SYMBOL_GPL(nft_register_chain_type); -void nft_unregister_chain_type(const struct nf_chain_type *ctype) +void nft_unregister_chain_type(const struct nft_chain_type *ctype) { nfnl_lock(NFNL_SUBSYS_NFTABLES); chain_type[ctype->family][ctype->type] = NULL; @@ -1239,7 +1239,7 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) struct nft_chain_hook { u32 num; s32 priority; - const struct nf_chain_type *type; + const struct nft_chain_type *type; struct net_device *dev; }; @@ -1249,7 +1249,7 @@ static int nft_chain_parse_hook(struct net *net, bool create) { struct nlattr *ha[NFTA_HOOK_MAX + 1]; - const struct nf_chain_type *type; + const struct nft_chain_type *type; struct net_device *dev; int err; @@ -6000,7 +6000,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { }; int nft_chain_validate_dependency(const struct nft_chain *chain, - enum nft_chain_type type) + enum nft_chain_types type) { const struct nft_base_chain *basechain; diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index e30c7da09d0db9..0aefe66ce5585a 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -38,7 +38,7 @@ static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, return nft_do_chain(&pkt, priv); } -static const struct nf_chain_type filter_inet = { +static const struct nft_chain_type filter_inet = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_INET, diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 4041fafca93404..88ea959211acb5 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -38,7 +38,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, return nft_do_chain(&pkt, priv); } -static const struct nf_chain_type nft_filter_chain_netdev = { +static const struct nft_chain_type nft_filter_chain_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, From cc07eeb0e5ee18895241460bdccf91a4952731f9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Mar 2018 11:53:06 +0200 Subject: [PATCH 39/47] netfilter: nf_tables: nft_register_chain_type() returns void Use WARN_ON() instead since it should not happen that neither family goes over NFPROTO_NUMPROTO nor there is already a chain of this type already registered. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/bridge/netfilter/nf_tables_bridge.c | 4 +++- net/ipv4/netfilter/nf_tables_arp.c | 4 +++- net/ipv4/netfilter/nf_tables_ipv4.c | 4 +++- net/ipv4/netfilter/nft_chain_nat_ipv4.c | 6 +----- net/ipv4/netfilter/nft_chain_route_ipv4.c | 4 +++- net/ipv6/netfilter/nf_tables_ipv6.c | 4 +++- net/ipv6/netfilter/nft_chain_nat_ipv6.c | 6 +----- net/ipv6/netfilter/nft_chain_route_ipv6.c | 4 +++- net/netfilter/nf_tables_api.c | 14 +++++--------- net/netfilter/nf_tables_inet.c | 4 +++- net/netfilter/nf_tables_netdev.c | 4 +--- 12 files changed, 30 insertions(+), 30 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4a304997c304b8..1f7148fe05044d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -970,7 +970,7 @@ struct nft_table { char *name; }; -int nft_register_chain_type(const struct nft_chain_type *); +void nft_register_chain_type(const struct nft_chain_type *); void nft_unregister_chain_type(const struct nft_chain_type *); int nft_register_expr(struct nft_expr_type *); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 73a1ec556a0ae1..ffb8580dfdacf6 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -63,7 +63,9 @@ static const struct nft_chain_type filter_bridge = { static int __init nf_tables_bridge_init(void) { - return nft_register_chain_type(&filter_bridge); + nft_register_chain_type(&filter_bridge); + + return 0; } static void __exit nf_tables_bridge_exit(void) diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 5b0be2a10b695e..c2ee6420874345 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -42,7 +42,9 @@ static const struct nft_chain_type filter_arp = { static int __init nf_tables_arp_init(void) { - return nft_register_chain_type(&filter_arp); + nft_register_chain_type(&filter_arp); + + return 0; } static void __exit nf_tables_arp_exit(void) diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 13bae5cfa257e0..c09667de0d6894 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -51,7 +51,9 @@ static const struct nft_chain_type filter_ipv4 = { static int __init nf_tables_ipv4_init(void) { - return nft_register_chain_type(&filter_ipv4); + nft_register_chain_type(&filter_ipv4); + + return 0; } static void __exit nf_tables_ipv4_exit(void) diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index 167f377eb1cb5e..9864f5b3279c04 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -86,11 +86,7 @@ static const struct nft_chain_type nft_chain_nat_ipv4 = { static int __init nft_chain_nat_init(void) { - int err; - - err = nft_register_chain_type(&nft_chain_nat_ipv4); - if (err < 0) - return err; + nft_register_chain_type(&nft_chain_nat_ipv4); return 0; } diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 48cf1f892314a1..7d82934c46f424 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -71,7 +71,9 @@ static const struct nft_chain_type nft_chain_route_ipv4 = { static int __init nft_chain_route_init(void) { - return nft_register_chain_type(&nft_chain_route_ipv4); + nft_register_chain_type(&nft_chain_route_ipv4); + + return 0; } static void __exit nft_chain_route_exit(void) diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index d99f9ac6f1b6e4..496f694534571c 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -49,7 +49,9 @@ static const struct nft_chain_type filter_ipv6 = { static int __init nf_tables_ipv6_init(void) { - return nft_register_chain_type(&filter_ipv6); + nft_register_chain_type(&filter_ipv6); + + return 0; } static void __exit nf_tables_ipv6_exit(void) diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index c498aaa8056b9e..c95d9a97d425e2 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -84,11 +84,7 @@ static const struct nft_chain_type nft_chain_nat_ipv6 = { static int __init nft_chain_nat_ipv6_init(void) { - int err; - - err = nft_register_chain_type(&nft_chain_nat_ipv6); - if (err < 0) - return err; + nft_register_chain_type(&nft_chain_nat_ipv6); return 0; } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index d5c7fdc342560c..da3f1f8cb325cb 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -73,7 +73,9 @@ static const struct nft_chain_type nft_chain_route_ipv6 = { static int __init nft_chain_route_init(void) { - return nft_register_chain_type(&nft_chain_route_ipv6); + nft_register_chain_type(&nft_chain_route_ipv6); + + return 0; } static void __exit nft_chain_route_exit(void) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bf564f49108535..9e4b1614ee3933 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -859,22 +859,18 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) kfree(ctx->table); } -int nft_register_chain_type(const struct nft_chain_type *ctype) +void nft_register_chain_type(const struct nft_chain_type *ctype) { - int err = 0; - if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO)) - return -EINVAL; + return; nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (chain_type[ctype->family][ctype->type] != NULL) { - err = -EBUSY; - goto out; + if (WARN_ON(chain_type[ctype->family][ctype->type] != NULL)) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return; } chain_type[ctype->family][ctype->type] = ctype; -out: nfnl_unlock(NFNL_SUBSYS_NFTABLES); - return err; } EXPORT_SYMBOL_GPL(nft_register_chain_type); diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index 0aefe66ce5585a..202c4219969bac 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -59,7 +59,9 @@ static const struct nft_chain_type filter_inet = { static int __init nf_tables_inet_init(void) { - return nft_register_chain_type(&filter_inet); + nft_register_chain_type(&filter_inet); + + return 0; } static void __exit nf_tables_inet_exit(void) diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 88ea959211acb5..4c3835bca63e7a 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -112,9 +112,7 @@ static int __init nf_tables_netdev_init(void) { int ret; - ret = nft_register_chain_type(&nft_filter_chain_netdev); - if (ret) - return ret; + nft_register_chain_type(&nft_filter_chain_netdev); ret = register_netdevice_notifier(&nf_tables_netdev_notifier); if (ret) From 02c7b25e5f54321b9063e18d4f52cce07f8e081d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Mar 2018 11:53:07 +0200 Subject: [PATCH 40/47] netfilter: nf_tables: build-in filter chain type One module per supported filter chain family type takes too much memory for very little code - too much modularization - place all chain filter definitions in one single file. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 + net/bridge/netfilter/Kconfig | 2 +- net/bridge/netfilter/Makefile | 1 - net/bridge/netfilter/nf_tables_bridge.c | 81 ----- net/ipv4/netfilter/Kconfig | 4 +- net/ipv4/netfilter/Makefile | 2 - net/ipv4/netfilter/nf_tables_arp.c | 60 ---- net/ipv4/netfilter/nf_tables_ipv4.c | 69 ---- net/ipv6/netfilter/Kconfig | 2 +- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/nf_tables_ipv6.c | 67 ---- net/netfilter/Kconfig | 4 +- net/netfilter/Makefile | 9 +- net/netfilter/nf_tables_api.c | 3 + net/netfilter/nf_tables_inet.c | 77 ----- net/netfilter/nf_tables_netdev.c | 140 --------- net/netfilter/nft_chain_filter.c | 398 ++++++++++++++++++++++++ 17 files changed, 414 insertions(+), 509 deletions(-) delete mode 100644 net/bridge/netfilter/nf_tables_bridge.c delete mode 100644 net/ipv4/netfilter/nf_tables_arp.c delete mode 100644 net/ipv4/netfilter/nf_tables_ipv4.c delete mode 100644 net/ipv6/netfilter/nf_tables_ipv6.c delete mode 100644 net/netfilter/nf_tables_inet.c delete mode 100644 net/netfilter/nf_tables_netdev.c create mode 100644 net/netfilter/nft_chain_filter.c diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1f7148fe05044d..77c3c04c27ac97 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1345,4 +1345,7 @@ struct nft_trans_flowtable { #define nft_trans_flowtable(trans) \ (((struct nft_trans_flowtable *)trans->data)->flowtable) +int __init nft_chain_filter_init(void); +void __exit nft_chain_filter_fini(void); + #endif /* _NET_NF_TABLES_H */ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 225d1668dfddb1..f212447794bd5c 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -5,7 +5,7 @@ menuconfig NF_TABLES_BRIDGE depends on BRIDGE && NETFILTER && NF_TABLES select NETFILTER_FAMILY_BRIDGE - tristate "Ethernet Bridge nf_tables support" + bool "Ethernet Bridge nf_tables support" if NF_TABLES_BRIDGE diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 2f28e16de6c7f1..4bc758dd4a8c13 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -3,7 +3,6 @@ # Makefile for the netfilter modules for Link Layer filtering on a bridge. # -obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c deleted file mode 100644 index ffb8580dfdacf6..00000000000000 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * Copyright (c) 2013 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int -nft_do_chain_bridge(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - nft_set_pktinfo_ipv4_validate(&pkt, skb); - break; - case htons(ETH_P_IPV6): - nft_set_pktinfo_ipv6_validate(&pkt, skb); - break; - default: - nft_set_pktinfo_unspec(&pkt, skb); - break; - } - - return nft_do_chain(&pkt, priv); -} - -static const struct nft_chain_type filter_bridge = { - .name = "filter", - .type = NFT_CHAIN_T_DEFAULT, - .family = NFPROTO_BRIDGE, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_BR_PRE_ROUTING) | - (1 << NF_BR_LOCAL_IN) | - (1 << NF_BR_FORWARD) | - (1 << NF_BR_LOCAL_OUT) | - (1 << NF_BR_POST_ROUTING), - .hooks = { - [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, - [NF_BR_LOCAL_IN] = nft_do_chain_bridge, - [NF_BR_FORWARD] = nft_do_chain_bridge, - [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, - [NF_BR_POST_ROUTING] = nft_do_chain_bridge, - }, -}; - -static int __init nf_tables_bridge_init(void) -{ - nft_register_chain_type(&filter_bridge); - - return 0; -} - -static void __exit nf_tables_bridge_exit(void) -{ - nft_unregister_chain_type(&filter_bridge); -} - -module_init(nf_tables_bridge_init); -module_exit(nf_tables_bridge_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter"); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index dfe6fa4ea5540b..280048e1e3958b 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -34,7 +34,7 @@ config NF_SOCKET_IPV4 if NF_TABLES config NF_TABLES_IPV4 - tristate "IPv4 nf_tables support" + bool "IPv4 nf_tables support" help This option enables the IPv4 support for nf_tables. @@ -71,7 +71,7 @@ config NFT_FIB_IPV4 endif # NF_TABLES_IPV4 config NF_TABLES_ARP - tristate "ARP nf_tables support" + bool "ARP nf_tables support" select NETFILTER_FAMILY_ARP help This option enables the ARP support for nf_tables. diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 2dad20eefd26aa..62ede5e3a3def9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -39,7 +39,6 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o -obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o @@ -47,7 +46,6 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o -obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # flow table support obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c deleted file mode 100644 index c2ee6420874345..00000000000000 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2008-2010 Patrick McHardy - * Copyright (c) 2013 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include - -static unsigned int -nft_do_chain_arp(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_unspec(&pkt, skb); - - return nft_do_chain(&pkt, priv); -} - -static const struct nft_chain_type filter_arp = { - .name = "filter", - .type = NFT_CHAIN_T_DEFAULT, - .family = NFPROTO_ARP, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_ARP_IN) | - (1 << NF_ARP_OUT), - .hooks = { - [NF_ARP_IN] = nft_do_chain_arp, - [NF_ARP_OUT] = nft_do_chain_arp, - }, -}; - -static int __init nf_tables_arp_init(void) -{ - nft_register_chain_type(&filter_arp); - - return 0; -} - -static void __exit nf_tables_arp_exit(void) -{ - nft_unregister_chain_type(&filter_arp); -} - -module_init(nf_tables_arp_init); -module_exit(nf_tables_arp_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */ diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c deleted file mode 100644 index c09667de0d6894..00000000000000 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * Copyright (c) 2012-2013 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nft_do_chain_ipv4(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv4(&pkt, skb); - - return nft_do_chain(&pkt, priv); -} - -static const struct nft_chain_type filter_ipv4 = { - .name = "filter", - .type = NFT_CHAIN_T_DEFAULT, - .family = NFPROTO_IPV4, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | - (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING), - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, - [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4, - [NF_INET_FORWARD] = nft_do_chain_ipv4, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, - }, -}; - -static int __init nf_tables_ipv4_init(void) -{ - nft_register_chain_type(&filter_ipv4); - - return 0; -} - -static void __exit nf_tables_ipv4_exit(void) -{ - nft_unregister_chain_type(&filter_ipv4); -} - -module_init(nf_tables_ipv4_init); -module_exit(nf_tables_ipv4_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index d395d1590699d8..ccbfa83e4bb076 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -34,7 +34,7 @@ config NF_SOCKET_IPV6 if NF_TABLES config NF_TABLES_IPV6 - tristate "IPv6 nf_tables support" + bool "IPv6 nf_tables support" help This option enables the IPv6 support for nf_tables. diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index d984057b839540..44273d6f03a57d 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -36,7 +36,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o # nf_tables -obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c deleted file mode 100644 index 496f694534571c..00000000000000 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * Copyright (c) 2012-2013 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include -#include -#include -#include -#include -#include - -static unsigned int nft_do_chain_ipv6(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv6(&pkt, skb); - - return nft_do_chain(&pkt, priv); -} - -static const struct nft_chain_type filter_ipv6 = { - .name = "filter", - .type = NFT_CHAIN_T_DEFAULT, - .family = NFPROTO_IPV6, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | - (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING), - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, - [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6, - [NF_INET_FORWARD] = nft_do_chain_ipv6, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, - }, -}; - -static int __init nf_tables_ipv6_init(void) -{ - nft_register_chain_type(&filter_ipv6); - - return 0; -} - -static void __exit nf_tables_ipv6_exit(void) -{ - nft_unregister_chain_type(&filter_ipv6); -} - -module_init(nf_tables_ipv6_init); -module_exit(nf_tables_ipv6_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d3220b43c832f6..704b3832dbad33 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -465,12 +465,12 @@ config NF_TABLES_INET depends on IPV6 select NF_TABLES_IPV4 select NF_TABLES_IPV6 - tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support" + bool "Netfilter nf_tables mixed IPv4/IPv6 tables support" help This option enables support for a mixed IPv4/IPv6 "inet" table. config NF_TABLES_NETDEV - tristate "Netfilter nf_tables netdev tables support" + bool "Netfilter nf_tables netdev tables support" help This option enables support for the "netdev" table. diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5d9b8b959e5892..fd32bd2c952158 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -73,13 +73,12 @@ obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o # nf_tables -nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \ - nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \ - nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o +nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ + nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ + nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ + nft_dynset.o obj-$(CONFIG_NF_TABLES) += nf_tables.o -obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o -obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9e4b1614ee3933..97ec1c388bfeee 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6584,6 +6584,8 @@ static int __init nf_tables_module_init(void) { int err; + nft_chain_filter_init(); + info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS, GFP_KERNEL); if (info == NULL) { @@ -6618,6 +6620,7 @@ static void __exit nf_tables_module_exit(void) rcu_barrier(); nf_tables_core_module_exit(); kfree(info); + nft_chain_filter_fini(); } module_init(nf_tables_module_init); diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c deleted file mode 100644 index 202c4219969bac..00000000000000 --- a/net/netfilter/nf_tables_inet.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2012-2014 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - - switch (state->pf) { - case NFPROTO_IPV4: - nft_set_pktinfo_ipv4(&pkt, skb); - break; - case NFPROTO_IPV6: - nft_set_pktinfo_ipv6(&pkt, skb); - break; - default: - break; - } - - return nft_do_chain(&pkt, priv); -} - -static const struct nft_chain_type filter_inet = { - .name = "filter", - .type = NFT_CHAIN_T_DEFAULT, - .family = NFPROTO_INET, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | - (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING), - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_inet, - [NF_INET_LOCAL_OUT] = nft_do_chain_inet, - [NF_INET_FORWARD] = nft_do_chain_inet, - [NF_INET_PRE_ROUTING] = nft_do_chain_inet, - [NF_INET_POST_ROUTING] = nft_do_chain_inet, - }, -}; - -static int __init nf_tables_inet_init(void) -{ - nft_register_chain_type(&filter_inet); - - return 0; -} - -static void __exit nf_tables_inet_exit(void) -{ - nft_unregister_chain_type(&filter_inet); -} - -module_init(nf_tables_inet_init); -module_exit(nf_tables_inet_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_CHAIN(1, "filter"); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c deleted file mode 100644 index 4c3835bca63e7a..00000000000000 --- a/net/netfilter/nf_tables_netdev.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2015 Pablo Neira Ayuso - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned int -nft_do_chain_netdev(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nft_pktinfo pkt; - - nft_set_pktinfo(&pkt, skb, state); - - switch (skb->protocol) { - case htons(ETH_P_IP): - nft_set_pktinfo_ipv4_validate(&pkt, skb); - break; - case htons(ETH_P_IPV6): - nft_set_pktinfo_ipv6_validate(&pkt, skb); - break; - default: - nft_set_pktinfo_unspec(&pkt, skb); - break; - } - - return nft_do_chain(&pkt, priv); -} - -static const struct nft_chain_type nft_filter_chain_netdev = { - .name = "filter", - .type = NFT_CHAIN_T_DEFAULT, - .family = NFPROTO_NETDEV, - .owner = THIS_MODULE, - .hook_mask = (1 << NF_NETDEV_INGRESS), - .hooks = { - [NF_NETDEV_INGRESS] = nft_do_chain_netdev, - }, -}; - -static void nft_netdev_event(unsigned long event, struct net_device *dev, - struct nft_ctx *ctx) -{ - struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - - switch (event) { - case NETDEV_UNREGISTER: - if (strcmp(basechain->dev_name, dev->name) != 0) - return; - - __nft_release_basechain(ctx); - break; - case NETDEV_CHANGENAME: - if (dev->ifindex != basechain->ops.dev->ifindex) - return; - - strncpy(basechain->dev_name, dev->name, IFNAMSIZ); - break; - } -} - -static int nf_tables_netdev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct nft_table *table; - struct nft_chain *chain, *nr; - struct nft_ctx ctx = { - .net = dev_net(dev), - }; - - if (event != NETDEV_UNREGISTER && - event != NETDEV_CHANGENAME) - return NOTIFY_DONE; - - nfnl_lock(NFNL_SUBSYS_NFTABLES); - list_for_each_entry(table, &ctx.net->nft.tables, list) { - if (table->family != NFPROTO_NETDEV) - continue; - - ctx.family = table->family; - ctx.table = table; - list_for_each_entry_safe(chain, nr, &table->chains, list) { - if (!nft_is_base_chain(chain)) - continue; - - ctx.chain = chain; - nft_netdev_event(event, dev, &ctx); - } - } - nfnl_unlock(NFNL_SUBSYS_NFTABLES); - - return NOTIFY_DONE; -} - -static struct notifier_block nf_tables_netdev_notifier = { - .notifier_call = nf_tables_netdev_event, -}; - -static int __init nf_tables_netdev_init(void) -{ - int ret; - - nft_register_chain_type(&nft_filter_chain_netdev); - - ret = register_netdevice_notifier(&nf_tables_netdev_notifier); - if (ret) - goto err_register_netdevice_notifier; - - return 0; - -err_register_netdevice_notifier: - nft_unregister_chain_type(&nft_filter_chain_netdev); - - return ret; -} - -static void __exit nf_tables_netdev_exit(void) -{ - unregister_netdevice_notifier(&nf_tables_netdev_notifier); - nft_unregister_chain_type(&nft_filter_chain_netdev); -} - -module_init(nf_tables_netdev_init); -module_exit(nf_tables_netdev_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Pablo Neira Ayuso "); -MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */ diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c new file mode 100644 index 00000000000000..84c902477a91ef --- /dev/null +++ b/net/netfilter/nft_chain_filter.c @@ -0,0 +1,398 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_NF_TABLES_IPV4 +static unsigned int nft_do_chain_ipv4(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb); + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_filter_ipv4 = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV4, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, + }, +}; + +static void nft_chain_filter_ipv4_init(void) +{ + nft_register_chain_type(&nft_chain_filter_ipv4); +} +static void nft_chain_filter_ipv4_fini(void) +{ + nft_unregister_chain_type(&nft_chain_filter_ipv4); +} + +#else +static inline void nft_chain_filter_ipv4_init(void) {} +static inline void nft_chain_filter_ipv4_fini(void) {} +#endif /* CONFIG_NF_TABLES_IPV4 */ + +#ifdef CONFIG_NF_TABLES_ARP +static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb); + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_filter_arp = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_ARP, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_ARP_IN) | + (1 << NF_ARP_OUT), + .hooks = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + }, +}; + +static void nft_chain_filter_arp_init(void) +{ + nft_register_chain_type(&nft_chain_filter_arp); +} + +static void nft_chain_filter_arp_fini(void) +{ + nft_unregister_chain_type(&nft_chain_filter_arp); +} +#else +static inline void nft_chain_filter_arp_init(void) {} +static inline void nft_chain_filter_arp_fini(void) {} +#endif /* CONFIG_NF_TABLES_ARP */ + +#ifdef CONFIG_NF_TABLES_IPV6 +static unsigned int nft_do_chain_ipv6(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_filter_ipv6 = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV6, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, + [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, + }, +}; + +static void nft_chain_filter_ipv6_init(void) +{ + nft_register_chain_type(&nft_chain_filter_ipv6); +} + +static void nft_chain_filter_ipv6_fini(void) +{ + nft_unregister_chain_type(&nft_chain_filter_ipv6); +} +#else +static inline void nft_chain_filter_ipv6_init(void) {} +static inline void nft_chain_filter_ipv6_fini(void) {} +#endif /* CONFIG_NF_TABLES_IPV6 */ + +#ifdef CONFIG_NF_TABLES_INET +static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + + switch (state->pf) { + case NFPROTO_IPV4: + nft_set_pktinfo_ipv4(&pkt, skb); + break; + case NFPROTO_IPV6: + nft_set_pktinfo_ipv6(&pkt, skb); + break; + default: + break; + } + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_filter_inet = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_INET, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_inet, + [NF_INET_LOCAL_OUT] = nft_do_chain_inet, + [NF_INET_FORWARD] = nft_do_chain_inet, + [NF_INET_PRE_ROUTING] = nft_do_chain_inet, + [NF_INET_POST_ROUTING] = nft_do_chain_inet, + }, +}; + +static void nft_chain_filter_inet_init(void) +{ + nft_register_chain_type(&nft_chain_filter_inet); +} + +static void nft_chain_filter_inet_fini(void) +{ + nft_unregister_chain_type(&nft_chain_filter_inet); +} +#else +static inline void nft_chain_filter_inet_init(void) {} +static inline void nft_chain_filter_inet_fini(void) {} +#endif /* CONFIG_NF_TABLES_IPV6 */ + +#ifdef CONFIG_NF_TABLES_BRIDGE +static unsigned int +nft_do_chain_bridge(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + nft_set_pktinfo_ipv4_validate(&pkt, skb); + break; + case htons(ETH_P_IPV6): + nft_set_pktinfo_ipv6_validate(&pkt, skb); + break; + default: + nft_set_pktinfo_unspec(&pkt, skb); + break; + } + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_filter_bridge = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_BRIDGE, + .hook_mask = (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN) | + (1 << NF_BR_FORWARD) | + (1 << NF_BR_LOCAL_OUT) | + (1 << NF_BR_POST_ROUTING), + .hooks = { + [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, + [NF_BR_LOCAL_IN] = nft_do_chain_bridge, + [NF_BR_FORWARD] = nft_do_chain_bridge, + [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + [NF_BR_POST_ROUTING] = nft_do_chain_bridge, + }, +}; + +static void nft_chain_filter_bridge_init(void) +{ + nft_register_chain_type(&nft_chain_filter_bridge); +} + +static void nft_chain_filter_bridge_fini(void) +{ + nft_unregister_chain_type(&nft_chain_filter_bridge); +} +#else +static inline void nft_chain_filter_bridge_init(void) {} +static inline void nft_chain_filter_bridge_fini(void) {} +#endif /* CONFIG_NF_TABLES_BRIDGE */ + +#ifdef CONFIG_NF_TABLES_NETDEV +static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, skb, state); + + switch (skb->protocol) { + case htons(ETH_P_IP): + nft_set_pktinfo_ipv4_validate(&pkt, skb); + break; + case htons(ETH_P_IPV6): + nft_set_pktinfo_ipv6_validate(&pkt, skb); + break; + default: + nft_set_pktinfo_unspec(&pkt, skb); + break; + } + + return nft_do_chain(&pkt, priv); +} + +static const struct nft_chain_type nft_chain_filter_netdev = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_NETDEV, + .hook_mask = (1 << NF_NETDEV_INGRESS), + .hooks = { + [NF_NETDEV_INGRESS] = nft_do_chain_netdev, + }, +}; + +static void nft_netdev_event(unsigned long event, struct net_device *dev, + struct nft_ctx *ctx) +{ + struct nft_base_chain *basechain = nft_base_chain(ctx->chain); + + switch (event) { + case NETDEV_UNREGISTER: + if (strcmp(basechain->dev_name, dev->name) != 0) + return; + + __nft_release_basechain(ctx); + break; + case NETDEV_CHANGENAME: + if (dev->ifindex != basechain->ops.dev->ifindex) + return; + + strncpy(basechain->dev_name, dev->name, IFNAMSIZ); + break; + } +} + +static int nf_tables_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_table *table; + struct nft_chain *chain, *nr; + struct nft_ctx ctx = { + .net = dev_net(dev), + }; + + if (event != NETDEV_UNREGISTER && + event != NETDEV_CHANGENAME) + return NOTIFY_DONE; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_for_each_entry(table, &ctx.net->nft.tables, list) { + if (table->family != NFPROTO_NETDEV) + continue; + + ctx.family = table->family; + ctx.table = table; + list_for_each_entry_safe(chain, nr, &table->chains, list) { + if (!nft_is_base_chain(chain)) + continue; + + ctx.chain = chain; + nft_netdev_event(event, dev, &ctx); + } + } + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + + return NOTIFY_DONE; +} + +static struct notifier_block nf_tables_netdev_notifier = { + .notifier_call = nf_tables_netdev_event, +}; + +static int nft_chain_filter_netdev_init(void) +{ + int err; + + nft_register_chain_type(&nft_chain_filter_netdev); + + err = register_netdevice_notifier(&nf_tables_netdev_notifier); + if (err) + goto err_register_netdevice_notifier; + + return 0; + +err_register_netdevice_notifier: + nft_unregister_chain_type(&nft_chain_filter_netdev); + + return err; +} + +static void nft_chain_filter_netdev_fini(void) +{ + nft_unregister_chain_type(&nft_chain_filter_netdev); + unregister_netdevice_notifier(&nf_tables_netdev_notifier); +} +#else +static inline int nft_chain_filter_netdev_init(void) { return 0; } +static inline void nft_chain_filter_netdev_fini(void) {} +#endif /* CONFIG_NF_TABLES_NETDEV */ + +int __init nft_chain_filter_init(void) +{ + int err; + + err = nft_chain_filter_netdev_init(); + if (err < 0) + return err; + + nft_chain_filter_ipv4_init(); + nft_chain_filter_ipv6_init(); + nft_chain_filter_arp_init(); + nft_chain_filter_inet_init(); + nft_chain_filter_bridge_init(); + + return 0; +} + +void __exit nft_chain_filter_fini(void) +{ + nft_chain_filter_bridge_fini(); + nft_chain_filter_inet_fini(); + nft_chain_filter_arp_fini(); + nft_chain_filter_ipv6_fini(); + nft_chain_filter_ipv4_fini(); + nft_chain_filter_netdev_fini(); +} From 43a605f2f722b6e08addedae8545b490fca252c4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Mar 2018 11:53:08 +0200 Subject: [PATCH 41/47] netfilter: nf_tables: enable conntrack if NAT chain is registered Register conntrack hooks if the user adds NAT chains. Users get confused with the existing behaviour since they will see no packets hitting this chain until they add the first rule that refers to conntrack. This patch adds new ->init() and ->free() indirections to chain types that can be used by NAT chains to invoke the conntrack dependency. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++++ net/ipv4/netfilter/nft_chain_nat_ipv4.c | 12 ++++++++++++ net/ipv6/netfilter/nft_chain_nat_ipv6.c | 12 ++++++++++++ net/netfilter/nf_tables_api.c | 24 +++++++++++++++++------- 4 files changed, 45 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 77c3c04c27ac97..e26b94a61a99d9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -884,6 +884,8 @@ enum nft_chain_types { * @owner: module owner * @hook_mask: mask of valid hooks * @hooks: array of hook functions + * @init: chain initialization function + * @free: chain release function */ struct nft_chain_type { const char *name; @@ -892,6 +894,8 @@ struct nft_chain_type { struct module *owner; unsigned int hook_mask; nf_hookfn *hooks[NF_MAX_HOOKS]; + int (*init)(struct nft_ctx *ctx); + void (*free)(struct nft_ctx *ctx); }; int nft_chain_validate_dependency(const struct nft_chain *chain, diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index 9864f5b3279c04..b5464a3f253baa 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -67,6 +67,16 @@ static unsigned int nft_nat_ipv4_local_fn(void *priv, return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain); } +static int nft_nat_ipv4_init(struct nft_ctx *ctx) +{ + return nf_ct_netns_get(ctx->net, ctx->family); +} + +static void nft_nat_ipv4_free(struct nft_ctx *ctx) +{ + nf_ct_netns_put(ctx->net, ctx->family); +} + static const struct nft_chain_type nft_chain_nat_ipv4 = { .name = "nat", .type = NFT_CHAIN_T_NAT, @@ -82,6 +92,8 @@ static const struct nft_chain_type nft_chain_nat_ipv4 = { [NF_INET_LOCAL_OUT] = nft_nat_ipv4_local_fn, [NF_INET_LOCAL_IN] = nft_nat_ipv4_fn, }, + .init = nft_nat_ipv4_init, + .free = nft_nat_ipv4_free, }; static int __init nft_chain_nat_init(void) diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index c95d9a97d425e2..3557b114446c93 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -65,6 +65,16 @@ static unsigned int nft_nat_ipv6_local_fn(void *priv, return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain); } +static int nft_nat_ipv6_init(struct nft_ctx *ctx) +{ + return nf_ct_netns_get(ctx->net, ctx->family); +} + +static void nft_nat_ipv6_free(struct nft_ctx *ctx) +{ + nf_ct_netns_put(ctx->net, ctx->family); +} + static const struct nft_chain_type nft_chain_nat_ipv6 = { .name = "nat", .type = NFT_CHAIN_T_NAT, @@ -80,6 +90,8 @@ static const struct nft_chain_type nft_chain_nat_ipv6 = { [NF_INET_LOCAL_OUT] = nft_nat_ipv6_local_fn, [NF_INET_LOCAL_IN] = nft_nat_ipv6_fn, }, + .init = nft_nat_ipv6_init, + .free = nft_nat_ipv6_free, }; static int __init nft_chain_nat_ipv6_init(void) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 97ec1c388bfeee..af8b6a7488bdd4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1211,13 +1211,17 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain, rcu_assign_pointer(chain->stats, newstats); } -static void nf_tables_chain_destroy(struct nft_chain *chain) +static void nf_tables_chain_destroy(struct nft_ctx *ctx) { + struct nft_chain *chain = ctx->chain; + BUG_ON(chain->use > 0); if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); + if (basechain->type->free) + basechain->type->free(ctx); module_put(basechain->type->owner); free_percpu(basechain->stats); if (basechain->stats) @@ -1354,6 +1358,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, } basechain->type = hook.type; + if (basechain->type->init) + basechain->type->init(ctx); + chain = &basechain->chain; ops = &basechain->ops; @@ -1374,6 +1381,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (chain == NULL) return -ENOMEM; } + ctx->chain = chain; + INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); chain->table = table; @@ -1387,7 +1396,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; - ctx->chain = chain; err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (err < 0) goto err2; @@ -1399,7 +1407,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, err2: nf_tables_unregister_hook(net, table, chain); err1: - nf_tables_chain_destroy(chain); + nf_tables_chain_destroy(ctx); return err; } @@ -5678,7 +5686,7 @@ static void nf_tables_commit_release(struct nft_trans *trans) nf_tables_table_destroy(&trans->ctx); break; case NFT_MSG_DELCHAIN: - nf_tables_chain_destroy(trans->ctx.chain); + nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_DELRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); @@ -5849,7 +5857,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_table_destroy(&trans->ctx); break; case NFT_MSG_NEWCHAIN: - nf_tables_chain_destroy(trans->ctx.chain); + nf_tables_chain_destroy(&trans->ctx); break; case NFT_MSG_NEWRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); @@ -6499,7 +6507,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) } list_del(&ctx->chain->list); ctx->table->use--; - nf_tables_chain_destroy(ctx->chain); + nf_tables_chain_destroy(ctx); return 0; } @@ -6515,6 +6523,7 @@ static void __nft_release_tables(struct net *net) struct nft_set *set, *ns; struct nft_ctx ctx = { .net = net, + .family = NFPROTO_NETDEV, }; list_for_each_entry_safe(table, nt, &net->nft.tables, list) { @@ -6551,9 +6560,10 @@ static void __nft_release_tables(struct net *net) nft_obj_destroy(obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { + ctx.chain = chain; list_del(&chain->list); table->use--; - nf_tables_chain_destroy(chain); + nf_tables_chain_destroy(&ctx); } list_del(&table->list); nf_tables_table_destroy(&ctx); From 10659cbab72b7bfee1a886018d1915a9549b6378 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 28 Mar 2018 12:06:49 +0200 Subject: [PATCH 42/47] netfilter: nf_tables: rename to nft_set_lookup_global() To prepare shorter introduction of shorter function prefix. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 10 +++++----- net/netfilter/nf_tables_api.c | 12 ++++++------ net/netfilter/nft_dynset.c | 5 +++-- net/netfilter/nft_lookup.c | 4 ++-- net/netfilter/nft_objref.c | 5 +++-- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e26b94a61a99d9..bd2a18d66189e6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -434,11 +434,11 @@ static inline struct nft_set *nft_set_container_of(const void *priv) return (void *)priv - offsetof(struct nft_set, data); } -struct nft_set *nft_set_lookup(const struct net *net, - const struct nft_table *table, - const struct nlattr *nla_set_name, - const struct nlattr *nla_set_id, - u8 genmask); +struct nft_set *nft_set_lookup_global(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla_set_name, + const struct nlattr *nla_set_id, + u8 genmask); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index af8b6a7488bdd4..769d84015073b2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2633,11 +2633,11 @@ static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, return ERR_PTR(-ENOENT); } -struct nft_set *nft_set_lookup(const struct net *net, - const struct nft_table *table, - const struct nlattr *nla_set_name, - const struct nlattr *nla_set_id, - u8 genmask) +struct nft_set *nft_set_lookup_global(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla_set_name, + const struct nlattr *nla_set_id, + u8 genmask) { struct nft_set *set; @@ -2650,7 +2650,7 @@ struct nft_set *nft_set_lookup(const struct net *net, } return set; } -EXPORT_SYMBOL_GPL(nft_set_lookup); +EXPORT_SYMBOL_GPL(nft_set_lookup_global); static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index fc83e29d6634ea..04863fad05ddd2 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -132,8 +132,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->invert = true; } - set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME], - tb[NFTA_DYNSET_SET_ID], genmask); + set = nft_set_lookup_global(ctx->net, ctx->table, + tb[NFTA_DYNSET_SET_NAME], + tb[NFTA_DYNSET_SET_ID], genmask); if (IS_ERR(set)) return PTR_ERR(set); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 475570e89ede71..f52da5e2199fe4 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -71,8 +71,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, tb[NFTA_LOOKUP_SREG] == NULL) return -EINVAL; - set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET], - tb[NFTA_LOOKUP_SET_ID], genmask); + set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET], + tb[NFTA_LOOKUP_SET_ID], genmask); if (IS_ERR(set)) return PTR_ERR(set); diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 7bcdc48f3d737a..0b02407773ad27 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -117,8 +117,9 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, struct nft_set *set; int err; - set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME], - tb[NFTA_OBJREF_SET_ID], genmask); + set = nft_set_lookup_global(ctx->net, ctx->table, + tb[NFTA_OBJREF_SET_NAME], + tb[NFTA_OBJREF_SET_ID], genmask); if (IS_ERR(set)) return PTR_ERR(set); From a3073c17dd8cd041d5cf68f28a80a54e310f2f45 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 28 Mar 2018 12:06:50 +0200 Subject: [PATCH 43/47] netfilter: nf_tables: use nft_set_lookup_global from nf_tables_newsetelem() Replace opencoded implementation of nft_set_lookup_global() by call to this function. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 769d84015073b2..2bd80fa9b070ec 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4032,17 +4032,10 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); - if (IS_ERR(set)) { - if (nla[NFTA_SET_ELEM_LIST_SET_ID]) { - set = nf_tables_set_lookup_byid(net, - nla[NFTA_SET_ELEM_LIST_SET_ID], - genmask); - } - if (IS_ERR(set)) - return PTR_ERR(set); - } + set = nft_set_lookup_global(net, ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + nla[NFTA_SET_ELEM_LIST_SET_ID], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY; From c47d36b3855d804b2e282f9b4eecbbd19b5453f9 Mon Sep 17 00:00:00 2001 From: Arushi Singhal Date: Thu, 29 Mar 2018 00:39:50 +0530 Subject: [PATCH 44/47] netfilter: Merge assignment with return Merge assignment with return statement to directly return the value. Signed-off-by: Arushi Singhal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 5 ++--- net/netfilter/xt_hashlimit.c | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 11ef85a57244a8..b00e84bf410738 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1527,9 +1527,8 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) if (ret < 0) return ret; - ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC, - cda[CTA_NAT_SRC]); - return ret; + return ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC, + cda[CTA_NAT_SRC]); #else if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) return 0; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index db2fe091174048..64fc3721d74cf6 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -534,8 +534,7 @@ static u64 user2rate_bytes(u32 user) u64 r; r = user ? U32_MAX / user : U32_MAX; - r = (r - 1) << XT_HASHLIMIT_BYTE_SHIFT; - return r; + return (r - 1) << XT_HASHLIMIT_BYTE_SHIFT; } static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, From 9ba5c404bf1d6284f0269411b33394362b7ff405 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 29 Mar 2018 15:12:41 +0100 Subject: [PATCH 45/47] netfilter: x_tables: Add note about how to free percpu counters Due to the way percpu counters are allocated and freed in blocks, it is not safe to free counters individually. Currently all callers do the right thing, but let's note this restriction. Fixes: ae0ac0ed6fcf ("netfilter: x_tables: pack percpu counter allocations") Signed-off-by: Ben Hutchings Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index bac932f1c58208..75cd5196b29b2d 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1854,7 +1854,9 @@ EXPORT_SYMBOL_GPL(xt_proto_fini); * to fetch the real percpu counter. * * To speed up allocation and improve data locality, a 4kb block is - * allocated. + * allocated. Freeing any counter may free an entire block, so all + * counters allocated using the same state must be freed at the same + * time. * * xt_percpu_counter_alloc_state contains the base address of the * allocated page and the current sub-offset. From e3b5e1ec75234fb6b27708a316cdf69f9fb176a8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 30 Mar 2018 11:39:12 +0200 Subject: [PATCH 46/47] Revert "netfilter: x_tables: ensure last rule in base chain matches underflow/policy" This reverts commit 0d7df906a0e78079a02108b06d32c3ef2238ad25. Valdis Kletnieks reported that xtables is broken in linux-next since 0d7df906a0e78 ("netfilter: x_tables: ensure last rule in base chain matches underflow/policy"), as kernel rejects the (well-formed) ruleset: [ 64.402790] ip6_tables: last base chain position 1136 doesn't match underflow 1344 (hook 1) mark_source_chains is not the correct place for such a check, as it terminates evaluation of a chain once it sees an unconditional verdict (following rules are known to be unreachable). It seems preferrable to fix libiptc instead, so remove this check again. Fixes: 0d7df906a0e78 ("netfilter: x_tables: ensure last rule in base chain matches underflow/policy") Reported-by: Valdis Kletnieks Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 17 +---------------- net/ipv4/netfilter/ip_tables.c | 17 +---------------- net/ipv6/netfilter/ip6_tables.c | 17 +---------------- 3 files changed, 3 insertions(+), 48 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f366ff1cfc19ed..aaafdbd15ad351 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -309,13 +309,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct arpt_entry *e = entry0 + pos; - unsigned int last_pos, depth; if (!(valid_hooks & (1 << hook))) continue; - depth = 0; - last_pos = pos; /* Set initial back pointer. */ e->counters.pcnt = pos; @@ -346,8 +343,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo, pos = e->counters.pcnt; e->counters.pcnt = 0; - if (depth) - --depth; /* We're at the start. */ if (pos == oldpos) goto next; @@ -372,9 +367,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - - if (entry0 + newpos != arpt_next_entry(e)) - ++depth; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -385,15 +377,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, e->counters.pcnt = pos; pos = newpos; } - if (depth == 0) - last_pos = pos; - } -next: - if (last_pos != newinfo->underflow[hook]) { - pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n", - last_pos, newinfo->underflow[hook], hook); - return 0; } +next: ; } return 1; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2362ca2c9e0c10..f9063513f9d198 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -378,13 +378,10 @@ mark_source_chains(const struct xt_table_info *newinfo, for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ipt_entry *e = entry0 + pos; - unsigned int last_pos, depth; if (!(valid_hooks & (1 << hook))) continue; - depth = 0; - last_pos = pos; /* Set initial back pointer. */ e->counters.pcnt = pos; @@ -413,8 +410,6 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = e->counters.pcnt; e->counters.pcnt = 0; - if (depth) - --depth; /* We're at the start. */ if (pos == oldpos) goto next; @@ -439,9 +434,6 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - - if (entry0 + newpos != ipt_next_entry(e)) - ++depth; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -452,15 +444,8 @@ mark_source_chains(const struct xt_table_info *newinfo, e->counters.pcnt = pos; pos = newpos; } - if (depth == 0) - last_pos = pos; - } -next: - if (last_pos != newinfo->underflow[hook]) { - pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n", - last_pos, newinfo->underflow[hook], hook); - return 0; } +next: ; } return 1; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 004508753abc62..3c36a4c77f2920 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -396,13 +396,10 @@ mark_source_chains(const struct xt_table_info *newinfo, for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ip6t_entry *e = entry0 + pos; - unsigned int last_pos, depth; if (!(valid_hooks & (1 << hook))) continue; - depth = 0; - last_pos = pos; /* Set initial back pointer. */ e->counters.pcnt = pos; @@ -431,8 +428,6 @@ mark_source_chains(const struct xt_table_info *newinfo, pos = e->counters.pcnt; e->counters.pcnt = 0; - if (depth) - --depth; /* We're at the start. */ if (pos == oldpos) goto next; @@ -457,9 +452,6 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - - if (entry0 + newpos != ip6t_next_entry(e)) - ++depth; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -470,15 +462,8 @@ mark_source_chains(const struct xt_table_info *newinfo, e->counters.pcnt = pos; pos = newpos; } - if (depth == 0) - last_pos = pos; - } -next: - if (last_pos != newinfo->underflow[hook]) { - pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n", - last_pos, newinfo->underflow[hook], hook); - return 0; } +next: ; } return 1; } From 26c97c5d8dac6bc56d4360561a286f52543ac07e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 20 Mar 2018 10:35:47 -0700 Subject: [PATCH 47/47] netfilter: ipset: Use is_zero_ether_addr instead of static and memcmp To make the test a bit clearer and to reduce object size a little. Miscellanea: o remove now unnecessary static const array $ size ip_set_hash_mac.o* text data bss dec hex filename 22822 4619 64 27505 6b71 ip_set_hash_mac.o.allyesconfig.new 22932 4683 64 27679 6c1f ip_set_hash_mac.o.allyesconfig.old 10443 1040 0 11483 2cdb ip_set_hash_mac.o.defconfig.new 10507 1040 0 11547 2d1b ip_set_hash_mac.o.defconfig.old Signed-off-by: Joe Perches Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_mac.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 8f004edad39671..f9d5a2a1e3d0f8 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -72,9 +72,6 @@ hash_mac4_data_next(struct hash_mac4_elem *next, #define IP_SET_PROTO_UNDEF #include "ip_set_hash_gen.h" -/* Zero valued element is not supported */ -static const unsigned char invalid_ether[ETH_ALEN] = { 0 }; - static int hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -93,7 +90,7 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, return -EINVAL; ether_addr_copy(e.ether, eth_hdr(skb)->h_source); - if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) + if (is_zero_ether_addr(e.ether)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } @@ -118,7 +115,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], if (ret) return ret; ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER])); - if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0) + if (is_zero_ether_addr(e.ether)) return -IPSET_ERR_HASH_ELEM; return adtfn(set, &e, &ext, &ext, flags);