From b0e50c4e895aa2e8b8469eb359a43778df3db27e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 1 Nov 2017 16:32:15 +0300 Subject: [PATCH 01/27] tcp_nv: fix division by zero in tcpnv_acked() [ Upstream commit 4eebff27ca4182bbf5f039dd60d79e2d7c0a707e ] Average RTT could become zero. This happened in real life at least twice. This patch treats zero as 1us. Signed-off-by: Konstantin Khlebnikov Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_nv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 5de82a8d4d87..e45e2c41c7bd 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -263,7 +263,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) /* rate in 100's bits per second */ rate64 = ((u64)sample->in_flight) * 8000000; - rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100)); + rate = (u32)div64_u64(rate64, (u64)(avg_rtt ?: 1) * 100); /* Remember the maximum rate seen during this RTT * Note: It may be more than one RTT. This function should be From 58b21b0287212bc8e60af9aafdcb9df9521e3db0 Mon Sep 17 00:00:00 2001 From: Jeff Barnhill <0xeffeff@gmail.com> Date: Wed, 1 Nov 2017 14:58:09 +0000 Subject: [PATCH 02/27] net: vrf: correct FRA_L3MDEV encode type [ Upstream commit 18129a24983906eaf2a2d448ce4b83e27091ebe2 ] FRA_L3MDEV is defined as U8, but is being added as a U32 attribute. On big endian architecture, this results in the l3mdev entry not being added to the FIB rules. Fixes: 1aa6c4f6b8cd8 ("net: vrf: Add l3mdev rules on first device create") Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 578bd5001d93..346e48698555 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1129,7 +1129,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) frh->family = family; frh->action = FR_ACT_TO_TBL; - if (nla_put_u32(skb, FRA_L3MDEV, 1)) + if (nla_put_u8(skb, FRA_L3MDEV, 1)) goto nla_put_failure; if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF)) From 3920a5bdd951b291129a896c75a0b6c8d913fbbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Nov 2017 12:30:25 -0700 Subject: [PATCH 03/27] tcp: do not mangle skb->cb[] in tcp_make_synack() [ Upstream commit 3b11775033dc87c3d161996c54507b15ba26414a ] Christoph Paasch sent a patch to address the following issue : tcp_make_synack() is leaving some TCP private info in skb->cb[], then send the packet by other means than tcp_transmit_skb() tcp_transmit_skb() makes sure to clear skb->cb[] to not confuse IPv4/IPV6 stacks, but we have no such cleanup for SYNACK. tcp_make_synack() should not use tcp_init_nondata_skb() : tcp_init_nondata_skb() really should be limited to skbs put in write/rtx queues (the ones that are only sent via tcp_transmit_skb()) This patch fixes the issue and should even save few cpu cycles ;) Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses") Signed-off-by: Eric Dumazet Reported-by: Christoph Paasch Reviewed-by: Christoph Paasch Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 566b43afe378..3d7b59ecc76c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3110,13 +3110,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, tcp_ecn_make_synack(req, th); th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; - /* Setting of flags are superfluous here for callers (and ECE is - * not even correctly set) - */ - tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, - TCPHDR_SYN | TCPHDR_ACK); - - th->seq = htonl(TCP_SKB_CB(skb)->seq); + skb->ip_summed = CHECKSUM_PARTIAL; + th->seq = htonl(tcp_rsk(req)->snt_isn); /* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); From afd9fa6619277fe460f9ad7949fd90d0985aec78 Mon Sep 17 00:00:00 2001 From: Ye Yin Date: Thu, 26 Oct 2017 16:57:05 +0800 Subject: [PATCH 04/27] netfilter/ipvs: clear ipvs_property flag when SKB net namespace changed [ Upstream commit 2b5ec1a5f9738ee7bf8f5ec0526e75e00362c48f ] When run ipvs in two different network namespace at the same host, and one ipvs transport network traffic to the other network namespace ipvs. 'ipvs_property' flag will make the second ipvs take no effect. So we should clear 'ipvs_property' when SKB network namespace changed. Fixes: 621e84d6f373 ("dev: introduce skb_scrub_packet()") Signed-off-by: Ye Yin Signed-off-by: Wei Zhou Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 7 +++++++ net/core/skbuff.c | 1 + 2 files changed, 8 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32810f279f8e..601dfa849d30 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3584,6 +3584,13 @@ static inline void nf_reset_trace(struct sk_buff *skb) #endif } +static inline void ipvs_reset(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IP_VS) + skb->ipvs_property = 0; +#endif +} + /* Note: This doesn't put any conntrack and bridge info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fe008f1bd930..aec5605944d3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4375,6 +4375,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) if (!xnet) return; + ipvs_reset(skb); skb_orphan(skb); skb->mark = 0; } From 6f239c0655cfe90c3e048bccb2e74ab1780cabba Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 6 Nov 2017 09:01:57 +0800 Subject: [PATCH 05/27] bonding: discard lowest hash bit for 802.3ad layer3+4 [ Upstream commit b5f862180d7011d9575d0499fa37f0f25b423b12 ] After commit 07f4c90062f8 ("tcp/dccp: try to not exhaust ip_local_port_range in connect()"), we will try to use even ports for connect(). Then if an application (seen clearly with iperf) opens multiple streams to the same destination IP and port, each stream will be given an even source port. So the bonding driver's simple xmit_hash_policy based on layer3+4 addressing will always hash all these streams to the same interface. And the total throughput will limited to a single slave. Change the tcp code will impact the whole tcp behavior, only for bonding usage. Paolo Abeni suggested fix this by changing the bonding code only, which should be more reasonable, and less impact. Fix this by discarding the lowest hash bit because it contains little entropy. After the fix we can re-balance between slaves. Signed-off-by: Paolo Abeni Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5fa36ebc0640..63d61c084815 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3217,7 +3217,7 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) hash ^= (hash >> 16); hash ^= (hash >> 8); - return hash; + return hash >> 1; } /*-------------------------- Device entry points ----------------------------*/ From f376621861e3d8a713d6931f4363c4137912330b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 6 Nov 2017 15:37:22 +0100 Subject: [PATCH 06/27] net: cdc_ether: fix divide by 0 on bad descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2cb80187ba065d7decad7c6614e35e07aec8a974 ] Setting dev->hard_mtu to 0 will cause a divide error in usbnet_probe. Protect against devices with bogus CDC Ethernet functional descriptors by ignoring a zero wMaxSegmentSize. Signed-off-by: Bjørn Mork Acked-by: Oliver Neukum Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/cdc_ether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index b82be816256c..1fca0024f294 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -221,7 +221,7 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) goto bad_desc; } - if (header.usb_cdc_ether_desc) { + if (header.usb_cdc_ether_desc && info->ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(info->ether->wMaxSegmentSize); /* because of Zaurus, we may be ignoring the host * side link address we were given. From 02a0c0639ae07beab982bbad4527b40da96d33ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 6 Nov 2017 15:32:18 +0100 Subject: [PATCH 07/27] net: qmi_wwan: fix divide by 0 on bad descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7fd078337201cf7468f53c3d9ef81ff78cb6df3b ] A CDC Ethernet functional descriptor with wMaxSegmentSize = 0 will cause a divide error in usbnet_probe: divide error: 0000 [#1] PREEMPT SMP KASAN Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc8-44453-g1fdc1a82c34f #56 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bef5c00 task.stack: ffff88006bf60000 RIP: 0010:usbnet_update_max_qlen+0x24d/0x390 drivers/net/usb/usbnet.c:355 RSP: 0018:ffff88006bf67508 EFLAGS: 00010246 RAX: 00000000000163c8 RBX: ffff8800621fce40 RCX: ffff8800621fcf34 RDX: 0000000000000000 RSI: ffffffff837ecb7a RDI: ffff8800621fcf34 RBP: ffff88006bf67520 R08: ffff88006bef5c00 R09: ffffed000c43f881 R10: ffffed000c43f880 R11: ffff8800621fc406 R12: 0000000000000003 R13: ffffffff85c71de0 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88006ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffe9c0d6dac CR3: 00000000614f4000 CR4: 00000000000006f0 Call Trace: usbnet_probe+0x18b5/0x2790 drivers/net/usb/usbnet.c:1783 qmi_wwan_probe+0x133/0x220 drivers/net/usb/qmi_wwan.c:1338 usb_probe_interface+0x324/0x940 drivers/usb/core/driver.c:361 really_probe drivers/base/dd.c:413 driver_probe_device+0x522/0x740 drivers/base/dd.c:557 Fix by simply ignoring the bogus descriptor, as it is optional for QMI devices anyway. Fixes: 423ce8caab7e ("net: usb: qmi_wwan: New driver for Huawei QMI based WWAN devices") Reported-by: Andrey Konovalov Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 49a27dc46e5e..cce36e0df2eb 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -386,7 +386,7 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) } /* errors aren't fatal - we can live with the dynamic address */ - if (cdc_ether) { + if (cdc_ether && cdc_ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(cdc_ether->wMaxSegmentSize); usbnet_get_ethernet_addr(dev, cdc_ether->iMACAddress); } From 4ad82095bfe2bf438fc33252d24682fd36dd0c26 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Tue, 7 Nov 2017 13:47:56 +0100 Subject: [PATCH 08/27] qmi_wwan: Add missing skb_reset_mac_header-call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0de0add10e587effa880c741c9413c874f16be91 ] When we receive a packet on a QMI device in raw IP mode, we should call skb_reset_mac_header() to ensure that skb->mac_header contains a valid offset in the packet. While it shouldn't really matter, the packets have no MAC header and the interface is configured as-such, it seems certain parts of the network stack expects a "good" value in skb->mac_header. Without the skb_reset_mac_header() call added in this patch, for example shaping traffic (using tc) triggers the following oops on the first received packet: [ 303.642957] skbuff: skb_under_panic: text:8f137918 len:177 put:67 head:8e4b0f00 data:8e4b0eff tail:0x8e4b0fb0 end:0x8e4b1520 dev:wwan0 [ 303.655045] Kernel bug detected[#1]: [ 303.658622] CPU: 1 PID: 1002 Comm: logd Not tainted 4.9.58 #0 [ 303.664339] task: 8fdf05e0 task.stack: 8f15c000 [ 303.668844] $ 0 : 00000000 00000001 0000007a 00000000 [ 303.674062] $ 4 : 8149a2fc 8149a2fc 8149ce20 00000000 [ 303.679284] $ 8 : 00000030 3878303a 31623465 20303235 [ 303.684510] $12 : ded731e3 2626a277 00000000 03bd0000 [ 303.689747] $16 : 8ef62b40 00000043 8f137918 804db5fc [ 303.694978] $20 : 00000001 00000004 8fc13800 00000003 [ 303.700215] $24 : 00000001 8024ab10 [ 303.705442] $28 : 8f15c000 8fc19cf0 00000043 802cc920 [ 303.710664] Hi : 00000000 [ 303.713533] Lo : 74e58000 [ 303.716436] epc : 802cc920 skb_panic+0x58/0x5c [ 303.721046] ra : 802cc920 skb_panic+0x58/0x5c [ 303.725639] Status: 11007c03 KERNEL EXL IE [ 303.729823] Cause : 50800024 (ExcCode 09) [ 303.733817] PrId : 0001992f (MIPS 1004Kc) [ 303.737892] Modules linked in: rt2800pci rt2800mmio rt2800lib qcserial ppp_async option usb_wwan rt2x00pci rt2x00mmio rt2x00lib rndis_host qmi_wwan ppp_generic nf_nat_pptp nf_conntrack_pptp nf_conntrack_ipv6 mt76x2i Process logd (pid: 1002, threadinfo=8f15c000, task=8fdf05e0, tls=77b3eee4) [ 303.962509] Stack : 00000000 80408990 8f137918 000000b1 00000043 8e4b0f00 8e4b0eff 8e4b0fb0 [ 303.970871] 8e4b1520 8fec1800 00000043 802cd2a4 6e000045 00000043 00000000 8ef62000 [ 303.979219] 8eef5d00 8ef62b40 8fea7300 8f137918 00000000 00000000 0002bb01 793e5664 [ 303.987568] 8ef08884 00000001 8fea7300 00000002 8fc19e80 8eef5d00 00000006 00000003 [ 303.995934] 00000000 8030ba90 00000003 77ab3fd0 8149dc80 8004d1bc 8f15c000 8f383700 [ 304.004324] ... [ 304.006767] Call Trace: [ 304.009241] [<802cc920>] skb_panic+0x58/0x5c [ 304.013504] [<802cd2a4>] skb_push+0x78/0x90 [ 304.017783] [<8f137918>] 0x8f137918 [ 304.021269] Code: 00602825 0c02a3b4 24842888 <000c000d> 8c870060 8c8200a0 0007382b 00070336 8c88005c [ 304.031034] [ 304.032805] ---[ end trace b778c482b3f0bda9 ]--- [ 304.041384] Kernel panic - not syncing: Fatal exception in interrupt [ 304.051975] Rebooting in 3 seconds.. While the oops is for a 4.9-kernel, I was able to trigger the same oops with net-next as of yesterday. Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode") Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index cce36e0df2eb..9cf11c83993a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -205,6 +205,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return 1; } if (rawip) { + skb_reset_mac_header(skb); skb->dev = dev->net; /* normally set by eth_type_trans */ skb->protocol = proto; return 1; From 58baa36d3728eea14884f74817d5749b32b0cdeb Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 6 Nov 2017 13:26:46 +0100 Subject: [PATCH 09/27] net: usb: asix: fill null-ptr-deref in asix_suspend [ Upstream commit 8f5624629105589bcc23d0e51cc01bd8103d09a5 ] When asix_suspend() is called dev->driver_priv might not have been assigned a value, so we need to check that it's not NULL. Similar issue is present in asix_resume(), this patch fixes it as well. Found by syzkaller. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Modules linked in: CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc4-43422-geccacdd69a8c #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bb36300 task.stack: ffff88006bba8000 RIP: 0010:asix_suspend+0x76/0xc0 drivers/net/usb/asix_devices.c:629 RSP: 0018:ffff88006bbae718 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff880061ba3b80 RCX: 1ffff1000c34d644 RDX: 0000000000000001 RSI: 0000000000000402 RDI: 0000000000000008 RBP: ffff88006bbae738 R08: 1ffff1000d775cad R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800630a8b40 R13: 0000000000000000 R14: 0000000000000402 R15: ffff880061ba3b80 FS: 0000000000000000(0000) GS:ffff88006c600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff33cf89000 CR3: 0000000061c0a000 CR4: 00000000000006f0 Call Trace: usb_suspend_interface drivers/usb/core/driver.c:1209 usb_suspend_both+0x27f/0x7e0 drivers/usb/core/driver.c:1314 usb_runtime_suspend+0x41/0x120 drivers/usb/core/driver.c:1852 __rpm_callback+0x339/0xb60 drivers/base/power/runtime.c:334 rpm_callback+0x106/0x220 drivers/base/power/runtime.c:461 rpm_suspend+0x465/0x1980 drivers/base/power/runtime.c:596 __pm_runtime_suspend+0x11e/0x230 drivers/base/power/runtime.c:1009 pm_runtime_put_sync_autosuspend ./include/linux/pm_runtime.h:251 usb_new_device+0xa37/0x1020 drivers/usb/core/hub.c:2487 hub_port_connect drivers/usb/core/hub.c:4903 hub_port_connect_change drivers/usb/core/hub.c:5009 port_event drivers/usb/core/hub.c:5115 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119 worker_thread+0x221/0x1850 kernel/workqueue.c:2253 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: 8d 7c 24 20 48 89 fa 48 c1 ea 03 80 3c 02 00 75 5b 48 b8 00 00 00 00 00 fc ff df 4d 8b 6c 24 20 49 8d 7d 08 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 34 4d 8b 6d 08 4d 85 ed 74 0b e8 26 2b 51 fd 4c RIP: asix_suspend+0x76/0xc0 RSP: ffff88006bbae718 ---[ end trace dfc4f5649284342c ]--- Signed-off-by: Andrey Konovalov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/asix_devices.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 50737def774c..32e9ec8f1521 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -624,7 +624,7 @@ static int asix_suspend(struct usb_interface *intf, pm_message_t message) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->suspend) + if (priv && priv->suspend) priv->suspend(dev); return usbnet_suspend(intf, message); @@ -676,7 +676,7 @@ static int asix_resume(struct usb_interface *intf) struct usbnet *dev = usb_get_intfdata(intf); struct asix_common_private *priv = dev->driver_priv; - if (priv->resume) + if (priv && priv->resume) priv->resume(dev); return usbnet_resume(intf); From 080ecd2bb62b42ad986f9b4ad404777f2c27d58f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 9 Nov 2017 16:43:13 -0800 Subject: [PATCH 10/27] vlan: fix a use-after-free in vlan_device_event() [ Upstream commit 052d41c01b3a2e3371d66de569717353af489d63 ] After refcnt reaches zero, vlan_vid_del() could free dev->vlan_info via RCU: RCU_INIT_POINTER(dev->vlan_info, NULL); call_rcu(&vlan_info->rcu, vlan_info_rcu_free); However, the pointer 'grp' still points to that memory since it is set before vlan_vid_del(): vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) goto out; grp = &vlan_info->grp; Depends on when that RCU callback is scheduled, we could trigger a use-after-free in vlan_group_for_each_dev() right following this vlan_vid_del(). Fix it by moving vlan_vid_del() before setting grp. This is also symmetric to the vlan_vid_add() we call in vlan_device_event(). Reported-by: Fengguang Wu Fixes: efc73f4bbc23 ("net: Fix memory leak - vlan_info struct") Cc: Alexander Duyck Cc: Linus Torvalds Cc: Girish Moodalbail Signed-off-by: Cong Wang Reviewed-by: Girish Moodalbail Tested-by: Fengguang Wu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8d213f974448..4a47074d1d7f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -376,6 +376,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, dev->name); vlan_vid_add(dev, htons(ETH_P_8021Q), 0); } + if (event == NETDEV_DOWN && + (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + vlan_vid_del(dev, htons(ETH_P_8021Q), 0); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) @@ -423,9 +426,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, struct net_device *tmp; LIST_HEAD(close_list); - if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) - vlan_vid_del(dev, htons(ETH_P_8021Q), 0); - /* Put all VLANs for this dev in the down state too. */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; From 99aa74ce9c2d1c19c9a3b263ef59965c3f3ba048 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 9 Nov 2017 13:04:44 +0900 Subject: [PATCH 11/27] af_netlink: ensure that NLMSG_DONE never fails in dumps [ Upstream commit 0642840b8bb008528dbdf929cec9f65ac4231ad0 ] The way people generally use netlink_dump is that they fill in the skb as much as possible, breaking when nla_put returns an error. Then, they get called again and start filling out the next skb, and again, and so forth. The mechanism at work here is the ability for the iterative dumping function to detect when the skb is filled up and not fill it past the brim, waiting for a fresh skb for the rest of the data. However, if the attributes are small and nicely packed, it is possible that a dump callback function successfully fills in attributes until the skb is of size 4080 (libmnl's default page-sized receive buffer size). The dump function completes, satisfied, and then, if it happens to be that this is actually the last skb, and no further ones are to be sent, then netlink_dump will add on the NLMSG_DONE part: nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); It is very important that netlink_dump does this, of course. However, in this example, that call to nlmsg_put_answer will fail, because the previous filling by the dump function did not leave it enough room. And how could it possibly have done so? All of the nla_put variety of functions simply check to see if the skb has enough tailroom, independent of the context it is in. In order to keep the important assumptions of all netlink dump users, it is therefore important to give them an skb that has this end part of the tail already reserved, so that the call to nlmsg_put_answer does not fail. Otherwise, library authors are forced to find some bizarre sized receive buffer that has a large modulo relative to the common sizes of messages received, which is ugly and buggy. This patch thus saves the NLMSG_DONE for an additional message, for the case that things are dangerously close to the brim. This requires keeping track of the errno from ->dump() across calls. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 17 +++++++++++------ net/netlink/af_netlink.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a1dca3b169a1..c9fac08a53b1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2077,7 +2077,7 @@ static int netlink_dump(struct sock *sk) struct sk_buff *skb = NULL; struct nlmsghdr *nlh; struct module *module; - int len, err = -ENOBUFS; + int err = -ENOBUFS; int alloc_min_size; int alloc_size; @@ -2124,9 +2124,11 @@ static int netlink_dump(struct sock *sk) skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); - len = cb->dump(skb, cb); + if (nlk->dump_done_errno > 0) + nlk->dump_done_errno = cb->dump(skb, cb); - if (len > 0) { + if (nlk->dump_done_errno > 0 || + skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { mutex_unlock(nlk->cb_mutex); if (sk_filter(sk, skb)) @@ -2136,13 +2138,15 @@ static int netlink_dump(struct sock *sk) return 0; } - nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); - if (!nlh) + nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, + sizeof(nlk->dump_done_errno), NLM_F_MULTI); + if (WARN_ON(!nlh)) goto errout_skb; nl_dump_check_consistent(cb, nlh); - memcpy(nlmsg_data(nlh), &len, sizeof(len)); + memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, + sizeof(nlk->dump_done_errno)); if (sk_filter(sk, skb)) kfree_skb(skb); @@ -2214,6 +2218,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, } nlk->cb_running = true; + nlk->dump_done_errno = INT_MAX; mutex_unlock(nlk->cb_mutex); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 4fdb38318977..bae961cfa3ad 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -24,6 +24,7 @@ struct netlink_sock { wait_queue_head_t wait; bool bound; bool cb_running; + int dump_done_errno; struct netlink_callback cb; struct mutex *cb_mutex; struct mutex cb_def_mutex; From 362d2ce0f851653d2eed87fdb8891ab4cfb0c2bf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 17 Oct 2017 23:26:10 +0800 Subject: [PATCH 12/27] sctp: do not peel off an assoc from one netns to another one [ Upstream commit df80cd9b28b9ebaa284a41df611dbf3a2d05ca74 ] Now when peeling off an association to the sock in another netns, all transports in this assoc are not to be rehashed and keep use the old key in hashtable. As a transport uses sk->net as the hash key to insert into hashtable, it would miss removing these transports from hashtable due to the new netns when closing the sock and all transports are being freeed, then later an use-after-free issue could be caused when looking up an asoc and dereferencing those transports. This is a very old issue since very beginning, ChunYu found it with syzkaller fuzz testing with this series: socket$inet6_sctp() bind$inet6() sendto$inet6() unshare(0x40000000) getsockopt$inet_sctp6_SCTP_GET_ASSOC_ID_LIST() getsockopt$inet_sctp6_SCTP_SOCKOPT_PEELOFF() This patch is to block this call when peeling one assoc off from one netns to another one, so that the netns of all transport would not go out-sync with the key in hashtable. Note that this patch didn't fix it by rehashing transports, as it's difficult to handle the situation when the tuple is already in use in the new netns. Besides, no one would like to peel off one assoc to another netns, considering ipaddrs, ifaces, etc. are usually different. Reported-by: ChunYu Wang Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ffcc8aa78db7..c062ceae19e6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4764,6 +4764,10 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) struct socket *sock; int err = 0; + /* Do not peel off from one netns to another one. */ + if (!net_eq(current->nsproxy->net_ns, sock_net(sk))) + return -EINVAL; + if (!asoc) return -EINVAL; From f0ae7a1b45fa38178e0f6fa244897e1e28601223 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Nov 2017 11:07:15 +0800 Subject: [PATCH 13/27] fealnx: Fix building error on MIPS [ Upstream commit cc54c1d32e6a4bb3f116721abf900513173e4d02 ] This patch try to fix the building error on MIPS. The reason is MIPS has already defined the LONG macro, which conflicts with the LONG enum in drivers/net/ethernet/fealnx.c. Signed-off-by: Huacai Chen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/fealnx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c08bd763172a..a300ed48a7d8 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -257,8 +257,8 @@ enum rx_desc_status_bits { RXFSD = 0x00000800, /* first descriptor */ RXLSD = 0x00000400, /* last descriptor */ ErrorSummary = 0x80, /* error summary */ - RUNT = 0x40, /* runt packet received */ - LONG = 0x20, /* long packet received */ + RUNTPKT = 0x40, /* runt packet received */ + LONGPKT = 0x20, /* long packet received */ FAE = 0x10, /* frame align error */ CRC = 0x08, /* crc error */ RXER = 0x04, /* receive error */ @@ -1633,7 +1633,7 @@ static int netdev_rx(struct net_device *dev) dev->name, rx_status); dev->stats.rx_errors++; /* end of a packet. */ - if (rx_status & (LONG | RUNT)) + if (rx_status & (LONGPKT | RUNTPKT)) dev->stats.rx_length_errors++; if (rx_status & RXER) dev->stats.rx_frame_errors++; From 67b718fcf89745836ecbf15cdd46ded2b071c3b6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 15 Nov 2017 22:17:48 -0600 Subject: [PATCH 14/27] net/sctp: Always set scope_id in sctp_inet6_skb_msgname [ Upstream commit 7c8a61d9ee1df0fb4747879fa67a99614eb62fec ] Alexandar Potapenko while testing the kernel with KMSAN and syzkaller discovered that in some configurations sctp would leak 4 bytes of kernel stack. Working with his reproducer I discovered that those 4 bytes that are leaked is the scope id of an ipv6 address returned by recvmsg. With a little code inspection and a shrewd guess I discovered that sctp_inet6_skb_msgname only initializes the scope_id field for link local ipv6 addresses to the interface index the link local address pertains to instead of initializing the scope_id field for all ipv6 addresses. That is almost reasonable as scope_id's are meaniningful only for link local addresses. Set the scope_id in all other cases to 0 which is not a valid interface index to make it clear there is nothing useful in the scope_id field. There should be no danger of breaking userspace as the stack leak guaranteed that previously meaningless random data was being returned. Fixes: 372f525b495c ("SCTP: Resync with LKSCTP tree.") History-tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Reported-by: Alexander Potapenko Tested-by: Alexander Potapenko Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/ipv6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f7f00d012888..5d015270e454 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -806,9 +806,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname, addr->v6.sin6_flowinfo = 0; addr->v6.sin6_port = sh->source; addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; - if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) { + if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb); - } + else + addr->v6.sin6_scope_id = 0; } *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr); From 4a7e0231242040513f4b78117e83aa85028ff053 Mon Sep 17 00:00:00 2001 From: Tudor-Dan Ambarus Date: Thu, 25 May 2017 10:18:07 +0300 Subject: [PATCH 15/27] crypto: dh - fix memleak in setkey commit ee34e2644a78e2561742bea8c4bdcf83cabf90a7 upstream. setkey can be called multiple times during the existence of the transformation object. In case of multiple setkey calls, the old key was not freed and we leaked memory. Free the old MPI key if any. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/dh.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/dh.c b/crypto/dh.c index 9d19360e7189..df7698e4f740 100644 --- a/crypto/dh.c +++ b/crypto/dh.c @@ -84,6 +84,9 @@ static int dh_set_secret(struct crypto_kpp *tfm, void *buf, unsigned int len) struct dh_ctx *ctx = dh_get_ctx(tfm); struct dh params; + /* Free the old MPI key if any */ + dh_free_ctx(ctx); + if (crypto_dh_decode_key(buf, len, ¶ms) < 0) return -EINVAL; From aa15fe4d6a7f34b76b562cff5f4c4b86663e83d9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 5 Nov 2017 18:30:44 -0800 Subject: [PATCH 16/27] crypto: dh - Fix double free of ctx->p commit 12d41a023efb01b846457ccdbbcbe2b65a87d530 upstream. When setting the secret with the software Diffie-Hellman implementation, if allocating 'g' failed (e.g. if it was longer than MAX_EXTERN_MPI_BITS), then 'p' was freed twice: once immediately, and once later when the crypto_kpp tfm was destroyed. Fix it by using dh_free_ctx() (renamed to dh_clear_ctx()) in the error paths, as that correctly sets the pointers to NULL. KASAN report: MPI: mpi too large (32760 bits) ================================================================== BUG: KASAN: use-after-free in mpi_free+0x131/0x170 Read of size 4 at addr ffff88006c7cdf90 by task reproduce_doubl/367 CPU: 1 PID: 367 Comm: reproduce_doubl Not tainted 4.14.0-rc7-00040-g05298abde6fe #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: dump_stack+0xb3/0x10b ? mpi_free+0x131/0x170 print_address_description+0x79/0x2a0 ? mpi_free+0x131/0x170 kasan_report+0x236/0x340 ? akcipher_register_instance+0x90/0x90 __asan_report_load4_noabort+0x14/0x20 mpi_free+0x131/0x170 ? akcipher_register_instance+0x90/0x90 dh_exit_tfm+0x3d/0x140 crypto_kpp_exit_tfm+0x52/0x70 crypto_destroy_tfm+0xb3/0x250 __keyctl_dh_compute+0x640/0xe90 ? kasan_slab_free+0x12f/0x180 ? dh_data_from_key+0x240/0x240 ? key_create_or_update+0x1ee/0xb20 ? key_instantiate_and_link+0x440/0x440 ? lock_contended+0xee0/0xee0 ? kfree+0xcf/0x210 ? SyS_add_key+0x268/0x340 keyctl_dh_compute+0xb3/0xf1 ? __keyctl_dh_compute+0xe90/0xe90 ? SyS_add_key+0x26d/0x340 ? entry_SYSCALL_64_fastpath+0x5/0xbe ? trace_hardirqs_on_caller+0x3f4/0x560 SyS_keyctl+0x72/0x2c0 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x43ccf9 RSP: 002b:00007ffeeec96158 EFLAGS: 00000246 ORIG_RAX: 00000000000000fa RAX: ffffffffffffffda RBX: 000000000248b9b9 RCX: 000000000043ccf9 RDX: 00007ffeeec96170 RSI: 00007ffeeec96160 RDI: 0000000000000017 RBP: 0000000000000046 R08: 0000000000000000 R09: 0248b9b9143dc936 R10: 0000000000001000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000409670 R14: 0000000000409700 R15: 0000000000000000 Allocated by task 367: save_stack_trace+0x16/0x20 kasan_kmalloc+0xeb/0x180 kmem_cache_alloc_trace+0x114/0x300 mpi_alloc+0x4b/0x230 mpi_read_raw_data+0xbe/0x360 dh_set_secret+0x1dc/0x460 __keyctl_dh_compute+0x623/0xe90 keyctl_dh_compute+0xb3/0xf1 SyS_keyctl+0x72/0x2c0 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 367: save_stack_trace+0x16/0x20 kasan_slab_free+0xab/0x180 kfree+0xb5/0x210 mpi_free+0xcb/0x170 dh_set_secret+0x2d7/0x460 __keyctl_dh_compute+0x623/0xe90 keyctl_dh_compute+0xb3/0xf1 SyS_keyctl+0x72/0x2c0 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: 802c7f1c84e4 ("crypto: dh - Add DH software implementation") Signed-off-by: Eric Biggers Reviewed-by: Tudor Ambarus Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/dh.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/crypto/dh.c b/crypto/dh.c index df7698e4f740..99e20fc63cc9 100644 --- a/crypto/dh.c +++ b/crypto/dh.c @@ -21,19 +21,12 @@ struct dh_ctx { MPI xa; }; -static inline void dh_clear_params(struct dh_ctx *ctx) +static void dh_clear_ctx(struct dh_ctx *ctx) { mpi_free(ctx->p); mpi_free(ctx->g); - ctx->p = NULL; - ctx->g = NULL; -} - -static void dh_free_ctx(struct dh_ctx *ctx) -{ - dh_clear_params(ctx); mpi_free(ctx->xa); - ctx->xa = NULL; + memset(ctx, 0, sizeof(*ctx)); } /* @@ -71,10 +64,8 @@ static int dh_set_params(struct dh_ctx *ctx, struct dh *params) return -EINVAL; ctx->g = mpi_read_raw_data(params->g, params->g_size); - if (!ctx->g) { - mpi_free(ctx->p); + if (!ctx->g) return -EINVAL; - } return 0; } @@ -85,21 +76,23 @@ static int dh_set_secret(struct crypto_kpp *tfm, void *buf, unsigned int len) struct dh params; /* Free the old MPI key if any */ - dh_free_ctx(ctx); + dh_clear_ctx(ctx); if (crypto_dh_decode_key(buf, len, ¶ms) < 0) - return -EINVAL; + goto err_clear_ctx; if (dh_set_params(ctx, ¶ms) < 0) - return -EINVAL; + goto err_clear_ctx; ctx->xa = mpi_read_raw_data(params.key, params.key_size); - if (!ctx->xa) { - dh_clear_params(ctx); - return -EINVAL; - } + if (!ctx->xa) + goto err_clear_ctx; return 0; + +err_clear_ctx: + dh_clear_ctx(ctx); + return -EINVAL; } static int dh_compute_value(struct kpp_request *req) @@ -157,7 +150,7 @@ static void dh_exit_tfm(struct crypto_kpp *tfm) { struct dh_ctx *ctx = dh_get_ctx(tfm); - dh_free_ctx(ctx); + dh_clear_ctx(ctx); } static struct kpp_alg dh = { From 2cfbb32f6ccb8910418d447fabd0420fba4de825 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Tue, 7 Nov 2017 11:37:07 +0100 Subject: [PATCH 17/27] ima: do not update security.ima if appraisal status is not INTEGRITY_PASS commit 020aae3ee58c1af0e7ffc4e2cc9fe4dc630338cb upstream. Commit b65a9cfc2c38 ("Untangling ima mess, part 2: deal with counters") moved the call of ima_file_check() from may_open() to do_filp_open() at a point where the file descriptor is already opened. This breaks the assumption made by IMA that file descriptors being closed belong to files whose access was granted by ima_file_check(). The consequence is that security.ima and security.evm are updated with good values, regardless of the current appraisal status. For example, if a file does not have security.ima, IMA will create it after opening the file for writing, even if access is denied. Access to the file will be allowed afterwards. Avoid this issue by checking the appraisal status before updating security.ima. Signed-off-by: Roberto Sassu Signed-off-by: Mimi Zohar Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- security/integrity/ima/ima_appraise.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 097459830454..6830d2427e47 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -303,6 +303,9 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file) if (iint->flags & IMA_DIGSIG) return; + if (iint->ima_file_status != INTEGRITY_PASS) + return; + rc = ima_collect_measurement(iint, file, NULL, 0, ima_hash_algo); if (rc < 0) return; From 70eb4608bb0e4e45658cb5b5002afe74c31feffb Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 21 Oct 2017 10:50:18 +0200 Subject: [PATCH 18/27] serial: omap: Fix EFR write on RTS deassertion commit 2a71de2f7366fb1aec632116d0549ec56d6a3940 upstream. Commit 348f9bb31c56 ("serial: omap: Fix RTS handling") sought to enable auto RTS upon manual RTS assertion and disable it on deassertion. However it seems the latter was done incorrectly, it clears all bits in the Extended Features Register *except* auto RTS. Fixes: 348f9bb31c56 ("serial: omap: Fix RTS handling") Cc: Peter Hurley Signed-off-by: Lukas Wunner Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/omap-serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 44e5b5bf713b..472ba3c813c1 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -693,7 +693,7 @@ static void serial_omap_set_mctrl(struct uart_port *port, unsigned int mctrl) if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS)) up->efr |= UART_EFR_RTS; else - up->efr &= UART_EFR_RTS; + up->efr &= ~UART_EFR_RTS; serial_out(up, UART_EFR, up->efr); serial_out(up, UART_LCR, lcr); From e6d4a078f0e798041f19e0e7672341beaa4561e1 Mon Sep 17 00:00:00 2001 From: "Ji-Ze Hong (Peter Hong)" Date: Tue, 17 Oct 2017 14:23:08 +0800 Subject: [PATCH 19/27] serial: 8250_fintek: Fix finding base_port with activated SuperIO commit fd97e66c5529046e989a0879c3bb58fddb592c71 upstream. The SuperIO will be configured at boot time by BIOS, but some BIOS will not deactivate the SuperIO when the end of configuration. It'll lead to mismatch for pdata->base_port in probe_setup_port(). So we'll deactivate all SuperIO before activate special base_port in fintek_8250_enter_key(). Tested on iBASE MI802. Tested-by: Ji-Ze Hong (Peter Hong) Signed-off-by: Ji-Ze Hong (Peter Hong) Reviewd-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_fintek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 0facc789fe7d..f8c31070a337 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -54,6 +54,9 @@ static int fintek_8250_enter_key(u16 base_port, u8 key) if (!request_muxed_region(base_port, 2, "8250_fintek")) return -EBUSY; + /* Force to deactive all SuperIO in this base_port */ + outb(EXIT_KEY, base_port + ADDR_PORT); + outb(key, base_port + ADDR_PORT); outb(key, base_port + ADDR_PORT); return 0; From 2bd38ece78a4eeaddff3ae19f1f34af1b5331b67 Mon Sep 17 00:00:00 2001 From: Adam Wallis Date: Thu, 2 Nov 2017 08:53:30 -0400 Subject: [PATCH 20/27] dmaengine: dmatest: warn user when dma test times out commit a9df21e34b422f79d9a9fa5c3eff8c2a53491be6 upstream. Commit adfa543e7314 ("dmatest: don't use set_freezable_with_signal()") introduced a bug (that is in fact documented by the patch commit text) that leaves behind a dangling pointer. Since the done_wait structure is allocated on the stack, future invocations to the DMATEST can produce undesirable results (e.g., corrupted spinlocks). Ideally, this would be cleaned up in the thread handler, but at the very least, the kernel is left in a very precarious scenario that can lead to some long debug sessions when the crash comes later. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=197605 Signed-off-by: Adam Wallis Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dmatest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index cf76fc6149e5..fbb75514dfb4 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -666,6 +666,7 @@ static int dmatest_func(void *data) * free it this time?" dancing. For now, just * leave it dangling. */ + WARN(1, "dmatest: Kernel stack may be corrupted!!\n"); dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); From a8356445ba0f7e60e8cbaa5beca3f35e65f44964 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Wed, 15 Nov 2017 17:31:33 -0800 Subject: [PATCH 21/27] ocfs2: fix cluster hang after a node dies commit 1c01967116a678fed8e2c68a6ab82abc8effeddc upstream. When a node dies, other live nodes have to choose a new master for an existed lock resource mastered by the dead node. As for ocfs2/dlm implementation, this is done by function - dlm_move_lockres_to_recovery_list which marks those lock rsources as DLM_LOCK_RES_RECOVERING and manages them via a list from which DLM changes lock resource's master later. So without invoking dlm_move_lockres_to_recovery_list, no master will be choosed after dlm recovery accomplishment since no lock resource can be found through ::resource list. What's worse is that if DLM_LOCK_RES_RECOVERING is not marked for lock resources mastered a dead node, it will break up synchronization among nodes. So invoke dlm_move_lockres_to_recovery_list again. Fixs: 'commit ee8f7fcbe638 ("ocfs2/dlm: continue to purge recovery lockres when recovery master goes down")' Link: http://lkml.kernel.org/r/63ADC13FD55D6546B7DECE290D39E373CED6E0F9@H3CMLB14-EX.srv.huawei-3com.com Signed-off-by: Changwei Ge Reported-by: Vitaly Mayatskih Tested-by: Vitaly Mayatskikh Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlm/dlmrecovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index dd5cb8bcefd1..eef324823311 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2419,6 +2419,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) dlm_lockres_put(res); continue; } + dlm_move_lockres_to_recovery_list(dlm, res); } else if (res->owner == dlm->node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); From 8af777385f7a3e693f5e79fb4655aebf881156e6 Mon Sep 17 00:00:00 2001 From: alex chen Date: Wed, 15 Nov 2017 17:31:40 -0800 Subject: [PATCH 22/27] ocfs2: should wait dio before inode lock in ocfs2_setattr() commit 28f5a8a7c033cbf3e32277f4cc9c6afd74f05300 upstream. we should wait dio requests to finish before inode lock in ocfs2_setattr(), otherwise the following deadlock will happen: process 1 process 2 process 3 truncate file 'A' end_io of writing file 'A' receiving the bast messages ocfs2_setattr ocfs2_inode_lock_tracker ocfs2_inode_lock_full inode_dio_wait __inode_dio_wait -->waiting for all dio requests finish dlm_proxy_ast_handler dlm_do_local_bast ocfs2_blocking_ast ocfs2_generic_handle_bast set OCFS2_LOCK_BLOCKED flag dio_end_io dio_bio_end_aio dio_complete ocfs2_dio_end_io ocfs2_dio_end_io_write ocfs2_inode_lock __ocfs2_cluster_lock ocfs2_wait_for_mask -->waiting for OCFS2_LOCK_BLOCKED flag to be cleared, that is waiting for 'process 1' unlocking the inode lock inode_dio_end -->here dec the i_dio_count, but will never be called, so a deadlock happened. Link: http://lkml.kernel.org/r/59F81636.70508@huawei.com Signed-off-by: Alex Chen Reviewed-by: Jun Piao Reviewed-by: Joseph Qi Acked-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/file.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0db6f83fdea1..05a0fb9854f9 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1166,6 +1166,13 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; if (size_change) { + /* + * Here we should wait dio to finish before inode lock + * to avoid a deadlock between ocfs2_setattr() and + * ocfs2_dio_end_io_write() + */ + inode_dio_wait(inode); + status = ocfs2_rw_lock(inode, 1); if (status < 0) { mlog_errno(status); @@ -1186,8 +1193,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if (status) goto bail_unlock; - inode_dio_wait(inode); - if (i_size_read(inode) >= attr->ia_size) { if (ocfs2_should_order_data(inode)) { status = ocfs2_begin_ordered_truncate(inode, From 55b06b0fc09b8d880ce36b7befb4dc5c17a5efb6 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Sat, 29 Jul 2017 21:14:55 -0500 Subject: [PATCH 23/27] ipmi: fix unsigned long underflow commit 392a17b10ec4320d3c0e96e2a23ebaad1123b989 upstream. When I set the timeout to a specific value such as 500ms, the timeout event will not happen in time due to the overflow in function check_msg_timeout: ... ent->timeout -= timeout_period; if (ent->timeout > 0) return; ... The type of timeout_period is long, but ent->timeout is unsigned long. This patch makes the type consistent. Reported-by: Weilong Chen Signed-off-by: Corey Minyard Tested-by: Weilong Chen Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_msghandler.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 172a9dc06ec9..5d509ccf1299 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -4029,7 +4029,8 @@ smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, } static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, - struct list_head *timeouts, long timeout_period, + struct list_head *timeouts, + unsigned long timeout_period, int slot, unsigned long *flags, unsigned int *waiting_msgs) { @@ -4042,8 +4043,8 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, if (!ent->inuse) return; - ent->timeout -= timeout_period; - if (ent->timeout > 0) { + if (timeout_period < ent->timeout) { + ent->timeout -= timeout_period; (*waiting_msgs)++; return; } @@ -4109,7 +4110,8 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, } } -static unsigned int ipmi_timeout_handler(ipmi_smi_t intf, long timeout_period) +static unsigned int ipmi_timeout_handler(ipmi_smi_t intf, + unsigned long timeout_period) { struct list_head timeouts; struct ipmi_recv_msg *msg, *msg2; From 9980b8278338d4ba2064c32baa5db30f15a3ff96 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 15 Nov 2017 17:38:41 -0800 Subject: [PATCH 24/27] mm/page_alloc.c: broken deferred calculation commit d135e5750205a21a212a19dbb05aeb339e2cbea7 upstream. In reset_deferred_meminit() we determine number of pages that must not be deferred. We initialize pages for at least 2G of memory, but also pages for reserved memory in this node. The reserved memory is determined in this function: memblock_reserved_memory_within(), which operates over physical addresses, and returns size in bytes. However, reset_deferred_meminit() assumes that that this function operates with pfns, and returns page count. The result is that in the best case machine boots slower than expected due to initializing more pages than needed in single thread, and in the worst case panics because fewer than needed pages are initialized early. Link: http://lkml.kernel.org/r/20171021011707.15191-1-pasha.tatashin@oracle.com Fixes: 864b9a393dcb ("mm: consider memblock reservations for deferred memory initialization sizing") Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 3 ++- mm/page_alloc.c | 27 ++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6744eb40c4ea..fff21a82780c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -672,7 +672,8 @@ typedef struct pglist_data { * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; - unsigned long static_init_size; + /* Number of non-deferred pages */ + unsigned long static_init_pgcnt; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7064aae8ded7..4a044134ce84 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -284,28 +284,37 @@ EXPORT_SYMBOL(nr_online_nodes); int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT + +/* + * Determine how many pages need to be initialized durig early boot + * (non-deferred initialization). + * The value of first_deferred_pfn will be set later, once non-deferred pages + * are initialized, but for now set it ULONG_MAX. + */ static inline void reset_deferred_meminit(pg_data_t *pgdat) { - unsigned long max_initialise; - unsigned long reserved_lowmem; + phys_addr_t start_addr, end_addr; + unsigned long max_pgcnt; + unsigned long reserved; /* * Initialise at least 2G of a node but also take into account that * two large system hashes that can take up 1GB for 0.25TB/node. */ - max_initialise = max(2UL << (30 - PAGE_SHIFT), - (pgdat->node_spanned_pages >> 8)); + max_pgcnt = max(2UL << (30 - PAGE_SHIFT), + (pgdat->node_spanned_pages >> 8)); /* * Compensate the all the memblock reservations (e.g. crash kernel) * from the initial estimation to make sure we will initialize enough * memory to boot. */ - reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, - pgdat->node_start_pfn + max_initialise); - max_initialise += reserved_lowmem; + start_addr = PFN_PHYS(pgdat->node_start_pfn); + end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt); + reserved = memblock_reserved_memory_within(start_addr, end_addr); + max_pgcnt += PHYS_PFN(reserved); - pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); + pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages); pgdat->first_deferred_pfn = ULONG_MAX; } @@ -332,7 +341,7 @@ static inline bool update_defer_init(pg_data_t *pgdat, if (zone_end < pgdat_end_pfn(pgdat)) return true; (*nr_initialised)++; - if ((*nr_initialised > pgdat->static_init_size) && + if ((*nr_initialised > pgdat->static_init_pgcnt) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { pgdat->first_deferred_pfn = pfn; return false; From fae5947129eb9f3cf0bc05bb4c770e5c2b2c8dd6 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Wed, 27 Sep 2017 15:52:12 -0400 Subject: [PATCH 25/27] coda: fix 'kernel memory exposure attempt' in fsync commit d337b66a4c52c7b04eec661d86c2ef6e168965a2 upstream. When an application called fsync on a file in Coda a small request with just the file identifier was allocated, but the declared length was set to the size of union of all possible upcall requests. This bug has been around for a very long time and is now caught by the extra checking in usercopy that was introduced in Linux-4.8. The exposure happens when the Coda cache manager process reads the fsync upcall request at which point it is killed. As a result there is nobody servicing any further upcalls, trapping any processes that try to access the mounted Coda filesystem. Signed-off-by: Jan Harkes Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/coda/upcall.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index f6c6c8adbc01..7289f0a7670b 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -446,8 +446,7 @@ int venus_fsync(struct super_block *sb, struct CodaFid *fid) UPARG(CODA_FSYNC); inp->coda_fsync.VFid = *fid; - error = coda_upcall(coda_vcp(sb), sizeof(union inputArgs), - &outsize, inp); + error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); CODA_FREE(inp, insize); return error; From ceaec6e8cd98c8fd87701ddfb7468a13d989d79d Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 14 Nov 2017 01:03:44 +0100 Subject: [PATCH 26/27] mm/pagewalk.c: report holes in hugetlb ranges commit 373c4557d2aa362702c4c2d41288fb1e54990b7c upstream. This matters at least for the mincore syscall, which will otherwise copy uninitialized memory from the page allocator to userspace. It is probably also a correctness error for /proc/$pid/pagemap, but I haven't tested that. Removing the `walk->hugetlb_entry` condition in walk_hugetlb_range() has no effect because the caller already checks for that. This only reports holes in hugetlb ranges to callers who have specified a hugetlb_entry callback. This issue was found using an AFL-based fuzzer. v2: - don't crash on ->pte_hole==NULL (Andrew Morton) - add Cc stable (Andrew Morton) Changed for 4.4/4.9 stable backport: - fix up conflict in the huge_pte_offset() call Fixes: 1e25a271c8ac ("mincore: apply page table walker on do_mincore()") Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/pagewalk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 207244489a68..d95341cffc2f 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -142,8 +142,12 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, do { next = hugetlb_entry_end(h, addr, end); pte = huge_pte_offset(walk->mm, addr & hmask); - if (pte && walk->hugetlb_entry) + + if (pte) err = walk->hugetlb_entry(pte, hmask, addr, next, walk); + else if (walk->pte_hole) + err = walk->pte_hole(addr, next, walk); + if (err) break; } while (addr = next, addr != end); From 133e6ccf46f1704a4a680ef45565e970ac9a7f9c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 24 Nov 2017 08:33:43 +0100 Subject: [PATCH 27/27] Linux 4.9.65 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d29cace0da6d..87a641515e9c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 64 +SUBLEVEL = 65 EXTRAVERSION = NAME = Roaring Lionus