From f84c32aaab13f009c8c1fb10988832b524eec88b Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:41 +0100 Subject: [PATCH 01/13] tg create t/mptcp-refactor-passive-socket-initialization-net base --- .topdeps | 1 - .topmsg | 24 ------------------------ 2 files changed, 25 deletions(-) delete mode 100644 .topdeps delete mode 100644 .topmsg diff --git a/.topdeps b/.topdeps deleted file mode 100644 index 4860443f56228..0000000000000 --- a/.topdeps +++ /dev/null @@ -1 +0,0 @@ -t/mptcp-fix-UaF-in-listener-shutdown diff --git a/.topmsg b/.topmsg deleted file mode 100644 index ce4782ebcc295..0000000000000 --- a/.topmsg +++ /dev/null @@ -1,24 +0,0 @@ -From: Paolo Abeni -Subject: [PATCH] mptcp: fix possible deadlock in subflow_error_report - -Christoph reported a possible deadlock while the TCP stack -destroys an unaccepted subflow due to an incoming reset: the -MPTCP socket error path tries to acquire the msk-level socket -lock while TCP still owns the listener socket accept queue -spinlock, and the reverse dependency already exists in the -TCP stack. - -Note that the above is actually a lockdep false positive, as -the chain involves two separate sockets. A different per-socket -lockdep key will address the issue, but such a change will be -quite invasive. - -Instead, we can simply stop earlier the socket error handling -for orphaned or unaccepted subflows, breaking the critical -lockdep chain. Error handling in such a scenario is a no-op. - -Reported-and-tested-by: Christoph Paasch -Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk") -Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/355 -Signed-off-by: Paolo Abeni -Reviewed-by: Matthieu Baerts From 22112a770a898b1faafd19d095a2ba6247ff9aea Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:41 +0100 Subject: [PATCH 02/13] tg create t/mptcp-refactor-passive-socket-initialization-net --- .topdeps | 1 + .topmsg | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000000..a8964c50c193a --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +t/mptcp-fix-possible-deadlock-in-subflow_error_report diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000000..7e820c49c6e4e --- /dev/null +++ b/.topmsg @@ -0,0 +1,4 @@ +From: Matthieu Baerts +Subject: [PATCH] t/mptcp-refactor-passive-socket-initialization-net + +Signed-off-by: Matthieu Baerts From 8f722430276d6d389c1f7d55e3eca0e2a6d1b80c Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:42 +0100 Subject: [PATCH 03/13] tg import create t/mptcp-refactor-passive-socket-initialization-net --- .topmsg | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.topmsg b/.topmsg index 7e820c49c6e4e..ce9ca67c21fbb 100644 --- a/.topmsg +++ b/.topmsg @@ -1,4 +1,14 @@ -From: Matthieu Baerts -Subject: [PATCH] t/mptcp-refactor-passive-socket-initialization-net +From: Paolo Abeni +Subject: [PATCH] mptcp: refactor passive socket initialization (net) -Signed-off-by: Matthieu Baerts +After commit 30e51b923e43 ("mptcp: fix unreleased socket in +accept queue") unaccepted msk sockets go throu complete +shutdown, we don't need anymore to delay inserting the first +subflow into the subflow lists. + +The reference counting deserve some extra care, as __mptcp_close() +is unaware of the request socket linkage to the first subflow. + +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Tested-by: Christoph Paasch From eb67c35c665c726b021cd6eea7aea0139886bf75 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Feb 2023 05:53:00 +0000 Subject: [PATCH 04/13] mptcp: refactor passive socket initialization (net) After commit 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") unaccepted msk sockets go throu complete shutdown, we don't need anymore to delay inserting the first subflow into the subflow lists. The reference counting deserve some extra care, as __mptcp_close() is unaware of the request socket linkage to the first subflow. Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Tested-by: Christoph Paasch --- net/mptcp/protocol.c | 17 ----------------- net/mptcp/subflow.c | 27 +++++++++++++++++++++------ 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ad9c46202fc6..447641d34c2c5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); - mptcp_propagate_sndbuf((struct sock *)msk, ssk); mptcp_sockopt_sync_locked(msk, ssk); return true; } @@ -3708,22 +3707,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, lock_sock(newsk); - /* PM/worker can now acquire the first subflow socket - * lock without racing with listener queue cleanup, - * we can notify it, if needed. - * - * Even if remote has reset the initial subflow by now - * the refcnt is still at least one. - */ - subflow = mptcp_subflow_ctx(msk->first); - list_add(&subflow->node, &msk->conn_list); - sock_hold(msk->first); - if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); - - mptcp_rcv_space_init(msk, msk->first); - mptcp_propagate_sndbuf(newsk, msk->first); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 5070dc33675dd..a631a5e6fc7b5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -397,6 +397,12 @@ void mptcp_subflow_reset(struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; + /* mptcp_mp_fail_no_response() can reach here on an already closed + * socket + */ + if (ssk->sk_state == TCP_CLOSE) + return; + /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); @@ -750,6 +756,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; + struct mptcp_sock *owner; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -824,6 +831,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { + owner = mptcp_sk(new_msk); + /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ @@ -832,14 +841,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list */ - WRITE_ONCE(mptcp_sk(new_msk)->first, child); + WRITE_ONCE(owner->first, child); /* new mpc subflow takes ownership of the newly * created mptcp socket */ mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; - mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); - mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); + mptcp_pm_new_connection(owner, child, 1); + mptcp_token_accept(subflow_req, owner); ctx->conn = new_msk; new_msk = NULL; @@ -847,15 +856,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * uses the correct data */ mptcp_copy_inaddrs(ctx->conn, child); + mptcp_propagate_sndbuf(ctx->conn, child); + + mptcp_rcv_space_init(owner, child); + list_add(&ctx->node, &owner->conn_list); + sock_hold(child); /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); + mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + ctx->pm_notified = 1; + } } else if (ctx->mp_join) { - struct mptcp_sock *owner; - owner = subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); From 42e62c3a97d5320d6d8d3a973f34c90def3b74e5 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:43 +0100 Subject: [PATCH 05/13] tg create t/mptcp-use-the-workqueue-to-destroy-unaccepted-sockets-net base --- .topdeps | 1 - .topmsg | 14 -------------- 2 files changed, 15 deletions(-) delete mode 100644 .topdeps delete mode 100644 .topmsg diff --git a/.topdeps b/.topdeps deleted file mode 100644 index a8964c50c193a..0000000000000 --- a/.topdeps +++ /dev/null @@ -1 +0,0 @@ -t/mptcp-fix-possible-deadlock-in-subflow_error_report diff --git a/.topmsg b/.topmsg deleted file mode 100644 index ce9ca67c21fbb..0000000000000 --- a/.topmsg +++ /dev/null @@ -1,14 +0,0 @@ -From: Paolo Abeni -Subject: [PATCH] mptcp: refactor passive socket initialization (net) - -After commit 30e51b923e43 ("mptcp: fix unreleased socket in -accept queue") unaccepted msk sockets go throu complete -shutdown, we don't need anymore to delay inserting the first -subflow into the subflow lists. - -The reference counting deserve some extra care, as __mptcp_close() -is unaware of the request socket linkage to the first subflow. - -Signed-off-by: Paolo Abeni -Reviewed-by: Matthieu Baerts -Tested-by: Christoph Paasch From 2c762924622b0a86693a17b8e1576dd845a8088f Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:43 +0100 Subject: [PATCH 06/13] tg create t/mptcp-use-the-workqueue-to-destroy-unaccepted-sockets-net --- .topdeps | 1 + .topmsg | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000000..dc54e7191d617 --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +t/mptcp-refactor-passive-socket-initialization-net diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000000..344915b379871 --- /dev/null +++ b/.topmsg @@ -0,0 +1,4 @@ +From: Matthieu Baerts +Subject: [PATCH] t/mptcp-use-the-workqueue-to-destroy-unaccepted-sockets-net + +Signed-off-by: Matthieu Baerts From 27355c08f1df70d33b582a9340972c1afd505eaa Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:44 +0100 Subject: [PATCH 07/13] tg import create t/mptcp-use-the-workqueue-to-destroy-unaccepted-sockets-net --- .topmsg | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/.topmsg b/.topmsg index 344915b379871..4f612ecd5caf4 100644 --- a/.topmsg +++ b/.topmsg @@ -1,4 +1,46 @@ -From: Matthieu Baerts -Subject: [PATCH] t/mptcp-use-the-workqueue-to-destroy-unaccepted-sockets-net +From: Paolo Abeni +Subject: [PATCH] mptcp: use the workqueue to destroy unaccepted sockets (net) -Signed-off-by: Matthieu Baerts +Christoph reported a UaF at token lookup time after having +refactored the passive socket initialization part: + + BUG: KASAN: use-after-free in __token_bucket_busy+0x253/0x260 + Read of size 4 at addr ffff88810698d5b0 by task syz-executor653/3198 + + CPU: 1 PID: 3198 Comm: syz-executor653 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + Call Trace: + + dump_stack_lvl+0x6e/0x91 + print_report+0x16a/0x46f + kasan_report+0xad/0x130 + __token_bucket_busy+0x253/0x260 + mptcp_token_new_connect+0x13d/0x490 + mptcp_connect+0x4ed/0x860 + __inet_stream_connect+0x80e/0xd90 + tcp_sendmsg_fastopen+0x3ce/0x710 + mptcp_sendmsg+0xff1/0x1a20 + inet_sendmsg+0x11d/0x140 + __sys_sendto+0x405/0x490 + __x64_sys_sendto+0xdc/0x1b0 + do_syscall_64+0x3b/0x90 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +We need to properly clean-up all the paired MPTCP-level +resources and be sure to release the msk last, even when +the unaccepted subflow is destroyed by the TCP internals +via inet_child_forget(). + +We can re-use the existing MPTCP_WORK_CLOSE_SUBFLOW infra, +explicitly checking that for the critical scenario: the +closed subflow is the MPC one, the msk is not accepted and +eventually going through full cleanup. + +With such change, __mptcp_destroy_sock() is always called +on msk sockets, even on accepted ones. We don't need anymore +to transiently drop one sk reference at msk clone time. + +Reported-and-tested-by: Christoph Paasch +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/347 +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts From 315f247fb1036fac576e2e5e9e0943f419159790 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Feb 2023 05:53:03 +0000 Subject: [PATCH 08/13] mptcp: use the workqueue to destroy unaccepted sockets (net) Christoph reported a UaF at token lookup time after having refactored the passive socket initialization part: BUG: KASAN: use-after-free in __token_bucket_busy+0x253/0x260 Read of size 4 at addr ffff88810698d5b0 by task syz-executor653/3198 CPU: 1 PID: 3198 Comm: syz-executor653 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x6e/0x91 print_report+0x16a/0x46f kasan_report+0xad/0x130 __token_bucket_busy+0x253/0x260 mptcp_token_new_connect+0x13d/0x490 mptcp_connect+0x4ed/0x860 __inet_stream_connect+0x80e/0xd90 tcp_sendmsg_fastopen+0x3ce/0x710 mptcp_sendmsg+0xff1/0x1a20 inet_sendmsg+0x11d/0x140 __sys_sendto+0x405/0x490 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc We need to properly clean-up all the paired MPTCP-level resources and be sure to release the msk last, even when the unaccepted subflow is destroyed by the TCP internals via inet_child_forget(). We can re-use the existing MPTCP_WORK_CLOSE_SUBFLOW infra, explicitly checking that for the critical scenario: the closed subflow is the MPC one, the msk is not accepted and eventually going through full cleanup. With such change, __mptcp_destroy_sock() is always called on msk sockets, even on accepted ones. We don't need anymore to transiently drop one sk reference at msk clone time. Reported-and-tested-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/347 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts --- net/mptcp/protocol.c | 26 +++++++++++++++++--------- net/mptcp/protocol.h | 3 ++- net/mptcp/subflow.c | 11 +++++++++-- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 447641d34c2c5..b7014f9392364 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2398,9 +2398,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } -static void __mptcp_close_subflow(struct mptcp_sock *msk) +static void __mptcp_close_subflow(struct sock *sk) { struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_sock *msk = mptcp_sk(sk); might_sleep(); @@ -2414,7 +2415,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) if (!skb_queue_empty_lockless(&ssk->sk_receive_queue)) continue; - mptcp_close_ssk((struct sock *)msk, ssk, subflow); + mptcp_close_ssk(sk, ssk, subflow); + } + + /* if the MPC subflow has been closed before the msk is accepted, + * msk will never be accept-ed, close it now + */ + if (!msk->first && msk->in_accept_queue) { + sock_set_flag(sk, SOCK_DEAD); + inet_sk_state_store(sk, TCP_CLOSE); } } @@ -2623,6 +2632,9 @@ static void mptcp_worker(struct work_struct *work) __mptcp_check_send_data_fin(sk); mptcp_check_data_fin(sk); + if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + __mptcp_close_subflow(sk); + /* There is no point in keeping around an orphaned sk timedout or * closed, but we need the msk around to reply to incoming DATA_FIN, * even if it is orphaned and in FIN_WAIT2 state @@ -2638,9 +2650,6 @@ static void mptcp_worker(struct work_struct *work) } } - if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - __mptcp_close_subflow(msk); - if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); @@ -3078,6 +3087,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; msk->subflow = NULL; + msk->in_accept_queue = 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); @@ -3095,8 +3105,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, security_inet_csk_clone(nsk, req); bh_unlock_sock(nsk); - /* keep a single reference */ - __sock_put(nsk); + /* note: the newly allocated socket refcount is 2 now */ return nsk; } @@ -3152,8 +3161,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, goto out; } - /* acquire the 2nd reference for the owning socket */ - sock_hold(new_mptcp_sock); newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); } else { @@ -3704,6 +3711,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + msk->in_accept_queue = 0; lock_sock(newsk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 61fd8eabfca20..901c9da8fe66f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -295,7 +295,8 @@ struct mptcp_sock { u8 recvmsg_inq:1, cork:1, nodelay:1, - fastopening:1; + fastopening:1, + in_accept_queue:1; int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a631a5e6fc7b5..9d5bf2a020efe 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -699,9 +699,10 @@ static bool subflow_hmac_valid(const struct request_sock *req, static void mptcp_force_close(struct sock *sk) { - /* the msk is not yet exposed to user-space */ + /* the msk is not yet exposed to user-space, and refcount is 2 */ inet_sk_state_store(sk, TCP_CLOSE); sk_common_release(sk); + sock_put(sk); } static void subflow_ulp_fallback(struct sock *sk, @@ -1866,7 +1867,6 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s struct sock *sk = (struct sock *)msk; bool do_cancel_work; - sock_hold(sk); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; msk->first = NULL; @@ -1954,6 +1954,13 @@ static void subflow_ulp_release(struct sock *ssk) * when the subflow is still unaccepted */ release = ctx->disposable || list_empty(&ctx->node); + + /* inet_child_forget() does not call sk_state_change(), + * explicitly trigger the socket close machinery + */ + if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, + &mptcp_sk(sk)->flags)) + mptcp_schedule_work(sk); sock_put(sk); } From db8cfebe5898c7b6ed66e5c1be2c84b79286cf5b Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:45 +0100 Subject: [PATCH 09/13] tg create t/mptcp-fix-UaF-in-listener-shutdown-net base --- .topdeps | 1 - .topmsg | 46 ---------------------------------------------- 2 files changed, 47 deletions(-) delete mode 100644 .topdeps delete mode 100644 .topmsg diff --git a/.topdeps b/.topdeps deleted file mode 100644 index dc54e7191d617..0000000000000 --- a/.topdeps +++ /dev/null @@ -1 +0,0 @@ -t/mptcp-refactor-passive-socket-initialization-net diff --git a/.topmsg b/.topmsg deleted file mode 100644 index 4f612ecd5caf4..0000000000000 --- a/.topmsg +++ /dev/null @@ -1,46 +0,0 @@ -From: Paolo Abeni -Subject: [PATCH] mptcp: use the workqueue to destroy unaccepted sockets (net) - -Christoph reported a UaF at token lookup time after having -refactored the passive socket initialization part: - - BUG: KASAN: use-after-free in __token_bucket_busy+0x253/0x260 - Read of size 4 at addr ffff88810698d5b0 by task syz-executor653/3198 - - CPU: 1 PID: 3198 Comm: syz-executor653 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6 - Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 - Call Trace: - - dump_stack_lvl+0x6e/0x91 - print_report+0x16a/0x46f - kasan_report+0xad/0x130 - __token_bucket_busy+0x253/0x260 - mptcp_token_new_connect+0x13d/0x490 - mptcp_connect+0x4ed/0x860 - __inet_stream_connect+0x80e/0xd90 - tcp_sendmsg_fastopen+0x3ce/0x710 - mptcp_sendmsg+0xff1/0x1a20 - inet_sendmsg+0x11d/0x140 - __sys_sendto+0x405/0x490 - __x64_sys_sendto+0xdc/0x1b0 - do_syscall_64+0x3b/0x90 - entry_SYSCALL_64_after_hwframe+0x72/0xdc - -We need to properly clean-up all the paired MPTCP-level -resources and be sure to release the msk last, even when -the unaccepted subflow is destroyed by the TCP internals -via inet_child_forget(). - -We can re-use the existing MPTCP_WORK_CLOSE_SUBFLOW infra, -explicitly checking that for the critical scenario: the -closed subflow is the MPC one, the msk is not accepted and -eventually going through full cleanup. - -With such change, __mptcp_destroy_sock() is always called -on msk sockets, even on accepted ones. We don't need anymore -to transiently drop one sk reference at msk clone time. - -Reported-and-tested-by: Christoph Paasch -Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/347 -Signed-off-by: Paolo Abeni -Reviewed-by: Matthieu Baerts From 13ba5e91ca7ecf965f0bb85b2d0c707be4c55622 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:45 +0100 Subject: [PATCH 10/13] tg create t/mptcp-fix-UaF-in-listener-shutdown-net --- .topdeps | 1 + .topmsg | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 .topdeps create mode 100644 .topmsg diff --git a/.topdeps b/.topdeps new file mode 100644 index 0000000000000..67cfa9737ed2e --- /dev/null +++ b/.topdeps @@ -0,0 +1 @@ +t/mptcp-use-the-workqueue-to-destroy-unaccepted-sockets-net diff --git a/.topmsg b/.topmsg new file mode 100644 index 0000000000000..581cabb12bc89 --- /dev/null +++ b/.topmsg @@ -0,0 +1,4 @@ +From: Matthieu Baerts +Subject: [PATCH] t/mptcp-fix-UaF-in-listener-shutdown-net + +Signed-off-by: Matthieu Baerts From a07849cf372761a1bcb274c69049cb551dc2fac6 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:46 +0100 Subject: [PATCH 11/13] tg import create t/mptcp-fix-UaF-in-listener-shutdown-net --- .topmsg | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/.topmsg b/.topmsg index 581cabb12bc89..f82d83ea196ac 100644 --- a/.topmsg +++ b/.topmsg @@ -1,4 +1,54 @@ -From: Matthieu Baerts -Subject: [PATCH] t/mptcp-fix-UaF-in-listener-shutdown-net +From: Paolo Abeni +Subject: [PATCH] mptcp: fix UaF in listener shutdown (net) -Signed-off-by: Matthieu Baerts +As reported by Christoph after having refactored the passive +socket initialization, the mptcp listener shutdown path is prone +to an UaF issue. + + BUG: KASAN: use-after-free in _raw_spin_lock_bh+0x73/0xe0 + Write of size 4 at addr ffff88810cb23098 by task syz-executor731/1266 + + CPU: 1 PID: 1266 Comm: syz-executor731 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 + Call Trace: + + dump_stack_lvl+0x6e/0x91 + print_report+0x16a/0x46f + kasan_report+0xad/0x130 + kasan_check_range+0x14a/0x1a0 + _raw_spin_lock_bh+0x73/0xe0 + subflow_error_report+0x6d/0x110 + sk_error_report+0x3b/0x190 + tcp_disconnect+0x138c/0x1aa0 + inet_child_forget+0x6f/0x2e0 + inet_csk_listen_stop+0x209/0x1060 + __mptcp_close_ssk+0x52d/0x610 + mptcp_destroy_common+0x165/0x640 + mptcp_destroy+0x13/0x80 + __mptcp_destroy_sock+0xe7/0x270 + __mptcp_close+0x70e/0x9b0 + mptcp_close+0x2b/0x150 + inet_release+0xe9/0x1f0 + __sock_release+0xd2/0x280 + sock_close+0x15/0x20 + __fput+0x252/0xa20 + task_work_run+0x169/0x250 + exit_to_user_mode_prepare+0x113/0x120 + syscall_exit_to_user_mode+0x1d/0x40 + do_syscall_64+0x48/0x90 + entry_SYSCALL_64_after_hwframe+0x72/0xdc + +The msk grace period can legitly expire in between the last +reference count dropped in mptcp_subflow_queue_clean() and +the later eventual access in inet_csk_listen_stop() + +After the previous patch we don't need anymore special-casing +msk listener socket cleanup: the mptcp worker will process each +of the unaccepted msk sockets. + +Just drop the now unnecessary code. + +Reported-and-tested-by: Christoph Paasch +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/346 +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts From 26486b7946a612513d42948460d90eda45e060b5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Feb 2023 05:53:04 +0000 Subject: [PATCH 12/13] mptcp: fix UaF in listener shutdown (net) As reported by Christoph after having refactored the passive socket initialization, the mptcp listener shutdown path is prone to an UaF issue. BUG: KASAN: use-after-free in _raw_spin_lock_bh+0x73/0xe0 Write of size 4 at addr ffff88810cb23098 by task syz-executor731/1266 CPU: 1 PID: 1266 Comm: syz-executor731 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x6e/0x91 print_report+0x16a/0x46f kasan_report+0xad/0x130 kasan_check_range+0x14a/0x1a0 _raw_spin_lock_bh+0x73/0xe0 subflow_error_report+0x6d/0x110 sk_error_report+0x3b/0x190 tcp_disconnect+0x138c/0x1aa0 inet_child_forget+0x6f/0x2e0 inet_csk_listen_stop+0x209/0x1060 __mptcp_close_ssk+0x52d/0x610 mptcp_destroy_common+0x165/0x640 mptcp_destroy+0x13/0x80 __mptcp_destroy_sock+0xe7/0x270 __mptcp_close+0x70e/0x9b0 mptcp_close+0x2b/0x150 inet_release+0xe9/0x1f0 __sock_release+0xd2/0x280 sock_close+0x15/0x20 __fput+0x252/0xa20 task_work_run+0x169/0x250 exit_to_user_mode_prepare+0x113/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x48/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc The msk grace period can legitly expire in between the last reference count dropped in mptcp_subflow_queue_clean() and the later eventual access in inet_csk_listen_stop() After the previous patch we don't need anymore special-casing msk listener socket cleanup: the mptcp worker will process each of the unaccepted msk sockets. Just drop the now unnecessary code. Reported-and-tested-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/346 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts --- net/mptcp/protocol.c | 1 - net/mptcp/protocol.h | 1 - net/mptcp/subflow.c | 72 -------------------------------------------- 3 files changed, 74 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b7014f9392364..420d6616da7df 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2355,7 +2355,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, /* otherwise tcp will dispose of the ssk and subflow ctx */ if (ssk->sk_state == TCP_LISTEN) { tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); inet_csk_listen_stop(ssk); mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 901c9da8fe66f..bda5ad723d389 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -629,7 +629,6 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); -void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9d5bf2a020efe..5a3b17811b6b9 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1826,78 +1826,6 @@ static void subflow_state_change(struct sock *sk) } } -void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) -{ - struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; - struct mptcp_sock *msk, *next, *head = NULL; - struct request_sock *req; - - /* build a list of all unaccepted mptcp sockets */ - spin_lock_bh(&queue->rskq_lock); - for (req = queue->rskq_accept_head; req; req = req->dl_next) { - struct mptcp_subflow_context *subflow; - struct sock *ssk = req->sk; - struct mptcp_sock *msk; - - if (!sk_is_mptcp(ssk)) - continue; - - subflow = mptcp_subflow_ctx(ssk); - if (!subflow || !subflow->conn) - continue; - - /* skip if already in list */ - msk = mptcp_sk(subflow->conn); - if (msk->dl_next || msk == head) - continue; - - msk->dl_next = head; - head = msk; - } - spin_unlock_bh(&queue->rskq_lock); - if (!head) - return; - - /* can't acquire the msk socket lock under the subflow one, - * or will cause ABBA deadlock - */ - release_sock(listener_ssk); - - for (msk = head; msk; msk = next) { - struct sock *sk = (struct sock *)msk; - bool do_cancel_work; - - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - next = msk->dl_next; - msk->first = NULL; - msk->dl_next = NULL; - - do_cancel_work = __mptcp_close(sk, 0); - release_sock(sk); - if (do_cancel_work) { - /* lockdep will report a false positive ABBA deadlock - * between cancel_work_sync and the listener socket. - * The involved locks belong to different sockets WRT - * the existing AB chain. - * Using a per socket key is problematic as key - * deregistration requires process context and must be - * performed at socket disposal time, in atomic - * context. - * Just tell lockdep to consider the listener socket - * released here. - */ - mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); - mptcp_cancel_work(sk); - mutex_acquire(&listener_sk->sk_lock.dep_map, - SINGLE_DEPTH_NESTING, 0, _RET_IP_); - } - sock_put(sk); - } - - /* we are still under the listener msk socket lock */ - lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); -} - static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); From f1ce17ef35d7c6333791d88ce9cfcbd78378e04b Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 22 Feb 2023 17:43:47 +0100 Subject: [PATCH 13/13] tg: switch to t/mptcp-fix-UaF-in-listener-shutdown-net Signed-off-by: Matthieu Baerts --- .topdeps | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.topdeps b/.topdeps index a8964c50c193a..63c7a10530f05 100644 --- a/.topdeps +++ b/.topdeps @@ -1 +1 @@ -t/mptcp-fix-possible-deadlock-in-subflow_error_report +t/mptcp-fix-UaF-in-listener-shutdown-net