From 78675d212b6530b396efb476acef480de80ff880 Mon Sep 17 00:00:00 2001 From: Dustin Dettmer Date: Fri, 16 Sep 2022 18:11:51 -0400 Subject: [PATCH] connectd+: Flake/race fix for new channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) dualopen has fd to connectd 2) channeld needs to take over 3) dualopen passes fd that leads to a connectd over for channeld to use 4) lightningd must receive the fd transfer request and process 5) dualopen shuts down and closes everything it owns 4 & 5 end up in a race. If 5 happens before 4, channeld ends up with an invalid fd for connectd — leaving it in a position to not receive messages. Lingering for a second makes 4 win the race. Since the daemon is closing anyway, waiting for a second should be alright. Changelog-Fixed: Fixed a condition for newly created channels that could trigger a need for reconnect. --- channeld/channeld.c | 4 ++++ openingd/dualopend.c | 6 ++++++ openingd/openingd.c | 3 +++ 3 files changed, 13 insertions(+) diff --git a/channeld/channeld.c b/channeld/channeld.c index c4f2a744dbb9..f701d24aa6f0 100644 --- a/channeld/channeld.c +++ b/channeld/channeld.c @@ -955,6 +955,10 @@ static void send_shutdown_complete(struct peer *peer) wire_sync_write(MASTER_FD, take(towire_channeld_shutdown_complete(NULL))); per_peer_state_fdpass_send(MASTER_FD, peer->pps); + + /* Give master a chance to pass the fd along */ + sleep(1); + close(MASTER_FD); } diff --git a/openingd/dualopend.c b/openingd/dualopend.c index 03e47911c3d0..e215cca188d5 100644 --- a/openingd/dualopend.c +++ b/openingd/dualopend.c @@ -303,6 +303,9 @@ static void dualopen_shutdown(struct state *state) status_debug("Sent %s with fds", dualopend_wire_name(fromwire_peektype(msg))); + /* Give master a chance to pass the fd along */ + sleep(1); + /* This frees the entire tal tree. */ tal_free(state); daemon_shutdown(); @@ -3989,6 +3992,9 @@ int main(int argc, char *argv[]) dualopend_wire_name(fromwire_peektype(msg))); tal_free(msg); + /* Give master a chance to pass the fd along */ + sleep(1); + /* This frees the entire tal tree. */ tal_free(state); daemon_shutdown(); diff --git a/openingd/openingd.c b/openingd/openingd.c index 3e9272a43f8d..a7bd167f5937 100644 --- a/openingd/openingd.c +++ b/openingd/openingd.c @@ -1485,6 +1485,9 @@ int main(int argc, char *argv[]) status_debug("Sent %s with fd", openingd_wire_name(fromwire_peektype(msg))); + /* Give master a chance to pass the fd along */ + sleep(1); + /* This frees the entire tal tree. */ tal_free(state);