Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

subd version messages #4471

Merged
merged 5 commits into from
Apr 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions channeld/channeld.c
Original file line number Diff line number Diff line change
Expand Up @@ -3040,8 +3040,6 @@ static void init_channel(struct peer *peer)

assert(!(fcntl(MASTER_FD, F_GETFL) & O_NONBLOCK));

status_setup_sync(MASTER_FD);

msg = wire_sync_read(tmpctx, MASTER_FD);
if (!fromwire_channeld_init(peer, msg,
&chainparams,
Expand Down Expand Up @@ -3225,6 +3223,8 @@ int main(int argc, char *argv[])

subdaemon_setup(argc, argv);

status_setup_sync(MASTER_FD);

peer = tal(NULL, struct peer);
peer->expecting_pong = false;
timers_init(&peer->timers, time_mono());
Expand Down
7 changes: 7 additions & 0 deletions common/status.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <common/status.h>
#include <common/status_wiregen.h>
#include <common/utils.h>
#include <common/version.h>
#include <errno.h>
#include <signal.h>
#include <wire/peer_wire.h>
Expand Down Expand Up @@ -59,6 +60,9 @@ void status_setup_sync(int fd)
assert(!status_conn);
status_fd = fd;
setup_logging_sighandler();

/* Send version now. */
status_send(take(towire_status_version(NULL, version())));
}

static void destroy_daemon_conn(struct daemon_conn *dc UNUSED)
Expand All @@ -75,6 +79,9 @@ void status_setup_async(struct daemon_conn *master)
tal_add_destructor(master, destroy_daemon_conn);

setup_logging_sighandler();

/* Send version now. */
status_send(take(towire_status_version(NULL, version())));
}

void status_send(const u8 *msg TAKES)
Expand Down
3 changes: 3 additions & 0 deletions common/status_wire.csv
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,7 @@ msgtype,status_peer_connection_lost,0xFFF3
msgtype,status_peer_billboard,0xFFF5
msgdata,status_peer_billboard,perm,bool,
msgdata,status_peer_billboard,happenings,wirestring,

msgtype,status_version,0xFFF6
msgdata,status_version,version,wirestring,
# Note: 0xFFFF is reserved for MSG_PASS_FD!
25 changes: 24 additions & 1 deletion common/status_wiregen.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion common/status_wiregen.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion contrib/pyln-testing/pyln/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,7 +1216,8 @@ def split_options(self, opts):
'random_hsm',
'feerates',
'wait_for_bitcoind_sync',
'allow_bad_gossip'
'allow_bad_gossip',
'start',
]
node_opts = {k: v for k, v in opts.items() if k in node_opt_keys}
cli_opts = {k: v for k, v in opts.items() if k not in node_opt_keys}
Expand Down
8 changes: 4 additions & 4 deletions gossipd/gossipd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,10 @@ static struct io_plan *gossip_init(struct io_conn *conn,
/* Fire up the seeker! */
daemon->seeker = new_seeker(daemon);

/* connectd is already started, and uses this fd to ask us things. */
daemon->connectd = daemon_conn_new(daemon, CONNECTD_FD,
connectd_req, NULL, daemon);

return daemon_conn_read_next(conn, daemon->master);
}

Expand Down Expand Up @@ -1952,10 +1956,6 @@ int main(int argc, char *argv[])

status_setup_async(daemon->master);

/* connectd is already started, and uses this fd to ask us things. */
daemon->connectd = daemon_conn_new(daemon, CONNECTD_FD,
connectd_req, NULL, daemon);

/* This loop never exits. io_loop() only returns if a timer has
* expired, or io_break() is called, or all fds are closed. We don't
* use io_break and closing the lightningd fd calls master_gone()
Expand Down
34 changes: 34 additions & 0 deletions lightningd/lightningd.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
ld->listen = true;
ld->autolisten = true;
ld->reconnect = true;
ld->try_reexec = false;

/*~ This is from ccan/timer: it is efficient for the case where timers
* are deleted before expiry (as is common with timeouts) using an
Expand Down Expand Up @@ -854,6 +855,8 @@ int main(int argc, char *argv[])
struct rlimit nofile = {1024, 1024};
int sigchld_rfd;
int exit_code = 0;
char **orig_argv;
bool try_reexec;

/*~ Make sure that we limit ourselves to something reasonable. Modesty
* is a virtue. */
Expand Down Expand Up @@ -888,6 +891,17 @@ int main(int argc, char *argv[])
ld = new_lightningd(NULL);
ld->state = LD_STATE_RUNNING;

/*~ We store an copy of our arguments before parsing mangles them, so
* we can re-exec if versions of subdaemons change. Note the use of
* notleak() since our leak-detector can't find orig_argv on the
* stack. */
orig_argv = notleak(tal_arr(ld, char *, argc + 1));
for (size_t i = 1; i < argc; i++)
orig_argv[i] = tal_strdup(orig_argv, argv[i]);
/*~ Turn argv[0] into an absolute path (if not already) */
orig_argv[0] = path_join(orig_argv, take(path_cwd(NULL)), argv[0]);
orig_argv[argc] = NULL;

/* Figure out where our daemons are first. */
ld->daemon_dir = find_daemon_dir(ld, argv[0]);
if (!ld->daemon_dir)
Expand Down Expand Up @@ -1144,6 +1158,11 @@ int main(int argc, char *argv[])
* ld->payments, so clean that up. */
clean_tmpctx();

/* Gather these before we free ld! */
try_reexec = ld->try_reexec;
if (try_reexec)
tal_steal(NULL, orig_argv);

/* Free this last: other things may clean up timers. */
timers = tal_steal(NULL, ld->timers);
tal_free(ld);
Expand All @@ -1161,6 +1180,21 @@ int main(int argc, char *argv[])
tal_free(stop_response);
}

/* Were we supposed to restart ourselves? */
if (try_reexec) {
long max_fd;

/* Give a reasonable chance for the install to finish. */
sleep(5);

/* Close all filedescriptors except stdin/stdout/stderr */
max_fd = sysconf(_SC_OPEN_MAX);
for (int i = STDERR_FILENO+1; i < max_fd; i++)
close(i);
execv(orig_argv[0], orig_argv);
err(1, "Failed to re-exec ourselves after version change");
}

/*~ Farewell. Next stop: hsmd/hsmd.c. */
return exit_code;
}
3 changes: 3 additions & 0 deletions lightningd/lightningd.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ struct lightningd {

/* The round-robin list of channels, for use when doing MPP. */
u64 rr_counter;

/* Should we re-exec ourselves instead of just exiting? */
bool try_reexec;
};

/* Turning this on allows a tal allocation to return NULL, rather than aborting.
Expand Down
37 changes: 36 additions & 1 deletion lightningd/subd.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <ccan/array_size/array_size.h>
#include <ccan/err/err.h>
#include <ccan/io/fdpass/fdpass.h>
#include <ccan/io/io.h>
Expand All @@ -12,6 +13,7 @@
#include <common/peer_status_wiregen.h>
#include <common/per_peer_state.h>
#include <common/status_wiregen.h>
#include <common/version.h>
#include <errno.h>
#include <fcntl.h>
#include <lightningd/lightningd.h>
Expand Down Expand Up @@ -402,6 +404,29 @@ static bool handle_set_billboard(struct subd *sd, const u8 *msg)
return true;
}

static bool handle_version(struct subd *sd, const u8 *msg)
{
char *ver;

if (!fromwire_status_version(msg, msg, &ver))
return false;

if (!streq(ver, version())) {
log_broken(sd->log, "version '%s' not '%s': restarting",
ver, version());
sd->ld->try_reexec = true;
/* Return us to toplevel lightningd.c */
io_break(sd->ld);
return false;
}

sd->rcvd_version = true;
/* In case there are outgoing msgs, we can send now. */
msg_wake(sd->outq);

return true;
}

static struct io_plan *sd_msg_read(struct io_conn *conn, struct subd *sd)
{
int type = fromwire_peektype(sd->msg_in);
Expand Down Expand Up @@ -455,6 +480,10 @@ static struct io_plan *sd_msg_read(struct io_conn *conn, struct subd *sd)
if (!handle_set_billboard(sd, sd->msg_in))
goto malformed;
goto next;
case WIRE_STATUS_VERSION:
if (!handle_version(sd, sd->msg_in))
goto close;
goto next;
}

if (sd->channel) {
Expand Down Expand Up @@ -581,10 +610,15 @@ static void destroy_subd(struct subd *sd)

static struct io_plan *msg_send_next(struct io_conn *conn, struct subd *sd)
{
const u8 *msg = msg_dequeue(sd->outq);
const u8 *msg;
int fd;

/* Don't send if we haven't read version! */
if (!sd->rcvd_version)
return msg_queue_wait(conn, sd->outq, msg_send_next, sd);

/* Nothing to do? Wait for msg_enqueue. */
msg = msg_dequeue(sd->outq);
if (!msg)
return msg_queue_wait(conn, sd->outq, msg_send_next, sd);

Expand Down Expand Up @@ -678,6 +712,7 @@ static struct subd *new_subd(struct lightningd *ld,
tal_add_destructor(sd, destroy_subd);
list_head_init(&sd->reqs);
sd->channel = channel;
sd->rcvd_version = false;
if (node_id)
sd->node_id = tal_dup(sd, struct node_id, node_id);
else
Expand Down
3 changes: 3 additions & 0 deletions lightningd/subd.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ struct subd {
/* If we are associated with a single channel, this points to it. */
void *channel;

/* Have we received the version msg yet? Don't send until we do. */
bool rcvd_version;

/* For logging */
struct log *log;
const struct node_id *node_id;
Expand Down
3 changes: 3 additions & 0 deletions lightningd/test/run-find_my_abspath.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ bool fromwire_status_peer_billboard(const tal_t *ctx UNNEEDED, const void *p UNN
/* Generated stub for fromwire_status_peer_error */
bool fromwire_status_peer_error(const tal_t *ctx UNNEEDED, const void *p UNNEEDED, struct channel_id *channel UNNEEDED, wirestring **desc UNNEEDED, bool *warning UNNEEDED, struct per_peer_state **pps UNNEEDED, u8 **error_for_them UNNEEDED)
{ fprintf(stderr, "fromwire_status_peer_error called!\n"); abort(); }
/* Generated stub for fromwire_status_version */
bool fromwire_status_version(const tal_t *ctx UNNEEDED, const void *p UNNEEDED, wirestring **version UNNEEDED)
{ fprintf(stderr, "fromwire_status_version called!\n"); abort(); }
/* Generated stub for gossip_init */
void gossip_init(struct lightningd *ld UNNEEDED, int connectd_fd UNNEEDED)
{ fprintf(stderr, "gossip_init called!\n"); abort(); }
Expand Down
11 changes: 11 additions & 0 deletions tests/plugins/badopeningd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#! /bin/sh

# If this file exists, we send that message back, then sleep.
if [ $# = 0 ] && [ -f openingd-version ]; then
# lightningd expects us to write to stdin!
cat openingd-version >&0
sleep 10
exit 0
fi

exec "$(cat openingd-real)" "$@"
30 changes: 30 additions & 0 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2507,3 +2507,33 @@ def test_listforwards(node_factory, bitcoind):
# out_channel=c24
c24_forwards = l2.rpc.listforwards(out_channel=c24)['forwards']
assert len(c24_forwards) == 1


def test_version_reexec(node_factory, bitcoind):
badopeningd = os.path.join(os.path.dirname(__file__), "plugins", "badopeningd.sh")
version = subprocess.check_output(['lightningd/lightningd',
'--version']).decode('utf-8').splitlines()[0]

l1, l2 = node_factory.get_nodes(2, opts=[{'subdaemon': 'openingd:' + badopeningd,
'start': False,
'allow_broken_log': True},
{}])
# We use a file to tell our openingd wrapper where the real one is
with open(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, "openingd-real"), 'w') as f:
f.write(os.path.abspath('lightningd/lightning_openingd'))

l1.start()
# This is a "version" message
verfile = os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, "openingd-version")
with open(verfile, 'wb') as f:
f.write(bytes.fromhex('0000000d' # len
'fff6')) # type
f.write(bytes('badversion\0', encoding='utf8'))

l1.rpc.connect(l2.info['id'], 'localhost', l2.port)

l1.daemon.wait_for_log("openingd.*version 'badversion' not '{}': restarting".format(version))

# Now "fix" it, it should restart.
os.unlink(verfile)
l1.daemon.wait_for_log("Server started with public key")