From 8678c5efb337ef98008a8523999a60c7762e8a4a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 16 Jul 2022 14:19:30 +0930 Subject: [PATCH] connectd: release peer soon as lightingd tells us. Now we have separate peer draining logic, we can simply use it when connectd tells us to release the peer, without waiting. (We could simply free the peer, but that's a bit rude, as messages can get lost). This removes various complex flags and logic we had before. Signed-off-by: Rusty Russell Changelog-Fixed: `connectd`: various crashes and issues fixed by simplification and rewrite. --- connectd/connectd.c | 47 +++++++---------------------------------- connectd/connectd.h | 12 +---------- connectd/multiplex.c | 50 +++++++------------------------------------- connectd/multiplex.h | 3 --- 4 files changed, 16 insertions(+), 96 deletions(-) diff --git a/connectd/connectd.c b/connectd/connectd.c index aadfc055d..107f30e6e 100644 --- a/connectd/connectd.c +++ b/connectd/connectd.c @@ -315,8 +315,6 @@ static struct peer *new_peer(struct daemon *daemon, peer->peer_in = NULL; peer->sent_to_peer = NULL; peer->urgent = false; - peer->ready_to_die = false; - peer->active = false; peer->draining = false; peer->peer_outq = msg_queue_new(peer, false); peer->last_recv_time = time_now(); @@ -1832,17 +1830,12 @@ static void try_connect_peer(struct daemon *daemon, /* Already existing? */ existing = peer_htable_get(&daemon->peers, id); if (existing) { - /* If it's exiting now, we've raced: reconnect after */ - if ((tal_count(existing->subds) != 0 || !existing->active) - && existing->to_peer - && !existing->ready_to_die) { - /* Tell it it's already connected so it doesn't - * wait forever. */ - daemon_conn_send(daemon->master, - take(towire_connectd_peer_already_connected - (NULL, id))); - return; - } + /* FIXME: Tell it it's already connected so it doesn't + * wait forever. */ + daemon_conn_send(daemon->master, + take(towire_connectd_peer_already_connected + (NULL, id))); + return; } /* If we're trying to connect it right now, that's OK. */ @@ -1935,30 +1928,6 @@ static void connect_to_peer(struct daemon *daemon, const u8 *msg) try_connect_peer(daemon, &id, seconds_waited, addrs, addrhint); } -void peer_conn_closed(struct peer *peer) -{ - struct connecting *connect = find_connecting(peer->daemon, &peer->id); - - /* These should be closed already! */ - assert(!peer->to_peer); - assert(peer->ready_to_die || !peer->active); - - status_peer_debug(&peer->id, "peer_conn_closed"); - - /* Wake up in case there's a reconnecting peer waiting in io_wait. */ - io_wake(peer); - - /* Note: deleting from a htable (a-la node_set_del) does not free it: - * htable doesn't assume it's a tal object at all. That's why we have - * a destructor attached to peer (called destroy_peer by - * convention). */ - tal_free(peer); - - /* If we wanted to connect to it, but found it was exiting, try again */ - if (connect && !connect->conn) - try_connect_one_addr(connect); -} - /* lightningd tells us a peer should be disconnected. */ static void peer_discard(struct daemon *daemon, const u8 *msg) { @@ -1974,9 +1943,7 @@ static void peer_discard(struct daemon *daemon, const u8 *msg) if (!peer) return; status_peer_debug(&id, "disconnect"); - - /* When it's finished, it will call peer_conn_closed() */ - close_peer_conn(peer); + tal_free(peer); } /* lightningd tells us to send a msg and disconnect. */ diff --git a/connectd/connectd.h b/connectd/connectd.h index 43e7b242a..bdab8d1e9 100644 --- a/connectd/connectd.h +++ b/connectd/connectd.h @@ -59,13 +59,6 @@ struct peer { /* Connections to the subdaemons */ struct subd **subds; - /* Set once lightningd says it's OK to close (subd tells it - * it's done). */ - bool ready_to_die; - - /* Has this ever been active? (i.e. ever had a subd attached?) */ - bool active; - /* When socket has Nagle overridden */ bool urgent; @@ -220,10 +213,7 @@ struct io_plan *peer_connected(struct io_conn *conn, bool incoming, bool retrying); -/* Called when peer->peer_conn is finally freed */ -void peer_conn_closed(struct peer *peer); - -/* Removes peer from hash table */ +/* Removes peer from hash table, tells gossipd and lightningd. */ void destroy_peer(struct peer *peer); #endif /* LIGHTNING_CONNECTD_CONNECTD_H */ diff --git a/connectd/multiplex.c b/connectd/multiplex.c index 9a88724b5..f7fa7d0b3 100644 --- a/connectd/multiplex.c +++ b/connectd/multiplex.c @@ -538,9 +538,6 @@ static struct subd *activate_subd(struct peer *peer, u16 t, *tp; struct subd *subd; - /* If it wasn't active before, it is now! */ - peer->active = true; - subd = multiplex_subd_setup(peer, channel_id, &fd_for_subd); if (!subd) return NULL; @@ -968,14 +965,6 @@ static struct io_plan *write_to_peer(struct io_conn *peer_conn, return io_sock_shutdown(peer_conn); } - /* We close once subds are all closed; or if we're not - active, when told to die. */ - if ((peer->active || peer->ready_to_die) - && tal_count(peer->subds) == 0) { - set_closing_timer(peer, peer_conn); - return io_sock_shutdown(peer_conn); - } - /* If they want us to send gossip, do so now. */ msg = maybe_from_gossip_store(NULL, peer); if (!msg) { @@ -1078,7 +1067,7 @@ static struct io_plan *read_body_from_peer_done(struct io_conn *peer_conn, peer->last_recv_time = time_now(); /* Don't process packets while we're closing */ - if (peer->ready_to_die) + if (peer->draining) return read_hdr_from_peer(peer_conn, peer); /* If we swallow this, just try again. */ @@ -1180,37 +1169,14 @@ static void destroy_subd(struct subd *subd) struct peer *peer = subd->peer; size_t pos; - status_peer_debug(&peer->id, - "destroy_subd: %zu subds, to_peer conn %p, read_to_die = %u", - tal_count(peer->subds), peer->to_peer, - peer->ready_to_die); for (pos = 0; peer->subds[pos] != subd; pos++) assert(pos < tal_count(peer->subds)); tal_arr_remove(&peer->subds, pos); - /* Make sure we try to keep reading from peer, so we know if - * it hangs up! */ + /* Make sure we try to keep reading from peer (might + * have been waiting for write_to_subd) */ io_wake(&peer->peer_in); - - /* If no peer, finally time to close */ - if (!peer->to_peer && peer->ready_to_die) - peer_conn_closed(peer); -} - -void close_peer_conn(struct peer *peer) -{ - /* Make write_to_peer do flush after writing */ - peer->ready_to_die = true; - - /* Already dead? */ - if (tal_count(peer->subds) == 0 && !peer->to_peer) { - peer_conn_closed(peer); - return; - } - - /* In case it's not currently writing, wake write_to_peer */ - msg_wake(peer->peer_outq); } static struct subd *multiplex_subd_setup(struct peer *peer, @@ -1250,22 +1216,22 @@ static void destroy_peer_conn(struct io_conn *peer_conn, struct peer *peer) assert(peer->to_peer == peer_conn); peer->to_peer = NULL; - /* Flush internal connections if any. */ + /* Flush internal connections if any: last one out will free peer. */ if (tal_count(peer->subds) != 0) { for (size_t i = 0; i < tal_count(peer->subds); i++) msg_wake(peer->subds[i]->outq); return; } - /* If lightningd says we're ready, or we were never had a subd, finish */ - if (peer->ready_to_die || !peer->active) - peer_conn_closed(peer); + /* We never had any subds? Free peer (might already be being freed, + * as it's our parent, but that's allowed by tal). */ + tal_free(peer); } struct io_plan *multiplex_peer_setup(struct io_conn *peer_conn, struct peer *peer) { - /*~ If conn closes, we close the subd connections and wait for + /*~ If conn closes, we drain the subd connections and wait for * lightningd to tell us to close with the peer */ tal_add_destructor2(peer_conn, destroy_peer_conn, peer); diff --git a/connectd/multiplex.h b/connectd/multiplex.h index f389d88da..ce2ed7b02 100644 --- a/connectd/multiplex.h +++ b/connectd/multiplex.h @@ -26,9 +26,6 @@ void setup_peer_gossip_store(struct peer *peer, const struct feature_set *our_features, const u8 *their_features); -/* Start the process of flushing and closing the peer_conn */ -void close_peer_conn(struct peer *peer); - /* When lightningd says to send a ping */ void send_manual_ping(struct daemon *daemon, const u8 *msg);