connectd: release peer soon as lightingd tells us.

Now we have separate peer draining logic, we can simply use it when
connectd tells us to release the peer, without waiting.  (We could
simply free the peer, but that's a bit rude, as messages can get
lost).

This removes various complex flags and logic we had before.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Changelog-Fixed: `connectd`: various crashes and issues fixed by simplification and rewrite.
This commit is contained in:
Rusty Russell 2022-07-16 14:19:30 +09:30 committed by neil saitug
parent e856accb7d
commit 8678c5efb3
4 changed files with 16 additions and 96 deletions

View File

@ -315,8 +315,6 @@ static struct peer *new_peer(struct daemon *daemon,
peer->peer_in = NULL;
peer->sent_to_peer = NULL;
peer->urgent = false;
peer->ready_to_die = false;
peer->active = false;
peer->draining = false;
peer->peer_outq = msg_queue_new(peer, false);
peer->last_recv_time = time_now();
@ -1832,17 +1830,12 @@ static void try_connect_peer(struct daemon *daemon,
/* Already existing? */
existing = peer_htable_get(&daemon->peers, id);
if (existing) {
/* If it's exiting now, we've raced: reconnect after */
if ((tal_count(existing->subds) != 0 || !existing->active)
&& existing->to_peer
&& !existing->ready_to_die) {
/* Tell it it's already connected so it doesn't
* wait forever. */
daemon_conn_send(daemon->master,
take(towire_connectd_peer_already_connected
(NULL, id)));
return;
}
/* FIXME: Tell it it's already connected so it doesn't
* wait forever. */
daemon_conn_send(daemon->master,
take(towire_connectd_peer_already_connected
(NULL, id)));
return;
}
/* If we're trying to connect it right now, that's OK. */
@ -1935,30 +1928,6 @@ static void connect_to_peer(struct daemon *daemon, const u8 *msg)
try_connect_peer(daemon, &id, seconds_waited, addrs, addrhint);
}
void peer_conn_closed(struct peer *peer)
{
struct connecting *connect = find_connecting(peer->daemon, &peer->id);
/* These should be closed already! */
assert(!peer->to_peer);
assert(peer->ready_to_die || !peer->active);
status_peer_debug(&peer->id, "peer_conn_closed");
/* Wake up in case there's a reconnecting peer waiting in io_wait. */
io_wake(peer);
/* Note: deleting from a htable (a-la node_set_del) does not free it:
* htable doesn't assume it's a tal object at all. That's why we have
* a destructor attached to peer (called destroy_peer by
* convention). */
tal_free(peer);
/* If we wanted to connect to it, but found it was exiting, try again */
if (connect && !connect->conn)
try_connect_one_addr(connect);
}
/* lightningd tells us a peer should be disconnected. */
static void peer_discard(struct daemon *daemon, const u8 *msg)
{
@ -1974,9 +1943,7 @@ static void peer_discard(struct daemon *daemon, const u8 *msg)
if (!peer)
return;
status_peer_debug(&id, "disconnect");
/* When it's finished, it will call peer_conn_closed() */
close_peer_conn(peer);
tal_free(peer);
}
/* lightningd tells us to send a msg and disconnect. */

View File

@ -59,13 +59,6 @@ struct peer {
/* Connections to the subdaemons */
struct subd **subds;
/* Set once lightningd says it's OK to close (subd tells it
* it's done). */
bool ready_to_die;
/* Has this ever been active? (i.e. ever had a subd attached?) */
bool active;
/* When socket has Nagle overridden */
bool urgent;
@ -220,10 +213,7 @@ struct io_plan *peer_connected(struct io_conn *conn,
bool incoming,
bool retrying);
/* Called when peer->peer_conn is finally freed */
void peer_conn_closed(struct peer *peer);
/* Removes peer from hash table */
/* Removes peer from hash table, tells gossipd and lightningd. */
void destroy_peer(struct peer *peer);
#endif /* LIGHTNING_CONNECTD_CONNECTD_H */

View File

@ -538,9 +538,6 @@ static struct subd *activate_subd(struct peer *peer,
u16 t, *tp;
struct subd *subd;
/* If it wasn't active before, it is now! */
peer->active = true;
subd = multiplex_subd_setup(peer, channel_id, &fd_for_subd);
if (!subd)
return NULL;
@ -968,14 +965,6 @@ static struct io_plan *write_to_peer(struct io_conn *peer_conn,
return io_sock_shutdown(peer_conn);
}
/* We close once subds are all closed; or if we're not
active, when told to die. */
if ((peer->active || peer->ready_to_die)
&& tal_count(peer->subds) == 0) {
set_closing_timer(peer, peer_conn);
return io_sock_shutdown(peer_conn);
}
/* If they want us to send gossip, do so now. */
msg = maybe_from_gossip_store(NULL, peer);
if (!msg) {
@ -1078,7 +1067,7 @@ static struct io_plan *read_body_from_peer_done(struct io_conn *peer_conn,
peer->last_recv_time = time_now();
/* Don't process packets while we're closing */
if (peer->ready_to_die)
if (peer->draining)
return read_hdr_from_peer(peer_conn, peer);
/* If we swallow this, just try again. */
@ -1180,37 +1169,14 @@ static void destroy_subd(struct subd *subd)
struct peer *peer = subd->peer;
size_t pos;
status_peer_debug(&peer->id,
"destroy_subd: %zu subds, to_peer conn %p, read_to_die = %u",
tal_count(peer->subds), peer->to_peer,
peer->ready_to_die);
for (pos = 0; peer->subds[pos] != subd; pos++)
assert(pos < tal_count(peer->subds));
tal_arr_remove(&peer->subds, pos);
/* Make sure we try to keep reading from peer, so we know if
* it hangs up! */
/* Make sure we try to keep reading from peer (might
* have been waiting for write_to_subd) */
io_wake(&peer->peer_in);
/* If no peer, finally time to close */
if (!peer->to_peer && peer->ready_to_die)
peer_conn_closed(peer);
}
void close_peer_conn(struct peer *peer)
{
/* Make write_to_peer do flush after writing */
peer->ready_to_die = true;
/* Already dead? */
if (tal_count(peer->subds) == 0 && !peer->to_peer) {
peer_conn_closed(peer);
return;
}
/* In case it's not currently writing, wake write_to_peer */
msg_wake(peer->peer_outq);
}
static struct subd *multiplex_subd_setup(struct peer *peer,
@ -1250,22 +1216,22 @@ static void destroy_peer_conn(struct io_conn *peer_conn, struct peer *peer)
assert(peer->to_peer == peer_conn);
peer->to_peer = NULL;
/* Flush internal connections if any. */
/* Flush internal connections if any: last one out will free peer. */
if (tal_count(peer->subds) != 0) {
for (size_t i = 0; i < tal_count(peer->subds); i++)
msg_wake(peer->subds[i]->outq);
return;
}
/* If lightningd says we're ready, or we were never had a subd, finish */
if (peer->ready_to_die || !peer->active)
peer_conn_closed(peer);
/* We never had any subds? Free peer (might already be being freed,
* as it's our parent, but that's allowed by tal). */
tal_free(peer);
}
struct io_plan *multiplex_peer_setup(struct io_conn *peer_conn,
struct peer *peer)
{
/*~ If conn closes, we close the subd connections and wait for
/*~ If conn closes, we drain the subd connections and wait for
* lightningd to tell us to close with the peer */
tal_add_destructor2(peer_conn, destroy_peer_conn, peer);

View File

@ -26,9 +26,6 @@ void setup_peer_gossip_store(struct peer *peer,
const struct feature_set *our_features,
const u8 *their_features);
/* Start the process of flushing and closing the peer_conn */
void close_peer_conn(struct peer *peer);
/* When lightningd says to send a ping */
void send_manual_ping(struct daemon *daemon, const u8 *msg);