From 30f08cc2b0640a6cbfe386c5728b5d16ff78f986 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 9 Aug 2018 09:55:29 +0930 Subject: [PATCH] connectd: always tell master when connection fails/succeeded. We used to separate implicit connection requests (ie. timed retries for important peers) and explicit ones, and send a WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT for the latter. In the success case, that's now redundant, since we hand the connected peer to the master using WIRE_CONNECT_PEER_CONNECTED; we just need a message for the failure case. And we might as well tell the master every failure, so we don't have to distinguish internally. This also solves a race we had before: connectd would send WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT which completes the incoming JSON connect command, then send WIRE_CONNECT_PEER_CONNECTED. So there's a window where the JSON command can return, but the peer isn't known to lightningd yet. Signed-off-by: Rusty Russell --- connectd/connect.c | 115 ++++++++++++----------------------- connectd/connect_wire.csv | 12 ++-- lightningd/connect_control.c | 68 ++++++++++----------- lightningd/connect_control.h | 1 + lightningd/peer_control.c | 3 + wallet/test/run-wallet.c | 3 + 6 files changed, 85 insertions(+), 117 deletions(-) diff --git a/connectd/connect.c b/connectd/connect.c index 7cd4e4f4e..fe39fc9cf 100644 --- a/connectd/connect.c +++ b/connectd/connect.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -189,9 +190,6 @@ struct reaching { /* FIXME: Support multiple address. */ struct wireaddr_internal addr; - /* Whether connect command is waiting for the result. */ - bool master_needs_response; - /* How far did we get? */ const char *connstate; }; @@ -343,24 +341,15 @@ static void reached_peer(struct peer *peer, struct io_conn *conn) { /* OK, we've reached the peer successfully, tell everyone. */ struct reaching *r = find_reaching(peer->daemon, &peer->id); - u8 *msg; if (!r) return; - /* Don't call connect_failed */ + /* Don't call destroy_io_conn */ io_set_finish(conn, NULL, NULL); /* Don't free conn with reach */ tal_steal(peer->daemon, conn); - - /* Tell any connect command what happened. */ - if (r->master_needs_response) { - msg = towire_connectctl_connect_to_peer_result(NULL, &r->id, - true, ""); - daemon_conn_send(&peer->daemon->master, take(msg)); - } - tal_free(r); } @@ -966,27 +955,30 @@ struct io_plan *connection_out(struct io_conn *conn, struct reaching *reach) handshake_out_success, reach); } -static void connect_failed(struct io_conn *conn, struct reaching *reach) +static void PRINTF_FMT(3,4) + connect_failed(struct daemon *daemon, + const struct pubkey *id, + const char *errfmt, ...) { u8 *msg; struct important_peerid *imp; - const char *err = tal_fmt(tmpctx, "%s: %s", - reach->connstate, - strerror(errno)); + va_list ap; + char *err; + + va_start(ap, errfmt); + err = tal_vfmt(tmpctx, errfmt, ap); + va_end(ap); /* Tell any connect command what happened. */ - if (reach->master_needs_response) { - msg = towire_connectctl_connect_to_peer_result(NULL, &reach->id, - false, err); - daemon_conn_send(&reach->daemon->master, take(msg)); - } + msg = towire_connectctl_connect_failed(NULL, id, err); + daemon_conn_send(&daemon->master, take(msg)); - status_trace("Failed connected out for %s", - type_to_string(tmpctx, struct pubkey, &reach->id)); + status_trace("Failed connected out for %s: %s", + type_to_string(tmpctx, struct pubkey, id), + err); /* If we want to keep trying, do so. */ - imp = important_peerid_map_get(&reach->daemon->important_peerids, - &reach->id); + imp = important_peerid_map_get(&daemon->important_peerids, id); if (imp) { imp->wait_seconds *= 2; if (imp->wait_seconds > MAX_WAIT_SECONDS) @@ -996,10 +988,16 @@ static void connect_failed(struct io_conn *conn, struct reaching *reach) imp->wait_seconds); /* If important_id freed, this will be removed too */ imp->reconnect_timer - = new_reltimer(&reach->daemon->timers, imp, + = new_reltimer(&daemon->timers, imp, time_from_sec(imp->wait_seconds), retry_important, imp); } +} + +static void destroy_io_conn(struct io_conn *conn, struct reaching *reach) +{ + connect_failed(reach->daemon, &reach->id, + "%s: %s", reach->connstate, strerror(errno)); tal_free(reach); } @@ -1030,7 +1028,7 @@ static struct io_plan *conn_init(struct io_conn *conn, struct reaching *reach) } assert(ai); - io_set_finish(conn, connect_failed, reach); + io_set_finish(conn, destroy_io_conn, reach); return io_connect(conn, ai, connection_out, reach); } @@ -1060,7 +1058,7 @@ static struct io_plan *conn_proxy_init(struct io_conn *conn, status_failed(STATUS_FAIL_INTERNAL_ERROR, "Can't reach to %u address", reach->addr.itype); - io_set_finish(conn, connect_failed, reach); + io_set_finish(conn, destroy_io_conn, reach); return io_tor_connect(conn, reach->daemon->proxyaddr, host, port, reach); } @@ -1130,40 +1128,21 @@ gossip_resolve_addr(const tal_t *ctx, const struct pubkey *id) return addr; } -static void try_reach_peer(struct daemon *daemon, const struct pubkey *id, - bool master_needs_response) +static void try_reach_peer(struct daemon *daemon, const struct pubkey *id) { struct wireaddr_internal *a; struct addrhint *hint; int fd, af; struct reaching *reach; - u8 *msg; bool use_proxy = daemon->use_proxy_always; - if (pubkey_set_get(&daemon->peers, id)) { - status_debug("try_reach_peer: have peer %s", - type_to_string(tmpctx, struct pubkey, id)); - if (master_needs_response) { - msg = towire_connectctl_connect_to_peer_result(NULL, id, - true, - ""); - daemon_conn_send(&daemon->master, take(msg)); - } + /* Already done? May happen with timer. */ + if (pubkey_set_get(&daemon->peers, id)) return; - } /* If we're trying to reach it right now, that's OK. */ - reach = find_reaching(daemon, id); - if (reach) { - /* Please tell us too. Master should not ask twice (we'll - * only respond once, and so one request will get stuck) */ - if (reach->master_needs_response) - status_failed(STATUS_FAIL_MASTER_IO, - "Already reaching %s", - type_to_string(tmpctx, struct pubkey, id)); - reach->master_needs_response = master_needs_response; + if (find_reaching(daemon, id)) return; - } hint = find_addrhint(daemon, id); if (hint) @@ -1187,14 +1166,7 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id, } if (!a) { - status_debug("No address known for %s, giving up", - type_to_string(tmpctx, struct pubkey, id)); - if (master_needs_response) { - msg = towire_connectctl_connect_to_peer_result(NULL, id, - false, - "No address known, giving up"); - daemon_conn_send(&daemon->master, take(msg)); - } + connect_failed(daemon, id, "No address known"); return; } @@ -1249,17 +1221,11 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id, fd = socket(af, SOCK_STREAM, 0); if (fd < 0) { - char *err = tal_fmt(tmpctx, - "Can't open %i socket for %s (%s), giving up", - af, - type_to_string(tmpctx, struct pubkey, id), - strerror(errno)); - status_debug("%s", err); - if (master_needs_response) { - msg = towire_connectctl_connect_to_peer_result(NULL, id, - false, err); - daemon_conn_send(&daemon->master, take(msg)); - } + connect_failed(daemon, id, + "Can't open %i socket for %s (%s)", + af, + type_to_string(tmpctx, struct pubkey, id), + strerror(errno)); return; } @@ -1268,7 +1234,6 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id, reach->daemon = daemon; reach->id = *id; reach->addr = *a; - reach->master_needs_response = master_needs_response; reach->connstate = "Connection establishment"; list_add_tail(&daemon->reaching, &reach->list); tal_add_destructor(reach, destroy_reaching); @@ -1290,7 +1255,7 @@ static void retry_important(struct important_peerid *imp) if (!imp->daemon->reconnect) return; - try_reach_peer(imp->daemon, &imp->id, false); + try_reach_peer(imp->daemon, &imp->id); } static struct io_plan *connect_to_peer(struct io_conn *conn, @@ -1306,7 +1271,7 @@ static struct io_plan *connect_to_peer(struct io_conn *conn, imp = important_peerid_map_get(&daemon->important_peerids, &id); if (imp) imp->reconnect_timer = tal_free(imp->reconnect_timer); - try_reach_peer(daemon, &id, true); + try_reach_peer(daemon, &id); return daemon_conn_read_next(conn, &daemon->master); } @@ -1423,8 +1388,8 @@ static struct io_plan *recv_req(struct io_conn *conn, struct daemon_conn *master case WIRE_CONNECTCTL_INIT_REPLY: case WIRE_CONNECTCTL_ACTIVATE_REPLY: case WIRE_CONNECT_PEER_CONNECTED: - case WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT: case WIRE_CONNECT_RECONNECTED: + case WIRE_CONNECTCTL_CONNECT_FAILED: break; } diff --git a/connectd/connect_wire.csv b/connectd/connect_wire.csv index 11bc2aff9..f374cb200 100644 --- a/connectd/connect_wire.csv +++ b/connectd/connect_wire.csv @@ -46,14 +46,10 @@ connectctl_peer_addrhint,,addr,struct wireaddr_internal connectctl_connect_to_peer,2001 connectctl_connect_to_peer,,id,struct pubkey -# Connectd->master: result (not a reply since it can be out-of-order, but -# you will get one reply for every request). -connectctl_connect_to_peer_result,2020 -connectctl_connect_to_peer_result,,id,struct pubkey -# True it connected. -connectctl_connect_to_peer_result,,connected,bool -# Otherwise, why we can't reach them. -connectctl_connect_to_peer_result,,failreason,wirestring +# Connectd->master: connect failed. +connectctl_connect_failed,2020 +connectctl_connect_failed,,id,struct pubkey +connectctl_connect_failed,,failreason,wirestring # Master -> connectd: try to always maintain connection to this peer (or not) connectctl_peer_important,2010 diff --git a/lightningd/connect_control.c b/lightningd/connect_control.c index 569f389b9..62c2cacde 100644 --- a/lightningd/connect_control.c +++ b/lightningd/connect_control.c @@ -68,32 +68,6 @@ static void connect_cmd_succeed(struct command *cmd, const struct pubkey *id) command_success(cmd, response); } -static void connectd_connect_result(struct lightningd *ld, const u8 *msg) -{ - struct pubkey id; - bool connected; - char *err; - struct connect *c; - - if (!fromwire_connectctl_connect_to_peer_result(tmpctx, msg, - &id, - &connected, - &err)) - fatal("Connect gave bad CONNECTCTL_CONNECT_TO_PEER_RESULT message %s", - tal_hex(msg, msg)); - - - /* We can have multiple connect commands: complete them all */ - while ((c = find_connect(ld, &id)) != NULL) { - if (connected) { - connect_cmd_succeed(c->cmd, &id); - } else { - command_fail(c->cmd, LIGHTNINGD, "%s", err); - } - /* They delete themselves from list */ - } -} - static void json_connect(struct command *cmd, const char *buffer, const jsmntok_t *params) { @@ -200,12 +174,10 @@ static void json_connect(struct command *cmd, subd_send_msg(cmd->ld->connectd, take(msg)); } - /* If there isn't already a connect command, tell connectd */ - if (!find_connect(cmd->ld, &id)) { - msg = towire_connectctl_connect_to_peer(NULL, &id); - subd_send_msg(cmd->ld->connectd, take(msg)); - } - /* Leave this here for connect_connect_result */ + msg = towire_connectctl_connect_to_peer(NULL, &id); + subd_send_msg(cmd->ld->connectd, take(msg)); + + /* Leave this here for peer_connected or connect_failed. */ new_connect(cmd->ld, &id, cmd); command_still_pending(cmd); } @@ -218,6 +190,34 @@ static const struct json_command connect_command = { }; AUTODATA(json_command, &connect_command); +static void connect_failed(struct lightningd *ld, const u8 *msg) +{ + struct pubkey id; + char *err; + struct connect *c; + + if (!fromwire_connectctl_connect_failed(tmpctx, msg, &id, &err)) + fatal("Connect gave bad CONNECTCTL_CONNECT_FAILED message %s", + tal_hex(msg, msg)); + + /* We can have multiple connect commands: fail them all */ + while ((c = find_connect(ld, &id)) != NULL) { + /* They delete themselves from list */ + command_fail(c->cmd, LIGHTNINGD, "%s", err); + } +} + +void connect_succeeded(struct lightningd *ld, const struct pubkey *id) +{ + struct connect *c; + + /* We can have multiple connect commands: fail them all */ + while ((c = find_connect(ld, id)) != NULL) { + /* They delete themselves from list */ + connect_cmd_succeed(c->cmd, id); + } +} + static void peer_please_disconnect(struct lightningd *ld, const u8 *msg) { struct pubkey id; @@ -261,8 +261,8 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd peer_connected(connectd->ld, msg, fds[0], fds[1]); break; - case WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT: - connectd_connect_result(connectd->ld, msg); + case WIRE_CONNECTCTL_CONNECT_FAILED: + connect_failed(connectd->ld, msg); break; } return 0; diff --git a/lightningd/connect_control.h b/lightningd/connect_control.h index 4f07b6d5b..16babfc2f 100644 --- a/lightningd/connect_control.h +++ b/lightningd/connect_control.h @@ -9,6 +9,7 @@ struct pubkey; int connectd_init(struct lightningd *ld); void connectd_activate(struct lightningd *ld); +void connect_succeeded(struct lightningd *ld, const struct pubkey *id); void gossip_connect_result(struct lightningd *ld, const u8 *msg); #endif /* LIGHTNING_LIGHTNINGD_CONNECT_CONTROL_H */ diff --git a/lightningd/peer_control.c b/lightningd/peer_control.c index 560526b3a..7a1d477e1 100644 --- a/lightningd/peer_control.c +++ b/lightningd/peer_control.c @@ -448,6 +448,9 @@ void peer_connected(struct lightningd *ld, const u8 *msg, fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s", tal_hex(msg, msg)); + /* Complete any outstanding connect commands. */ + connect_succeeded(ld, &id); + /* If we're already dealing with this peer, hand off to correct * subdaemon. Otherwise, we'll hand to openingd to wait there. */ peer = peer_by_id(ld, &id); diff --git a/wallet/test/run-wallet.c b/wallet/test/run-wallet.c index 09f8de9b1..a164bf845 100644 --- a/wallet/test/run-wallet.c +++ b/wallet/test/run-wallet.c @@ -59,6 +59,9 @@ void command_still_pending(struct command *cmd UNNEEDED) /* Generated stub for command_success */ void command_success(struct command *cmd UNNEEDED, struct json_result *response UNNEEDED) { fprintf(stderr, "command_success called!\n"); abort(); } +/* Generated stub for connect_succeeded */ +void connect_succeeded(struct lightningd *ld UNNEEDED, const struct pubkey *id UNNEEDED) +{ fprintf(stderr, "connect_succeeded called!\n"); abort(); } /* Generated stub for fromwire_connect_peer_connected */ bool fromwire_connect_peer_connected(const tal_t *ctx UNNEEDED, const void *p UNNEEDED, struct pubkey *id UNNEEDED, struct wireaddr_internal *addr UNNEEDED, struct crypto_state *crypto_state UNNEEDED, u8 **gfeatures UNNEEDED, u8 **lfeatures UNNEEDED) { fprintf(stderr, "fromwire_connect_peer_connected called!\n"); abort(); }