connectd: always tell master when connection fails/succeeded.

We used to separate implicit connection requests (ie. timed retries
for important peers) and explicit ones, and send a
WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT for the latter.

In the success case, that's now redundant, since we hand the connected
peer to the master using WIRE_CONNECT_PEER_CONNECTED; we just need a
message for the failure case.  And we might as well tell the master
every failure, so we don't have to distinguish internally.

This also solves a race we had before: connectd would send
WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT which completes the incoming
JSON connect command, then send WIRE_CONNECT_PEER_CONNECTED.  So
there's a window where the JSON command can return, but the peer isn't
known to lightningd yet.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2018-08-09 09:55:29 +09:30 committed by Christian Decker
parent 684d60dbda
commit 30f08cc2b0
6 changed files with 85 additions and 117 deletions

View File

@ -47,6 +47,7 @@
#include <netinet/in.h>
#include <secp256k1_ecdh.h>
#include <sodium/randombytes.h>
#include <stdarg.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
@ -189,9 +190,6 @@ struct reaching {
/* FIXME: Support multiple address. */
struct wireaddr_internal addr;
/* Whether connect command is waiting for the result. */
bool master_needs_response;
/* How far did we get? */
const char *connstate;
};
@ -343,24 +341,15 @@ static void reached_peer(struct peer *peer, struct io_conn *conn)
{
/* OK, we've reached the peer successfully, tell everyone. */
struct reaching *r = find_reaching(peer->daemon, &peer->id);
u8 *msg;
if (!r)
return;
/* Don't call connect_failed */
/* Don't call destroy_io_conn */
io_set_finish(conn, NULL, NULL);
/* Don't free conn with reach */
tal_steal(peer->daemon, conn);
/* Tell any connect command what happened. */
if (r->master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, &r->id,
true, "");
daemon_conn_send(&peer->daemon->master, take(msg));
}
tal_free(r);
}
@ -966,27 +955,30 @@ struct io_plan *connection_out(struct io_conn *conn, struct reaching *reach)
handshake_out_success, reach);
}
static void connect_failed(struct io_conn *conn, struct reaching *reach)
static void PRINTF_FMT(3,4)
connect_failed(struct daemon *daemon,
const struct pubkey *id,
const char *errfmt, ...)
{
u8 *msg;
struct important_peerid *imp;
const char *err = tal_fmt(tmpctx, "%s: %s",
reach->connstate,
strerror(errno));
va_list ap;
char *err;
va_start(ap, errfmt);
err = tal_vfmt(tmpctx, errfmt, ap);
va_end(ap);
/* Tell any connect command what happened. */
if (reach->master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, &reach->id,
false, err);
daemon_conn_send(&reach->daemon->master, take(msg));
}
msg = towire_connectctl_connect_failed(NULL, id, err);
daemon_conn_send(&daemon->master, take(msg));
status_trace("Failed connected out for %s",
type_to_string(tmpctx, struct pubkey, &reach->id));
status_trace("Failed connected out for %s: %s",
type_to_string(tmpctx, struct pubkey, id),
err);
/* If we want to keep trying, do so. */
imp = important_peerid_map_get(&reach->daemon->important_peerids,
&reach->id);
imp = important_peerid_map_get(&daemon->important_peerids, id);
if (imp) {
imp->wait_seconds *= 2;
if (imp->wait_seconds > MAX_WAIT_SECONDS)
@ -996,10 +988,16 @@ static void connect_failed(struct io_conn *conn, struct reaching *reach)
imp->wait_seconds);
/* If important_id freed, this will be removed too */
imp->reconnect_timer
= new_reltimer(&reach->daemon->timers, imp,
= new_reltimer(&daemon->timers, imp,
time_from_sec(imp->wait_seconds),
retry_important, imp);
}
}
static void destroy_io_conn(struct io_conn *conn, struct reaching *reach)
{
connect_failed(reach->daemon, &reach->id,
"%s: %s", reach->connstate, strerror(errno));
tal_free(reach);
}
@ -1030,7 +1028,7 @@ static struct io_plan *conn_init(struct io_conn *conn, struct reaching *reach)
}
assert(ai);
io_set_finish(conn, connect_failed, reach);
io_set_finish(conn, destroy_io_conn, reach);
return io_connect(conn, ai, connection_out, reach);
}
@ -1060,7 +1058,7 @@ static struct io_plan *conn_proxy_init(struct io_conn *conn,
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Can't reach to %u address", reach->addr.itype);
io_set_finish(conn, connect_failed, reach);
io_set_finish(conn, destroy_io_conn, reach);
return io_tor_connect(conn, reach->daemon->proxyaddr, host, port, reach);
}
@ -1130,40 +1128,21 @@ gossip_resolve_addr(const tal_t *ctx, const struct pubkey *id)
return addr;
}
static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
bool master_needs_response)
static void try_reach_peer(struct daemon *daemon, const struct pubkey *id)
{
struct wireaddr_internal *a;
struct addrhint *hint;
int fd, af;
struct reaching *reach;
u8 *msg;
bool use_proxy = daemon->use_proxy_always;
if (pubkey_set_get(&daemon->peers, id)) {
status_debug("try_reach_peer: have peer %s",
type_to_string(tmpctx, struct pubkey, id));
if (master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, id,
true,
"");
daemon_conn_send(&daemon->master, take(msg));
}
/* Already done? May happen with timer. */
if (pubkey_set_get(&daemon->peers, id))
return;
}
/* If we're trying to reach it right now, that's OK. */
reach = find_reaching(daemon, id);
if (reach) {
/* Please tell us too. Master should not ask twice (we'll
* only respond once, and so one request will get stuck) */
if (reach->master_needs_response)
status_failed(STATUS_FAIL_MASTER_IO,
"Already reaching %s",
type_to_string(tmpctx, struct pubkey, id));
reach->master_needs_response = master_needs_response;
if (find_reaching(daemon, id))
return;
}
hint = find_addrhint(daemon, id);
if (hint)
@ -1187,14 +1166,7 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
}
if (!a) {
status_debug("No address known for %s, giving up",
type_to_string(tmpctx, struct pubkey, id));
if (master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, id,
false,
"No address known, giving up");
daemon_conn_send(&daemon->master, take(msg));
}
connect_failed(daemon, id, "No address known");
return;
}
@ -1249,17 +1221,11 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
fd = socket(af, SOCK_STREAM, 0);
if (fd < 0) {
char *err = tal_fmt(tmpctx,
"Can't open %i socket for %s (%s), giving up",
af,
type_to_string(tmpctx, struct pubkey, id),
strerror(errno));
status_debug("%s", err);
if (master_needs_response) {
msg = towire_connectctl_connect_to_peer_result(NULL, id,
false, err);
daemon_conn_send(&daemon->master, take(msg));
}
connect_failed(daemon, id,
"Can't open %i socket for %s (%s)",
af,
type_to_string(tmpctx, struct pubkey, id),
strerror(errno));
return;
}
@ -1268,7 +1234,6 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
reach->daemon = daemon;
reach->id = *id;
reach->addr = *a;
reach->master_needs_response = master_needs_response;
reach->connstate = "Connection establishment";
list_add_tail(&daemon->reaching, &reach->list);
tal_add_destructor(reach, destroy_reaching);
@ -1290,7 +1255,7 @@ static void retry_important(struct important_peerid *imp)
if (!imp->daemon->reconnect)
return;
try_reach_peer(imp->daemon, &imp->id, false);
try_reach_peer(imp->daemon, &imp->id);
}
static struct io_plan *connect_to_peer(struct io_conn *conn,
@ -1306,7 +1271,7 @@ static struct io_plan *connect_to_peer(struct io_conn *conn,
imp = important_peerid_map_get(&daemon->important_peerids, &id);
if (imp)
imp->reconnect_timer = tal_free(imp->reconnect_timer);
try_reach_peer(daemon, &id, true);
try_reach_peer(daemon, &id);
return daemon_conn_read_next(conn, &daemon->master);
}
@ -1423,8 +1388,8 @@ static struct io_plan *recv_req(struct io_conn *conn, struct daemon_conn *master
case WIRE_CONNECTCTL_INIT_REPLY:
case WIRE_CONNECTCTL_ACTIVATE_REPLY:
case WIRE_CONNECT_PEER_CONNECTED:
case WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT:
case WIRE_CONNECT_RECONNECTED:
case WIRE_CONNECTCTL_CONNECT_FAILED:
break;
}

View File

@ -46,14 +46,10 @@ connectctl_peer_addrhint,,addr,struct wireaddr_internal
connectctl_connect_to_peer,2001
connectctl_connect_to_peer,,id,struct pubkey
# Connectd->master: result (not a reply since it can be out-of-order, but
# you will get one reply for every request).
connectctl_connect_to_peer_result,2020
connectctl_connect_to_peer_result,,id,struct pubkey
# True it connected.
connectctl_connect_to_peer_result,,connected,bool
# Otherwise, why we can't reach them.
connectctl_connect_to_peer_result,,failreason,wirestring
# Connectd->master: connect failed.
connectctl_connect_failed,2020
connectctl_connect_failed,,id,struct pubkey
connectctl_connect_failed,,failreason,wirestring
# Master -> connectd: try to always maintain connection to this peer (or not)
connectctl_peer_important,2010

1 #include <common/cryptomsg.h>
46 connectctl_connect_to_peer_result,,connected,bool connectctl_peer_important,2010
47 # Otherwise, why we can't reach them. connectctl_peer_important,,id,struct pubkey
48 connectctl_connect_to_peer_result,,failreason,wirestring connectctl_peer_important,,important,bool
49 # Master -> connectd: try to always maintain connection to this peer (or not) # Connectd -> master: we got a peer. Two fds: peer and gossip
50 connectctl_peer_important,2010 connect_peer_connected,2002
51 connectctl_peer_important,,id,struct pubkey connect_peer_connected,,id,struct pubkey
52 connectctl_peer_important,,important,bool connect_peer_connected,,addr,struct wireaddr_internal
# Connectd -> master: we got a peer. Two fds: peer and gossip
connect_peer_connected,2002
connect_peer_connected,,id,struct pubkey
connect_peer_connected,,addr,struct wireaddr_internal
53 connect_peer_connected,,crypto_state,struct crypto_state
54 connect_peer_connected,,gflen,u16
55 connect_peer_connected,,gfeatures,gflen*u8

View File

@ -68,32 +68,6 @@ static void connect_cmd_succeed(struct command *cmd, const struct pubkey *id)
command_success(cmd, response);
}
static void connectd_connect_result(struct lightningd *ld, const u8 *msg)
{
struct pubkey id;
bool connected;
char *err;
struct connect *c;
if (!fromwire_connectctl_connect_to_peer_result(tmpctx, msg,
&id,
&connected,
&err))
fatal("Connect gave bad CONNECTCTL_CONNECT_TO_PEER_RESULT message %s",
tal_hex(msg, msg));
/* We can have multiple connect commands: complete them all */
while ((c = find_connect(ld, &id)) != NULL) {
if (connected) {
connect_cmd_succeed(c->cmd, &id);
} else {
command_fail(c->cmd, LIGHTNINGD, "%s", err);
}
/* They delete themselves from list */
}
}
static void json_connect(struct command *cmd,
const char *buffer, const jsmntok_t *params)
{
@ -200,12 +174,10 @@ static void json_connect(struct command *cmd,
subd_send_msg(cmd->ld->connectd, take(msg));
}
/* If there isn't already a connect command, tell connectd */
if (!find_connect(cmd->ld, &id)) {
msg = towire_connectctl_connect_to_peer(NULL, &id);
subd_send_msg(cmd->ld->connectd, take(msg));
}
/* Leave this here for connect_connect_result */
msg = towire_connectctl_connect_to_peer(NULL, &id);
subd_send_msg(cmd->ld->connectd, take(msg));
/* Leave this here for peer_connected or connect_failed. */
new_connect(cmd->ld, &id, cmd);
command_still_pending(cmd);
}
@ -218,6 +190,34 @@ static const struct json_command connect_command = {
};
AUTODATA(json_command, &connect_command);
static void connect_failed(struct lightningd *ld, const u8 *msg)
{
struct pubkey id;
char *err;
struct connect *c;
if (!fromwire_connectctl_connect_failed(tmpctx, msg, &id, &err))
fatal("Connect gave bad CONNECTCTL_CONNECT_FAILED message %s",
tal_hex(msg, msg));
/* We can have multiple connect commands: fail them all */
while ((c = find_connect(ld, &id)) != NULL) {
/* They delete themselves from list */
command_fail(c->cmd, LIGHTNINGD, "%s", err);
}
}
void connect_succeeded(struct lightningd *ld, const struct pubkey *id)
{
struct connect *c;
/* We can have multiple connect commands: fail them all */
while ((c = find_connect(ld, id)) != NULL) {
/* They delete themselves from list */
connect_cmd_succeed(c->cmd, id);
}
}
static void peer_please_disconnect(struct lightningd *ld, const u8 *msg)
{
struct pubkey id;
@ -261,8 +261,8 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd
peer_connected(connectd->ld, msg, fds[0], fds[1]);
break;
case WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT:
connectd_connect_result(connectd->ld, msg);
case WIRE_CONNECTCTL_CONNECT_FAILED:
connect_failed(connectd->ld, msg);
break;
}
return 0;

View File

@ -9,6 +9,7 @@ struct pubkey;
int connectd_init(struct lightningd *ld);
void connectd_activate(struct lightningd *ld);
void connect_succeeded(struct lightningd *ld, const struct pubkey *id);
void gossip_connect_result(struct lightningd *ld, const u8 *msg);
#endif /* LIGHTNING_LIGHTNINGD_CONNECT_CONTROL_H */

View File

@ -448,6 +448,9 @@ void peer_connected(struct lightningd *ld, const u8 *msg,
fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s",
tal_hex(msg, msg));
/* Complete any outstanding connect commands. */
connect_succeeded(ld, &id);
/* If we're already dealing with this peer, hand off to correct
* subdaemon. Otherwise, we'll hand to openingd to wait there. */
peer = peer_by_id(ld, &id);

View File

@ -59,6 +59,9 @@ void command_still_pending(struct command *cmd UNNEEDED)
/* Generated stub for command_success */
void command_success(struct command *cmd UNNEEDED, struct json_result *response UNNEEDED)
{ fprintf(stderr, "command_success called!\n"); abort(); }
/* Generated stub for connect_succeeded */
void connect_succeeded(struct lightningd *ld UNNEEDED, const struct pubkey *id UNNEEDED)
{ fprintf(stderr, "connect_succeeded called!\n"); abort(); }
/* Generated stub for fromwire_connect_peer_connected */
bool fromwire_connect_peer_connected(const tal_t *ctx UNNEEDED, const void *p UNNEEDED, struct pubkey *id UNNEEDED, struct wireaddr_internal *addr UNNEEDED, struct crypto_state *crypto_state UNNEEDED, u8 **gfeatures UNNEEDED, u8 **lfeatures UNNEEDED)
{ fprintf(stderr, "fromwire_connect_peer_connected called!\n"); abort(); }