connectd: always tell master when connection fails/succeeded.
We used to separate implicit connection requests (ie. timed retries for important peers) and explicit ones, and send a WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT for the latter. In the success case, that's now redundant, since we hand the connected peer to the master using WIRE_CONNECT_PEER_CONNECTED; we just need a message for the failure case. And we might as well tell the master every failure, so we don't have to distinguish internally. This also solves a race we had before: connectd would send WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT which completes the incoming JSON connect command, then send WIRE_CONNECT_PEER_CONNECTED. So there's a window where the JSON command can return, but the peer isn't known to lightningd yet. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
parent
684d60dbda
commit
30f08cc2b0
|
@ -47,6 +47,7 @@
|
|||
#include <netinet/in.h>
|
||||
#include <secp256k1_ecdh.h>
|
||||
#include <sodium/randombytes.h>
|
||||
#include <stdarg.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
@ -189,9 +190,6 @@ struct reaching {
|
|||
/* FIXME: Support multiple address. */
|
||||
struct wireaddr_internal addr;
|
||||
|
||||
/* Whether connect command is waiting for the result. */
|
||||
bool master_needs_response;
|
||||
|
||||
/* How far did we get? */
|
||||
const char *connstate;
|
||||
};
|
||||
|
@ -343,24 +341,15 @@ static void reached_peer(struct peer *peer, struct io_conn *conn)
|
|||
{
|
||||
/* OK, we've reached the peer successfully, tell everyone. */
|
||||
struct reaching *r = find_reaching(peer->daemon, &peer->id);
|
||||
u8 *msg;
|
||||
|
||||
if (!r)
|
||||
return;
|
||||
|
||||
/* Don't call connect_failed */
|
||||
/* Don't call destroy_io_conn */
|
||||
io_set_finish(conn, NULL, NULL);
|
||||
|
||||
/* Don't free conn with reach */
|
||||
tal_steal(peer->daemon, conn);
|
||||
|
||||
/* Tell any connect command what happened. */
|
||||
if (r->master_needs_response) {
|
||||
msg = towire_connectctl_connect_to_peer_result(NULL, &r->id,
|
||||
true, "");
|
||||
daemon_conn_send(&peer->daemon->master, take(msg));
|
||||
}
|
||||
|
||||
tal_free(r);
|
||||
}
|
||||
|
||||
|
@ -966,27 +955,30 @@ struct io_plan *connection_out(struct io_conn *conn, struct reaching *reach)
|
|||
handshake_out_success, reach);
|
||||
}
|
||||
|
||||
static void connect_failed(struct io_conn *conn, struct reaching *reach)
|
||||
static void PRINTF_FMT(3,4)
|
||||
connect_failed(struct daemon *daemon,
|
||||
const struct pubkey *id,
|
||||
const char *errfmt, ...)
|
||||
{
|
||||
u8 *msg;
|
||||
struct important_peerid *imp;
|
||||
const char *err = tal_fmt(tmpctx, "%s: %s",
|
||||
reach->connstate,
|
||||
strerror(errno));
|
||||
va_list ap;
|
||||
char *err;
|
||||
|
||||
va_start(ap, errfmt);
|
||||
err = tal_vfmt(tmpctx, errfmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
/* Tell any connect command what happened. */
|
||||
if (reach->master_needs_response) {
|
||||
msg = towire_connectctl_connect_to_peer_result(NULL, &reach->id,
|
||||
false, err);
|
||||
daemon_conn_send(&reach->daemon->master, take(msg));
|
||||
}
|
||||
msg = towire_connectctl_connect_failed(NULL, id, err);
|
||||
daemon_conn_send(&daemon->master, take(msg));
|
||||
|
||||
status_trace("Failed connected out for %s",
|
||||
type_to_string(tmpctx, struct pubkey, &reach->id));
|
||||
status_trace("Failed connected out for %s: %s",
|
||||
type_to_string(tmpctx, struct pubkey, id),
|
||||
err);
|
||||
|
||||
/* If we want to keep trying, do so. */
|
||||
imp = important_peerid_map_get(&reach->daemon->important_peerids,
|
||||
&reach->id);
|
||||
imp = important_peerid_map_get(&daemon->important_peerids, id);
|
||||
if (imp) {
|
||||
imp->wait_seconds *= 2;
|
||||
if (imp->wait_seconds > MAX_WAIT_SECONDS)
|
||||
|
@ -996,10 +988,16 @@ static void connect_failed(struct io_conn *conn, struct reaching *reach)
|
|||
imp->wait_seconds);
|
||||
/* If important_id freed, this will be removed too */
|
||||
imp->reconnect_timer
|
||||
= new_reltimer(&reach->daemon->timers, imp,
|
||||
= new_reltimer(&daemon->timers, imp,
|
||||
time_from_sec(imp->wait_seconds),
|
||||
retry_important, imp);
|
||||
}
|
||||
}
|
||||
|
||||
static void destroy_io_conn(struct io_conn *conn, struct reaching *reach)
|
||||
{
|
||||
connect_failed(reach->daemon, &reach->id,
|
||||
"%s: %s", reach->connstate, strerror(errno));
|
||||
tal_free(reach);
|
||||
}
|
||||
|
||||
|
@ -1030,7 +1028,7 @@ static struct io_plan *conn_init(struct io_conn *conn, struct reaching *reach)
|
|||
}
|
||||
assert(ai);
|
||||
|
||||
io_set_finish(conn, connect_failed, reach);
|
||||
io_set_finish(conn, destroy_io_conn, reach);
|
||||
return io_connect(conn, ai, connection_out, reach);
|
||||
}
|
||||
|
||||
|
@ -1060,7 +1058,7 @@ static struct io_plan *conn_proxy_init(struct io_conn *conn,
|
|||
status_failed(STATUS_FAIL_INTERNAL_ERROR,
|
||||
"Can't reach to %u address", reach->addr.itype);
|
||||
|
||||
io_set_finish(conn, connect_failed, reach);
|
||||
io_set_finish(conn, destroy_io_conn, reach);
|
||||
return io_tor_connect(conn, reach->daemon->proxyaddr, host, port, reach);
|
||||
}
|
||||
|
||||
|
@ -1130,40 +1128,21 @@ gossip_resolve_addr(const tal_t *ctx, const struct pubkey *id)
|
|||
return addr;
|
||||
}
|
||||
|
||||
static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
|
||||
bool master_needs_response)
|
||||
static void try_reach_peer(struct daemon *daemon, const struct pubkey *id)
|
||||
{
|
||||
struct wireaddr_internal *a;
|
||||
struct addrhint *hint;
|
||||
int fd, af;
|
||||
struct reaching *reach;
|
||||
u8 *msg;
|
||||
bool use_proxy = daemon->use_proxy_always;
|
||||
|
||||
if (pubkey_set_get(&daemon->peers, id)) {
|
||||
status_debug("try_reach_peer: have peer %s",
|
||||
type_to_string(tmpctx, struct pubkey, id));
|
||||
if (master_needs_response) {
|
||||
msg = towire_connectctl_connect_to_peer_result(NULL, id,
|
||||
true,
|
||||
"");
|
||||
daemon_conn_send(&daemon->master, take(msg));
|
||||
}
|
||||
/* Already done? May happen with timer. */
|
||||
if (pubkey_set_get(&daemon->peers, id))
|
||||
return;
|
||||
}
|
||||
|
||||
/* If we're trying to reach it right now, that's OK. */
|
||||
reach = find_reaching(daemon, id);
|
||||
if (reach) {
|
||||
/* Please tell us too. Master should not ask twice (we'll
|
||||
* only respond once, and so one request will get stuck) */
|
||||
if (reach->master_needs_response)
|
||||
status_failed(STATUS_FAIL_MASTER_IO,
|
||||
"Already reaching %s",
|
||||
type_to_string(tmpctx, struct pubkey, id));
|
||||
reach->master_needs_response = master_needs_response;
|
||||
if (find_reaching(daemon, id))
|
||||
return;
|
||||
}
|
||||
|
||||
hint = find_addrhint(daemon, id);
|
||||
if (hint)
|
||||
|
@ -1187,14 +1166,7 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
|
|||
}
|
||||
|
||||
if (!a) {
|
||||
status_debug("No address known for %s, giving up",
|
||||
type_to_string(tmpctx, struct pubkey, id));
|
||||
if (master_needs_response) {
|
||||
msg = towire_connectctl_connect_to_peer_result(NULL, id,
|
||||
false,
|
||||
"No address known, giving up");
|
||||
daemon_conn_send(&daemon->master, take(msg));
|
||||
}
|
||||
connect_failed(daemon, id, "No address known");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1249,17 +1221,11 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
|
|||
fd = socket(af, SOCK_STREAM, 0);
|
||||
|
||||
if (fd < 0) {
|
||||
char *err = tal_fmt(tmpctx,
|
||||
"Can't open %i socket for %s (%s), giving up",
|
||||
af,
|
||||
type_to_string(tmpctx, struct pubkey, id),
|
||||
strerror(errno));
|
||||
status_debug("%s", err);
|
||||
if (master_needs_response) {
|
||||
msg = towire_connectctl_connect_to_peer_result(NULL, id,
|
||||
false, err);
|
||||
daemon_conn_send(&daemon->master, take(msg));
|
||||
}
|
||||
connect_failed(daemon, id,
|
||||
"Can't open %i socket for %s (%s)",
|
||||
af,
|
||||
type_to_string(tmpctx, struct pubkey, id),
|
||||
strerror(errno));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1268,7 +1234,6 @@ static void try_reach_peer(struct daemon *daemon, const struct pubkey *id,
|
|||
reach->daemon = daemon;
|
||||
reach->id = *id;
|
||||
reach->addr = *a;
|
||||
reach->master_needs_response = master_needs_response;
|
||||
reach->connstate = "Connection establishment";
|
||||
list_add_tail(&daemon->reaching, &reach->list);
|
||||
tal_add_destructor(reach, destroy_reaching);
|
||||
|
@ -1290,7 +1255,7 @@ static void retry_important(struct important_peerid *imp)
|
|||
if (!imp->daemon->reconnect)
|
||||
return;
|
||||
|
||||
try_reach_peer(imp->daemon, &imp->id, false);
|
||||
try_reach_peer(imp->daemon, &imp->id);
|
||||
}
|
||||
|
||||
static struct io_plan *connect_to_peer(struct io_conn *conn,
|
||||
|
@ -1306,7 +1271,7 @@ static struct io_plan *connect_to_peer(struct io_conn *conn,
|
|||
imp = important_peerid_map_get(&daemon->important_peerids, &id);
|
||||
if (imp)
|
||||
imp->reconnect_timer = tal_free(imp->reconnect_timer);
|
||||
try_reach_peer(daemon, &id, true);
|
||||
try_reach_peer(daemon, &id);
|
||||
return daemon_conn_read_next(conn, &daemon->master);
|
||||
}
|
||||
|
||||
|
@ -1423,8 +1388,8 @@ static struct io_plan *recv_req(struct io_conn *conn, struct daemon_conn *master
|
|||
case WIRE_CONNECTCTL_INIT_REPLY:
|
||||
case WIRE_CONNECTCTL_ACTIVATE_REPLY:
|
||||
case WIRE_CONNECT_PEER_CONNECTED:
|
||||
case WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT:
|
||||
case WIRE_CONNECT_RECONNECTED:
|
||||
case WIRE_CONNECTCTL_CONNECT_FAILED:
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,14 +46,10 @@ connectctl_peer_addrhint,,addr,struct wireaddr_internal
|
|||
connectctl_connect_to_peer,2001
|
||||
connectctl_connect_to_peer,,id,struct pubkey
|
||||
|
||||
# Connectd->master: result (not a reply since it can be out-of-order, but
|
||||
# you will get one reply for every request).
|
||||
connectctl_connect_to_peer_result,2020
|
||||
connectctl_connect_to_peer_result,,id,struct pubkey
|
||||
# True it connected.
|
||||
connectctl_connect_to_peer_result,,connected,bool
|
||||
# Otherwise, why we can't reach them.
|
||||
connectctl_connect_to_peer_result,,failreason,wirestring
|
||||
# Connectd->master: connect failed.
|
||||
connectctl_connect_failed,2020
|
||||
connectctl_connect_failed,,id,struct pubkey
|
||||
connectctl_connect_failed,,failreason,wirestring
|
||||
|
||||
# Master -> connectd: try to always maintain connection to this peer (or not)
|
||||
connectctl_peer_important,2010
|
||||
|
|
|
|
@ -68,32 +68,6 @@ static void connect_cmd_succeed(struct command *cmd, const struct pubkey *id)
|
|||
command_success(cmd, response);
|
||||
}
|
||||
|
||||
static void connectd_connect_result(struct lightningd *ld, const u8 *msg)
|
||||
{
|
||||
struct pubkey id;
|
||||
bool connected;
|
||||
char *err;
|
||||
struct connect *c;
|
||||
|
||||
if (!fromwire_connectctl_connect_to_peer_result(tmpctx, msg,
|
||||
&id,
|
||||
&connected,
|
||||
&err))
|
||||
fatal("Connect gave bad CONNECTCTL_CONNECT_TO_PEER_RESULT message %s",
|
||||
tal_hex(msg, msg));
|
||||
|
||||
|
||||
/* We can have multiple connect commands: complete them all */
|
||||
while ((c = find_connect(ld, &id)) != NULL) {
|
||||
if (connected) {
|
||||
connect_cmd_succeed(c->cmd, &id);
|
||||
} else {
|
||||
command_fail(c->cmd, LIGHTNINGD, "%s", err);
|
||||
}
|
||||
/* They delete themselves from list */
|
||||
}
|
||||
}
|
||||
|
||||
static void json_connect(struct command *cmd,
|
||||
const char *buffer, const jsmntok_t *params)
|
||||
{
|
||||
|
@ -200,12 +174,10 @@ static void json_connect(struct command *cmd,
|
|||
subd_send_msg(cmd->ld->connectd, take(msg));
|
||||
}
|
||||
|
||||
/* If there isn't already a connect command, tell connectd */
|
||||
if (!find_connect(cmd->ld, &id)) {
|
||||
msg = towire_connectctl_connect_to_peer(NULL, &id);
|
||||
subd_send_msg(cmd->ld->connectd, take(msg));
|
||||
}
|
||||
/* Leave this here for connect_connect_result */
|
||||
msg = towire_connectctl_connect_to_peer(NULL, &id);
|
||||
subd_send_msg(cmd->ld->connectd, take(msg));
|
||||
|
||||
/* Leave this here for peer_connected or connect_failed. */
|
||||
new_connect(cmd->ld, &id, cmd);
|
||||
command_still_pending(cmd);
|
||||
}
|
||||
|
@ -218,6 +190,34 @@ static const struct json_command connect_command = {
|
|||
};
|
||||
AUTODATA(json_command, &connect_command);
|
||||
|
||||
static void connect_failed(struct lightningd *ld, const u8 *msg)
|
||||
{
|
||||
struct pubkey id;
|
||||
char *err;
|
||||
struct connect *c;
|
||||
|
||||
if (!fromwire_connectctl_connect_failed(tmpctx, msg, &id, &err))
|
||||
fatal("Connect gave bad CONNECTCTL_CONNECT_FAILED message %s",
|
||||
tal_hex(msg, msg));
|
||||
|
||||
/* We can have multiple connect commands: fail them all */
|
||||
while ((c = find_connect(ld, &id)) != NULL) {
|
||||
/* They delete themselves from list */
|
||||
command_fail(c->cmd, LIGHTNINGD, "%s", err);
|
||||
}
|
||||
}
|
||||
|
||||
void connect_succeeded(struct lightningd *ld, const struct pubkey *id)
|
||||
{
|
||||
struct connect *c;
|
||||
|
||||
/* We can have multiple connect commands: fail them all */
|
||||
while ((c = find_connect(ld, id)) != NULL) {
|
||||
/* They delete themselves from list */
|
||||
connect_cmd_succeed(c->cmd, id);
|
||||
}
|
||||
}
|
||||
|
||||
static void peer_please_disconnect(struct lightningd *ld, const u8 *msg)
|
||||
{
|
||||
struct pubkey id;
|
||||
|
@ -261,8 +261,8 @@ static unsigned connectd_msg(struct subd *connectd, const u8 *msg, const int *fd
|
|||
peer_connected(connectd->ld, msg, fds[0], fds[1]);
|
||||
break;
|
||||
|
||||
case WIRE_CONNECTCTL_CONNECT_TO_PEER_RESULT:
|
||||
connectd_connect_result(connectd->ld, msg);
|
||||
case WIRE_CONNECTCTL_CONNECT_FAILED:
|
||||
connect_failed(connectd->ld, msg);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -9,6 +9,7 @@ struct pubkey;
|
|||
int connectd_init(struct lightningd *ld);
|
||||
void connectd_activate(struct lightningd *ld);
|
||||
|
||||
void connect_succeeded(struct lightningd *ld, const struct pubkey *id);
|
||||
void gossip_connect_result(struct lightningd *ld, const u8 *msg);
|
||||
|
||||
#endif /* LIGHTNING_LIGHTNINGD_CONNECT_CONTROL_H */
|
||||
|
|
|
@ -448,6 +448,9 @@ void peer_connected(struct lightningd *ld, const u8 *msg,
|
|||
fatal("Connectd gave bad CONNECT_PEER_CONNECTED message %s",
|
||||
tal_hex(msg, msg));
|
||||
|
||||
/* Complete any outstanding connect commands. */
|
||||
connect_succeeded(ld, &id);
|
||||
|
||||
/* If we're already dealing with this peer, hand off to correct
|
||||
* subdaemon. Otherwise, we'll hand to openingd to wait there. */
|
||||
peer = peer_by_id(ld, &id);
|
||||
|
|
|
@ -59,6 +59,9 @@ void command_still_pending(struct command *cmd UNNEEDED)
|
|||
/* Generated stub for command_success */
|
||||
void command_success(struct command *cmd UNNEEDED, struct json_result *response UNNEEDED)
|
||||
{ fprintf(stderr, "command_success called!\n"); abort(); }
|
||||
/* Generated stub for connect_succeeded */
|
||||
void connect_succeeded(struct lightningd *ld UNNEEDED, const struct pubkey *id UNNEEDED)
|
||||
{ fprintf(stderr, "connect_succeeded called!\n"); abort(); }
|
||||
/* Generated stub for fromwire_connect_peer_connected */
|
||||
bool fromwire_connect_peer_connected(const tal_t *ctx UNNEEDED, const void *p UNNEEDED, struct pubkey *id UNNEEDED, struct wireaddr_internal *addr UNNEEDED, struct crypto_state *crypto_state UNNEEDED, u8 **gfeatures UNNEEDED, u8 **lfeatures UNNEEDED)
|
||||
{ fprintf(stderr, "fromwire_connect_peer_connected called!\n"); abort(); }
|
||||
|
|
Loading…
Reference in New Issue