json_fundchannel: fix release vs connect/nongossip race.

The new connect code revealed an existing race: we tell gossipd to
release the peer, but at the same time it connects in.  gossipd fails
the release because the peer is remote, and json_fundchannel fails.

Instead, we catch this race when we get peer_connected() and we were
trying to open a channel.  It means keeping a list of fundchannels which
are awaiting a gossipd response though.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2018-04-23 19:38:01 +09:30
parent bee795ed68
commit 8e976150ad
6 changed files with 80 additions and 0 deletions

View File

@ -66,6 +66,7 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
ld->alias = NULL;
ld->rgb = NULL;
list_head_init(&ld->connects);
list_head_init(&ld->fundchannels);
list_head_init(&ld->waitsendpay_commands);
list_head_init(&ld->sendpay_commands);
list_head_init(&ld->close_commands);

View File

@ -125,6 +125,9 @@ struct lightningd {
/* Outstanding connect commands. */
struct list_head connects;
/* Outstanding fundchannel commands. */
struct list_head fundchannels;
/* Our chain topology. */
struct chain_topology *topology;

View File

@ -61,6 +61,9 @@ struct uncommitted_channel {
};
struct funding_channel {
/* In lightningd->fundchannels while waiting for gossipd reply. */
struct list_node list;
struct command *cmd; /* Which also owns us. */
/* Peer we're trying to reach. */
@ -77,6 +80,23 @@ struct funding_channel {
struct uncommitted_channel *uc;
};
static struct funding_channel *find_funding_channel(struct lightningd *ld,
const struct pubkey *id)
{
struct funding_channel *i;
list_for_each(&ld->fundchannels, i, list) {
if (pubkey_eq(&i->peerid, id))
return i;
}
return NULL;
}
static void remove_funding_channel_from_list(struct funding_channel *fc)
{
list_del_from(&fc->cmd->ld->fundchannels, &fc->list);
}
/* Opening failed: hand back to gossipd (sending errpkt if not NULL) */
static void uncommitted_channel_to_gossipd(struct lightningd *ld,
struct uncommitted_channel *uc,
@ -736,6 +756,10 @@ static void peer_offer_channel(struct lightningd *ld,
u32 max_to_self_delay, max_minimum_depth;
u64 min_effective_htlc_capacity_msat;
/* Remove from list, it's not pending any more. */
list_del_from(&ld->fundchannels, &fc->list);
tal_del_destructor(fc, remove_funding_channel_from_list);
fc->uc = new_uncommitted_channel(ld, fc, &fc->peerid, addr);
/* We asked to release this peer, but another raced in? Corner case,
@ -809,6 +833,10 @@ static void gossip_peer_released(struct subd *gossip,
struct channel *c;
struct uncommitted_channel *uc;
/* handle_opening_channel might have already taken care of this. */
if (fc->uc)
return;
c = active_channel_by_id(ld, &fc->peerid, &uc);
if (!fromwire_gossipctl_release_peer_reply(fc, resp, &addr, &cs,
@ -840,6 +868,27 @@ static void gossip_peer_released(struct subd *gossip,
fds[0], fds[1]);
}
/* We can race: we're trying to get gossipd to release peer just as it
* reconnects. If that's happened, treat it as if it were
* released. */
bool handle_opening_channel(struct lightningd *ld,
const struct pubkey *id,
const struct wireaddr *addr,
const struct crypto_state *cs,
u64 gossip_index,
const u8 *gfeatures, const u8 *lfeatures,
int peer_fd, int gossip_fd)
{
struct funding_channel *fc = find_funding_channel(ld, id);
if (!fc)
return false;
peer_offer_channel(ld, fc, addr, cs, gossip_index, gfeatures, lfeatures,
peer_fd, gossip_fd);
return true;
}
/**
* json_fund_channel - Entrypoint for funding a channel
*/
@ -861,6 +910,7 @@ static void json_fund_channel(struct command *cmd,
}
fc = tal(cmd, struct funding_channel);
fc->uc = NULL;
fc->cmd = cmd;
fc->change_keyindex = 0;
fc->funding_satoshi = 0;
@ -912,6 +962,9 @@ static void json_fund_channel(struct command *cmd,
return;
}
list_add(&cmd->ld->fundchannels, &fc->list);
tal_add_destructor(fc, remove_funding_channel_from_list);
msg = towire_gossipctl_release_peer(cmd, &fc->peerid);
subd_req(fc, cmd->ld->gossip, msg, -1, 2, gossip_peer_released, fc);
command_still_pending(cmd);

View File

@ -29,6 +29,15 @@ u8 *peer_accept_channel(const tal_t *ctx,
const struct channel_id *channel_id,
const u8 *open_msg);
/* Gossipd spat out peer: were we currently asking gossipd to release it
* so we could open a channel? Returns true if it took over. */
bool handle_opening_channel(struct lightningd *ld,
const struct pubkey *id,
const struct wireaddr *addr,
const struct crypto_state *cs,
u64 gossip_index,
const u8 *gfeatures, const u8 *lfeatures,
int peer_fd, int gossip_fd);
void kill_uncommitted_channel(struct uncommitted_channel *uc,
const char *why);

View File

@ -440,6 +440,11 @@ void peer_connected(struct lightningd *ld, const u8 *msg,
goto send_error;
}
/* Were we trying to open a channel, and we've raced? */
if (handle_opening_channel(ld, &id, &addr, &cs, gossip_index,
gfeatures, lfeatures, peer_fd, gossip_fd))
return;
/* If we're already dealing with this peer, hand off to correct
* subdaemon. Otherwise, we'll respond iff they ask about an inactive
* channel. */

View File

@ -105,6 +105,15 @@ u8 *get_offered_global_features(const tal_t *ctx UNNEEDED)
/* Generated stub for get_offered_local_features */
u8 *get_offered_local_features(const tal_t *ctx UNNEEDED)
{ fprintf(stderr, "get_offered_local_features called!\n"); abort(); }
/* Generated stub for handle_opening_channel */
bool handle_opening_channel(struct lightningd *ld UNNEEDED,
const struct pubkey *id UNNEEDED,
const struct wireaddr *addr UNNEEDED,
const struct crypto_state *cs UNNEEDED,
u64 gossip_index UNNEEDED,
const u8 *gfeatures UNNEEDED, const u8 *lfeatures UNNEEDED,
int peer_fd UNNEEDED, int gossip_fd UNNEEDED)
{ fprintf(stderr, "handle_opening_channel called!\n"); abort(); }
/* Generated stub for invoices_autoclean_set */
void invoices_autoclean_set(struct invoices *invoices UNNEEDED,
u64 cycle_seconds UNNEEDED,