status: separate types for peer failure vs "impossible" failures.

Ideally we'd rename status_failed() to status_fatal(), but that's
too much churn for now.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2018-02-08 11:55:12 +10:30 committed by Christian Decker
parent fd498be7ca
commit cc9ca82821
12 changed files with 228 additions and 151 deletions

View File

@ -185,8 +185,7 @@ static void do_peer_write(struct peer *peer)
r = write(PEER_FD, peer->peer_outmsg + peer->peer_outoff,
len - peer->peer_outoff);
if (r < 0)
status_failed(STATUS_FAIL_PEER_IO,
"Peer write failed: %s", strerror(errno));
status_fatal_connection_lost();
peer->peer_outoff += r;
if (peer->peer_outoff == len)
@ -1628,15 +1627,13 @@ static void peer_in(struct peer *peer, const u8 *msg)
static void peer_conn_broken(struct peer *peer)
{
const char *e = strerror(errno);
/* If we have signatures, send an update to say we're disabled. */
if (peer->have_sigs[LOCAL] && peer->have_sigs[REMOTE]) {
u8 *cupdate = create_channel_update(peer, peer, true);
wire_sync_write(GOSSIP_FD, take(cupdate));
}
status_failed(STATUS_FAIL_PEER_IO, "peer read failed: %s", e);
status_fatal_connection_lost();
}
static void resend_revoke(struct peer *peer)
@ -1784,8 +1781,7 @@ static void peer_reconnect(struct peer *peer)
peer->next_index[LOCAL],
peer->revocations_received);
if (!sync_crypto_write(&peer->cs, PEER_FD, take(msg)))
status_failed(STATUS_FAIL_PEER_IO,
"Failed writing reestablish: %s", strerror(errno));
status_fatal_connection_lost();
/* Read until they say something interesting */
while ((msg = channeld_read_peer_msg(peer)) == NULL);
@ -1793,10 +1789,12 @@ static void peer_reconnect(struct peer *peer)
if (!fromwire_channel_reestablish(msg, NULL, &channel_id,
&next_local_commitment_number,
&next_remote_revocation_number)) {
status_failed(STATUS_FAIL_PEER_IO,
"bad reestablish msg: %s %s",
wire_type_name(fromwire_peektype(msg)),
tal_hex(msg, msg));
peer_failed(PEER_FD,
&peer->cs,
&peer->channel_id,
"bad reestablish msg: %s %s",
wire_type_name(fromwire_peektype(msg)),
tal_hex(msg, msg));
}
status_trace("Got reestablish commit=%"PRIu64" revoke=%"PRIu64,
@ -1840,18 +1838,22 @@ static void peer_reconnect(struct peer *peer)
if (next_remote_revocation_number == peer->next_index[LOCAL] - 2) {
/* Don't try to retransmit revocation index -1! */
if (peer->next_index[LOCAL] < 2) {
status_failed(STATUS_FAIL_PEER_IO,
"bad reestablish revocation_number: %"
PRIu64,
next_remote_revocation_number);
peer_failed(PEER_FD,
&peer->cs,
&peer->channel_id,
"bad reestablish revocation_number: %"
PRIu64,
next_remote_revocation_number);
}
retransmit_revoke_and_ack = true;
} else if (next_remote_revocation_number != peer->next_index[LOCAL] - 1) {
status_failed(STATUS_FAIL_PEER_IO,
"bad reestablish revocation_number: %"PRIu64
" vs %"PRIu64,
next_remote_revocation_number,
peer->next_index[LOCAL]);
peer_failed(PEER_FD,
&peer->cs,
&peer->channel_id,
"bad reestablish revocation_number: %"PRIu64
" vs %"PRIu64,
next_remote_revocation_number,
peer->next_index[LOCAL]);
} else
retransmit_revoke_and_ack = false;
@ -1870,10 +1872,12 @@ static void peer_reconnect(struct peer *peer)
if (next_local_commitment_number == peer->next_index[REMOTE] - 1) {
/* We completed opening, we don't re-transmit that one! */
if (next_local_commitment_number == 0)
status_failed(STATUS_FAIL_PEER_IO,
"bad reestablish commitment_number: %"
PRIu64,
next_local_commitment_number);
peer_failed(PEER_FD,
&peer->cs,
&peer->channel_id,
"bad reestablish commitment_number: %"
PRIu64,
next_local_commitment_number);
resend_commitment(peer, peer->last_sent_commit);

View File

@ -115,8 +115,8 @@ static void do_reconnect(struct crypto_state *cs,
next_index[LOCAL],
revocations_received);
if (!sync_crypto_write(cs, PEER_FD, take(msg)))
status_failed(STATUS_FAIL_PEER_IO,
"Failed writing reestablish: %s", strerror(errno));
status_fatal_connection_lost();
;
/* Wait for them to say something interesting */
msg = closing_read_peer_msg(tmpctx, cs, channel_id);
@ -198,8 +198,7 @@ static void send_offer(struct crypto_state *cs,
msg = towire_closing_signed(tmpctx, channel_id, fee_to_offer, &our_sig);
if (!sync_crypto_write(cs, PEER_FD, take(msg)))
status_failed(STATUS_FAIL_PEER_IO,
"Writing closing_signed");
status_fatal_connection_lost();
tal_free(tmpctx);
}

View File

@ -15,31 +15,19 @@ void peer_failed(int peer_fd, struct crypto_state *cs,
const char *fmt, ...)
{
va_list ap;
const char *errmsg;
struct channel_id all_channels;
const char *desc;
u8 *msg;
/* BOLT #1:
*
* The channel is referred to by `channel_id` unless `channel_id` is
* zero (ie. all bytes zero), in which case it refers to all channels.
*/
if (!channel_id) {
memset(&all_channels, 0, sizeof(all_channels));
channel_id = &all_channels;
}
va_start(ap, fmt);
errmsg = tal_vfmt(NULL, fmt, ap);
va_start(ap, fmt);
desc = tal_vfmt(NULL, fmt, ap);
va_end(ap);
va_start(ap, fmt);
msg = towire_errorfmtv(errmsg, channel_id, fmt, ap);
va_end(ap);
status_broken("SENT ERROR:%s", desc);
msg = towire_errorfmt(desc, channel_id, "%s", desc);
/* This is only best-effort; don't block. */
io_fd_block(peer_fd, false);
sync_crypto_write(cs, peer_fd, take(msg));
sync_crypto_write(cs, peer_fd, msg);
status_failed(STATUS_FAIL_PEER_BAD, "%s", errmsg);
status_fatal_sent_errmsg(take(msg), desc, channel_id);
}

View File

@ -41,10 +41,10 @@ u8 *read_peer_msg_(const tal_t *ctx,
int peer_fd, int gossip_fd,
struct crypto_state *cs,
const struct channel_id *channel,
bool (*send_reply)(struct crypto_state *, int, const u8 *,
void *),
bool (*send_reply)(struct crypto_state *cs, int fd,
const u8 *TAKES, void *arg),
void (*io_error)(const char *what_i_was_doing, void *arg),
void (*err_pkt)(const char *desc, bool this_channel_only,
void (*err_pkt)(const char *desc, const struct channel_id *,
void *arg),
void *arg)
{
@ -85,10 +85,7 @@ u8 *read_peer_msg_(const tal_t *ctx,
* message:
* - MUST ignore the message.
*/
if (channel_id_is_all(&chanid))
err_pkt(err, false, arg);
else if (structeq(&chanid, channel))
err_pkt(err, true, arg);
err_pkt(err, &chanid, arg);
return tal_free(msg);
}
@ -118,15 +115,15 @@ bool sync_crypto_write_arg(struct crypto_state *cs, int fd, const u8 *msg,
return sync_crypto_write(cs, fd, msg);
}
/* Helper: calls status_failed(STATUS_FAIL_PEER_IO) */
/* Helper: calls status_fatal_connection_lost. */
void status_fail_io(const char *what_i_was_doing, void *unused)
{
status_failed(STATUS_FAIL_PEER_IO,
"%s:%s", what_i_was_doing, strerror(errno));
status_fatal_connection_lost();
}
/* Helper: calls status_failed(STATUS_FAIL_PEER_BAD, <error>) */
void status_fail_errpkt(const char *desc, bool this_channel_only, void *unused)
/* Helper: calls status_fatal_received_errmsg() */
void status_fail_errpkt(const char *desc, const struct channel_id *c,
void *unused)
{
status_failed(STATUS_FAIL_PEER_BAD, "Peer sent ERROR: %s", desc);
status_fatal_received_errmsg(desc, c);
}

View File

@ -12,7 +12,7 @@ struct channel_id;
* read_peer_msg - read & decode in a peer message, handling common ones.
* @ctx: context to allocate return packet from.
* @cs: the cryptostate (updated)
* @channel_id: the channel id (for identifying errors)
* @chanid: the channel id (for identifying errors)
* @send_reply: the way to send a reply packet (eg. sync_crypto_write_arg)
* @io_error: what to do if there's an IO error (eg. status_fail_io)
* (MUST NOT RETURN!)
@ -22,26 +22,29 @@ struct channel_id;
* This returns NULL if it handled the message, so it's normally called in
* a loop.
*/
#define read_peer_msg(ctx, cs, channel_id, send_reply, io_error, err_pkt, arg) \
read_peer_msg_((ctx), PEER_FD, GOSSIP_FD, (cs), (channel_id), \
#define read_peer_msg(ctx, cs, chanid, send_reply, io_error, err_pkt, arg) \
read_peer_msg_((ctx), PEER_FD, GOSSIP_FD, (cs), (chanid), \
typesafe_cb_preargs(bool, void *, (send_reply), (arg), \
struct crypto_state *, int, \
const u8 *), \
typesafe_cb_preargs(void, void *, (io_error), (arg), \
const char *), \
typesafe_cb_preargs(void, void *, (err_pkt), (arg), \
const char *, bool), \
const char *, \
const struct channel_id *), \
arg)
/* Helper: sync_crypto_write, with extra args it ignores */
bool sync_crypto_write_arg(struct crypto_state *cs, int fd, const u8 *TAKES,
void *unused);
/* Helper: calls status_failed(STATUS_FAIL_PEER_IO) */
/* Helper: calls status_fatal_connection_lost. */
/* FIXME: Remove what_i_was_doing arg */
void status_fail_io(const char *what_i_was_doing, void *unused);
/* Helper: calls status_failed(STATUS_FAIL_PEER_BAD, <error>) */
void status_fail_errpkt(const char *desc, bool this_channel_only, void *unused);
/* Helper: calls status_fatal_received_errmsg() */
void status_fail_errpkt(const char *desc, const struct channel_id *c,
void *unused);
u8 *read_peer_msg_(const tal_t *ctx,
int peer_fd, int gossip_fd,
@ -50,7 +53,7 @@ u8 *read_peer_msg_(const tal_t *ctx,
bool (*send_reply)(struct crypto_state *cs, int fd,
const u8 *TAKES, void *arg),
void (*io_error)(const char *what_i_was_doing, void *arg),
void (*err_pkt)(const char *desc, bool this_channel_only,
void (*err_pkt)(const char *desc, const struct channel_id *,
void *arg),
void *arg);

View File

@ -112,6 +112,17 @@ void status_fmt(enum log_level level, const char *fmt, ...)
va_end(ap);
}
static NORETURN void flush_and_exit(int reason)
{
/* Don't let it take forever. */
alarm(10);
if (status_conn)
daemon_conn_sync_flush(status_conn);
exit(0x80 | (reason & 0xFF));
}
/* FIXME: rename to status_fatal, s/fail/fatal/ in status_failreason enums */
void status_failed(enum status_failreason reason, const char *fmt, ...)
{
va_list ap;
@ -123,21 +134,46 @@ void status_failed(enum status_failreason reason, const char *fmt, ...)
status_send(take(towire_status_fail(NULL, reason, str)));
va_end(ap);
/* Don't let it take forever. */
alarm(10);
if (status_conn)
daemon_conn_sync_flush(status_conn);
exit(0x80 | (reason & 0xFF));
flush_and_exit(reason);
}
void master_badmsg(u32 type_expected, const u8 *msg)
{
if (!msg)
status_failed(STATUS_FAIL_MASTER_IO,
"failed reading msg %u: %s",
type_expected, strerror(errno));
"failed reading msg %u: %s",
type_expected, strerror(errno));
status_failed(STATUS_FAIL_MASTER_IO,
"Error parsing %u: %s",
type_expected, tal_hex(trc, msg));
"Error parsing %u: %s",
type_expected, tal_hex(trc, msg));
}
void status_fatal_connection_lost(void)
{
status_send(take(towire_status_peer_connection_lost(NULL)));
flush_and_exit(WIRE_STATUS_PEER_CONNECTION_LOST);
}
/* Got an error for one or all channels */
void status_fatal_received_errmsg(const char *desc, const struct channel_id *c)
{
static const struct channel_id all_channels;
if (!c)
c = &all_channels;
status_send(take(towire_status_received_errmsg(NULL, c, desc)));
flush_and_exit(WIRE_STATUS_RECEIVED_ERRMSG);
}
/* Sent an error for one or all channels */
void status_fatal_sent_errmsg(const u8 *errmsg,
const char *desc, const struct channel_id *c)
{
static const struct channel_id all_channels;
if (!c)
c = &all_channels;
status_send(take(towire_status_sent_errmsg(NULL, c, desc, errmsg)));
flush_and_exit(WIRE_STATUS_SENT_ERRMSG);
}

View File

@ -9,6 +9,7 @@
#include <stdbool.h>
#include <stdlib.h>
struct channel_id;
struct daemon_conn;
/* Simple status reporting API. */
@ -46,8 +47,20 @@ void status_io(enum log_level iodir, const u8 *p);
void status_failed(enum status_failreason code,
const char *fmt, ...) PRINTF_FMT(2,3) NORETURN;
/* Helper for master failures: sends STATUS_FAIL_MASTER_IO.
/* Helper for master failures: sends STATUS_FATAL_MASTER_IO.
* msg NULL == read failure. */
void master_badmsg(u32 type_expected, const u8 *msg);
void master_badmsg(u32 type_expected, const u8 *msg) NORETURN;
/* I/O error */
void status_fatal_connection_lost(void) NORETURN;
/* Got an error for one or all channels (if c == NULL) */
void status_fatal_received_errmsg(const char *desc,
const struct channel_id *c) NORETURN;
/* Sent an error for one or all channels (if c == NULL) */
void status_fatal_sent_errmsg(const u8 *errmsg,
const char *desc,
const struct channel_id *c) NORETURN;
#endif /* LIGHTNING_COMMON_STATUS_H */

View File

@ -17,10 +17,10 @@ enum log_level {
};
#define LOG_LEVEL_MAX LOG_BROKEN
/*
* These errors shouldn't happen:
*/
enum status_failreason {
/*
* These errors shouldn't happen:
*/
/* Master daemon sent unknown/malformed command, or fd failed */
STATUS_FAIL_MASTER_IO,
@ -32,16 +32,7 @@ enum status_failreason {
/* Other internal error. */
STATUS_FAIL_INTERNAL_ERROR,
/*
* These errors happen when the other peer misbehaves:
*/
/* I/O failure (probably they closed the socket) */
STATUS_FAIL_PEER_IO,
/* Peer did something else wrong */
STATUS_FAIL_PEER_BAD
};
#define STATUS_FAIL_MAX STATUS_FAIL_PEER_BAD
#define STATUS_FAIL_MAX STATUS_FAIL_INTERNAL_ERROR
#endif /* LIGHTNING_COMMON_STATUS_LEVELS_H */

View File

@ -12,3 +12,19 @@ status_io,,data,len*u8
status_fail,0xFFF2
status_fail,,failreason,enum status_failreason
status_fail,,desc,wirestring
status_peer_connection_lost,0xFFF3
# They sent us this error.
status_received_errmsg,0xFFF4
status_received_errmsg,,channel,struct channel_id
status_received_errmsg,,desc,wirestring
# We sent them this error.
status_sent_errmsg,0xFFF5
status_sent_errmsg,,channel,struct channel_id
status_sent_errmsg,,desc,wirestring
status_sent_errmsg,,len,u16
status_sent_errmsg,,errmsg,len*u8
# Note: 0xFFFF is reserved for MSG_PASS_FD!

1 #include <common/status_wire.h>
12 status_peer_connection_lost,0xFFF3
13 # They sent us this error.
14 status_received_errmsg,0xFFF4
15 status_received_errmsg,,channel,struct channel_id
16 status_received_errmsg,,desc,wirestring
17 # We sent them this error.
18 status_sent_errmsg,0xFFF5
19 status_sent_errmsg,,channel,struct channel_id
20 status_sent_errmsg,,desc,wirestring
21 status_sent_errmsg,,len,u16
22 status_sent_errmsg,,errmsg,len*u8
23 # Note: 0xFFFF is reserved for MSG_PASS_FD!
24
25
26
27
28
29
30

View File

@ -350,53 +350,79 @@ static void subdaemon_malformed_msg(struct subd *sd, const u8 *msg)
#endif
}
static void log_status_fail(struct subd *sd,
enum status_failreason type, const char *str)
static bool log_status_fail(struct subd *sd, const u8 *msg)
{
const char *name;
const char *name = NULL;
enum status_failreason failreason;
char *desc;
if (!fromwire_status_fail(msg, msg, NULL, &failreason, &desc))
return false;
/* No 'default:' here so gcc gives warning if a new type added */
switch (type) {
switch (failreason) {
case STATUS_FAIL_MASTER_IO:
name = "STATUS_FAIL_MASTER_IO";
goto log_str_broken;
break;
case STATUS_FAIL_HSM_IO:
name = "STATUS_FAIL_HSM_IO";
goto log_str_broken;
break;
case STATUS_FAIL_GOSSIP_IO:
name = "STATUS_FAIL_GOSSIP_IO";
goto log_str_broken;
break;
case STATUS_FAIL_INTERNAL_ERROR:
name = "STATUS_FAIL_INTERNAL_ERROR";
goto log_str_broken;
/*
* These errors happen when the other peer misbehaves:
*/
case STATUS_FAIL_PEER_IO:
name = "STATUS_FAIL_PEER_IO";
goto log_str_peer;
case STATUS_FAIL_PEER_BAD:
name = "STATUS_FAIL_PEER_BAD";
goto log_str_peer;
break;
}
/* fromwire_status_fail() guarantees it's one of those... */
abort();
assert(name);
/* Peers misbehaving is expected. */
log_str_peer:
log_info(sd->log, "%s: %s", name, str);
return;
/* Shouldn't happen. */
log_str_broken:
log_broken(sd->log, "%s: %s", name, str);
log_broken(sd->log, "%s: %s", name, desc);
#if DEVELOPER
if (sd->ld->dev_subdaemon_fail)
fatal("Subdaemon %s hit error", sd->name);
#endif
return true;
}
return;
static bool handle_received_errmsg(struct subd *sd, const u8 *msg)
{
struct peer *peer = sd->peer;
struct channel_id channel_id;
char *desc;
if (!fromwire_status_received_errmsg(msg, msg, NULL,
&channel_id, &desc))
return false;
/* FIXME: if not all channels failed, hand back to gossipd! */
/* Don't free sd; we're may be about to free peer. */
sd->peer = NULL;
peer_fail_permanent(peer, "%s: received ERROR %s", sd->name, desc);
return true;
}
static bool handle_sent_errmsg(struct subd *sd, const u8 *msg)
{
struct peer *peer = sd->peer;
struct channel_id channel_id;
char *desc;
u8 *errmsg;
if (!fromwire_status_sent_errmsg(msg, msg, NULL,
&channel_id, &desc, &errmsg))
return false;
/* FIXME: if not all channels failed, hand back to gossipd! */
if (!sd->peer->error)
sd->peer->error = tal_steal(sd->peer, errmsg);
/* Don't free sd; we're may be about to free peer. */
sd->peer = NULL;
peer_fail_permanent(peer, "%s: sent ERROR %s", sd->name, desc);
return true;
}
static struct io_plan *sd_msg_read(struct io_conn *conn, struct subd *sd)
@ -430,26 +456,33 @@ static struct io_plan *sd_msg_read(struct io_conn *conn, struct subd *sd)
tmpctx = tal_tmpctx(sd);
tal_steal(tmpctx, sd->msg_in);
if (log_status_msg(sd->log, sd->msg_in))
goto next;
if (type == WIRE_STATUS_FAIL) {
enum status_failreason failreason;
char *desc;
if (!fromwire_status_fail(sd->msg_in, sd->msg_in, NULL,
&failreason, &desc))
/* We handle status messages ourselves. */
switch ((enum status)type) {
case WIRE_STATUS_LOG:
case WIRE_STATUS_IO:
if (!log_status_msg(sd->log, sd->msg_in))
goto malformed;
goto next;
case WIRE_STATUS_FAIL:
if (!log_status_fail(sd, sd->msg_in))
goto malformed;
goto close;
case WIRE_STATUS_PEER_CONNECTION_LOST:
if (!sd->peer)
goto malformed;
log_info(sd->log, "Peer connection lost");
goto close;
case WIRE_STATUS_RECEIVED_ERRMSG:
if (!sd->peer)
goto malformed;
if (!handle_received_errmsg(sd, sd->msg_in))
goto malformed;
goto close;
case WIRE_STATUS_SENT_ERRMSG:
if (!sd->peer)
goto malformed;
if (!handle_sent_errmsg(sd, sd->msg_in))
goto malformed;
log_status_fail(sd, failreason, desc);
/* If they care, tell them about invalid peer behavior */
if (sd->peer && failreason == STATUS_FAIL_PEER_BAD) {
/* Don't free ourselves; we're about to do that. */
struct peer *peer = sd->peer;
sd->peer = NULL;
peer_fail_permanent(peer, "%s: %s", sd->name, desc);
}
goto close;
}

View File

@ -206,13 +206,13 @@ static void temporary_channel_id(struct channel_id *channel_id)
/* This handles the case where there's an error only for this channel */
static void opening_errpkt(const char *desc,
bool this_channel_only,
const struct channel_id *channel_id,
struct state *state)
{
if (this_channel_only)
/* FIXME: Remove negotiation_failed */
if (structeq(channel_id, &state->channel_id))
negotiation_failed(state, false, "Error packet: %s", desc);
else
status_failed(STATUS_FAIL_PEER_BAD, "Received ERROR %s", desc);
status_fatal_received_errmsg(desc, channel_id);
}
/* Handle random messages we might get, returning the first non-handled one. */
@ -287,8 +287,7 @@ static u8 *funder_channel(struct state *state,
&state->next_per_commit[LOCAL],
channel_flags);
if (!sync_crypto_write(&state->cs, PEER_FD, msg))
status_failed(STATUS_FAIL_PEER_IO,
"Writing open_channel: %s", strerror(errno));
status_fatal_connection_lost();
state->remoteconf = tal(state, struct channel_config);
@ -402,8 +401,7 @@ static u8 *funder_channel(struct state *state,
state->funding_txout,
&sig);
if (!sync_crypto_write(&state->cs, PEER_FD, msg))
status_failed(STATUS_FAIL_PEER_IO,
"Writing funding_created: %s", strerror(errno));
status_fatal_connection_lost();
/* BOLT #2:
*
@ -593,8 +591,7 @@ static u8 *fundee_channel(struct state *state,
&state->next_per_commit[LOCAL]);
if (!sync_crypto_write(&state->cs, PEER_FD, take(msg)))
status_failed(STATUS_FAIL_PEER_IO,
"Writing accept_channel: %s", strerror(errno));
status_fatal_connection_lost();
msg = opening_read_peer_msg(state);

View File

@ -3155,7 +3155,7 @@ class LightningDTests(BaseLightningDTests):
# L1 asks for stupid low fees
l1.rpc.dev_setfees(15)
l1.daemon.wait_for_log('STATUS_FAIL_PEER_BAD:.*update_fee 15 outside range 4250-75000')
l1.daemon.wait_for_log('Peer permanent failure in CHANNELD_NORMAL: lightning_channeld: received ERROR channel .*: update_fee 15 outside range 4250-75000')
# Make sure the resolution of this one doesn't interfere with the next!
# Note: may succeed, may fail with insufficient fee, depending on how
# bitcoind feels!
@ -3302,7 +3302,7 @@ class LightningDTests(BaseLightningDTests):
# Make l2 upset by asking for crazy fee.
l1.rpc.dev_setfees('150000')
# Wait for l1 notice
l1.daemon.wait_for_log('STATUS_FAIL_PEER_BAD')
l1.daemon.wait_for_log(r'Peer permanent failure in CHANNELD_NORMAL: lightning_channeld: received ERROR channel .*: update_fee 150000 outside range 4250-75000')
# Can't pay while its offline.
self.assertRaises(ValueError, l1.rpc.sendpay, to_json(route), rhash)