From ec658c1f89f3d601c7c5a544427661cbe1aff73e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 2 May 2019 10:25:17 +0930 Subject: [PATCH] status: suppress status_trace/status_debug messages if queue too long. We can be spammy, which is good for tests, but bad for our simple message queue. This avoids breaking our tests but also avoid the worst case (1M entries and counting!) for gossip status messages in the case where we're syncing a large peer. Signed-off-by: Rusty Russell --- common/daemon_conn.c | 5 +++++ common/daemon_conn.h | 6 ++++++ common/msg_queue.c | 5 +++++ common/msg_queue.h | 3 +++ common/status.c | 20 ++++++++++++++++++++ 5 files changed, 39 insertions(+) diff --git a/common/daemon_conn.c b/common/daemon_conn.c index 81945b4c2..cbadcdc72 100644 --- a/common/daemon_conn.c +++ b/common/daemon_conn.c @@ -146,3 +146,8 @@ void daemon_conn_wake(struct daemon_conn *dc) { msg_wake(dc->out); } + +size_t daemon_conn_queue_length(const struct daemon_conn *dc) +{ + return msg_queue_length(dc->out); +} diff --git a/common/daemon_conn.h b/common/daemon_conn.h index 53c63bd65..580b3fd69 100644 --- a/common/daemon_conn.h +++ b/common/daemon_conn.h @@ -56,4 +56,10 @@ struct io_plan *daemon_conn_read_next(struct io_conn *conn, * daemon_conn_sync_flush - Flush connection by sending all messages now.. */ bool daemon_conn_sync_flush(struct daemon_conn *dc); + +/** + * daemon_conn_queue_length - Get number of message in outgoing queue. + */ +size_t daemon_conn_queue_length(const struct daemon_conn *dc); + #endif /* LIGHTNING_COMMON_DAEMON_CONN_H */ diff --git a/common/msg_queue.c b/common/msg_queue.c index 6190de97d..14823d87d 100644 --- a/common/msg_queue.c +++ b/common/msg_queue.c @@ -22,6 +22,11 @@ static void do_enqueue(struct msg_queue *q, const u8 *add TAKES) io_wake(q); } +size_t msg_queue_length(const struct msg_queue *q) +{ + return tal_count(q->q); +} + void msg_enqueue(struct msg_queue *q, const u8 *add) { assert(fromwire_peektype(add) != MSG_PASS_FD); diff --git a/common/msg_queue.h b/common/msg_queue.h index a311408cb..0d4975d48 100644 --- a/common/msg_queue.h +++ b/common/msg_queue.h @@ -15,6 +15,9 @@ struct msg_queue *msg_queue_new(const tal_t *ctx); /* If add is taken(), freed after sending. msg_wake() implied. */ void msg_enqueue(struct msg_queue *q, const u8 *add TAKES); +/* Get current queue length */ +size_t msg_queue_length(const struct msg_queue *q); + /* Fd is closed after sending. msg_wake() implied. */ void msg_enqueue_fd(struct msg_queue *q, int fd); diff --git a/common/status.c b/common/status.c index 9c59cf336..a6682839b 100644 --- a/common/status.c +++ b/common/status.c @@ -20,6 +20,10 @@ static struct daemon_conn *status_conn; volatile bool logging_io = false; static bool was_logging_io = false; +/* If we're more than this many msgs deep, don't add debug messages. */ +#define TRACE_QUEUE_LIMIT 20 +static size_t traces_suppressed; + static void got_sigusr1(int signal UNUSED) { logging_io = !logging_io; @@ -118,6 +122,22 @@ void status_vfmt(enum log_level level, const char *fmt, va_list ap) { char *str; + /* We only suppress async debug msgs. IO messages are even spammier + * but they only occur when explicitly asked for */ + if (level == LOG_DBG && status_conn) { + size_t qlen = daemon_conn_queue_length(status_conn); + + /* Once suppressing, we keep suppressing until we're empty */ + if (traces_suppressed && qlen == 0) { + size_t n = traces_suppressed; + traces_suppressed = 0; + /* Careful: recursion! */ + status_debug("...[%zu debug messages suppressed]...", n); + } else if (traces_suppressed || qlen > TRACE_QUEUE_LIMIT) { + traces_suppressed++; + return; + } + } str = tal_vfmt(NULL, fmt, ap); status_send(take(towire_status_log(NULL, level, str))); tal_free(str);