From 1e467bb9866f0dbcfe0fb54f964f35893e29859a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 10 Oct 2018 17:02:46 +1030 Subject: [PATCH] lightningd: fail HTLCs which are in-transit as we shut down. This is the source of failure in the test_restart_many_payments stress test: we don't commit the outgoing HTLC immediately, instead waiting for gossip to tell us the peer for the outgoing channel, then waiting for that channeld to tell is it's committed. The result was incoming HTLCs with no outgoing. I initially pushed the HTLCs through that same path, but of course (since peers are not connected yet!) the only result was that we failed these HTLCs immediately. So I chose the far simpler course of just failing them directly. To reproduce this, I had to increase the test_restart_many_payments num to 10, and run it with nice -20 taskset -c 0. Signed-off-by: Rusty Russell --- lightningd/peer_htlcs.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/lightningd/peer_htlcs.c b/lightningd/peer_htlcs.c index 034ef2340..93bf5d4a1 100644 --- a/lightningd/peer_htlcs.c +++ b/lightningd/peer_htlcs.c @@ -1742,6 +1742,20 @@ void htlcs_reconnect(struct lightningd *ld, struct htlc_out_map_iter outi; struct htlc_in *hin; struct htlc_out *hout; + struct htlc_in_map unprocessed; + + /* Any HTLCs which happened to be incoming and weren't forwarded before + * we shutdown/crashed: fail them now. + * + * Note that since we do local processing synchronously, so this never + * captures local payments. But if it did, it would be a tiny corner + * case. */ + htlc_in_map_init(&unprocessed); + for (hin = htlc_in_map_first(htlcs_in, &ini); hin; + hin = htlc_in_map_next(htlcs_in, &ini)) { + if (hin->hstate == RCVD_ADD_ACK_REVOCATION) + htlc_in_map_add(&unprocessed, hin); + } for (hout = htlc_out_map_first(htlcs_out, &outi); hout; hout = htlc_out_map_next(htlcs_out, &outi)) { @@ -1780,7 +1794,21 @@ void htlcs_reconnect(struct lightningd *ld, fixup_hout(ld, hout); #endif + if (hout->in) + htlc_in_map_del(&unprocessed, hout->in); } + + /* Now fail any which were stuck. */ + for (hin = htlc_in_map_first(&unprocessed, &ini); hin; + hin = htlc_in_map_next(&unprocessed, &ini)) { + log_unusual(hin->key.channel->log, + "Failing old unprocessed HTLC #%"PRIu64, + hin->key.id); + fail_in_htlc(hin, WIRE_TEMPORARY_NODE_FAILURE, NULL, NULL); + } + + /* Don't leak memory! */ + htlc_in_map_clear(&unprocessed); }