From 626998efce3b0c73c2aa4a0241b9c816bf4ba21b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 26 Nov 2022 12:59:53 +1030 Subject: [PATCH] lightningd: don't timeout plugins if init is slow! This is a minimal fix: we wait until all plugins reply from init before continuing. Really large or busy nodes can have other things monopolize lightningd, then the timer goes off and we blame the plugin (which has responded, we just haven't read it yet!). The real answer is to have some timeouts only advance when we're idle, or have them low-priority so we only activate them when we're idle (this doesn't apply to all timers: some are probably important!). But this is a minimal fix for -rc3. Fixes: https://github.com/ElementsProject/lightning/issues/5736 Changelog-Fixed: plugins: on large/slow nodes we could blame plugins for failing to answer init in time, when we were just slow. Signed-off-by: Rusty Russell --- lightningd/plugin.c | 8 ++++++++ tests/test_plugin.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/lightningd/plugin.c b/lightningd/plugin.c index 3ca0f3d2a..38acdbfb5 100644 --- a/lightningd/plugin.c +++ b/lightningd/plugin.c @@ -180,6 +180,9 @@ static void check_plugins_initted(struct plugins *plugins) for (size_t i = 0; i < tal_count(plugin_cmds); i++) plugin_cmd_all_complete(plugins, plugin_cmds[i]); tal_free(plugin_cmds); + + if (plugins->startup) + io_break(plugins); } struct command_result *plugin_register_all_complete(struct lightningd *ld, @@ -1943,6 +1946,11 @@ void plugins_config(struct plugins *plugins) plugin_config(p); } + /* Wait for them to configure, before continuing: large + * nodes can take a while to startup! */ + if (plugins->startup) + io_loop_with_timers(plugins->ld); + plugins->startup = false; } diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 91474de2c..98ddec31b 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -1942,6 +1942,8 @@ def test_plugin_fail(node_factory): time.sleep(2) # It should clean up! assert 'failcmd' not in [h['command'] for h in l1.rpc.help()['help']] + # Can happen *before* the 'Server started with public key' + l1.daemon.logsearch_start = 0 l1.daemon.wait_for_log(r': exited during normal operation') l1.rpc.plugin_start(plugin)