lightningd: differentiate cases of plugin death correctly.

If a plugin died due to connection close, we'd always say
"Plugin exited before completing handshake.", which was often
wrong.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Rusty Russell 2021-02-10 13:09:08 +10:30 committed by Christian Decker
parent c301abe602
commit ee5da52677
2 changed files with 29 additions and 9 deletions

View File

@ -46,6 +46,24 @@ static void memleak_help_pending_requests(struct htable *memtable,
}
#endif /* DEVELOPER */
static const char *state_desc(const struct plugin *plugin)
{
switch (plugin->plugin_state) {
case UNCONFIGURED:
return "unconfigured";
case AWAITING_GETMANIFEST_RESPONSE:
return "before replying to getmanifest";
case NEEDS_INIT:
return "before we sent init";
case AWAITING_INIT_RESPONSE:
return "before replying to init";
case INIT_COMPLETE:
return "during normal operation";
}
fatal("Invalid plugin state %i for %s",
plugin->plugin_state, plugin->cmd);
}
struct plugins *plugins_new(const tal_t *ctx, struct log_book *log_book,
struct lightningd *ld)
{
@ -630,7 +648,8 @@ static struct io_plan *plugin_write_json(struct io_conn *conn,
/* This catches the case where their stdout closes (usually they're dead). */
static void plugin_conn_finish(struct io_conn *conn, struct plugin *plugin)
{
plugin_kill(plugin, "Plugin exited before completing handshake.");
const char *msg = tal_fmt(tmpctx, "exited %s", state_desc(plugin));
plugin_kill(plugin, msg);
}
struct io_plan *plugin_stdin_conn_init(struct io_conn *conn,
@ -1204,12 +1223,8 @@ static const char *plugin_add_params(struct plugin *plugin)
static void plugin_manifest_timeout(struct plugin *plugin)
{
bool startup = plugin->plugins->startup;
if (plugin->plugin_state == AWAITING_GETMANIFEST_RESPONSE)
plugin_kill(plugin,
"failed to respond to 'getmanifest' in time, terminating.");
else
plugin_kill(plugin,
"failed to respond to 'init' in time, terminating.");
plugin_kill(plugin, tal_fmt(tmpctx, "timed out %s", state_desc(plugin)));
if (startup)
fatal("Can't recover from plugin failure, terminating.");

View File

@ -192,7 +192,7 @@ def test_plugin_slowinit(node_factory):
os.environ['SLOWINIT_TIME'] = '61'
n = node_factory.get_node()
with pytest.raises(RpcError, match='failed to respond to \'init\' in time, terminating.'):
with pytest.raises(RpcError, match=': timed out before replying to init'):
n.rpc.plugin_start(os.path.join(os.getcwd(), "tests/plugins/slow_init.py"))
# It's not actually configured yet, see what happens;
@ -252,9 +252,12 @@ def test_plugin_command(node_factory):
n2.rpc.plugin_stop(plugin="static.py")
# Test that we don't crash when starting a broken plugin
with pytest.raises(RpcError, match=r"Plugin exited before completing handshake."):
with pytest.raises(RpcError, match=r": exited before replying to getmanifest"):
n2.rpc.plugin_start(plugin=os.path.join(os.getcwd(), "tests/plugins/broken.py"))
with pytest.raises(RpcError, match=r': timed out before replying to getmanifest'):
n2.rpc.plugin_start(os.path.join(os.getcwd(), 'contrib/plugins/fail/failtimeout.py'))
# Test that we can add a directory with more than one new plugin in it.
try:
n.rpc.plugin_startdir(os.path.join(os.getcwd(), "contrib/plugins"))
@ -1800,11 +1803,13 @@ def test_plugin_fail(node_factory):
time.sleep(2)
# It should clean up!
assert 'failcmd' not in [h['command'] for h in l1.rpc.help()['help']]
l1.daemon.wait_for_log(r': exited during normal operation')
l1.rpc.plugin_start(plugin)
time.sleep(2)
# It should clean up!
assert 'failcmd' not in [h['command'] for h in l1.rpc.help()['help']]
l1.daemon.wait_for_log(r': exited during normal operation')
@unittest.skipIf(not DEVELOPER, "without DEVELOPER=1, gossip v slow")