pytest: Stopping daemon cleanly

We used to simply kill the daemon, which in some cases could result in
half-written crashlogs and similar artifacts such as half-completed
RPC calls. Now we ask lightningd to stop nicely, give it some time and
only then kill it. We also return the returncode of the daemon.

Signed-off-by: Christian Decker <decker.christian@gmail.com>
This commit is contained in:
Christian Decker 2017-09-28 13:01:47 +09:30 committed by Rusty Russell
parent 3f9ec6c2fa
commit 11eaabdbe6
2 changed files with 48 additions and 15 deletions

View File

@ -126,7 +126,7 @@ class NodeFactory(object):
def killall(self): def killall(self):
for n in self.nodes: for n in self.nodes:
n.daemon.stop() n.stop()
class BaseLightningDTests(unittest.TestCase): class BaseLightningDTests(unittest.TestCase):
@ -1541,9 +1541,8 @@ class LightningDTests(BaseLightningDTests):
time.sleep(1) time.sleep(1)
assert l1.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 99990000 assert l1.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 99990000
assert l2.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 10000 assert l2.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 10000
# Stop l2, l1 will reattempt to connect # Stop l2, l1 will reattempt to connect
l2.daemon.stop() l2.stop()
# Wait for l1 to notice # Wait for l1 to notice
wait_for(lambda: not l1.rpc.getpeers()['peers'][0]['connected']) wait_for(lambda: not l1.rpc.getpeers()['peers'][0]['connected'])
@ -1562,7 +1561,7 @@ class LightningDTests(BaseLightningDTests):
assert l2.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 20000 assert l2.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 20000
# Finally restart l1, and make sure it remembers # Finally restart l1, and make sure it remembers
l1.daemon.stop() l1.stop()
l1.daemon.start() l1.daemon.start()
assert l1.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 99980000 assert l1.rpc.getpeers()['peers'][0]['msatoshi_to_us'] == 99980000

View File

@ -58,17 +58,28 @@ class TailableProc(object):
self.thread.start() self.thread.start()
self.running = True self.running = True
def stop(self): def stop(self, timeout=10):
if self.outputDir: if self.outputDir:
logpath = os.path.join(self.outputDir, 'log') logpath = os.path.join(self.outputDir, 'log')
with open(logpath, 'w') as f: with open(logpath, 'w') as f:
for l in self.logs: for l in self.logs:
f.write(l + '\n') f.write(l + '\n')
self.proc.terminate() self.proc.terminate()
# Now give it some time to react to the signal
rc = self.proc.wait(timeout)
if rc is None:
self.proc.kill() self.proc.kill()
self.proc.wait() self.proc.wait()
self.thread.join() self.thread.join()
if failed:
raise(ValueError("Process '{}' did not cleanly shutdown".format(self.proc.pid)))
return self.proc.returncode
def tail(self): def tail(self):
"""Tail the stdout of the process and remember it. """Tail the stdout of the process and remember it.
@ -231,12 +242,14 @@ class LightningD(TailableProc):
self.wait_for_log("Creating IPv6 listener on port") self.wait_for_log("Creating IPv6 listener on port")
logging.info("LightningD started") logging.info("LightningD started")
def stop(self): def wait(self, timeout=10):
# If it's already crashing, wait a bit for log dump. """Wait for the daemon to stop for up to timeout seconds
if os.path.isfile(os.path.join(self.lightning_dir, 'crash.log')):
time.sleep(2) Returns the returncode of the process, None if the process did
TailableProc.stop(self) not return before the timeout triggers.
logging.info("LightningD stopped") """
self.proc.wait(timeout)
return self.proc.returncode
class LightningNode(object): class LightningNode(object):
def __init__(self, daemon, rpc, btc, executor): def __init__(self, daemon, rpc, btc, executor):
@ -316,3 +329,24 @@ class LightningNode(object):
on cleanup""" on cleanup"""
self.known_fail = True self.known_fail = True
def stop(self, timeout=10):
""" Attempt to do a clean shutdown, but kill if it hangs
"""
# Tell the daemon to stop
try:
# May fail if the process already died
self.rpc.stop()
except:
pass
rc = self.daemon.wait(timeout)
# If it did not stop be more insistent
if rc is None:
rc = self.daemon.stop()
if rc != 0:
raise ValueError("Node did not exit cleanly, rc={}".format(rc))
else:
return rc