From a672e06dd23a19001f5fdf906f47befa71b9ac9f Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sat, 6 Jun 2020 13:19:11 -0400 Subject: [PATCH] Move fencing to end of keepalive function --- node-daemon/pvcnoded/Daemon.py | 62 +++++++++++++++++----------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index ddb8f86e..fdbd1e6c 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -1337,37 +1337,6 @@ def node_keepalive(): logger.out('Failed to set keepalive data', state='e') return - # Look for dead nodes and fence them - if not maintenance: - if debug: - print("Look for dead nodes and fence them") - if config['daemon_mode'] == 'coordinator': - for node_name in d_node: - try: - node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) - node_domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name)) - node_keepalive = int(zkhandler.readdata(zk_conn, '/nodes/{}/keepalive'.format(node_name))) - except: - node_daemon_state = 'unknown' - node_domain_state = 'unknown' - node_keepalive = 0 - - # Handle deadtime and fencng if needed - # (A node is considered dead when its keepalive timer is >6*keepalive_interval seconds - # out-of-date while in 'start' state) - node_deadtime = int(time.time()) - ( int(config['keepalive_interval']) * int(config['fence_intervals']) ) - if node_keepalive < node_deadtime and node_daemon_state == 'run': - logger.out('Node {} seems dead - starting monitor for fencing'.format(node_name), state='w') - zk_lock = zkhandler.writelock(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) - with zk_lock: - # Ensures that, if we lost the lock race and come out of waiting, - # we won't try to trigger our own fence thread. - if zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) != 'dead': - fence_thread = threading.Thread(target=fencing.fenceNode, args=(node_name, zk_conn, config, logger), kwargs={}) - fence_thread.start() - # Write the updated data after we start the fence thread - zkhandler.writedata(zk_conn, { '/nodes/{}/daemonstate'.format(node_name): 'dead' }) - # Display node information to the terminal if config['log_keepalives']: if this_node.router_state == 'primary': @@ -1425,6 +1394,37 @@ def node_keepalive(): state='t' ) + # Look for dead nodes and fence them + if not maintenance: + if debug: + print("Look for dead nodes and fence them") + if config['daemon_mode'] == 'coordinator': + for node_name in d_node: + try: + node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) + node_domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name)) + node_keepalive = int(zkhandler.readdata(zk_conn, '/nodes/{}/keepalive'.format(node_name))) + except: + node_daemon_state = 'unknown' + node_domain_state = 'unknown' + node_keepalive = 0 + + # Handle deadtime and fencng if needed + # (A node is considered dead when its keepalive timer is >6*keepalive_interval seconds + # out-of-date while in 'start' state) + node_deadtime = int(time.time()) - ( int(config['keepalive_interval']) * int(config['fence_intervals']) ) + if node_keepalive < node_deadtime and node_daemon_state == 'run': + logger.out('Node {} seems dead - starting monitor for fencing'.format(node_name), state='w') + zk_lock = zkhandler.writelock(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) + with zk_lock: + # Ensures that, if we lost the lock race and come out of waiting, + # we won't try to trigger our own fence thread. + if zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) != 'dead': + fence_thread = threading.Thread(target=fencing.fenceNode, args=(node_name, zk_conn, config, logger), kwargs={}) + fence_thread.start() + # Write the updated data after we start the fence thread + zkhandler.writedata(zk_conn, { '/nodes/{}/daemonstate'.format(node_name): 'dead' }) + # Start keepalive thread update_timer = startKeepaliveTimer()