Tie fence timers to keepalive_interval
Also wait 2 full keepalive intervals after fencing before doing anything else, to give the Ceph cluster a chance to recover.
This commit is contained in:
parent
4afb288429
commit
0a01d84290
|
@ -155,9 +155,9 @@ def readConfig(pvcnoded_config_file, myhostname):
|
||||||
'log_keepalive_cluster_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_cluster_details'],
|
'log_keepalive_cluster_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_cluster_details'],
|
||||||
'log_keepalive_storage_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_storage_details'],
|
'log_keepalive_storage_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_storage_details'],
|
||||||
'console_log_lines': o_config['pvc']['system']['configuration']['logging']['console_log_lines'],
|
'console_log_lines': o_config['pvc']['system']['configuration']['logging']['console_log_lines'],
|
||||||
'keepalive_interval': o_config['pvc']['system']['intervals']['keepalive_interval'],
|
'keepalive_interval': int(o_config['pvc']['system']['intervals']['keepalive_interval']),
|
||||||
'fence_intervals': o_config['pvc']['system']['intervals']['fence_intervals'],
|
'fence_intervals': int(o_config['pvc']['system']['intervals']['fence_intervals']),
|
||||||
'suicide_intervals': o_config['pvc']['system']['intervals']['suicide_intervals'],
|
'suicide_intervals': int(o_config['pvc']['system']['intervals']['suicide_intervals']),
|
||||||
'successful_fence': o_config['pvc']['system']['fencing']['actions']['successful_fence'],
|
'successful_fence': o_config['pvc']['system']['fencing']['actions']['successful_fence'],
|
||||||
'failed_fence': o_config['pvc']['system']['fencing']['actions']['failed_fence'],
|
'failed_fence': o_config['pvc']['system']['fencing']['actions']['failed_fence'],
|
||||||
'migration_target_selector': o_config['pvc']['system']['migration']['target_selector'],
|
'migration_target_selector': o_config['pvc']['system']['migration']['target_selector'],
|
||||||
|
|
|
@ -35,7 +35,7 @@ def fenceNode(node_name, zk_conn, config, logger):
|
||||||
failcount = 0
|
failcount = 0
|
||||||
while failcount < failcount_limit:
|
while failcount < failcount_limit:
|
||||||
# Wait 5 seconds
|
# Wait 5 seconds
|
||||||
time.sleep(5)
|
time.sleep(config.keepalive_interval)
|
||||||
# Get the state
|
# Get the state
|
||||||
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
|
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
|
||||||
# Is it still 'dead'
|
# Is it still 'dead'
|
||||||
|
@ -56,8 +56,8 @@ def fenceNode(node_name, zk_conn, config, logger):
|
||||||
|
|
||||||
# Shoot it in the head
|
# Shoot it in the head
|
||||||
fence_status = rebootViaIPMI(ipmi_hostname, ipmi_username, ipmi_password, logger)
|
fence_status = rebootViaIPMI(ipmi_hostname, ipmi_username, ipmi_password, logger)
|
||||||
# Hold to ensure the fence takes effect
|
# Hold to ensure the fence takes effect and system stabilizes
|
||||||
time.sleep(3)
|
time.sleep(config.keepalive_interval * 2)
|
||||||
|
|
||||||
# Force into secondary network state if needed
|
# Force into secondary network state if needed
|
||||||
if node_name in config['coordinators']:
|
if node_name in config['coordinators']:
|
||||||
|
|
Loading…
Reference in New Issue