From 8f906c1f8129832c2a945448eecdfa26c97d62ac Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 12 Oct 2021 10:59:09 -0400 Subject: [PATCH] Use power off in fence instead of reset Use a power off (and then make the power on a requirement) during a node fence. Removes some potential ambiguity in the power state, since we will know for certain if it is off. --- docs/cluster-architecture.md | 2 +- node-daemon/pvcnoded/util/fencing.py | 30 +++++++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/docs/cluster-architecture.md b/docs/cluster-architecture.md index f6381be8..e3d00bad 100644 --- a/docs/cluster-architecture.md +++ b/docs/cluster-architecture.md @@ -322,7 +322,7 @@ Once the cluster, and specifically one node in the cluster, has determined that During the `dead` process, the failed node has 6 chances, called "saving throws", at `keepalive_interval` second windows, to send another keepalive before it is fenced. This additional, fixed, delay helps ensure that the cluster will gracefully recover from intermittent network failures or loss of Zookeeper contact, by providing nodes up to another 6 keepalive intervals to save themselves once the fence timer actually begins. This bring the total time, with default options, of a node stopping contact to a node being fenced, to between 60 and 65 seconds. This duration is considered by the author an acceptable compromise between speedy recovery and avoiding false positives (and hence larger outages). -Once a node has been marked `dead` and has failed its 6 "saving throws", the fence process triggers an IPMI chassis reset sequence. First, the node is issued the standard IPMI `chassis power reset` command to trigger a cold system reset. Next, it waits a fixed 1 second and then issues a `chassis power on` signal to ensure the node is powered on (just in case it had already shut itself off). The node then waits a fixed 2 seconds, and then checks the current `chassis power status`. Using the results of these 3 commands, PVC is then able to determine with near certainty whether the node has truly been forced offline or not, and it can proceed to the next step. +Once a node has been marked `dead` and has failed its 6 "saving throws", the fence process triggers an IPMI chassis reset sequence. First, the node is issued an IPMI `chassis power off` command to trigger a cold system shutdown. Next, it waits a fixed 1 second and then checks and logs the current `chassis power state`, and then issues a `chassis power on` signal to start up the node. It then finally waits a fixed 2 seconds, and then checks the current `chassis power status`. Using the results of these 3 commands, PVC is then able to determine with near certainty whether the node has truly been forced offline or not, and it can proceed to the next step. #### Recovery from Node Fences diff --git a/node-daemon/pvcnoded/util/fencing.py b/node-daemon/pvcnoded/util/fencing.py index 1435d697..4b703034 100644 --- a/node-daemon/pvcnoded/util/fencing.py +++ b/node-daemon/pvcnoded/util/fencing.py @@ -133,23 +133,39 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger): # Perform an IPMI fence # def reboot_via_ipmi(ipmi_hostname, ipmi_user, ipmi_password, logger): - # Forcibly reboot the node - ipmi_command_reset = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power reset'.format( + # Power off the node the node + logger.out('Sending power off to dead node', state='i') + ipmi_command_stop = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power off'.format( ipmi_hostname, ipmi_user, ipmi_password ) - ipmi_reset_retcode, ipmi_reset_stdout, ipmi_reset_stderr = common.run_os_command(ipmi_command_reset) + ipmi_stop_retcode, ipmi_stop_stdout, ipmi_stop_stderr = common.run_os_command(ipmi_command_stop) - if ipmi_reset_retcode != 0: - logger.out(f'Failed to reboot dead node: {ipmi_reset_stderr}', state='e') + if ipmi_stop_retcode != 0: + logger.out(f'Failed to power off dead node: {ipmi_stop_stderr}', state='e') time.sleep(1) - # Power on the node (just in case it is offline) + # Check the chassis power state + logger.out('Checking power state of dead node', state='i') + ipmi_command_status = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status'.format( + ipmi_hostname, ipmi_user, ipmi_password + ) + ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status) + if ipmi_status_retcode == 0: + logger.out(f'Current chassis power state is: {ipmi_status_stdout.strip()}', state='i') + else: + logger.out(f'Current chassis power state is: Unknown', state='w') + + # Power on the node + logger.out('Sending power on to dead node', state='i') ipmi_command_start = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power on'.format( ipmi_hostname, ipmi_user, ipmi_password ) ipmi_start_retcode, ipmi_start_stdout, ipmi_start_stderr = common.run_os_command(ipmi_command_start) + if ipmi_start_retcode != 0: + logger.out(f'Failed to power on dead node: {ipmi_start_stderr}', state='w') + time.sleep(2) # Check the chassis power state @@ -159,7 +175,7 @@ def reboot_via_ipmi(ipmi_hostname, ipmi_user, ipmi_password, logger): ) ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status) - if ipmi_reset_retcode == 0: + if ipmi_stop_retcode == 0: if ipmi_status_stdout.strip() == "Chassis Power is on": # We successfully rebooted the node and it is powered on; this is a succeessful fence logger.out('Successfully rebooted dead node', state='o')