pvc/node-daemon/pvcnoded/fencing.py

190 lines
8.2 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# fencing.py - PVC daemon function library, node fencing functions
# Part of the Parallel Virtual Cluster (PVC) system
#
2021-03-25 17:01:55 -04:00
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import time
2021-06-01 12:17:25 -04:00
import daemon_lib.common as common
import pvcnoded.VMInstance as VMInstance
#
# Fence thread entry function
#
2021-06-01 11:53:21 -04:00
def fenceNode(node_name, zkhandler, config, logger):
# We allow exactly 6 saving throws (30 seconds) for the host to come back online or we kill it
failcount_limit = 6
failcount = 0
while failcount < failcount_limit:
# Wait 5 seconds
time.sleep(config['keepalive_interval'])
# Get the state
node_daemon_state = zkhandler.read(('node.state.daemon', node_name))
# Is it still 'dead'
if node_daemon_state == 'dead':
failcount += 1
logger.out('Node "{}" failed {}/{} saving throws'.format(node_name, failcount, failcount_limit), state='w')
# It changed back to something else so it must be alive
else:
logger.out('Node "{}" passed a saving throw; canceling fence'.format(node_name), state='o')
return
logger.out('Fencing node "{}" via IPMI reboot signal'.format(node_name), state='w')
# Get IPMI information
ipmi_hostname = zkhandler.read(('node.ipmi.hostname', node_name))
ipmi_username = zkhandler.read(('node.ipmi.username', node_name))
ipmi_password = zkhandler.read(('node.ipmi.password', node_name))
# Shoot it in the head
fence_status = rebootViaIPMI(ipmi_hostname, ipmi_username, ipmi_password, logger)
# Hold to ensure the fence takes effect and system stabilizes
time.sleep(config['keepalive_interval'] * 2)
# Force into secondary network state if needed
2019-03-13 19:26:08 -04:00
if node_name in config['coordinators']:
2020-08-11 12:40:35 -04:00
logger.out('Forcing secondary status for node "{}"'.format(node_name), state='i')
2021-06-01 11:53:21 -04:00
zkhandler.write([
(('node.state.router', node_name), 'secondary')
2021-06-01 11:53:21 -04:00
])
if zkhandler.read('base.config.primary_node') == node_name:
2021-06-01 11:53:21 -04:00
zkhandler.write([
('base.config.primary_node', 'none')
2021-06-01 11:53:21 -04:00
])
# If the fence succeeded and successful_fence is migrate
2020-08-05 21:57:36 -04:00
if fence_status and config['successful_fence'] == 'migrate':
2021-06-01 11:53:21 -04:00
migrateFromFencedNode(zkhandler, node_name, config, logger)
# If the fence failed and failed_fence is migrate
2020-08-05 21:57:36 -04:00
if not fence_status and config['failed_fence'] == 'migrate' and config['suicide_intervals'] != '0':
2021-06-01 11:53:21 -04:00
migrateFromFencedNode(zkhandler, node_name, config, logger)
# Migrate hosts away from a fenced node
2021-06-01 11:53:21 -04:00
def migrateFromFencedNode(zkhandler, node_name, config, logger):
logger.out('Migrating VMs from dead node "{}" to new hosts'.format(node_name), state='i')
# Get the list of VMs
dead_node_running_domains = zkhandler.read(('node.running_domains', node_name)).split()
# Set the node to a custom domainstate so we know what's happening
2021-06-01 11:53:21 -04:00
zkhandler.write([
(('node.state.domain', node_name), 'fence-flush')
2021-06-01 11:53:21 -04:00
])
# Migrate a VM after a flush
def fence_migrate_vm(dom_uuid):
2021-06-01 11:53:21 -04:00
VMInstance.flush_locks(zkhandler, logger, dom_uuid)
2019-07-09 19:17:53 -04:00
2021-06-08 23:34:49 -04:00
target_node = common.findTargetNode(zkhandler, dom_uuid)
2018-11-23 20:02:31 -05:00
if target_node is not None:
logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i')
2021-06-01 11:53:21 -04:00
zkhandler.write([
(('domain.state', dom_uuid), 'start'),
(('domain.node', dom_uuid), target_node),
(('domain.last_node', dom_uuid), node_name),
2021-06-01 11:53:21 -04:00
])
else:
logger.out('No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format(dom_uuid), state='i')
2021-06-01 11:53:21 -04:00
zkhandler.write({
(('domain.state', dom_uuid), 'stopped'),
(('domain.meta.autostart', dom_uuid), 'True'),
2019-10-12 01:36:50 -04:00
})
# Loop through the VMs
for dom_uuid in dead_node_running_domains:
fence_migrate_vm(dom_uuid)
# Set node in flushed state for easy remigrating when it comes back
2021-06-01 11:53:21 -04:00
zkhandler.write([
(('node.state.domain', node_name), 'flushed')
2021-06-01 11:53:21 -04:00
])
#
# Perform an IPMI fence
#
def rebootViaIPMI(ipmi_hostname, ipmi_user, ipmi_password, logger):
2018-11-23 20:02:31 -05:00
# Forcibly reboot the node
ipmi_command_reset = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power reset'.format(
ipmi_hostname, ipmi_user, ipmi_password
)
2018-11-23 20:02:31 -05:00
ipmi_reset_retcode, ipmi_reset_stdout, ipmi_reset_stderr = common.run_os_command(ipmi_command_reset)
if ipmi_reset_retcode != 0:
logger.out('Failed to reboot dead node', state='e')
print(ipmi_reset_stderr)
Rework success checks for IPMI fencing Previously, if the node failed to restart, it was declared a "bad fence" and no further action would be taken. However, there are some situations, for instance critical hardware failures, where intelligent systems will not attempt (or succeed at) starting up the node in such a case, which would result in dead, known-offline nodes without recovery. Tweak this behaviour somewhat. The main path of Reboot -> Check On -> Success + fence-flush is retained, but some additional side-paths are now defined: 1. We attempt to power "on" the chassis 1 second after the reboot, just in case it is off and can be recovered. We then wait another 2 seconds and check the power status (as we did before). 2. If the reboot succeeded, follow this series of choices: a. If the chassis is on, the fence succeeded. b. If the chassis is off, the fence "succeeded" as well. c. If the chassis is in some other state, the fence failed. 3. If the reboot failed, follow this series of choices: a. If the chassis is off, the fence itself failed, but we can treat it as "succeeded"" since the chassis is in a known-offline state. This is the most likely situation when there is a critical hardware failure, and the server's IPMI does not allow itself to start back up again. b. If the chassis is in any other state ("on" or unknown), the fence itself failed and we must treat this as a fence failure. Overall, this should alleviate the aforementioned issue of a critical failure rendering the node persistently "off" not triggering a fence-flush and ensure fencing is more robust.
2021-07-13 17:17:14 -04:00
time.sleep(1)
# Power on the node (just in case it is offline)
ipmi_command_start = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power on'.format(
ipmi_hostname, ipmi_user, ipmi_password
)
ipmi_start_retcode, ipmi_start_stdout, ipmi_start_stderr = common.run_os_command(ipmi_command_start)
time.sleep(2)
2018-11-23 20:02:31 -05:00
Rework success checks for IPMI fencing Previously, if the node failed to restart, it was declared a "bad fence" and no further action would be taken. However, there are some situations, for instance critical hardware failures, where intelligent systems will not attempt (or succeed at) starting up the node in such a case, which would result in dead, known-offline nodes without recovery. Tweak this behaviour somewhat. The main path of Reboot -> Check On -> Success + fence-flush is retained, but some additional side-paths are now defined: 1. We attempt to power "on" the chassis 1 second after the reboot, just in case it is off and can be recovered. We then wait another 2 seconds and check the power status (as we did before). 2. If the reboot succeeded, follow this series of choices: a. If the chassis is on, the fence succeeded. b. If the chassis is off, the fence "succeeded" as well. c. If the chassis is in some other state, the fence failed. 3. If the reboot failed, follow this series of choices: a. If the chassis is off, the fence itself failed, but we can treat it as "succeeded"" since the chassis is in a known-offline state. This is the most likely situation when there is a critical hardware failure, and the server's IPMI does not allow itself to start back up again. b. If the chassis is in any other state ("on" or unknown), the fence itself failed and we must treat this as a fence failure. Overall, this should alleviate the aforementioned issue of a critical failure rendering the node persistently "off" not triggering a fence-flush and ensure fencing is more robust.
2021-07-13 17:17:14 -04:00
# Check the chassis power state
logger.out('Checking power state of dead node', state='i')
2018-11-23 20:02:31 -05:00
ipmi_command_status = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status'.format(
ipmi_hostname, ipmi_user, ipmi_password
)
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status)
Rework success checks for IPMI fencing Previously, if the node failed to restart, it was declared a "bad fence" and no further action would be taken. However, there are some situations, for instance critical hardware failures, where intelligent systems will not attempt (or succeed at) starting up the node in such a case, which would result in dead, known-offline nodes without recovery. Tweak this behaviour somewhat. The main path of Reboot -> Check On -> Success + fence-flush is retained, but some additional side-paths are now defined: 1. We attempt to power "on" the chassis 1 second after the reboot, just in case it is off and can be recovered. We then wait another 2 seconds and check the power status (as we did before). 2. If the reboot succeeded, follow this series of choices: a. If the chassis is on, the fence succeeded. b. If the chassis is off, the fence "succeeded" as well. c. If the chassis is in some other state, the fence failed. 3. If the reboot failed, follow this series of choices: a. If the chassis is off, the fence itself failed, but we can treat it as "succeeded"" since the chassis is in a known-offline state. This is the most likely situation when there is a critical hardware failure, and the server's IPMI does not allow itself to start back up again. b. If the chassis is in any other state ("on" or unknown), the fence itself failed and we must treat this as a fence failure. Overall, this should alleviate the aforementioned issue of a critical failure rendering the node persistently "off" not triggering a fence-flush and ensure fencing is more robust.
2021-07-13 17:17:14 -04:00
if ipmi_reset_retcode == 0:
if ipmi_status_stdout == "Chassis Power is on":
# We successfully rebooted the node and it is powered on; this is a succeessful fence
logger.out('Successfully rebooted dead node', state='o')
return True
elif ipmi_status_stdout == "Chassis Power is off":
# We successfully rebooted the node but it is powered off; this might be expected or not, but the node is confirmed off so we can call it a successful fence
logger.out('Chassis power is in confirmed off state after successfuly IPMI reboot; proceeding with fence-flush', state='o')
return True
else:
# We successfully rebooted the node but it is in some unknown power state; since this might indicate a silent failure, we must call it a failed fence
logger.out('Chassis power is in an unknown state after successful IPMI reboot; not performing fence-flush', state='e')
return False
else:
if ipmi_status_stdout == "Chassis Power is off":
# We failed to reboot the node but it is powered off; it has probably suffered a serious hardware failure, but the node is confirmed off so we can call it a successful fence
logger.out('Chassis power is in confirmed off state after failed IPMI reboot; proceeding with fence-flush', state='o')
return True
else:
# We failed to reboot the node but it is in some unknown power state (including "on"); since this might indicate a silent failure, we must call it a failed fence
logger.out('Chassis power is not in confirmed off state after failed IPMI reboot; not performing fence-flush', state='e')
return False
#
# Verify that IPMI connectivity to this host exists (used during node init)
#
def verifyIPMI(ipmi_hostname, ipmi_user, ipmi_password):
ipmi_command_status = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status'.format(
ipmi_hostname, ipmi_user, ipmi_password
)
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status, timeout=2)
if ipmi_status_retcode == 0 and ipmi_status_stdout != "Chassis Power is on":
return True
else:
return False