pvc/health-daemon/plugins/psur

139 lines
5.6 KiB
Python

#!/usr/bin/env python3
# psur.py - PVC Monitoring example plugin for PSU Redundancy
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2024 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
# This script provides an example of a PVC monitoring plugin script. It will create
# a simple plugin to check IPMI for power supply reundancy status.
# This script can thus be used as an example or reference implementation of a
# PVC monitoring pluginscript and expanded upon as required.
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
# of the role of each function is provided in context of the example; see the other
# examples for more potential uses.
# WARNING:
#
# This script will run in the context of the node daemon keepalives as root.
# DO NOT install untrusted, unvetted plugins under any circumstances.
# This import is always required here, as MonitoringPlugin is used by the
# MonitoringPluginScript class
from pvchealthd.objects.MonitoringInstance import MonitoringPlugin
# A monitoring plugin script must always expose its nice name, which must be identical to
# the file name
PLUGIN_NAME = "psur"
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
class MonitoringPluginScript(MonitoringPlugin):
def setup(self):
"""
setup(): Perform special setup steps during node daemon startup
This step is optional and should be used sparingly.
If you wish for the plugin to not load in certain conditions, do any checks here
and return a non-None failure message to indicate the error.
"""
# Run any imports first
from daemon_lib.common import run_os_command
from re import match
_ipmitool_ret, _ipmitool_list, _ = run_os_command("ipmitool sdr type 'Power Supply'")
if _ipmitool_ret != 0:
return "Error running ipmitool command"
else:
search_values = [
"PS Redundancy", # Dell PowerEdge
"Power Supplies", # HP ProLiant
"PS_RDNDNT_MODE", # Cisco UCS
]
reading_lines = [l for l in _ipmitool_list.split('\n') if len(l.split('|')) > 0 and l.split('|')[0].strip() in search_values]
if len(reading_lines) < 1:
return "No valid input power sensors found"
def run(self, coordinator_state=None):
"""
run(): Perform the check actions and return a PluginResult object
The {coordinator_state} can be used to check if this is a "primary" coordinator, "secondary" coordinator, or "client" (non-coordinator)
"""
# Run any imports first
from daemon_lib.common import run_os_command
from re import match
health_delta = 0
messages = list()
_ipmitool_ret, _ipmitool_list, _ = run_os_command("ipmitool sdr type 'Power Supply'")
if _ipmitool_ret != 0 or len(_ipmitool_list.split('\n')) < 1:
health_delta = 0
messages.append("Error running ipmitool command")
else:
search_values = [
"PS Redundancy", # Dell PowerEdge
"Power Supplies", # HP ProLiant
"PS_RDNDNT_MODE", # Cisco UCS
]
reading_lines = [l for l in _ipmitool_list.split('\n') if len(l.split('|')) > 0 and l.split('|')[0].strip() in search_values]
if len(reading_lines) > 0:
for reading_line in reading_lines:
reading_sensor = reading_line.split('|')[1].strip()
reading_text = reading_line.split('|')[-1].strip()
if reading_text == "Fully Redundant":
health_delta += 0
messages.append(f"Input power sensor {reading_sensor} reports {reading_text}")
elif reading_text == "No Reading":
health_delta += 5
messages.append(f"Input power sensor {reading_sensor} reports {reading_text} (PSU redundancy not configured?)")
else:
health_delta += 10
messages.append(f"Input power sensor {reading_sensor} reports {reading_text}")
else:
health_delta = 5
messages.append("No valid input power sensors found, but configured")
# Set the health delta in our local PluginResult object
self.plugin_result.set_health_delta(health_delta)
# Set the message in our local PluginResult object
self.plugin_result.set_message(', '.join(messages))
# Return our local PluginResult object
return self.plugin_result
def cleanup(self):
"""
cleanup(): Perform special cleanup steps during node daemon termination
This step is optional and should be used sparingly.
"""
pass