From caadafa80d2d0fb8d298c593eb47b0beb6c32428 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Fri, 15 Sep 2023 19:07:29 -0400 Subject: [PATCH] Add PSU redundancy sensor check --- node-daemon/plugins/psur | 138 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 node-daemon/plugins/psur diff --git a/node-daemon/plugins/psur b/node-daemon/plugins/psur new file mode 100644 index 00000000..1bf03223 --- /dev/null +++ b/node-daemon/plugins/psur @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 + +# psur.py - PVC Monitoring example plugin for PSU Redundancy +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018-2022 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +# This script provides an example of a PVC monitoring plugin script. It will create +# a simple plugin to check IPMI for power supply reundancy status. + +# This script can thus be used as an example or reference implementation of a +# PVC monitoring pluginscript and expanded upon as required. + +# A monitoring plugin script must implement the class "MonitoringPluginScript" which +# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation +# of the role of each function is provided in context of the example; see the other +# examples for more potential uses. + +# WARNING: +# +# This script will run in the context of the node daemon keepalives as root. +# DO NOT install untrusted, unvetted plugins under any circumstances. + + +# This import is always required here, as MonitoringPlugin is used by the +# MonitoringPluginScript class +from pvcnoded.objects.MonitoringInstance import MonitoringPlugin + + +# A monitoring plugin script must always expose its nice name, which must be identical to +# the file name +PLUGIN_NAME = "psur" + + +# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin. +class MonitoringPluginScript(MonitoringPlugin): + def setup(self): + """ + setup(): Perform special setup steps during node daemon startup + + This step is optional and should be used sparingly. + + If you wish for the plugin to not load in certain conditions, do any checks here + and return a non-None failure message to indicate the error. + """ + + # Run any imports first + from daemon_lib.common import run_os_command + from re import match + + _ipmitool_ret, _ipmitool_list, _ = run_os_command("ipmitool sdr type 'Power Supply'") + if _ipmitool_ret != 0: + return "Error running ipmitool command" + else: + search_values = [ + "PS Redundancy", # Dell PowerEdge + "Power Supplies", # HP ProLiant + "PS_RDNDNT_MODE", # Cisco UCS + ] + reading_lines = [l for l in _ipmitool_list.split('\n') if len(l.split('|')) > 0 and l.split('|')[0].strip() in search_values] + if len(reading_lines) < 1: + return "No valid input power sensors found" + + def run(self, coordinator_state=None): + """ + run(): Perform the check actions and return a PluginResult object + + The {coordinator_state} can be used to check if this is a "primary" coordinator, "secondary" coordinator, or "client" (non-coordinator) + """ + + # Run any imports first + from daemon_lib.common import run_os_command + from re import match + + health_delta = 0 + messages = list() + + _ipmitool_ret, _ipmitool_list, _ = run_os_command("ipmitool sdr type 'Power Supply'") + if _ipmitool_ret != 0 or len(_ipmitool_list.split('\n')) < 1: + health_delta = 0 + messages.append("Error running ipmitool command") + else: + search_values = [ + "PS Redundancy", # Dell PowerEdge + "Power Supplies", # HP ProLiant + "PS_RDNDNT_MODE", # Cisco UCS + ] + + reading_lines = [l for l in _ipmitool_list.split('\n') if len(l.split('|')) > 0 and l.split('|')[0].strip() in search_values] + if len(reading_lines) > 0: + for reading_line in reading_lines: + reading_sensor = reading_line.split('|')[1].strip() + reading_text = reading_line.split('|')[-1].strip() + + if reading_text == "Fully Redundant": + health_delta += 0 + messages.append(f"Input power sensor {reading_sensor} reports {reading_text}") + elif reading_text == "No Reading": + health_delta += 5 + messages.append("Input power sensor {reading_sensor} reports {reading_text}, redundant power not configured") + else: + health_delta += 10 + messages.append(f"Input power sensor {reading_sensor} reports {reading_text}") + else: + health_delta = 5 + messages.append("No valid input power sensors found, but configured") + + # Set the health delta in our local PluginResult object + self.plugin_result.set_health_delta(health_delta) + + # Set the message in our local PluginResult object + self.plugin_result.set_message(', '.join(messages)) + + # Return our local PluginResult object + return self.plugin_result + + def cleanup(self): + """ + cleanup(): Perform special cleanup steps during node daemon termination + + This step is optional and should be used sparingly. + """ + + pass