#!/usr/bin/env python3 # ceph-cluster.py - PVC Monitoring example plugin for Ceph status # Part of the Parallel Virtual Cluster (PVC) system # # Copyright (C) 2018-2022 Joshua M. Boniface # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ############################################################################### # This script provides an example of a PVC monitoring plugin script. It will create # a simple plugin to check the Ceph cluster health for anomalies, and return a health # delta reflective of the overall Ceph status (HEALTH_WARN = 10, HEALTH_ERR = 50). # This script can thus be used as an example or reference implementation of a # PVC monitoring pluginscript and expanded upon as required. # A monitoring plugin script must implement the class "MonitoringPluginScript" which # extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation # of the role of each function is provided in context of the example; see the other # examples for more potential uses. # WARNING: # # This script will run in the context of the node daemon keepalives as root. # DO NOT install untrusted, unvetted plugins under any circumstances. # This import is always required here, as MonitoringPlugin is used by the # MonitoringPluginScript class from pvcnoded.objects.MonitoringInstance import MonitoringPlugin # A monitoring plugin script must always expose its nice name, which must be identical to # the file name PLUGIN_NAME = "ceph-cluster" # The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin. class MonitoringPluginScript(MonitoringPlugin): def setup(self): """ setup(): Perform special setup steps during node daemon startup This step is optional and should be used sparingly. """ pass def run(self): """ run(): Perform the check actions and return a PluginResult object """ # Run any imports first from rados import Rados from json import loads, dumps # Connect to the Ceph cluster try: ceph_conn = Rados( conffile=self.config["ceph_config_file"], conf=dict(keyring=self.config["ceph_admin_keyring"]), ) ceph_conn.connect(timeout=1) except Exception as e: self.log(f"Failed to connect to Ceph cluster: {e}", state="e") return self.plugin_result # Get the Ceph cluster health try: health_status = loads( ceph_conn.mon_command(dumps({"prefix": "health", "format": "json"}), b"", timeout=1)[1] ) ceph_health = health_status["status"] except Exception as e: self.log(f"Failed to get health data from Ceph cluster: {e}", state="e") return self.plugin_result finally: ceph_conn.shutdown() # Get a list of error entries in the health status output error_entries = health_status["checks"].keys() # Set the health delta based on the errors presented if ceph_health == "HEALTH_ERR": health_delta = 50 message = f"Ceph cluster in ERROR state: {', '.join(error_entries)}" elif ceph_health == "HEALTH_WARN": health_delta = 10 message = f"Ceph cluster in WARNING state: {', '.join(error_entries)}" else: health_delta = 0 message = "Ceph cluster in OK state" # Set the health delta in our local PluginResult object self.plugin_result.set_health_delta(health_delta) # Set the message in our local PluginResult object self.plugin_result.set_message(message) # Set the detailed data in our local PluginResult object self.plugin_result.set_data(dumps(health_status)) # Return our local PluginResult object return self.plugin_result def cleanup(self): """ cleanup(): Perform special cleanup steps during node daemon termination This step is optional and should be used sparingly. """ pass