#!/usr/bin/env python3 # psql.py - PVC Monitoring example plugin for Postgres/Patroni # Part of the Parallel Virtual Cluster (PVC) system # # Copyright (C) 2018-2024 Joshua M. Boniface # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ############################################################################### # This script provides an example of a PVC monitoring plugin script. It will create # a simple plugin to check the Patroni PostgreSQL instance on the node for operation. # This script can thus be used as an example or reference implementation of a # PVC monitoring pluginscript and expanded upon as required. # A monitoring plugin script must implement the class "MonitoringPluginScript" which # extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation # of the role of each function is provided in context of the example; see the other # examples for more potential uses. # WARNING: # # This script will run in the context of the node daemon keepalives as root. # DO NOT install untrusted, unvetted plugins under any circumstances. # This import is always required here, as MonitoringPlugin is used by the # MonitoringPluginScript class from pvchealthd.objects.MonitoringInstance import MonitoringPlugin # A monitoring plugin script must always expose its nice name, which must be identical to # the file name PLUGIN_NAME = "psql" # The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin. class MonitoringPluginScript(MonitoringPlugin): def setup(self): """ setup(): Perform special setup steps during node daemon startup This step is optional and should be used sparingly. """ # Prepare the last coordinator state self.last_coordinator_state = None def run(self, coordinator_state=None): """ run(): Perform the check actions and return a PluginResult object The {coordinator_state} can be used to check if this is a "primary" coordinator, "secondary" coordinator, or "client" (non-coordinator) """ # Run any imports first from psycopg2 import connect from json import loads as jloads from daemon_lib.common import run_os_command conn_api = None cur_api = None conn_dns = None cur_dns = None # Set the health delta to 0 (no change) health_delta = 0 # Craft a message that can be used by the clients message = "Successfully connected to PostgreSQL databases on localhost" # Check the API database try: conn_api = connect( host=self.this_node.name, port=self.config["api_postgresql_port"], dbname=self.config["api_postgresql_dbname"], user=self.config["api_postgresql_user"], password=self.config["api_postgresql_password"], ) cur_api = conn_api.cursor() cur_api.execute("""SELECT * FROM alembic_version""") data = cur_api.fetchone() except Exception as e: health_delta = 50 err = str(e).split('\n')[0] message = f"Failed to connect to PostgreSQL database {self.config['api_postgresql_dbname']}: {err}" finally: if cur_api is not None: cur_api.close() if conn_api is not None: conn_api.close() # Check for Patroni status _, stdout, _ = run_os_command("patronictl --config-file /etc/patroni/config.yml list --format json") patronictl_status = jloads(stdout) this_node_patronictl_status = [p for p in patronictl_status if p["Member"] == self.this_node.name][0] self.logger.out(f"{this_node_patronictl_status}, last node state: {self.last_coordinator_state}, current node state: {coordinator_state}", state="d") # Invalid state, nothing returned; this is a fault if health_delta == 0 and not this_node_patronictl_status: health_delta = 10 message = "Unable to determine Patroni PostgreSQL node state" # We want to check for a non-running Patroni, but not during or immediately after a coordinator # transition. So we wait until 2 runs with the same coordinator state have been completed. elif health_delta == 0 and self.last_coordinator_state == coordinator_state and this_node_patronictl_status["State"] != "running": health_delta = 10 message = "Patroni PostgreSQL state is not running" # Handle some exceptional cases if health_delta > 0: if coordinator_state in ["takeover", "relinquish"]: # This scenario occurrs if this plugin run catches a node transitioning from primary to # secondary coordinator. We can ignore it. health_delta = 0 message = "Patroni PostgreSQL error reported but currently transitioning coordinator state; ignoring." # Set the health delta in our local PluginResult object self.plugin_result.set_health_delta(health_delta) # Set the message in our local PluginResult object self.plugin_result.set_message(message) # Update the last coordinator state self.last_coordinator_state = coordinator_state # Return our local PluginResult object return self.plugin_result def cleanup(self): """ cleanup(): Perform special cleanup steps during node daemon termination This step is optional and should be used sparingly. """ pass