Enhance and fix bugs in psql plugin

1. Check Patronictl statuses
2. Don't error during node primary transitions
This commit is contained in:
Joshua Boniface 2023-12-07 11:14:16 -05:00
parent 9dbadfdd6e
commit e7f21b7058
1 changed files with 24 additions and 24 deletions

View File

@ -66,6 +66,8 @@ class MonitoringPluginScript(MonitoringPlugin):
# Run any imports first # Run any imports first
from psycopg2 import connect from psycopg2 import connect
from json import loads as jloads
from daemon_lib.common import run_os_command
conn_api = None conn_api = None
cur_api = None cur_api = None
@ -77,7 +79,7 @@ class MonitoringPluginScript(MonitoringPlugin):
# Craft a message that can be used by the clients # Craft a message that can be used by the clients
message = "Successfully connected to PostgreSQL databases on localhost" message = "Successfully connected to PostgreSQL databases on localhost"
# Check the Metadata database (primary) # Check the API database
try: try:
conn_api = connect( conn_api = connect(
host=self.this_node.name, host=self.this_node.name,
@ -99,35 +101,33 @@ class MonitoringPluginScript(MonitoringPlugin):
if conn_api is not None: if conn_api is not None:
conn_api.close() conn_api.close()
if health_delta == 0: # Check for Patroni status
# Check the PowerDNS database (secondary) _, stdout, _ = run_os_command("patronictl --config-file /etc/patroni/config.yml list --format json")
try: patronictl_status = jloads(stdout)
conn_pdns = connect( this_node_patronictl_status = [p for p in patronictl_status if p["Member"] == self.this_node.name][0]
host=self.this_node.name,
port=self.config["pdns_postgresql_port"], if health_delta == 0 and not this_node_patronictl_status:
dbname=self.config["pdns_postgresql_dbname"], health_delta = 10
user=self.config["pdns_postgresql_user"], message = "Unable to determine Patroni PostgreSQL node state"
password=self.config["pdns_postgresql_password"],
) elif health_delta == 0 and this_node_patronictl_status["State"] != "running":
cur_pdns = conn_pdns.cursor() health_delta = 10
cur_pdns.execute("""SELECT * FROM supermasters""") message = "Patroni PostgreSQL state us not running"
data = cur_pdns.fetchone()
except Exception as e: # Handle some exceptional cases
health_delta = 50 if health_delta > 0:
err = str(e).split('\n')[0] if self.this_node.coordinator_state in ["takeover", "relinquish"]:
message = f"Failed to connect to PostgreSQL database {self.config['pdns_postgresql_dbname']}: {err}" # This scenario occurrs if this plugin run catches a node transitioning from primary to secondary coordinator
finally: # We can ignore it.
if cur_pdns is not None: health_delta = 0
cur_pdns.close() message = "Patroni PostgreSQL error reported but currently transitioning coordinator state; ignoring."
if conn_pdns is not None:
conn_pdns.close()
# Set the health delta in our local PluginResult object # Set the health delta in our local PluginResult object
self.plugin_result.set_health_delta(health_delta) self.plugin_result.set_health_delta(health_delta)
# Set the message in our local PluginResult object # Set the message in our local PluginResult object
self.plugin_result.set_message(message) self.plugin_result.set_message(message)
# Return our local PluginResult object # Return our local PluginResult object
return self.plugin_result return self.plugin_result