Enhance and fix bugs in psql plugin
1. Check Patronictl statuses 2. Don't error during node primary transitions
This commit is contained in:
parent
9dbadfdd6e
commit
e7f21b7058
|
@ -66,6 +66,8 @@ class MonitoringPluginScript(MonitoringPlugin):
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
from psycopg2 import connect
|
from psycopg2 import connect
|
||||||
|
from json import loads as jloads
|
||||||
|
from daemon_lib.common import run_os_command
|
||||||
|
|
||||||
conn_api = None
|
conn_api = None
|
||||||
cur_api = None
|
cur_api = None
|
||||||
|
@ -77,7 +79,7 @@ class MonitoringPluginScript(MonitoringPlugin):
|
||||||
# Craft a message that can be used by the clients
|
# Craft a message that can be used by the clients
|
||||||
message = "Successfully connected to PostgreSQL databases on localhost"
|
message = "Successfully connected to PostgreSQL databases on localhost"
|
||||||
|
|
||||||
# Check the Metadata database (primary)
|
# Check the API database
|
||||||
try:
|
try:
|
||||||
conn_api = connect(
|
conn_api = connect(
|
||||||
host=self.this_node.name,
|
host=self.this_node.name,
|
||||||
|
@ -99,35 +101,33 @@ class MonitoringPluginScript(MonitoringPlugin):
|
||||||
if conn_api is not None:
|
if conn_api is not None:
|
||||||
conn_api.close()
|
conn_api.close()
|
||||||
|
|
||||||
if health_delta == 0:
|
# Check for Patroni status
|
||||||
# Check the PowerDNS database (secondary)
|
_, stdout, _ = run_os_command("patronictl --config-file /etc/patroni/config.yml list --format json")
|
||||||
try:
|
patronictl_status = jloads(stdout)
|
||||||
conn_pdns = connect(
|
this_node_patronictl_status = [p for p in patronictl_status if p["Member"] == self.this_node.name][0]
|
||||||
host=self.this_node.name,
|
|
||||||
port=self.config["pdns_postgresql_port"],
|
if health_delta == 0 and not this_node_patronictl_status:
|
||||||
dbname=self.config["pdns_postgresql_dbname"],
|
health_delta = 10
|
||||||
user=self.config["pdns_postgresql_user"],
|
message = "Unable to determine Patroni PostgreSQL node state"
|
||||||
password=self.config["pdns_postgresql_password"],
|
|
||||||
)
|
elif health_delta == 0 and this_node_patronictl_status["State"] != "running":
|
||||||
cur_pdns = conn_pdns.cursor()
|
health_delta = 10
|
||||||
cur_pdns.execute("""SELECT * FROM supermasters""")
|
message = "Patroni PostgreSQL state us not running"
|
||||||
data = cur_pdns.fetchone()
|
|
||||||
except Exception as e:
|
# Handle some exceptional cases
|
||||||
health_delta = 50
|
if health_delta > 0:
|
||||||
err = str(e).split('\n')[0]
|
if self.this_node.coordinator_state in ["takeover", "relinquish"]:
|
||||||
message = f"Failed to connect to PostgreSQL database {self.config['pdns_postgresql_dbname']}: {err}"
|
# This scenario occurrs if this plugin run catches a node transitioning from primary to secondary coordinator
|
||||||
finally:
|
# We can ignore it.
|
||||||
if cur_pdns is not None:
|
health_delta = 0
|
||||||
cur_pdns.close()
|
message = "Patroni PostgreSQL error reported but currently transitioning coordinator state; ignoring."
|
||||||
if conn_pdns is not None:
|
|
||||||
conn_pdns.close()
|
|
||||||
|
|
||||||
# Set the health delta in our local PluginResult object
|
# Set the health delta in our local PluginResult object
|
||||||
self.plugin_result.set_health_delta(health_delta)
|
self.plugin_result.set_health_delta(health_delta)
|
||||||
|
|
||||||
# Set the message in our local PluginResult object
|
# Set the message in our local PluginResult object
|
||||||
self.plugin_result.set_message(message)
|
self.plugin_result.set_message(message)
|
||||||
|
|
||||||
# Return our local PluginResult object
|
# Return our local PluginResult object
|
||||||
return self.plugin_result
|
return self.plugin_result
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue