Adjust keepalive health printing and ordering

This commit is contained in:
Joshua Boniface 2023-02-24 10:38:24 -05:00
parent 202dc3ed59
commit 7c07fbefff
3 changed files with 79 additions and 43 deletions

View File

@ -365,7 +365,10 @@ class MonitoringInstance(object):
plugin_results.append(future.result())
for result in sorted(plugin_results, key=lambda x: x.plugin_name):
if self.config["log_keepalive_plugin_details"]:
if (
self.config["log_keepalives"]
and self.config["log_keepalive_plugin_details"]
):
self.logger.out(
result.message + f" [-{result.health_delta}]",
state="t",
@ -376,13 +379,6 @@ class MonitoringInstance(object):
if total_health < 0:
total_health = 0
if total_health > 90:
health_colour = self.logger.fmt_green
elif total_health > 50:
health_colour = self.logger.fmt_yellow
else:
health_colour = self.logger.fmt_red
self.zkhandler.write(
[
(
@ -391,10 +387,6 @@ class MonitoringInstance(object):
),
]
)
self.logger.out(
f"Node health: {health_colour}{total_health}%{self.logger.fmt_end}",
state="t",
)
def run_cleanup(self, plugin):
return plugin.cleanup()

View File

@ -67,6 +67,7 @@ class NodeInstance(object):
self.network_list = []
self.domain_list = []
# Node resources
self.health = 100
self.domains_count = 0
self.memused = 0
self.memfree = 0
@ -224,6 +225,28 @@ class NodeInstance(object):
)
self.flush_thread.start()
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.monitoring.health", self.name)
)
def watch_node_health(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode("ascii")
except AttributeError:
data = 100
try:
data = int(data)
except ValueError:
pass
if data != self.health:
self.health = data
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.memory.free", self.name)
)

View File

@ -644,8 +644,27 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
# Keepalive update function
def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
debug = config["debug"]
if debug:
logger.out("Keepalive starting", state="d", prefix="main-thread")
# Display node information to the terminal
if config["log_keepalives"]:
if this_node.router_state == "primary":
cst_colour = logger.fmt_green
elif this_node.router_state == "secondary":
cst_colour = logger.fmt_blue
else:
cst_colour = logger.fmt_cyan
logger.out(
"{}{} keepalive @ {}{} [{}{}{}]".format(
logger.fmt_purple,
config["node_hostname"],
datetime.now(),
logger.fmt_end,
logger.fmt_bold + cst_colour,
this_node.router_state,
logger.fmt_end,
),
state="t",
)
# Set the migration selector in Zookeeper for clients to read
if config["enable_hypervisor"]:
@ -808,44 +827,51 @@ def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
except Exception:
logger.out("Failed to set keepalive data", state="e")
# Display node information to the terminal
# Run this here since monitoring plugins output directly
monitoring_instance.run_plugins()
# Allow the health value to update in the Node instance
time.sleep(0.1)
if config["log_keepalives"]:
if this_node.router_state == "primary":
cst_colour = logger.fmt_green
elif this_node.router_state == "secondary":
cst_colour = logger.fmt_blue
if this_node.maintenance is True:
maintenance_colour = logger.fmt_blue
else:
cst_colour = logger.fmt_cyan
logger.out(
"{}{} keepalive @ {}{} [{}{}{}]".format(
logger.fmt_purple,
config["node_hostname"],
datetime.now(),
logger.fmt_end,
logger.fmt_bold + cst_colour,
this_node.router_state,
logger.fmt_end,
),
state="t",
)
maintenance_colour = logger.fmt_green
if isinstance(this_node.health, int):
if this_node.health > 90:
health_colour = logger.fmt_green
elif this_node.health > 50:
health_colour = logger.fmt_yellow
else:
health_colour = logger.fmt_red
health_text = str(this_node.health) + "%"
else:
health_colour = logger.fmt_blue
health_text = "N/A"
if config["log_keepalive_cluster_details"]:
logger.out(
"{bold}Maintenance:{nofmt} {maint} "
"{bold}Node VMs:{nofmt} {domcount} "
"{bold}Node OSDs:{nofmt} {osdcount} "
"{bold}Maintenance:{nofmt} {maintenance_colour}{maintenance}{nofmt} "
"{bold}Health:{nofmt} {health_colour}{health}{nofmt} "
"{bold}VMs:{nofmt} {domcount} "
"{bold}OSDs:{nofmt} {osdcount} "
"{bold}Load:{nofmt} {load} "
"{bold}Memory [MiB]: VMs:{nofmt} {allocmem} "
"{bold}Memory [MiB]: "
"{bold}Used:{nofmt} {usedmem} "
"{bold}Free:{nofmt} {freemem}".format(
bold=logger.fmt_bold,
maintenance_colour=maintenance_colour,
health_colour=health_colour,
nofmt=logger.fmt_end,
maint=this_node.maintenance,
maintenance=this_node.maintenance,
health=health_text,
domcount=this_node.domains_count,
osdcount=osds_this_node,
load=this_node.cpuload,
freemem=this_node.memfree,
usedmem=this_node.memused,
allocmem=this_node.memalloc,
),
state="t",
)
@ -893,8 +919,3 @@ def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
zkhandler.write(
[(("node.state.daemon", node_name), "dead")]
)
monitoring_instance.run_plugins()
if debug:
logger.out("Keepalive finished", state="d", prefix="main-thread")