Fix bugs and formatting of health messages

This commit is contained in:
Joshua Boniface 2023-02-15 16:28:56 -05:00
parent 388f6556c0
commit f4e8449356
2 changed files with 40 additions and 17 deletions

View File

@ -155,8 +155,23 @@ def format_info(cluster_information, oformat):
) )
) )
if cluster_information["health_messages"]: if cluster_information["health_messages"]:
for line in cluster_information["health_messages"]: health_messages = "\n > ".join(
ainformation.append(" > {}".format(line)) sorted(cluster_information["health_messages"])
)
ainformation.append(
"{}Health messages:{} > {}".format(
ansiprint.purple(),
ansiprint.end(),
health_messages,
)
)
else:
ainformation.append(
"{}Health messages:{} N/A".format(
ansiprint.purple(),
ansiprint.end(),
)
)
if oformat == "short": if oformat == "short":
return "\n".join(ainformation) return "\n".join(ainformation)

View File

@ -62,30 +62,35 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
for index, node in enumerate(node_list): for index, node in enumerate(node_list):
# Apply node health values to total health number # Apply node health values to total health number
cluster_health -= 100 - node["health"] try:
node_health_int = int(node["health"])
except Exception:
node_health_int = 100
cluster_health -= 100 - node_health_int
for entry in node["health_details"]: for entry in node["health_details"]:
if entry["health_delta"] > 0: if entry["health_delta"] > 0:
messages.append( messages.append(
f"{node['name']}: plugin {entry['plugin_name']}: {entry['message']}" f"{node['name']}: plugin '{entry['name']}': {entry['message']}"
) )
# Handle unhealthy node states # Handle unhealthy node states
if node["daemon_state"] not in ["run"]: if node["daemon_state"] not in ["run"]:
cluster_health -= health_delta_map["node_stopped"] cluster_health -= health_delta_map["node_stopped"]
messages.append( messages.append(
f"cluster: {node['name']} in {node['daemon_state']} daemon state" f"cluster: Node {node['name']} in {node['daemon_state'].upper()} daemon state"
) )
elif node["domain_state"] not in ["ready"]: elif node["domain_state"] not in ["ready"]:
cluster_health -= health_delta_map["node_flushed"] cluster_health -= health_delta_map["node_flushed"]
messages.append( messages.append(
f"cluster: {node['name']} in {node['domain_state']} domain state" f"cluster: Node {node['name']} in {node['domain_state'].upper()} domain state"
) )
for index, vm in enumerate(vm_list): for index, vm in enumerate(vm_list):
# Handle unhealthy VM states # Handle unhealthy VM states
if vm["state"] not in ["start", "disable", "migrate", "unmigrate", "provision"]: if vm["state"] not in ["start", "disable", "migrate", "unmigrate", "provision"]:
cluster_health -= health_delta_map["vm_stopped"] cluster_health -= health_delta_map["vm_stopped"]
messages.append(f"cluster: {vm['name']} in {vm['state']} state") messages.append(f"cluster: VM {vm['name']} in {vm['state'].upper()} state")
for index, ceph_osd in enumerate(ceph_osd_list): for index, ceph_osd in enumerate(ceph_osd_list):
in_texts = {1: "in", 0: "out"} in_texts = {1: "in", 0: "out"}
@ -95,12 +100,12 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
if in_texts[ceph_osd["stats"]["in"]] not in ["in"]: if in_texts[ceph_osd["stats"]["in"]] not in ["in"]:
cluster_health -= health_delta_map["osd_out"] cluster_health -= health_delta_map["osd_out"]
messages.append( messages.append(
f"cluster: OSD {ceph_osd['id']} in {in_texts[ceph_osd['stats']['in']]} state" f"cluster: Ceph OSD {ceph_osd['id']} in {in_texts[ceph_osd['stats']['in']].upper()} state"
) )
elif up_texts[ceph_osd["stats"]["up"]] not in ["up"]: elif up_texts[ceph_osd["stats"]["up"]] not in ["up"]:
cluster_health -= health_delta_map["osd_down"] cluster_health -= health_delta_map["osd_down"]
messages.append( messages.append(
f"cluster: OSD {ceph_osd['id']} in {up_texts[ceph_osd['stats']['up']]} state" f"cluster: Ceph OSD {ceph_osd['id']} in {up_texts[ceph_osd['stats']['up']].upper()} state"
) )
# Check for (n-1) overprovisioning # Check for (n-1) overprovisioning
@ -128,7 +133,7 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
if alloc_total > n_minus_1_total: if alloc_total > n_minus_1_total:
cluster_health -= health_delta_map["memory_overprovisioned"] cluster_health -= health_delta_map["memory_overprovisioned"]
messages.append( messages.append(
f"cluster: Total VM memory is overprovisioned ({alloc_total} > {n_minus_1_total} n-1)" f"cluster: Total memory is OVERPROVISIONED ({alloc_total} > {n_minus_1_total} @ N-1)"
) )
# Check Ceph cluster health # Check Ceph cluster health
@ -136,16 +141,19 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
ceph_health_status = ceph_health["status"] ceph_health_status = ceph_health["status"]
ceph_health_entries = ceph_health["checks"].keys() ceph_health_entries = ceph_health["checks"].keys()
ceph_health_status_map = {
"HEALTH_ERR": "ERROR",
"HEALTH_WARN": "WARNING",
}
for entry in ceph_health_entries:
messages.append(
f"cluster: Ceph {ceph_health_status_map[ceph_health['checks'][entry]['severity']]} {entry}: {ceph_health['checks'][entry]['summary']['message']}"
)
if ceph_health_status == "HEALTH_ERR": if ceph_health_status == "HEALTH_ERR":
cluster_health -= health_delta_map["ceph_err"] cluster_health -= health_delta_map["ceph_err"]
messages.append(
f"cluster: Ceph cluster in ERROR state: {', '.join(ceph_health_entries)}"
)
elif ceph_health_status == "HEALTH_WARN": elif ceph_health_status == "HEALTH_WARN":
cluster_health -= health_delta_map["ceph_warn"] cluster_health -= health_delta_map["ceph_warn"]
messages.append(
f"cluster: Ceph cluster in WARNING state: {', '.join(ceph_health_entries)}"
)
return cluster_health, messages return cluster_health, messages