From fa900f6212f23e4a87362fb52b0a00ef7a6ad110 Mon Sep 17 00:00:00 2001
From: "Joshua M. Boniface" <joshua@boniface.me>
Date: Wed, 15 Feb 2023 16:28:56 -0500
Subject: [PATCH] Fix bugs and formatting of health messages

---
 client-cli/pvc/cli_lib/cluster.py | 21 +++++++++++++++---
 daemon-common/cluster.py          | 36 +++++++++++++++++++------------
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/client-cli/pvc/cli_lib/cluster.py b/client-cli/pvc/cli_lib/cluster.py
index 33180c68..0e2cbf22 100644
--- a/client-cli/pvc/cli_lib/cluster.py
+++ b/client-cli/pvc/cli_lib/cluster.py
@@ -146,7 +146,7 @@ def format_info(cluster_information, oformat):
         health_text += " (maintenance on)"
 
     ainformation.append(
-        "{}Cluster health:{}      {}{}{}".format(
+        "{}Cluster health:{}  {}{}{}".format(
             ansiprint.purple(),
             ansiprint.end(),
             health_colour,
@@ -155,8 +155,23 @@ def format_info(cluster_information, oformat):
         )
     )
     if cluster_information["health_messages"]:
-        for line in cluster_information["health_messages"]:
-            ainformation.append("                     > {}".format(line))
+        health_messages = "\n                 > ".join(
+            sorted(cluster_information["health_messages"])
+        )
+        ainformation.append(
+            "{}Health messages:{} > {}".format(
+                ansiprint.purple(),
+                ansiprint.end(),
+                health_messages,
+            )
+        )
+    else:
+        ainformation.append(
+            "{}Health messages:{} N/A".format(
+                ansiprint.purple(),
+                ansiprint.end(),
+            )
+        )
 
     if oformat == "short":
         return "\n".join(ainformation)
diff --git a/daemon-common/cluster.py b/daemon-common/cluster.py
index ed285f92..57565c93 100644
--- a/daemon-common/cluster.py
+++ b/daemon-common/cluster.py
@@ -62,30 +62,35 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
 
     for index, node in enumerate(node_list):
         # Apply node health values to total health number
-        cluster_health -= 100 - node["health"]
+        try:
+            node_health_int = int(node["health"])
+        except Exception:
+            node_health_int = 100
+        cluster_health -= 100 - node_health_int
+
         for entry in node["health_details"]:
             if entry["health_delta"] > 0:
                 messages.append(
-                    f"{node['name']}: plugin {entry['plugin_name']}: {entry['message']}"
+                    f"{node['name']}: plugin '{entry['name']}': {entry['message']}"
                 )
 
         # Handle unhealthy node states
         if node["daemon_state"] not in ["run"]:
             cluster_health -= health_delta_map["node_stopped"]
             messages.append(
-                f"cluster: {node['name']} in {node['daemon_state']} daemon state"
+                f"cluster: Node {node['name']} in {node['daemon_state'].upper()} daemon state"
             )
         elif node["domain_state"] not in ["ready"]:
             cluster_health -= health_delta_map["node_flushed"]
             messages.append(
-                f"cluster: {node['name']} in {node['domain_state']} domain state"
+                f"cluster: Node {node['name']} in {node['domain_state'].upper()} domain state"
             )
 
     for index, vm in enumerate(vm_list):
         # Handle unhealthy VM states
         if vm["state"] not in ["start", "disable", "migrate", "unmigrate", "provision"]:
             cluster_health -= health_delta_map["vm_stopped"]
-            messages.append(f"cluster: {vm['name']} in {vm['state']} state")
+            messages.append(f"cluster: VM {vm['name']} in {vm['state'].upper()} state")
 
     for index, ceph_osd in enumerate(ceph_osd_list):
         in_texts = {1: "in", 0: "out"}
@@ -95,12 +100,12 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
         if in_texts[ceph_osd["stats"]["in"]] not in ["in"]:
             cluster_health -= health_delta_map["osd_out"]
             messages.append(
-                f"cluster: OSD {ceph_osd['id']} in {in_texts[ceph_osd['stats']['in']]} state"
+                f"cluster: Ceph OSD {ceph_osd['id']} in {in_texts[ceph_osd['stats']['in']].upper()} state"
             )
         elif up_texts[ceph_osd["stats"]["up"]] not in ["up"]:
             cluster_health -= health_delta_map["osd_down"]
             messages.append(
-                f"cluster: OSD {ceph_osd['id']} in {up_texts[ceph_osd['stats']['up']]} state"
+                f"cluster: Ceph OSD {ceph_osd['id']} in {up_texts[ceph_osd['stats']['up']].upper()} state"
             )
 
     # Check for (n-1) overprovisioning
@@ -128,7 +133,7 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
     if alloc_total > n_minus_1_total:
         cluster_health -= health_delta_map["memory_overprovisioned"]
         messages.append(
-            f"cluster: Total VM memory is overprovisioned ({alloc_total} > {n_minus_1_total} n-1)"
+            f"cluster: Total memory is OVERPROVISIONED ({alloc_total} > {n_minus_1_total} @ N-1)"
         )
 
     # Check Ceph cluster health
@@ -136,16 +141,19 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
     ceph_health_status = ceph_health["status"]
     ceph_health_entries = ceph_health["checks"].keys()
 
+    ceph_health_status_map = {
+        "HEALTH_ERR": "ERROR",
+        "HEALTH_WARN": "WARNING",
+    }
+    for entry in ceph_health_entries:
+        messages.append(
+            f"cluster: Ceph {ceph_health_status_map[ceph_health['checks'][entry]['severity']]} {entry}: {ceph_health['checks'][entry]['summary']['message']}"
+        )
+
     if ceph_health_status == "HEALTH_ERR":
         cluster_health -= health_delta_map["ceph_err"]
-        messages.append(
-            f"cluster: Ceph cluster in ERROR state: {', '.join(ceph_health_entries)}"
-        )
     elif ceph_health_status == "HEALTH_WARN":
         cluster_health -= health_delta_map["ceph_warn"]
-        messages.append(
-            f"cluster: Ceph cluster in WARNING state: {', '.join(ceph_health_entries)}"
-        )
 
     return cluster_health, messages