Add status information in cluster status
Provide textual explanations for the degraded status, including specific node/VM/OSD issues as well as detailed Ceph health. "Single pane of glass" mentality.
This commit is contained in:
parent
65add58c9a
commit
15e78aa9f0
|
@ -103,7 +103,17 @@ def format_info(cluster_information, oformat):
|
||||||
ainformation.append('{}PVC cluster status:{}'.format(ansiprint.bold(), ansiprint.end()))
|
ainformation.append('{}PVC cluster status:{}'.format(ansiprint.bold(), ansiprint.end()))
|
||||||
ainformation.append('')
|
ainformation.append('')
|
||||||
ainformation.append('{}Cluster health:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), health_colour, cluster_information['health'], ansiprint.end()))
|
ainformation.append('{}Cluster health:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), health_colour, cluster_information['health'], ansiprint.end()))
|
||||||
|
if cluster_information['health_msg']:
|
||||||
|
for line in cluster_information['health_msg']:
|
||||||
|
ainformation.append(
|
||||||
|
' > {}'.format(line)
|
||||||
|
)
|
||||||
ainformation.append('{}Storage health:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), storage_health_colour, cluster_information['storage_health'], ansiprint.end()))
|
ainformation.append('{}Storage health:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), storage_health_colour, cluster_information['storage_health'], ansiprint.end()))
|
||||||
|
if cluster_information['storage_health_msg']:
|
||||||
|
for line in cluster_information['storage_health_msg']:
|
||||||
|
ainformation.append(
|
||||||
|
' > {}'.format(line)
|
||||||
|
)
|
||||||
ainformation.append('')
|
ainformation.append('')
|
||||||
ainformation.append('{}Primary node:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['primary_node']))
|
ainformation.append('{}Primary node:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['primary_node']))
|
||||||
ainformation.append('{}Cluster upstream IP:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['upstream_ip']))
|
ainformation.append('{}Cluster upstream IP:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['upstream_ip']))
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
|
|
||||||
|
@ -50,6 +51,10 @@ def getClusterInformation(zk_conn):
|
||||||
except:
|
except:
|
||||||
maint_state = 'false'
|
maint_state = 'false'
|
||||||
|
|
||||||
|
# List of messages to display to the clients
|
||||||
|
cluster_health_msg = []
|
||||||
|
storage_health_msg = []
|
||||||
|
|
||||||
# Get node information object list
|
# Get node information object list
|
||||||
retcode, node_list = pvc_node.get_list(zk_conn, None)
|
retcode, node_list = pvc_node.get_list(zk_conn, None)
|
||||||
|
|
||||||
|
@ -82,6 +87,7 @@ def getClusterInformation(zk_conn):
|
||||||
domain_state = node['domain_state']
|
domain_state = node['domain_state']
|
||||||
if daemon_state != 'run' and domain_state != 'ready':
|
if daemon_state != 'run' and domain_state != 'ready':
|
||||||
node_healthy_status[index] = False
|
node_healthy_status[index] = False
|
||||||
|
cluster_health_msg.append("Node '{}' in {},{} state".format(node['name'], daemon_state, domain_state))
|
||||||
else:
|
else:
|
||||||
node_healthy_status[index] = True
|
node_healthy_status[index] = True
|
||||||
node_report_status[index] = daemon_state + ',' + domain_state
|
node_report_status[index] = daemon_state + ',' + domain_state
|
||||||
|
@ -93,6 +99,7 @@ def getClusterInformation(zk_conn):
|
||||||
vm_state = vm['state']
|
vm_state = vm['state']
|
||||||
if vm_state not in ['start', 'disable', 'migrate', 'unmigrate', 'provision']:
|
if vm_state not in ['start', 'disable', 'migrate', 'unmigrate', 'provision']:
|
||||||
vm_healthy_status[index] = False
|
vm_healthy_status[index] = False
|
||||||
|
cluster_health_msg.append("VM '{}' in {} state".format(vm['name'], vm_state))
|
||||||
else:
|
else:
|
||||||
vm_healthy_status[index] = True
|
vm_healthy_status[index] = True
|
||||||
vm_report_status[index] = vm_state
|
vm_report_status[index] = vm_state
|
||||||
|
@ -111,12 +118,14 @@ def getClusterInformation(zk_conn):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
ceph_osd_in = 0
|
ceph_osd_in = 0
|
||||||
|
|
||||||
if not ceph_osd_up or not ceph_osd_in:
|
|
||||||
ceph_osd_healthy_status[index] = False
|
|
||||||
else:
|
|
||||||
ceph_osd_healthy_status[index] = True
|
|
||||||
up_texts = { 1: 'up', 0: 'down' }
|
up_texts = { 1: 'up', 0: 'down' }
|
||||||
in_texts = { 1: 'in', 0: 'out' }
|
in_texts = { 1: 'in', 0: 'out' }
|
||||||
|
|
||||||
|
if not ceph_osd_up or not ceph_osd_in:
|
||||||
|
ceph_osd_healthy_status[index] = False
|
||||||
|
cluster_health_msg.append('OSD {} in {},{} state'.format(ceph_osd['id'], up_texts[ceph_osd_up], in_texts[ceph_osd_in]))
|
||||||
|
else:
|
||||||
|
ceph_osd_healthy_status[index] = True
|
||||||
ceph_osd_report_status[index] = up_texts[ceph_osd_up] + ',' + in_texts[ceph_osd_in]
|
ceph_osd_report_status[index] = up_texts[ceph_osd_up] + ',' + in_texts[ceph_osd_in]
|
||||||
|
|
||||||
# Find out the overall cluster health; if any element of a healthy_status is false, it's unhealthy
|
# Find out the overall cluster health; if any element of a healthy_status is false, it's unhealthy
|
||||||
|
@ -128,7 +137,19 @@ def getClusterInformation(zk_conn):
|
||||||
cluster_health = 'Optimal'
|
cluster_health = 'Optimal'
|
||||||
|
|
||||||
# Find out our storage health from Ceph
|
# Find out our storage health from Ceph
|
||||||
ceph_health = zkhandler.readdata(zk_conn, '/ceph').split('\n')[2].split()[-1]
|
ceph_status = zkhandler.readdata(zk_conn, '/ceph').split('\n')
|
||||||
|
ceph_health = ceph_status[2].split()[-1]
|
||||||
|
|
||||||
|
# Parse the status output to get the health indicators
|
||||||
|
line_record = False
|
||||||
|
for index, line in enumerate(ceph_status):
|
||||||
|
if re.search('services:', line):
|
||||||
|
line_record = False
|
||||||
|
if line_record and len(line.strip()) > 0:
|
||||||
|
storage_health_msg.append(line.strip())
|
||||||
|
if re.search('health:', line):
|
||||||
|
line_record = True
|
||||||
|
|
||||||
if maint_state == 'true':
|
if maint_state == 'true':
|
||||||
storage_health = 'Maintenance'
|
storage_health = 'Maintenance'
|
||||||
elif ceph_health != 'HEALTH_OK':
|
elif ceph_health != 'HEALTH_OK':
|
||||||
|
@ -183,7 +204,9 @@ def getClusterInformation(zk_conn):
|
||||||
# Format the status data
|
# Format the status data
|
||||||
cluster_information = {
|
cluster_information = {
|
||||||
'health': cluster_health,
|
'health': cluster_health,
|
||||||
|
'health_msg': cluster_health_msg,
|
||||||
'storage_health': storage_health,
|
'storage_health': storage_health,
|
||||||
|
'storage_health_msg': storage_health_msg,
|
||||||
'primary_node': common.getPrimaryNode(zk_conn),
|
'primary_node': common.getPrimaryNode(zk_conn),
|
||||||
'upstream_ip': zkhandler.readdata(zk_conn, '/upstream_ip'),
|
'upstream_ip': zkhandler.readdata(zk_conn, '/upstream_ip'),
|
||||||
'nodes': formatted_node_states,
|
'nodes': formatted_node_states,
|
||||||
|
|
Loading…
Reference in New Issue