Handle failures of Ceph commands gradefully
If these commands fail, catch the error, print a message, and set up empty lists. Also handle later data parsing in this case.
This commit is contained in:
parent
1a8e7509f7
commit
47f86475f8
|
@ -956,7 +956,12 @@ def update_zookeeper():
|
||||||
# Get pool info
|
# Get pool info
|
||||||
pool_df = dict()
|
pool_df = dict()
|
||||||
retcode, stdout, stderr = common.run_os_command('rados df --format json', timeout=1)
|
retcode, stdout, stderr = common.run_os_command('rados df --format json', timeout=1)
|
||||||
|
try:
|
||||||
pool_df_raw = json.loads(stdout)['pools']
|
pool_df_raw = json.loads(stdout)['pools']
|
||||||
|
except json.decoder.JSONDecodeError:
|
||||||
|
logger.out('Failed to obtain Pool data', state='w')
|
||||||
|
pool_df_raw = []
|
||||||
|
|
||||||
for pool in pool_df_raw:
|
for pool in pool_df_raw:
|
||||||
pool_df.update({
|
pool_df.update({
|
||||||
str(pool['name']): {
|
str(pool['name']): {
|
||||||
|
@ -977,9 +982,14 @@ def update_zookeeper():
|
||||||
|
|
||||||
# Trigger updates for each pool on this node
|
# Trigger updates for each pool on this node
|
||||||
for pool in pool_list:
|
for pool in pool_list:
|
||||||
|
try:
|
||||||
|
stats = json.dumps(pool_df[pool])
|
||||||
zkhandler.writedata(zk_conn, {
|
zkhandler.writedata(zk_conn, {
|
||||||
'/ceph/pools/{}/stats'.format(pool): str(json.dumps(pool_df[pool]))
|
'/ceph/pools/{}/stats'.format(pool): str(stats)
|
||||||
})
|
})
|
||||||
|
except KeyError:
|
||||||
|
# One or more of the status commands timed out, just continue
|
||||||
|
pass
|
||||||
|
|
||||||
# Only grab OSD stats if there are OSDs to grab (otherwise `ceph osd df` hangs)
|
# Only grab OSD stats if there are OSDs to grab (otherwise `ceph osd df` hangs)
|
||||||
osds_this_node = 0
|
osds_this_node = 0
|
||||||
|
@ -990,7 +1000,12 @@ def update_zookeeper():
|
||||||
# Parse the dump data
|
# Parse the dump data
|
||||||
osd_dump = dict()
|
osd_dump = dict()
|
||||||
retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json', timeout=1)
|
retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json', timeout=1)
|
||||||
|
try:
|
||||||
osd_dump_raw = json.loads(stdout)['osds']
|
osd_dump_raw = json.loads(stdout)['osds']
|
||||||
|
except json.decoder.JSONDecodeError:
|
||||||
|
logger.out('Failed to obtain OSD data', state='w')
|
||||||
|
osd_dump_raw = []
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
print("Loop through OSD dump")
|
print("Loop through OSD dump")
|
||||||
for osd in osd_dump_raw:
|
for osd in osd_dump_raw:
|
||||||
|
@ -1012,6 +1027,7 @@ def update_zookeeper():
|
||||||
osd_df_raw = json.loads(stdout)['nodes']
|
osd_df_raw = json.loads(stdout)['nodes']
|
||||||
except:
|
except:
|
||||||
logger.out('Failed to parse OSD list', state='w')
|
logger.out('Failed to parse OSD list', state='w')
|
||||||
|
osd_df_raw = []
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
print("Loop through OSD df")
|
print("Loop through OSD df")
|
||||||
|
@ -1066,19 +1082,28 @@ def update_zookeeper():
|
||||||
print("Merge OSD data together")
|
print("Merge OSD data together")
|
||||||
osd_stats = dict()
|
osd_stats = dict()
|
||||||
for osd in osd_list:
|
for osd in osd_list:
|
||||||
|
try:
|
||||||
this_dump = osd_dump[osd]
|
this_dump = osd_dump[osd]
|
||||||
this_dump.update(osd_df[osd])
|
this_dump.update(osd_df[osd])
|
||||||
this_dump.update(osd_status[osd])
|
this_dump.update(osd_status[osd])
|
||||||
osd_stats[osd] = this_dump
|
osd_stats[osd] = this_dump
|
||||||
|
except KeyError:
|
||||||
|
# One or more of the status commands timed out, just continue
|
||||||
|
pass
|
||||||
|
|
||||||
# Trigger updates for each OSD on this node
|
# Trigger updates for each OSD on this node
|
||||||
if debug:
|
if debug:
|
||||||
print("Trigger updates for each OSD on this node")
|
print("Trigger updates for each OSD on this node")
|
||||||
for osd in osd_list:
|
for osd in osd_list:
|
||||||
if d_osd[osd].node == myhostname:
|
if d_osd[osd].node == myhostname:
|
||||||
|
try:
|
||||||
|
stats = json.dumps(osd_stats[osd])
|
||||||
zkhandler.writedata(zk_conn, {
|
zkhandler.writedata(zk_conn, {
|
||||||
'/ceph/osds/{}/stats'.format(osd): str(json.dumps(osd_stats[osd]))
|
'/ceph/osds/{}/stats'.format(osd): str(stats)
|
||||||
})
|
})
|
||||||
|
except KeyError:
|
||||||
|
# One or more of the status commands timed out, just continue
|
||||||
|
pass
|
||||||
osds_this_node += 1
|
osds_this_node += 1
|
||||||
|
|
||||||
memalloc = 0
|
memalloc = 0
|
||||||
|
|
Loading…
Reference in New Issue