Remove OSD stat collection if count is zero

Otherwise, ceph osd df will hang indefinitely trying to get data
for the zero OSDs.
This commit is contained in:
Joshua Boniface 2019-06-18 12:36:53 -04:00
parent 5a327dc41a
commit 8d9007f697
1 changed files with 108 additions and 101 deletions

View File

@ -912,104 +912,111 @@ def update_zookeeper():
} }
}) })
# Trigger updates for each OSD on this node # Trigger updates for each pool on this node
for pool in pool_list: for pool in pool_list:
zkhandler.writedata(zk_conn, { zkhandler.writedata(zk_conn, {
'/ceph/pools/{}/stats'.format(pool): str(json.dumps(pool_df[pool])) '/ceph/pools/{}/stats'.format(pool): str(json.dumps(pool_df[pool]))
}) })
# Get data from Ceph OSDs # Only grab OSD stats if there are OSDs to grab (otherwise `ceph osd df` hangs)
if debug: osds_this_node = 0
print("Get data from Ceph OSDs") if len(osd_list) > 0:
# Parse the dump data # Get data from Ceph OSDs
osd_dump = dict() if debug:
retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json') print("Get data from Ceph OSDs")
osd_dump_raw = json.loads(stdout)['osds'] # Parse the dump data
if debug: osd_dump = dict()
print("Loop through OSD dump") retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json')
for osd in osd_dump_raw: osd_dump_raw = json.loads(stdout)['osds']
osd_dump.update({ if debug:
str(osd['osd']): { print("Loop through OSD dump")
'uuid': osd['uuid'], for osd in osd_dump_raw:
'up': osd['up'], osd_dump.update({
'in': osd['in'], str(osd['osd']): {
'primary_affinity': osd['primary_affinity'] 'uuid': osd['uuid'],
} 'up': osd['up'],
}) 'in': osd['in'],
# Parse the df data 'primary_affinity': osd['primary_affinity']
if debug:
print("Parse the OSD df data")
osd_df = dict()
retcode, stdout, stderr = common.run_os_command('ceph osd df --format json')
osd_df_raw = json.loads(stdout)['nodes']
if debug:
print("Loop through OSD df")
for osd in osd_df_raw:
osd_df.update({
str(osd['id']): {
'utilization': osd['utilization'],
'var': osd['var'],
'pgs': osd['pgs'],
'kb': osd['kb'],
'weight': osd['crush_weight'],
'reweight': osd['reweight'],
}
})
# Parse the status data
if debug:
print("Parse the OSD status data")
osd_status = dict()
retcode, stdout, stderr = common.run_os_command('ceph osd status')
if debug:
print("Loop through OSD status data")
for line in stderr.split('\n'):
# Strip off colour
line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line)
# Split it for parsing
line = line.split()
if len(line) > 1 and line[1].isdigit():
# This is an OSD line so parse it
osd_id = line[1]
node = line[3].split('.')[0]
used = line[5]
avail = line[7]
wr_ops = line[9]
wr_data = line[11]
rd_ops = line[13]
rd_data = line[15]
state = line[17]
osd_status.update({
str(osd_id): {
'node': node,
'used': used,
'avail': avail,
'wr_ops': wr_ops,
'wr_data': wr_data,
'rd_ops': rd_ops,
'rd_data': rd_data,
'state': state
} }
}) })
# Merge them together into a single meaningful dict
if debug:
print("Merge OSD data together")
osd_stats = dict()
for osd in osd_list:
this_dump = osd_dump[osd]
this_dump.update(osd_df[osd])
this_dump.update(osd_status[osd])
osd_stats[osd] = this_dump
# Trigger updates for each OSD on this node # Parse the df data
if debug: if debug:
print("Trigger updates for each OSD on this node") print("Parse the OSD df data")
osds_this_node = 0 osd_df = dict()
for osd in osd_list: retcode, stdout, stderr = common.run_os_command('ceph osd df --format json')
if d_osd[osd].node == myhostname: try:
zkhandler.writedata(zk_conn, { osd_df_raw = json.loads(stdout)['nodes']
'/ceph/osds/{}/stats'.format(osd): str(json.dumps(osd_stats[osd])) except:
logger.out('Failed to parse OSD list', state='w')
if debug:
print("Loop through OSD df")
for osd in osd_df_raw:
osd_df.update({
str(osd['id']): {
'utilization': osd['utilization'],
'var': osd['var'],
'pgs': osd['pgs'],
'kb': osd['kb'],
'weight': osd['crush_weight'],
'reweight': osd['reweight'],
}
}) })
osds_this_node += 1 # Parse the status data
if debug:
print("Parse the OSD status data")
osd_status = dict()
retcode, stdout, stderr = common.run_os_command('ceph osd status')
if debug:
print("Loop through OSD status data")
for line in stderr.split('\n'):
# Strip off colour
line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line)
# Split it for parsing
line = line.split()
if len(line) > 1 and line[1].isdigit():
# This is an OSD line so parse it
osd_id = line[1]
node = line[3].split('.')[0]
used = line[5]
avail = line[7]
wr_ops = line[9]
wr_data = line[11]
rd_ops = line[13]
rd_data = line[15]
state = line[17]
osd_status.update({
str(osd_id): {
'node': node,
'used': used,
'avail': avail,
'wr_ops': wr_ops,
'wr_data': wr_data,
'rd_ops': rd_ops,
'rd_data': rd_data,
'state': state
}
})
# Merge them together into a single meaningful dict
if debug:
print("Merge OSD data together")
osd_stats = dict()
for osd in osd_list:
this_dump = osd_dump[osd]
this_dump.update(osd_df[osd])
this_dump.update(osd_status[osd])
osd_stats[osd] = this_dump
# Trigger updates for each OSD on this node
if debug:
print("Trigger updates for each OSD on this node")
for osd in osd_list:
if d_osd[osd].node == myhostname:
zkhandler.writedata(zk_conn, {
'/ceph/osds/{}/stats'.format(osd): str(json.dumps(osd_stats[osd]))
})
osds_this_node += 1
memalloc = 0 memalloc = 0
vcpualloc = 0 vcpualloc = 0