Remove OSD stat collection if count is zero
Otherwise, ceph osd df will hang indefinitely trying to get data for the zero OSDs.
This commit is contained in:
parent
5a327dc41a
commit
8d9007f697
|
@ -58,7 +58,7 @@ import pvcd.CephInstance as CephInstance
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# PVCD - node daemon startup program
|
# PVCD - node daemon startup program
|
||||||
###############################################################################
|
###############################################################################
|
||||||
#
|
#
|
||||||
# The PVC daemon starts a node and configures all the required components for
|
# The PVC daemon starts a node and configures all the required components for
|
||||||
# the node to run. It determines which of the 3 daemon modes it should be in
|
# the node to run. It determines which of the 3 daemon modes it should be in
|
||||||
# during initial setup based on hostname and the config file, and then starts
|
# during initial setup based on hostname and the config file, and then starts
|
||||||
|
@ -201,14 +201,14 @@ def readConfig(pvcd_config_file, myhostname):
|
||||||
address_key = '{}_dev_ip'.format(net)
|
address_key = '{}_dev_ip'.format(net)
|
||||||
floating_key = '{}_floating_ip'.format(net)
|
floating_key = '{}_floating_ip'.format(net)
|
||||||
network_key = '{}_network'.format(net)
|
network_key = '{}_network'.format(net)
|
||||||
|
|
||||||
# Verify the network provided is valid
|
# Verify the network provided is valid
|
||||||
try:
|
try:
|
||||||
network = ipaddress.ip_network(config[network_key])
|
network = ipaddress.ip_network(config[network_key])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('ERROR: Network address {} for {} is not valid!'.format(config[network_key], network_key))
|
print('ERROR: Network address {} for {} is not valid!'.format(config[network_key], network_key))
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# If we should be autoselected
|
# If we should be autoselected
|
||||||
if config[address_key] == 'by-id':
|
if config[address_key] == 'by-id':
|
||||||
# Construct an IP from the relevant network
|
# Construct an IP from the relevant network
|
||||||
|
@ -216,9 +216,9 @@ def readConfig(pvcd_config_file, myhostname):
|
||||||
address_id = int(mynodeid) - 1
|
address_id = int(mynodeid) - 1
|
||||||
# Grab the nth address from the network
|
# Grab the nth address from the network
|
||||||
config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
|
config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
|
||||||
|
|
||||||
# Verify that the floating IP is valid
|
# Verify that the floating IP is valid
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Set the ipaddr
|
# Set the ipaddr
|
||||||
floating_addr = ipaddress.ip_address(config[floating_key].split('/')[0])
|
floating_addr = ipaddress.ip_address(config[floating_key].split('/')[0])
|
||||||
|
@ -228,7 +228,7 @@ def readConfig(pvcd_config_file, myhostname):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('ERROR: Floating address {} for {} is not valid!'.format(config[floating_key], floating_key))
|
print('ERROR: Floating address {} for {} is not valid!'.format(config[floating_key], floating_key))
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# Handle the storage config
|
# Handle the storage config
|
||||||
if config['enable_storage']:
|
if config['enable_storage']:
|
||||||
try:
|
try:
|
||||||
|
@ -246,7 +246,7 @@ def readConfig(pvcd_config_file, myhostname):
|
||||||
|
|
||||||
# Get the config object from readConfig()
|
# Get the config object from readConfig()
|
||||||
config = readConfig(pvcd_config_file, myhostname)
|
config = readConfig(pvcd_config_file, myhostname)
|
||||||
|
|
||||||
# Handle the enable values
|
# Handle the enable values
|
||||||
enable_hypervisor = config['enable_hypervisor']
|
enable_hypervisor = config['enable_hypervisor']
|
||||||
enable_networking = config['enable_networking']
|
enable_networking = config['enable_networking']
|
||||||
|
@ -357,19 +357,19 @@ if enable_networking:
|
||||||
# Enable routing functions
|
# Enable routing functions
|
||||||
common.run_os_command('sysctl net.ipv4.ip_forward=1')
|
common.run_os_command('sysctl net.ipv4.ip_forward=1')
|
||||||
common.run_os_command('sysctl net.ipv6.ip_forward=1')
|
common.run_os_command('sysctl net.ipv6.ip_forward=1')
|
||||||
|
|
||||||
# Send redirects
|
# Send redirects
|
||||||
common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
|
common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
|
||||||
common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
|
common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
|
||||||
common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
|
common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
|
||||||
common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
|
common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
|
||||||
|
|
||||||
# Accept source routes
|
# Accept source routes
|
||||||
common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
|
common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
|
||||||
common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
|
common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
|
||||||
common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
|
common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
|
||||||
common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
|
common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
|
||||||
|
|
||||||
# Disable RP filtering on the VNI dev and bridge interfaces (to allow traffic pivoting)
|
# Disable RP filtering on the VNI dev and bridge interfaces (to allow traffic pivoting)
|
||||||
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev']))
|
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev']))
|
||||||
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['upstream_dev']))
|
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['upstream_dev']))
|
||||||
|
@ -912,104 +912,111 @@ def update_zookeeper():
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
# Trigger updates for each OSD on this node
|
# Trigger updates for each pool on this node
|
||||||
for pool in pool_list:
|
for pool in pool_list:
|
||||||
zkhandler.writedata(zk_conn, {
|
zkhandler.writedata(zk_conn, {
|
||||||
'/ceph/pools/{}/stats'.format(pool): str(json.dumps(pool_df[pool]))
|
'/ceph/pools/{}/stats'.format(pool): str(json.dumps(pool_df[pool]))
|
||||||
})
|
})
|
||||||
|
|
||||||
# Get data from Ceph OSDs
|
# Only grab OSD stats if there are OSDs to grab (otherwise `ceph osd df` hangs)
|
||||||
if debug:
|
osds_this_node = 0
|
||||||
print("Get data from Ceph OSDs")
|
if len(osd_list) > 0:
|
||||||
# Parse the dump data
|
# Get data from Ceph OSDs
|
||||||
osd_dump = dict()
|
if debug:
|
||||||
retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json')
|
print("Get data from Ceph OSDs")
|
||||||
osd_dump_raw = json.loads(stdout)['osds']
|
# Parse the dump data
|
||||||
if debug:
|
osd_dump = dict()
|
||||||
print("Loop through OSD dump")
|
retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json')
|
||||||
for osd in osd_dump_raw:
|
osd_dump_raw = json.loads(stdout)['osds']
|
||||||
osd_dump.update({
|
if debug:
|
||||||
str(osd['osd']): {
|
print("Loop through OSD dump")
|
||||||
'uuid': osd['uuid'],
|
for osd in osd_dump_raw:
|
||||||
'up': osd['up'],
|
osd_dump.update({
|
||||||
'in': osd['in'],
|
str(osd['osd']): {
|
||||||
'primary_affinity': osd['primary_affinity']
|
'uuid': osd['uuid'],
|
||||||
}
|
'up': osd['up'],
|
||||||
})
|
'in': osd['in'],
|
||||||
# Parse the df data
|
'primary_affinity': osd['primary_affinity']
|
||||||
if debug:
|
|
||||||
print("Parse the OSD df data")
|
|
||||||
osd_df = dict()
|
|
||||||
retcode, stdout, stderr = common.run_os_command('ceph osd df --format json')
|
|
||||||
osd_df_raw = json.loads(stdout)['nodes']
|
|
||||||
if debug:
|
|
||||||
print("Loop through OSD df")
|
|
||||||
for osd in osd_df_raw:
|
|
||||||
osd_df.update({
|
|
||||||
str(osd['id']): {
|
|
||||||
'utilization': osd['utilization'],
|
|
||||||
'var': osd['var'],
|
|
||||||
'pgs': osd['pgs'],
|
|
||||||
'kb': osd['kb'],
|
|
||||||
'weight': osd['crush_weight'],
|
|
||||||
'reweight': osd['reweight'],
|
|
||||||
}
|
|
||||||
})
|
|
||||||
# Parse the status data
|
|
||||||
if debug:
|
|
||||||
print("Parse the OSD status data")
|
|
||||||
osd_status = dict()
|
|
||||||
retcode, stdout, stderr = common.run_os_command('ceph osd status')
|
|
||||||
if debug:
|
|
||||||
print("Loop through OSD status data")
|
|
||||||
for line in stderr.split('\n'):
|
|
||||||
# Strip off colour
|
|
||||||
line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line)
|
|
||||||
# Split it for parsing
|
|
||||||
line = line.split()
|
|
||||||
if len(line) > 1 and line[1].isdigit():
|
|
||||||
# This is an OSD line so parse it
|
|
||||||
osd_id = line[1]
|
|
||||||
node = line[3].split('.')[0]
|
|
||||||
used = line[5]
|
|
||||||
avail = line[7]
|
|
||||||
wr_ops = line[9]
|
|
||||||
wr_data = line[11]
|
|
||||||
rd_ops = line[13]
|
|
||||||
rd_data = line[15]
|
|
||||||
state = line[17]
|
|
||||||
osd_status.update({
|
|
||||||
str(osd_id): {
|
|
||||||
'node': node,
|
|
||||||
'used': used,
|
|
||||||
'avail': avail,
|
|
||||||
'wr_ops': wr_ops,
|
|
||||||
'wr_data': wr_data,
|
|
||||||
'rd_ops': rd_ops,
|
|
||||||
'rd_data': rd_data,
|
|
||||||
'state': state
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
# Merge them together into a single meaningful dict
|
|
||||||
if debug:
|
|
||||||
print("Merge OSD data together")
|
|
||||||
osd_stats = dict()
|
|
||||||
for osd in osd_list:
|
|
||||||
this_dump = osd_dump[osd]
|
|
||||||
this_dump.update(osd_df[osd])
|
|
||||||
this_dump.update(osd_status[osd])
|
|
||||||
osd_stats[osd] = this_dump
|
|
||||||
|
|
||||||
# Trigger updates for each OSD on this node
|
# Parse the df data
|
||||||
if debug:
|
if debug:
|
||||||
print("Trigger updates for each OSD on this node")
|
print("Parse the OSD df data")
|
||||||
osds_this_node = 0
|
osd_df = dict()
|
||||||
for osd in osd_list:
|
retcode, stdout, stderr = common.run_os_command('ceph osd df --format json')
|
||||||
if d_osd[osd].node == myhostname:
|
try:
|
||||||
zkhandler.writedata(zk_conn, {
|
osd_df_raw = json.loads(stdout)['nodes']
|
||||||
'/ceph/osds/{}/stats'.format(osd): str(json.dumps(osd_stats[osd]))
|
except:
|
||||||
|
logger.out('Failed to parse OSD list', state='w')
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
print("Loop through OSD df")
|
||||||
|
for osd in osd_df_raw:
|
||||||
|
osd_df.update({
|
||||||
|
str(osd['id']): {
|
||||||
|
'utilization': osd['utilization'],
|
||||||
|
'var': osd['var'],
|
||||||
|
'pgs': osd['pgs'],
|
||||||
|
'kb': osd['kb'],
|
||||||
|
'weight': osd['crush_weight'],
|
||||||
|
'reweight': osd['reweight'],
|
||||||
|
}
|
||||||
})
|
})
|
||||||
osds_this_node += 1
|
# Parse the status data
|
||||||
|
if debug:
|
||||||
|
print("Parse the OSD status data")
|
||||||
|
osd_status = dict()
|
||||||
|
retcode, stdout, stderr = common.run_os_command('ceph osd status')
|
||||||
|
if debug:
|
||||||
|
print("Loop through OSD status data")
|
||||||
|
for line in stderr.split('\n'):
|
||||||
|
# Strip off colour
|
||||||
|
line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line)
|
||||||
|
# Split it for parsing
|
||||||
|
line = line.split()
|
||||||
|
if len(line) > 1 and line[1].isdigit():
|
||||||
|
# This is an OSD line so parse it
|
||||||
|
osd_id = line[1]
|
||||||
|
node = line[3].split('.')[0]
|
||||||
|
used = line[5]
|
||||||
|
avail = line[7]
|
||||||
|
wr_ops = line[9]
|
||||||
|
wr_data = line[11]
|
||||||
|
rd_ops = line[13]
|
||||||
|
rd_data = line[15]
|
||||||
|
state = line[17]
|
||||||
|
osd_status.update({
|
||||||
|
str(osd_id): {
|
||||||
|
'node': node,
|
||||||
|
'used': used,
|
||||||
|
'avail': avail,
|
||||||
|
'wr_ops': wr_ops,
|
||||||
|
'wr_data': wr_data,
|
||||||
|
'rd_ops': rd_ops,
|
||||||
|
'rd_data': rd_data,
|
||||||
|
'state': state
|
||||||
|
}
|
||||||
|
})
|
||||||
|
# Merge them together into a single meaningful dict
|
||||||
|
if debug:
|
||||||
|
print("Merge OSD data together")
|
||||||
|
osd_stats = dict()
|
||||||
|
for osd in osd_list:
|
||||||
|
this_dump = osd_dump[osd]
|
||||||
|
this_dump.update(osd_df[osd])
|
||||||
|
this_dump.update(osd_status[osd])
|
||||||
|
osd_stats[osd] = this_dump
|
||||||
|
|
||||||
|
# Trigger updates for each OSD on this node
|
||||||
|
if debug:
|
||||||
|
print("Trigger updates for each OSD on this node")
|
||||||
|
for osd in osd_list:
|
||||||
|
if d_osd[osd].node == myhostname:
|
||||||
|
zkhandler.writedata(zk_conn, {
|
||||||
|
'/ceph/osds/{}/stats'.format(osd): str(json.dumps(osd_stats[osd]))
|
||||||
|
})
|
||||||
|
osds_this_node += 1
|
||||||
|
|
||||||
memalloc = 0
|
memalloc = 0
|
||||||
vcpualloc = 0
|
vcpualloc = 0
|
||||||
|
@ -1030,7 +1037,7 @@ def update_zookeeper():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Toggle a state "change"
|
# Toggle a state "change"
|
||||||
zkhandler.writedata(zk_conn, { '/domains/{}/state'.format(domain): instance.getstate() })
|
zkhandler.writedata(zk_conn, { '/domains/{}/state'.format(domain): instance.getstate() })
|
||||||
|
|
||||||
# Connect to libvirt
|
# Connect to libvirt
|
||||||
if debug:
|
if debug:
|
||||||
print("Connect to libvirt")
|
print("Connect to libvirt")
|
||||||
|
@ -1039,7 +1046,7 @@ def update_zookeeper():
|
||||||
if lv_conn == None:
|
if lv_conn == None:
|
||||||
logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e')
|
logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e')
|
||||||
return
|
return
|
||||||
|
|
||||||
# Ensure that any running VMs are readded to the domain_list
|
# Ensure that any running VMs are readded to the domain_list
|
||||||
if debug:
|
if debug:
|
||||||
print("Ensure that any running VMs are readded to the domain_list")
|
print("Ensure that any running VMs are readded to the domain_list")
|
||||||
|
|
Loading…
Reference in New Issue