Support OSD out/in and commands

This commit is contained in:
Joshua Boniface 2018-11-01 22:00:59 -04:00
parent 41bedbae3c
commit 2ea8a14ba4
4 changed files with 341 additions and 4 deletions

View File

@ -975,6 +975,78 @@ def ceph_osd_remove(osdid):
retcode, retmsg = pvc_ceph.remove_osd(zk_conn, osdid)
cleanup(retcode, retmsg, zk_conn)
###############################################################################
# pvc ceph osd in
###############################################################################
@click.command(name='in', short_help='Online OSD.')
@click.argument(
'osdid'
)
def ceph_osd_in(osdid):
"""
Set a Ceph OSD with ID OSDID online in the cluster.
"""
zk_conn = pvc_common.startZKConnection(zk_host)
retcode, retmsg = pvc_ceph.in_osd(zk_conn, osdid)
cleanup(retcode, retmsg, zk_conn)
###############################################################################
# pvc ceph osd out
###############################################################################
@click.command(name='out', short_help='Offline OSD.')
@click.argument(
'osdid'
)
def ceph_osd_out(osdid):
"""
Set a Ceph OSD with ID OSDID offline in the cluster.
"""
zk_conn = pvc_common.startZKConnection(zk_host)
retcode, retmsg = pvc_ceph.out_osd(zk_conn, osdid)
cleanup(retcode, retmsg, zk_conn)
###############################################################################
# pvc ceph osd set
###############################################################################
@click.command(name='set', short_help='Set property.')
@click.argument(
'osd_property'
)
def ceph_osd_set(osd_property):
"""
Set a Ceph OSD property OSD_PROPERTY on the cluster.
Valid properties are:
full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds
"""
zk_conn = pvc_common.startZKConnection(zk_host)
retcode, retmsg = pvc_ceph.set_osd(zk_conn, osd_property)
cleanup(retcode, retmsg, zk_conn)
###############################################################################
# pvc ceph osd unset
###############################################################################
@click.command(name='unset', short_help='Unset property.')
@click.argument(
'osd_property'
)
def ceph_osd_unset(osd_property):
"""
Unset a Ceph OSD property OSD_PROPERTY on the cluster.
Valid properties are:
full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds
"""
zk_conn = pvc_common.startZKConnection(zk_host)
retcode, retmsg = pvc_ceph.unset_osd(zk_conn, osd_property)
cleanup(retcode, retmsg, zk_conn)
###############################################################################
# pvc ceph osd list
###############################################################################
@ -1187,10 +1259,10 @@ net_acl.add_command(net_acl_list)
ceph_osd.add_command(ceph_osd_add)
ceph_osd.add_command(ceph_osd_remove)
#ceph_osd.add_command(ceph_osd_in)
#ceph_osd.add_command(ceph_osd_out)
#ceph_osd.add_command(ceph_osd_set)
#ceph_osd.add_command(ceph_osd_unset)
ceph_osd.add_command(ceph_osd_in)
ceph_osd.add_command(ceph_osd_out)
ceph_osd.add_command(ceph_osd_set)
ceph_osd.add_command(ceph_osd_unset)
ceph_osd.add_command(ceph_osd_list)
ceph_pool.add_command(ceph_pool_add)

View File

@ -603,6 +603,108 @@ def remove_osd(zk_conn, osd_id):
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def in_osd(zk_conn, osd_id):
if not verifyOSD(zk_conn, osd_id):
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(osd_id)
# Tell the cluster to online an OSD
in_osd_string = 'osd_in {}'.format(osd_id)
zkhandler.writedata(zk_conn, {'/ceph/cmd': in_osd_string})
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/ceph/cmd')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0]
if result == 'success-osd_in':
message = 'Set OSD {} online in the cluster.'.format(osd_id)
success = True
else:
message = 'ERROR: Failed to set OSD online; check node logs for details.'
success = False
except:
success = False
message = 'ERROR Command ignored by node.'
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def out_osd(zk_conn, osd_id):
if not verifyOSD(zk_conn, osd_id):
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(osd_id)
# Tell the cluster to offline an OSD
out_osd_string = 'osd_out {}'.format(osd_id)
zkhandler.writedata(zk_conn, {'/ceph/cmd': out_osd_string})
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/ceph/cmd')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0]
if result == 'success-osd_out':
message = 'Set OSD {} offline in the cluster.'.format(osd_id)
success = True
else:
message = 'ERROR: Failed to set OSD offline; check node logs for details.'
success = False
except:
success = False
message = 'ERROR Command ignored by node.'
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def set_osd(zk_conn, option):
# Tell the cluster to set an OSD property
set_osd_string = 'osd_set {}'.format(option)
zkhandler.writedata(zk_conn, {'/ceph/cmd': set_osd_string})
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/ceph/cmd')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0]
if result == 'success-osd_set':
message = 'Set OSD property {} on the cluster.'.format(option)
success = True
else:
message = 'ERROR: Failed to set OSD property; check node logs for details.'
success = False
except:
success = False
message = 'ERROR Command ignored by node.'
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def unset_osd(zk_conn, option):
# Tell the cluster to unset an OSD property
unset_osd_string = 'osd_unset {}'.format(option)
zkhandler.writedata(zk_conn, {'/ceph/cmd': unset_osd_string})
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/ceph/cmd')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0]
if result == 'success-osd_unset':
message = 'Unset OSD property {} on the cluster.'.format(option)
success = True
else:
message = 'ERROR: Failed to unset OSD property; check node logs for details.'
success = False
except:
success = False
message = 'ERROR Command ignored by node.'
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def get_list_osd(zk_conn, limit):
osd_list = []
full_osd_list = getCephOSDs(zk_conn)

View File

@ -241,6 +241,86 @@ def remove_osd(zk_conn, logger, osd_id, osd_obj):
logger.out('Failed to purge OSD disk with ID {}: {}'.format(osd_id, e), state='e')
return False
def in_osd(zk_conn, logger, osd_id):
# We are ready to create a new pool on this node
logger.out('Setting OSD {} in'.format(osd_id), state='i')
try:
# 1. Set it in
retcode, stdout, stderr = common.run_os_command('ceph osd in {}'.format(osd_id))
if retcode:
print('ceph osd in')
print(stdout)
print(stderr)
raise
# Log it
logger.out('Set OSD {} in'.format(osd_id), state='o')
return True
except Exception as e:
# Log it
logger.out('Failed to set OSD {} in: {}'.format(osd_id, e), state='e')
return False
def out_osd(zk_conn, logger, osd_id):
# We are ready to create a new pool on this node
logger.out('Settoutg OSD {} out'.format(osd_id), state='i')
try:
# 1. Set it out
retcode, stdout, stderr = common.run_os_command('ceph osd out {}'.format(osd_id))
if retcode:
proutt('ceph osd out')
proutt(stdout)
proutt(stderr)
raise
# Log it
logger.out('Set OSD {} out'.format(osd_id), state='o')
return True
except Exception as e:
# Log it
logger.out('Failed to set OSD {} out: {}'.format(osd_id, e), state='e')
return False
def set_property(zk_conn, logger, option):
# We are ready to create a new pool on this node
logger.out('Setting OSD property {}'.format(option), state='i')
try:
# 1. Set it in
retcode, stdout, stderr = common.run_os_command('ceph osd set {}'.format(option))
if retcode:
prsett('ceph osd set')
print(stdout)
print(stderr)
raise
# Log it
logger.out('Set OSD property {}'.format(option), state='o')
return True
except Exception as e:
# Log it
logger.out('Failed to set OSD property {}: {}'.format(option, e), state='e')
return False
def unset_property(zk_conn, logger, option):
# We are ready to create a new pool on this node
logger.out('Unsetting OSD property {}'.format(option), state='i')
try:
# 1. Set it in
retcode, stdout, stderr = common.run_os_command('ceph osd unset {}'.format(option))
if retcode:
prunsett('ceph osd unset')
print(stdout)
print(stderr)
raise
# Log it
logger.out('Unset OSD property {}'.format(option), state='o')
return True
except Exception as e:
# Log it
logger.out('Failed to unset OSD property {}: {}'.format(option, e), state='e')
return False
class CephPoolInstance(object):
def __init__(self, zk_conn, this_node, name):
self.zk_conn = zk_conn

View File

@ -707,6 +707,89 @@ def cmd(data, stat, event=''):
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)})
# Wait 0.5 seconds before we free the lock, to ensure the client hits the lock
time.sleep(0.5)
# Online an OSD
elif command == 'osd_in':
osd_id = args
# Verify osd_id is in the list
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
# Lock the command queue
lock = zkhandler.writelock(zk_conn, '/ceph/cmd')
with lock:
# Online the OSD
result = CephInstance.in_osd(zk_conn, logger, osd_id)
# Command succeeded
if result:
# Update the command queue
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'success-{}'.format(data)})
# Command failed
else:
# Update the command queue
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)})
# Wait 0.5 seconds before we free the lock, to ensure the client hits the lock
time.sleep(0.5)
# Offline an OSD
elif command == 'osd_out':
osd_id = args
# Verify osd_id is in the list
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
# Lock the command queue
lock = zkhandler.writelock(zk_conn, '/ceph/cmd')
with lock:
# Offline the OSD
result = CephInstance.out_osd(zk_conn, logger, osd_id)
# Command succeeded
if result:
# Update the command queue
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'success-{}'.format(data)})
# Command failed
else:
# Update the command queue
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)})
# Wait 0.5 seconds before we free the lock, to ensure the client hits the lock
time.sleep(0.5)
# Set a property
elif command == 'osd_set':
option = args
if this_node.router_state == 'primary':
# Lock the command queue
lock = zkhandler.writelock(zk_conn, '/ceph/cmd')
with lock:
# Set the property
result = CephInstance.set_property(zk_conn, logger, option)
# Command succeeded
if result:
# Update the command queue
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'success-{}'.format(data)})
# Command failed
else:
# Update the command queue
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)})
# Wait 0.5 seconds before we free the lock, to ensure the client hits the lock
time.sleep(0.5)
# Unset a property
elif command == 'osd_unset':
option = args
if this_node.router_state == 'primary':
# Lock the command queue
lock = zkhandler.writelock(zk_conn, '/ceph/cmd')
with lock:
# Unset the property
result = CephInstance.unset_property(zk_conn, logger, option)
# Command succeeded
if result:
# Update the command queue
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'success-{}'.format(data)})
# Command failed
else:
# Update the command queue
zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)})
# Wait 0.5 seconds before we free the lock, to ensure the client hits the lock
time.sleep(0.5)
# Adding a new pool
if command == 'pool_add':
name, pgs = args.split(',')