diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 41517403..31eaf7bc 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -975,6 +975,78 @@ def ceph_osd_remove(osdid): retcode, retmsg = pvc_ceph.remove_osd(zk_conn, osdid) cleanup(retcode, retmsg, zk_conn) +############################################################################### +# pvc ceph osd in +############################################################################### +@click.command(name='in', short_help='Online OSD.') +@click.argument( + 'osdid' +) +def ceph_osd_in(osdid): + """ + Set a Ceph OSD with ID OSDID online in the cluster. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.in_osd(zk_conn, osdid) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd out +############################################################################### +@click.command(name='out', short_help='Offline OSD.') +@click.argument( + 'osdid' +) +def ceph_osd_out(osdid): + """ + Set a Ceph OSD with ID OSDID offline in the cluster. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.out_osd(zk_conn, osdid) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd set +############################################################################### +@click.command(name='set', short_help='Set property.') +@click.argument( + 'osd_property' +) +def ceph_osd_set(osd_property): + """ + Set a Ceph OSD property OSD_PROPERTY on the cluster. + + Valid properties are: + + full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.set_osd(zk_conn, osd_property) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd unset +############################################################################### +@click.command(name='unset', short_help='Unset property.') +@click.argument( + 'osd_property' +) +def ceph_osd_unset(osd_property): + """ + Unset a Ceph OSD property OSD_PROPERTY on the cluster. + + Valid properties are: + + full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.unset_osd(zk_conn, osd_property) + cleanup(retcode, retmsg, zk_conn) + ############################################################################### # pvc ceph osd list ############################################################################### @@ -1187,10 +1259,10 @@ net_acl.add_command(net_acl_list) ceph_osd.add_command(ceph_osd_add) ceph_osd.add_command(ceph_osd_remove) -#ceph_osd.add_command(ceph_osd_in) -#ceph_osd.add_command(ceph_osd_out) -#ceph_osd.add_command(ceph_osd_set) -#ceph_osd.add_command(ceph_osd_unset) +ceph_osd.add_command(ceph_osd_in) +ceph_osd.add_command(ceph_osd_out) +ceph_osd.add_command(ceph_osd_set) +ceph_osd.add_command(ceph_osd_unset) ceph_osd.add_command(ceph_osd_list) ceph_pool.add_command(ceph_pool_add) diff --git a/client-common/ceph.py b/client-common/ceph.py index 16fae8f5..852a5f8c 100644 --- a/client-common/ceph.py +++ b/client-common/ceph.py @@ -603,6 +603,108 @@ def remove_osd(zk_conn, osd_id): zkhandler.writedata(zk_conn, {'/ceph/cmd': ''}) return success, message +def in_osd(zk_conn, osd_id): + if not verifyOSD(zk_conn, osd_id): + return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(osd_id) + + # Tell the cluster to online an OSD + in_osd_string = 'osd_in {}'.format(osd_id) + zkhandler.writedata(zk_conn, {'/ceph/cmd': in_osd_string}) + # Wait 1/2 second for the cluster to get the message and start working + time.sleep(0.5) + # Acquire a read lock, so we get the return exclusively + lock = zkhandler.readlock(zk_conn, '/ceph/cmd') + with lock: + try: + result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0] + if result == 'success-osd_in': + message = 'Set OSD {} online in the cluster.'.format(osd_id) + success = True + else: + message = 'ERROR: Failed to set OSD online; check node logs for details.' + success = False + except: + success = False + message = 'ERROR Command ignored by node.' + + zkhandler.writedata(zk_conn, {'/ceph/cmd': ''}) + return success, message + +def out_osd(zk_conn, osd_id): + if not verifyOSD(zk_conn, osd_id): + return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(osd_id) + + # Tell the cluster to offline an OSD + out_osd_string = 'osd_out {}'.format(osd_id) + zkhandler.writedata(zk_conn, {'/ceph/cmd': out_osd_string}) + # Wait 1/2 second for the cluster to get the message and start working + time.sleep(0.5) + # Acquire a read lock, so we get the return exclusively + lock = zkhandler.readlock(zk_conn, '/ceph/cmd') + with lock: + try: + result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0] + if result == 'success-osd_out': + message = 'Set OSD {} offline in the cluster.'.format(osd_id) + success = True + else: + message = 'ERROR: Failed to set OSD offline; check node logs for details.' + success = False + except: + success = False + message = 'ERROR Command ignored by node.' + + zkhandler.writedata(zk_conn, {'/ceph/cmd': ''}) + return success, message + +def set_osd(zk_conn, option): + # Tell the cluster to set an OSD property + set_osd_string = 'osd_set {}'.format(option) + zkhandler.writedata(zk_conn, {'/ceph/cmd': set_osd_string}) + # Wait 1/2 second for the cluster to get the message and start working + time.sleep(0.5) + # Acquire a read lock, so we get the return exclusively + lock = zkhandler.readlock(zk_conn, '/ceph/cmd') + with lock: + try: + result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0] + if result == 'success-osd_set': + message = 'Set OSD property {} on the cluster.'.format(option) + success = True + else: + message = 'ERROR: Failed to set OSD property; check node logs for details.' + success = False + except: + success = False + message = 'ERROR Command ignored by node.' + + zkhandler.writedata(zk_conn, {'/ceph/cmd': ''}) + return success, message + +def unset_osd(zk_conn, option): + # Tell the cluster to unset an OSD property + unset_osd_string = 'osd_unset {}'.format(option) + zkhandler.writedata(zk_conn, {'/ceph/cmd': unset_osd_string}) + # Wait 1/2 second for the cluster to get the message and start working + time.sleep(0.5) + # Acquire a read lock, so we get the return exclusively + lock = zkhandler.readlock(zk_conn, '/ceph/cmd') + with lock: + try: + result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0] + if result == 'success-osd_unset': + message = 'Unset OSD property {} on the cluster.'.format(option) + success = True + else: + message = 'ERROR: Failed to unset OSD property; check node logs for details.' + success = False + except: + success = False + message = 'ERROR Command ignored by node.' + + zkhandler.writedata(zk_conn, {'/ceph/cmd': ''}) + return success, message + def get_list_osd(zk_conn, limit): osd_list = [] full_osd_list = getCephOSDs(zk_conn) diff --git a/node-daemon/pvcd/CephInstance.py b/node-daemon/pvcd/CephInstance.py index 2525d27d..eb30e57a 100644 --- a/node-daemon/pvcd/CephInstance.py +++ b/node-daemon/pvcd/CephInstance.py @@ -241,6 +241,86 @@ def remove_osd(zk_conn, logger, osd_id, osd_obj): logger.out('Failed to purge OSD disk with ID {}: {}'.format(osd_id, e), state='e') return False +def in_osd(zk_conn, logger, osd_id): + # We are ready to create a new pool on this node + logger.out('Setting OSD {} in'.format(osd_id), state='i') + try: + # 1. Set it in + retcode, stdout, stderr = common.run_os_command('ceph osd in {}'.format(osd_id)) + if retcode: + print('ceph osd in') + print(stdout) + print(stderr) + raise + + # Log it + logger.out('Set OSD {} in'.format(osd_id), state='o') + return True + except Exception as e: + # Log it + logger.out('Failed to set OSD {} in: {}'.format(osd_id, e), state='e') + return False + +def out_osd(zk_conn, logger, osd_id): + # We are ready to create a new pool on this node + logger.out('Settoutg OSD {} out'.format(osd_id), state='i') + try: + # 1. Set it out + retcode, stdout, stderr = common.run_os_command('ceph osd out {}'.format(osd_id)) + if retcode: + proutt('ceph osd out') + proutt(stdout) + proutt(stderr) + raise + + # Log it + logger.out('Set OSD {} out'.format(osd_id), state='o') + return True + except Exception as e: + # Log it + logger.out('Failed to set OSD {} out: {}'.format(osd_id, e), state='e') + return False + +def set_property(zk_conn, logger, option): + # We are ready to create a new pool on this node + logger.out('Setting OSD property {}'.format(option), state='i') + try: + # 1. Set it in + retcode, stdout, stderr = common.run_os_command('ceph osd set {}'.format(option)) + if retcode: + prsett('ceph osd set') + print(stdout) + print(stderr) + raise + + # Log it + logger.out('Set OSD property {}'.format(option), state='o') + return True + except Exception as e: + # Log it + logger.out('Failed to set OSD property {}: {}'.format(option, e), state='e') + return False + +def unset_property(zk_conn, logger, option): + # We are ready to create a new pool on this node + logger.out('Unsetting OSD property {}'.format(option), state='i') + try: + # 1. Set it in + retcode, stdout, stderr = common.run_os_command('ceph osd unset {}'.format(option)) + if retcode: + prunsett('ceph osd unset') + print(stdout) + print(stderr) + raise + + # Log it + logger.out('Unset OSD property {}'.format(option), state='o') + return True + except Exception as e: + # Log it + logger.out('Failed to unset OSD property {}: {}'.format(option, e), state='e') + return False + class CephPoolInstance(object): def __init__(self, zk_conn, this_node, name): self.zk_conn = zk_conn diff --git a/node-daemon/pvcd/Daemon.py b/node-daemon/pvcd/Daemon.py index e33e7091..eca4c035 100644 --- a/node-daemon/pvcd/Daemon.py +++ b/node-daemon/pvcd/Daemon.py @@ -707,6 +707,89 @@ def cmd(data, stat, event=''): zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)}) # Wait 0.5 seconds before we free the lock, to ensure the client hits the lock time.sleep(0.5) + # Online an OSD + elif command == 'osd_in': + osd_id = args + + # Verify osd_id is in the list + if d_osd[osd_id] and d_osd[osd_id].node == this_node.name: + # Lock the command queue + lock = zkhandler.writelock(zk_conn, '/ceph/cmd') + with lock: + # Online the OSD + result = CephInstance.in_osd(zk_conn, logger, osd_id) + # Command succeeded + if result: + # Update the command queue + zkhandler.writedata(zk_conn, {'/ceph/cmd': 'success-{}'.format(data)}) + # Command failed + else: + # Update the command queue + zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)}) + # Wait 0.5 seconds before we free the lock, to ensure the client hits the lock + time.sleep(0.5) + # Offline an OSD + elif command == 'osd_out': + osd_id = args + + # Verify osd_id is in the list + if d_osd[osd_id] and d_osd[osd_id].node == this_node.name: + # Lock the command queue + lock = zkhandler.writelock(zk_conn, '/ceph/cmd') + with lock: + # Offline the OSD + result = CephInstance.out_osd(zk_conn, logger, osd_id) + # Command succeeded + if result: + # Update the command queue + zkhandler.writedata(zk_conn, {'/ceph/cmd': 'success-{}'.format(data)}) + # Command failed + else: + # Update the command queue + zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)}) + # Wait 0.5 seconds before we free the lock, to ensure the client hits the lock + time.sleep(0.5) + # Set a property + elif command == 'osd_set': + option = args + + if this_node.router_state == 'primary': + # Lock the command queue + lock = zkhandler.writelock(zk_conn, '/ceph/cmd') + with lock: + # Set the property + result = CephInstance.set_property(zk_conn, logger, option) + # Command succeeded + if result: + # Update the command queue + zkhandler.writedata(zk_conn, {'/ceph/cmd': 'success-{}'.format(data)}) + # Command failed + else: + # Update the command queue + zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)}) + # Wait 0.5 seconds before we free the lock, to ensure the client hits the lock + time.sleep(0.5) + # Unset a property + elif command == 'osd_unset': + option = args + + if this_node.router_state == 'primary': + # Lock the command queue + lock = zkhandler.writelock(zk_conn, '/ceph/cmd') + with lock: + # Unset the property + result = CephInstance.unset_property(zk_conn, logger, option) + # Command succeeded + if result: + # Update the command queue + zkhandler.writedata(zk_conn, {'/ceph/cmd': 'success-{}'.format(data)}) + # Command failed + else: + # Update the command queue + zkhandler.writedata(zk_conn, {'/ceph/cmd': 'failure-{}'.format(data)}) + # Wait 0.5 seconds before we free the lock, to ensure the client hits the lock + time.sleep(0.5) + # Adding a new pool if command == 'pool_add': name, pgs = args.split(',')