Add OSD removal force option
Ensures a removal can continue even in situations where some step(s) might fail, for instance removing an obsolete OSD from a replaced node.
This commit is contained in:
parent
53aed0a735
commit
4d698be34b
|
@ -4099,11 +4099,16 @@ class API_Storage_Ceph_OSD_Element(Resource):
|
||||||
|
|
||||||
@RequestParser(
|
@RequestParser(
|
||||||
[
|
[
|
||||||
|
{
|
||||||
|
"name": "force",
|
||||||
|
"required": False,
|
||||||
|
"helptext": "Force removal even if steps fail.",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "yes-i-really-mean-it",
|
"name": "yes-i-really-mean-it",
|
||||||
"required": True,
|
"required": True,
|
||||||
"helptext": "Please confirm that 'yes-i-really-mean-it'.",
|
"helptext": "Please confirm that 'yes-i-really-mean-it'.",
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
@Authenticator
|
@Authenticator
|
||||||
|
@ -4116,6 +4121,11 @@ class API_Storage_Ceph_OSD_Element(Resource):
|
||||||
tags:
|
tags:
|
||||||
- storage / ceph
|
- storage / ceph
|
||||||
parameters:
|
parameters:
|
||||||
|
- in: query
|
||||||
|
name: force
|
||||||
|
type: boolean
|
||||||
|
required: flase
|
||||||
|
description: Force removal even if some step(s) fail
|
||||||
- in: query
|
- in: query
|
||||||
name: yes-i-really-mean-it
|
name: yes-i-really-mean-it
|
||||||
type: string
|
type: string
|
||||||
|
@ -4138,7 +4148,7 @@ class API_Storage_Ceph_OSD_Element(Resource):
|
||||||
type: object
|
type: object
|
||||||
id: Message
|
id: Message
|
||||||
"""
|
"""
|
||||||
return api_helper.ceph_osd_remove(osdid)
|
return api_helper.ceph_osd_remove(osdid, reqargs.get("force", False))
|
||||||
|
|
||||||
|
|
||||||
api.add_resource(API_Storage_Ceph_OSD_Element, "/storage/ceph/osd/<osdid>")
|
api.add_resource(API_Storage_Ceph_OSD_Element, "/storage/ceph/osd/<osdid>")
|
||||||
|
|
|
@ -1302,11 +1302,11 @@ def ceph_osd_add(zkhandler, node, device, weight, ext_db_flag=False, ext_db_rati
|
||||||
|
|
||||||
|
|
||||||
@ZKConnection(config)
|
@ZKConnection(config)
|
||||||
def ceph_osd_remove(zkhandler, osd_id):
|
def ceph_osd_remove(zkhandler, osd_id, force_flag):
|
||||||
"""
|
"""
|
||||||
Remove a Ceph OSD from the PVC Ceph storage cluster.
|
Remove a Ceph OSD from the PVC Ceph storage cluster.
|
||||||
"""
|
"""
|
||||||
retflag, retdata = pvc_ceph.remove_osd(zkhandler, osd_id)
|
retflag, retdata = pvc_ceph.remove_osd(zkhandler, osd_id, force_flag)
|
||||||
|
|
||||||
if retflag:
|
if retflag:
|
||||||
retcode = 200
|
retcode = 200
|
||||||
|
|
|
@ -255,7 +255,7 @@ def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
|
||||||
return retstatus, response.json().get("message", "")
|
return retstatus, response.json().get("message", "")
|
||||||
|
|
||||||
|
|
||||||
def ceph_osd_remove(config, osdid):
|
def ceph_osd_remove(config, osdid, force_flag):
|
||||||
"""
|
"""
|
||||||
Remove Ceph OSD
|
Remove Ceph OSD
|
||||||
|
|
||||||
|
@ -263,7 +263,7 @@ def ceph_osd_remove(config, osdid):
|
||||||
API arguments:
|
API arguments:
|
||||||
API schema: {"message":"{data}"}
|
API schema: {"message":"{data}"}
|
||||||
"""
|
"""
|
||||||
params = {"yes-i-really-mean-it": "yes"}
|
params = {"force": force_flag, "yes-i-really-mean-it": "yes"}
|
||||||
response = call_api(
|
response = call_api(
|
||||||
config, "delete", "/storage/ceph/osd/{osdid}".format(osdid=osdid), params=params
|
config, "delete", "/storage/ceph/osd/{osdid}".format(osdid=osdid), params=params
|
||||||
)
|
)
|
||||||
|
|
|
@ -3376,6 +3376,14 @@ def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag):
|
||||||
###############################################################################
|
###############################################################################
|
||||||
@click.command(name="remove", short_help="Remove OSD.")
|
@click.command(name="remove", short_help="Remove OSD.")
|
||||||
@click.argument("osdid")
|
@click.argument("osdid")
|
||||||
|
@click.option(
|
||||||
|
"-f",
|
||||||
|
"--force",
|
||||||
|
"force_flag",
|
||||||
|
is_flag=True,
|
||||||
|
default=False,
|
||||||
|
help="Force removal even if steps fail",
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-y",
|
"-y",
|
||||||
"--yes",
|
"--yes",
|
||||||
|
@ -3385,11 +3393,13 @@ def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag):
|
||||||
help="Confirm the removal",
|
help="Confirm the removal",
|
||||||
)
|
)
|
||||||
@cluster_req
|
@cluster_req
|
||||||
def ceph_osd_remove(osdid, confirm_flag):
|
def ceph_osd_remove(osdid, force_flag, confirm_flag):
|
||||||
"""
|
"""
|
||||||
Remove a Ceph OSD with ID OSDID.
|
Remove a Ceph OSD with ID OSDID.
|
||||||
|
|
||||||
DANGER: This will completely remove the OSD from the cluster. OSDs will rebalance which will negatively affect performance and available space. It is STRONGLY RECOMMENDED to set an OSD out (using 'pvc storage osd out') and allow the cluster to fully rebalance (verified with 'pvc storage status') before removing an OSD.
|
DANGER: This will completely remove the OSD from the cluster. OSDs will rebalance which will negatively affect performance and available space. It is STRONGLY RECOMMENDED to set an OSD out (using 'pvc storage osd out') and allow the cluster to fully rebalance (verified with 'pvc storage status') before removing an OSD.
|
||||||
|
|
||||||
|
NOTE: The "-f"/"--force" option is useful after replacing a failed node, to ensure the OSD is removed even if the OSD in question does not properly exist on the node after a rebuild.
|
||||||
"""
|
"""
|
||||||
if not confirm_flag and not config["unsafe"]:
|
if not confirm_flag and not config["unsafe"]:
|
||||||
try:
|
try:
|
||||||
|
@ -3397,7 +3407,7 @@ def ceph_osd_remove(osdid, confirm_flag):
|
||||||
except Exception:
|
except Exception:
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
retcode, retmsg = pvc_ceph.ceph_osd_remove(config, osdid)
|
retcode, retmsg = pvc_ceph.ceph_osd_remove(config, osdid, force_flag)
|
||||||
cleanup(retcode, retmsg)
|
cleanup(retcode, retmsg)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -286,14 +286,14 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
|
||||||
return success, message
|
return success, message
|
||||||
|
|
||||||
|
|
||||||
def remove_osd(zkhandler, osd_id):
|
def remove_osd(zkhandler, osd_id, force_flag):
|
||||||
if not verifyOSD(zkhandler, osd_id):
|
if not verifyOSD(zkhandler, osd_id):
|
||||||
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(
|
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(
|
||||||
osd_id
|
osd_id
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tell the cluster to remove an OSD
|
# Tell the cluster to remove an OSD
|
||||||
remove_osd_string = "osd_remove {}".format(osd_id)
|
remove_osd_string = "osd_remove {} {}".format(osd_id, str(force_flag))
|
||||||
zkhandler.write([("base.cmd.ceph", remove_osd_string)])
|
zkhandler.write([("base.cmd.ceph", remove_osd_string)])
|
||||||
# Wait 1/2 second for the cluster to get the message and start working
|
# Wait 1/2 second for the cluster to get the message and start working
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
|
@ -310,7 +310,7 @@ class CephOSDInstance(object):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def remove_osd(zkhandler, logger, osd_id, osd_obj):
|
def remove_osd(zkhandler, logger, osd_id, osd_obj, force_flag):
|
||||||
logger.out("Removing OSD disk {}".format(osd_id), state="i")
|
logger.out("Removing OSD disk {}".format(osd_id), state="i")
|
||||||
try:
|
try:
|
||||||
# Verify the OSD is present
|
# Verify the OSD is present
|
||||||
|
@ -320,7 +320,10 @@ class CephOSDInstance(object):
|
||||||
logger.out(
|
logger.out(
|
||||||
"Could not find OSD {} in the cluster".format(osd_id), state="e"
|
"Could not find OSD {} in the cluster".format(osd_id), state="e"
|
||||||
)
|
)
|
||||||
return True
|
if force_flag:
|
||||||
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
# 1. Set the OSD down and out so it will flush
|
# 1. Set the OSD down and out so it will flush
|
||||||
logger.out("Setting down OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Setting down OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
@ -331,7 +334,10 @@ class CephOSDInstance(object):
|
||||||
print("ceph osd down")
|
print("ceph osd down")
|
||||||
print(stdout)
|
print(stdout)
|
||||||
print(stderr)
|
print(stderr)
|
||||||
raise Exception
|
if force_flag:
|
||||||
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
logger.out("Setting out OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Setting out OSD disk with ID {}".format(osd_id), state="i")
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
@ -341,7 +347,10 @@ class CephOSDInstance(object):
|
||||||
print("ceph osd out")
|
print("ceph osd out")
|
||||||
print(stdout)
|
print(stdout)
|
||||||
print(stderr)
|
print(stderr)
|
||||||
raise Exception
|
if force_flag:
|
||||||
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
# 2. Wait for the OSD to flush
|
# 2. Wait for the OSD to flush
|
||||||
logger.out("Flushing OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Flushing OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
@ -359,7 +368,11 @@ class CephOSDInstance(object):
|
||||||
if num_pgs > 0:
|
if num_pgs > 0:
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
else:
|
else:
|
||||||
raise Exception
|
if force_flag:
|
||||||
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -372,7 +385,10 @@ class CephOSDInstance(object):
|
||||||
print("systemctl stop")
|
print("systemctl stop")
|
||||||
print(stdout)
|
print(stdout)
|
||||||
print(stderr)
|
print(stderr)
|
||||||
raise Exception
|
if force_flag:
|
||||||
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
# FIXME: There has to be a better way to do this /shrug
|
# FIXME: There has to be a better way to do this /shrug
|
||||||
while True:
|
while True:
|
||||||
|
@ -408,7 +424,10 @@ class CephOSDInstance(object):
|
||||||
print("ceph-volume lvm zap")
|
print("ceph-volume lvm zap")
|
||||||
print(stdout)
|
print(stdout)
|
||||||
print(stderr)
|
print(stderr)
|
||||||
raise Exception
|
if force_flag:
|
||||||
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
# 6. Purge the OSD from Ceph
|
# 6. Purge the OSD from Ceph
|
||||||
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
@ -419,7 +438,10 @@ class CephOSDInstance(object):
|
||||||
print("ceph osd purge")
|
print("ceph osd purge")
|
||||||
print(stdout)
|
print(stdout)
|
||||||
print(stderr)
|
print(stderr)
|
||||||
raise Exception
|
if force_flag:
|
||||||
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
# 7. Remove the DB device
|
# 7. Remove the DB device
|
||||||
if zkhandler.exists(("osd.db_device", osd_id)):
|
if zkhandler.exists(("osd.db_device", osd_id)):
|
||||||
|
@ -734,7 +756,8 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):
|
||||||
|
|
||||||
# Removing an OSD
|
# Removing an OSD
|
||||||
elif command == "osd_remove":
|
elif command == "osd_remove":
|
||||||
osd_id = args
|
osd_id = args[0]
|
||||||
|
force_flag = bool(strtobool(args[1]))
|
||||||
|
|
||||||
# Verify osd_id is in the list
|
# Verify osd_id is in the list
|
||||||
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
|
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
|
||||||
|
@ -743,7 +766,7 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):
|
||||||
with zk_lock:
|
with zk_lock:
|
||||||
# Remove the OSD
|
# Remove the OSD
|
||||||
result = CephOSDInstance.remove_osd(
|
result = CephOSDInstance.remove_osd(
|
||||||
zkhandler, logger, osd_id, d_osd[osd_id]
|
zkhandler, logger, osd_id, d_osd[osd_id], force_flag
|
||||||
)
|
)
|
||||||
# Command succeeded
|
# Command succeeded
|
||||||
if result:
|
if result:
|
||||||
|
|
Loading…
Reference in New Issue