Fix bugs with forced removal
This commit is contained in:
parent
413100a147
commit
d6ca74376a
|
@ -406,7 +406,7 @@ def format_list_osd(osd_list):
|
||||||
osd_id_length = _osd_id_length
|
osd_id_length = _osd_id_length
|
||||||
|
|
||||||
# Set the OSD node length
|
# Set the OSD node length
|
||||||
_osd_node_length = len(osd_information["stats"]["node"]) + 1
|
_osd_node_length = len(osd_information["node"]) + 1
|
||||||
if _osd_node_length > osd_node_length:
|
if _osd_node_length > osd_node_length:
|
||||||
osd_node_length = _osd_node_length
|
osd_node_length = _osd_node_length
|
||||||
|
|
||||||
|
@ -602,13 +602,6 @@ def format_list_osd(osd_list):
|
||||||
)
|
)
|
||||||
|
|
||||||
for osd_information in sorted(osd_list, key=lambda x: int(x["id"])):
|
for osd_information in sorted(osd_list, key=lambda x: int(x["id"])):
|
||||||
try:
|
|
||||||
# If this happens, the node hasn't checked in fully yet, so just ignore it
|
|
||||||
if osd_information["stats"]["node"] == "|":
|
|
||||||
continue
|
|
||||||
except KeyError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
osd_up_flag, osd_up_colour, osd_in_flag, osd_in_colour = getOutputColoursOSD(
|
osd_up_flag, osd_up_colour, osd_in_flag, osd_in_colour = getOutputColoursOSD(
|
||||||
osd_information
|
osd_information
|
||||||
)
|
)
|
||||||
|
@ -663,7 +656,7 @@ def format_list_osd(osd_list):
|
||||||
osd_rdops_length=osd_rdops_length,
|
osd_rdops_length=osd_rdops_length,
|
||||||
osd_rddata_length=osd_rddata_length,
|
osd_rddata_length=osd_rddata_length,
|
||||||
osd_id=osd_information["id"],
|
osd_id=osd_information["id"],
|
||||||
osd_node=osd_information["stats"]["node"],
|
osd_node=osd_information["node"],
|
||||||
osd_device=osd_information["device"],
|
osd_device=osd_information["device"],
|
||||||
osd_db_device=osd_db_device,
|
osd_db_device=osd_db_device,
|
||||||
osd_up_colour=osd_up_colour,
|
osd_up_colour=osd_up_colour,
|
||||||
|
|
|
@ -181,6 +181,7 @@ def getClusterOSDList(zkhandler):
|
||||||
|
|
||||||
def getOSDInformation(zkhandler, osd_id):
|
def getOSDInformation(zkhandler, osd_id):
|
||||||
# Get the devices
|
# Get the devices
|
||||||
|
osd_node = zkhandler.read(("osd.node", osd_id))
|
||||||
osd_device = zkhandler.read(("osd.device", osd_id))
|
osd_device = zkhandler.read(("osd.device", osd_id))
|
||||||
osd_db_device = zkhandler.read(("osd.db_device", osd_id))
|
osd_db_device = zkhandler.read(("osd.db_device", osd_id))
|
||||||
# Parse the stats data
|
# Parse the stats data
|
||||||
|
@ -189,6 +190,7 @@ def getOSDInformation(zkhandler, osd_id):
|
||||||
|
|
||||||
osd_information = {
|
osd_information = {
|
||||||
"id": osd_id,
|
"id": osd_id,
|
||||||
|
"node": osd_node,
|
||||||
"device": osd_device,
|
"device": osd_device,
|
||||||
"db_device": osd_db_device,
|
"db_device": osd_db_device,
|
||||||
"stats": osd_stats,
|
"stats": osd_stats,
|
||||||
|
@ -293,7 +295,7 @@ def remove_osd(zkhandler, osd_id, force_flag):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tell the cluster to remove an OSD
|
# Tell the cluster to remove an OSD
|
||||||
remove_osd_string = "osd_remove {} {}".format(osd_id, str(force_flag))
|
remove_osd_string = "osd_remove {},{}".format(osd_id, str(force_flag))
|
||||||
zkhandler.write([("base.cmd.ceph", remove_osd_string)])
|
zkhandler.write([("base.cmd.ceph", remove_osd_string)])
|
||||||
# Wait 1/2 second for the cluster to get the message and start working
|
# Wait 1/2 second for the cluster to get the message and start working
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
|
@ -404,30 +404,47 @@ class CephOSDInstance(object):
|
||||||
break
|
break
|
||||||
|
|
||||||
# 4. Determine the block devices
|
# 4. Determine the block devices
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
device_zk = zkhandler.read(("osd.device", osd_id))
|
||||||
"readlink /var/lib/ceph/osd/ceph-{}/block".format(osd_id)
|
try:
|
||||||
)
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
vg_name = stdout.split("/")[-2] # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
|
"readlink /var/lib/ceph/osd/ceph-{}/block".format(osd_id)
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
)
|
||||||
"vgs --separator , --noheadings -o pv_name {}".format(vg_name)
|
vg_name = stdout.split("/")[
|
||||||
)
|
-2
|
||||||
pv_block = stdout.strip()
|
] # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"vgs --separator , --noheadings -o pv_name {}".format(vg_name)
|
||||||
|
)
|
||||||
|
pv_block = stdout.strip()
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
pv_block = device_zk
|
||||||
|
|
||||||
# 5. Zap the volumes
|
# 5a. Verify that the blockdev actually has a ceph volume that matches the ID, otherwise don't zap it
|
||||||
logger.out(
|
logger.out(
|
||||||
"Zapping OSD disk with ID {} on {}".format(osd_id, pv_block), state="i"
|
f"Check OSD disk {pv_block} for OSD signature with ID osd.{osd_id}",
|
||||||
|
state="i",
|
||||||
)
|
)
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"ceph-volume lvm zap --destroy {}".format(pv_block)
|
f"ceph-volume lvm list {pv_block}"
|
||||||
)
|
)
|
||||||
if retcode:
|
if f"====== osd.{osd_id} =======" in stdout:
|
||||||
print("ceph-volume lvm zap")
|
# 5b. Zap the volumes
|
||||||
print(stdout)
|
logger.out(
|
||||||
print(stderr)
|
"Zapping OSD disk with ID {} on {}".format(osd_id, pv_block),
|
||||||
if force_flag:
|
state="i",
|
||||||
logger.out("Ignoring error due to force flag", state="i")
|
)
|
||||||
else:
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
raise Exception
|
"ceph-volume lvm zap --destroy {}".format(pv_block)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph-volume lvm zap")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
if force_flag:
|
||||||
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
|
else:
|
||||||
|
raise Exception
|
||||||
|
|
||||||
# 6. Purge the OSD from Ceph
|
# 6. Purge the OSD from Ceph
|
||||||
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
@ -756,8 +773,8 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):
|
||||||
|
|
||||||
# Removing an OSD
|
# Removing an OSD
|
||||||
elif command == "osd_remove":
|
elif command == "osd_remove":
|
||||||
osd_id = args[0]
|
osd_id, force = args.split(",")
|
||||||
force_flag = bool(strtobool(args[1]))
|
force_flag = bool(strtobool(force))
|
||||||
|
|
||||||
# Verify osd_id is in the list
|
# Verify osd_id is in the list
|
||||||
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
|
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
|
||||||
|
|
Loading…
Reference in New Issue