Compare commits

...

3 Commits

Author SHA1 Message Date
d18e009b00 Improve handling of rounded values 2022-05-02 15:29:30 -04:00
1f8f3252a6 Fix bug with initial JSON for stats 2022-05-02 13:28:19 -04:00
b47c9832b7 Refactor OSD removal to use new ZK data
With the OSD LVM information stored in Zookeeper, we can use this to
determine the actual block device to zap rather than relying on runtime
determination and guestimation.
2022-05-02 12:52:22 -04:00
2 changed files with 53 additions and 47 deletions

View File

@ -370,13 +370,22 @@ def format_list_osd(osd_list):
# If this happens, the node hasn't checked in fully yet, so use some dummy data
if osd_information["stats"]["node"] == "|":
for key in osd_information["stats"].keys():
if osd_information["stats"][key] == "|":
osd_information["stats"][key] = "N/A"
elif osd_information["stats"][key] is None:
if (
osd_information["stats"][key] == "|"
or osd_information["stats"][key] is None
):
osd_information["stats"][key] = "N/A"
for key in osd_information.keys():
if osd_information[key] is None:
osd_information[key] = "N/A"
else:
for key in osd_information["stats"].keys():
if key in ["utilization", "var"] and isinstance(
osd_information["stats"][key], float
):
osd_information["stats"][key] = round(
osd_information["stats"][key], 2
)
except KeyError:
print(
f"Details for OSD {osd_information['id']} missing required keys, skipping."
@ -449,13 +458,11 @@ def format_list_osd(osd_list):
if _osd_free_length > osd_free_length:
osd_free_length = _osd_free_length
osd_util = round(osd_information["stats"]["utilization"], 2)
_osd_util_length = len(str(osd_util)) + 1
_osd_util_length = len(str(osd_information["stats"]["utilization"])) + 1
if _osd_util_length > osd_util_length:
osd_util_length = _osd_util_length
osd_var = round(osd_information["stats"]["var"], 2)
_osd_var_length = len(str(osd_var)) + 1
_osd_var_length = len(str(osd_information["stats"]["var"])) + 1
if _osd_var_length > osd_var_length:
osd_var_length = _osd_var_length
@ -605,8 +612,6 @@ def format_list_osd(osd_list):
osd_up_flag, osd_up_colour, osd_in_flag, osd_in_colour = getOutputColoursOSD(
osd_information
)
osd_util = round(osd_information["stats"]["utilization"], 2)
osd_var = round(osd_information["stats"]["var"], 2)
osd_db_device = osd_information["db_device"]
if not osd_db_device:
@ -669,8 +674,8 @@ def format_list_osd(osd_list):
osd_reweight=osd_information["stats"]["reweight"],
osd_used=osd_information["stats"]["used"],
osd_free=osd_information["stats"]["avail"],
osd_util=osd_util,
osd_var=osd_var,
osd_util=osd_information["stats"]["utilization"],
osd_var=osd_information["stats"]["var"],
osd_wrops=osd_information["stats"]["wr_ops"],
osd_wrdata=osd_information["stats"]["wr_data"],
osd_rdops=osd_information["stats"]["rd_ops"],

View File

@ -434,7 +434,7 @@ class CephOSDInstance(object):
(("osd.lv", osd_id), osd_lv),
(
("osd.stats", osd_id),
f'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "{node}", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", state="|" }',
'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
),
]
)
@ -542,47 +542,48 @@ class CephOSDInstance(object):
break
# 4. Determine the block devices
device_zk = zkhandler.read(("osd.device", osd_id))
try:
retcode, stdout, stderr = common.run_os_command(
"readlink /var/lib/ceph/osd/ceph-{}/block".format(osd_id)
)
vg_name = stdout.split("/")[
-2
] # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
retcode, stdout, stderr = common.run_os_command(
"vgs --separator , --noheadings -o pv_name {}".format(vg_name)
)
pv_block = stdout.strip()
except Exception as e:
print(e)
pv_block = device_zk
osd_vg = zkhandler.read(("osd.vg", osd_id))
osd_lv = zkhandler.read(("osd.lv", osd_id))
osd_lvm = f"/dev/{osd_vg}/{osd_lv}"
osd_device = None
# 5a. Verify that the blockdev actually has a ceph volume that matches the ID, otherwise don't zap it
logger.out(
f"Check OSD disk {pv_block} for OSD signature with ID osd.{osd_id}",
f"Getting disk info for OSD {osd_id} LV {osd_lvm}",
state="i",
)
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm list {pv_block}"
f"ceph-volume lvm list {osd_lvm}"
)
if f"====== osd.{osd_id} =======" in stdout:
# 5b. Zap the volumes
logger.out(
"Zapping OSD disk with ID {} on {}".format(osd_id, pv_block),
state="i",
)
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm zap --destroy {}".format(pv_block)
)
if retcode:
print("ceph-volume lvm zap")
print(stdout)
print(stderr)
if force_flag:
logger.out("Ignoring error due to force flag", state="i")
else:
raise Exception
for line in stdout.split("\n"):
if "devices" in line:
osd_device = line.split()[-1]
if not osd_device:
print("ceph-volume lvm list")
print("Could not find OSD information in data:")
print(stdout)
print(stderr)
if force_flag:
logger.out("Ignoring error due to force flag", state="i")
else:
raise Exception
# 5. Zap the volumes
logger.out(
"Zapping OSD {} disk on {}".format(osd_id, osd_device),
state="i",
)
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm zap --destroy {}".format(osd_device)
)
if retcode:
print("ceph-volume lvm zap")
print(stdout)
print(stderr)
if force_flag:
logger.out("Ignoring error due to force flag", state="i")
else:
raise Exception
# 6. Purge the OSD from Ceph
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")