Refactor refresh as well

This commit is contained in:
Joshua Boniface 2023-11-04 02:44:52 -04:00
parent 18d32fede3
commit e32054be81
1 changed files with 104 additions and 102 deletions

View File

@ -27,6 +27,7 @@ from daemon_lib.ceph import format_bytes_fromhuman, get_list_osd
from distutils.util import strtobool from distutils.util import strtobool
from re import search, match, sub from re import search, match, sub
from os import path
from uuid import uuid4 from uuid import uuid4
from json import loads as jloads from json import loads as jloads
@ -454,12 +455,12 @@ class CephOSDInstance(object):
logger.out(stderr, state="d") logger.out(stderr, state="d")
raise Exception raise Exception
# 5d. Wait half a second for it to activate # 5d. Wait 1 second for it to activate
time.sleep(0.5) time.sleep(1)
# 5e. Verify it started # 5e. Verify it started
retcode, stdout, stderr = common.run_os_command( retcode, stdout, stderr = common.run_os_command(
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id) f"systemctl status ceph-osd@{osd_id}"
) )
if retcode: if retcode:
logger.out(f"Failed: OSD {osd_id} unit is not active", state="e") logger.out(f"Failed: OSD {osd_id} unit is not active", state="e")
@ -789,6 +790,19 @@ class CephOSDInstance(object):
logger.out(stderr, state="d") logger.out(stderr, state="d")
raise Exception raise Exception
# Wait 1 second for it to activate
time.sleep(1)
# Verify it started
retcode, stdout, stderr = common.run_os_command(
f"systemctl status ceph-osd@{osd_id}"
)
if retcode:
logger.out(f"Failed: OSD {osd_id} unit is not active", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
logger.out(f"Updating OSD {osd_id} details in PVC", state="i") logger.out(f"Updating OSD {osd_id} details in PVC", state="i")
zkhandler.write( zkhandler.write(
[ [
@ -829,67 +843,66 @@ class CephOSDInstance(object):
) )
device = ddevice device = ddevice
# We are ready to create a new OSD on this node
logger.out(
"Refreshing OSD {} disk on block device {}".format(osd_id, device),
state="i",
)
try:
# 1. Verify the OSD is present
retcode, stdout, stderr = common.run_os_command("ceph osd ls") retcode, stdout, stderr = common.run_os_command("ceph osd ls")
osd_list = stdout.split("\n") osd_list = stdout.split("\n")
if osd_id not in osd_list: if osd_id not in osd_list:
logger.out(f"Could not find OSD {osd_id} in the cluster", state="e")
return False
found_osds = CephOSDInstance.find_osds_from_block(logger, device)
if osd_id not in found_osds.keys():
logger.out(f"Could not find OSD {osd_id} on device {device}", state="e")
return False
logger.out( logger.out(
"Could not find OSD {} in the cluster".format(osd_id), state="e" f"Refreshing OSD {osd_id} disk on block device {device}",
)
return True
dev_flags = "--data {}".format(device)
if ext_db_flag:
db_device = "osd-db/osd-{}".format(osd_id)
dev_flags += " --block.db {}".format(db_device)
else:
db_device = ""
# 2. Get OSD information
logger.out(
"Getting OSD information for ID {} on {}".format(osd_id, device),
state="i", state="i",
) )
retcode, stdout, stderr = common.run_os_command( try:
"ceph-volume lvm list {device}".format(device=device) for osd in found_osds:
) found_osd = found_osds[osd]
for line in stdout.split("\n"): lv_device = found_osd["lv_path"]
if "block device" in line:
osd_blockdev = line.split()[-1]
if "osd fsid" in line:
osd_fsid = line.split()[-1]
if "cluster fsid" in line:
osd_clusterfsid = line.split()[-1]
if "devices" in line:
osd_device = line.split()[-1]
if not osd_fsid: _, osd_pvc_information = get_list_osd(zkhandler, osd_id)
osd_information = osd_pvc_information[0]
logger.out(f"Querying OSD on device {lv_device}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm list --format json {lv_device}"
)
if retcode:
logger.out("Failed: ceph-volume lvm list", state="e") logger.out("Failed: ceph-volume lvm list", state="e")
logger.out(stdout, state="d") logger.out(stdout, state="d")
logger.out(stderr, state="d") logger.out(stderr, state="d")
raise Exception raise Exception
# Split OSD blockdev into VG and LV components osd_detail = jloads(stdout)[osd_id][0]
# osd_blockdev = /dev/ceph-<uuid>/osd-block-<uuid>
_, _, osd_vg, osd_lv = osd_blockdev.split("/")
# Reset whatever we were given to Ceph's /dev/xdX naming osd_fsid = osd_detail["tags"]["ceph.osd_fsid"]
if device != osd_device: if osd_fsid != osd_information["fsid"]:
device = osd_device logger.out(
f"OSD {osd_id} FSID {osd_information['fsid']} does not match volume FSID {osd_fsid}; OSD cannot be imported",
# 3. Activate the OSD state="e",
logger.out("Activating new OSD disk with ID {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm activate --bluestore {osdid} {osdfsid}".format(
osdid=osd_id, osdfsid=osd_fsid
) )
dev_flags = f"--data {lv_device}"
if ext_db_flag:
db_device = "osd-db/osd-{osd_id}"
dev_flags += f" --block.db {db_device}"
if not path.exists(f"/dev/{db_device}"):
logger.out(
f"OSD Bluestore DB volume {db_device} does not exist; OSD cannot be imported",
state="e",
)
return
else:
db_device = ""
logger.out(f"Activating OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
) )
if retcode: if retcode:
logger.out("Failed: ceph-volume lvm activate", state="e") logger.out("Failed: ceph-volume lvm activate", state="e")
@ -897,47 +910,36 @@ class CephOSDInstance(object):
logger.out(stderr, state="d") logger.out(stderr, state="d")
raise Exception raise Exception
time.sleep(0.5) # Wait 1 second for it to activate
time.sleep(1)
# 4. Verify it started # Verify it started
retcode, stdout, stderr = common.run_os_command( retcode, stdout, stderr = common.run_os_command(
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id) f"systemctl status ceph-osd@{osd_id}"
) )
if retcode: if retcode:
logger.out("Failed: systemctl status", state="e") logger.out(f"Failed: OSD {osd_id} unit is not active", state="e")
logger.out(stdout, state="d") logger.out(stdout, state="d")
logger.out(stderr, state="d") logger.out(stderr, state="d")
raise Exception raise Exception
# 5. Update Zookeeper information logger.out(f"Updating OSD {osd_id} details in PVC", state="i")
logger.out(
"Adding new OSD disk with ID {} to Zookeeper".format(osd_id), state="i"
)
zkhandler.write( zkhandler.write(
[ [
(("osd", osd_id), ""),
(("osd.node", osd_id), node),
(("osd.device", osd_id), device), (("osd.device", osd_id), device),
(("osd.db_device", osd_id), db_device), (("osd.vg", osd_id), osd_detail["vg_name"]),
(("osd.fsid", osd_id), ""), (("osd.lv", osd_id), osd_detail["lv_name"]),
(("osd.ofsid", osd_id), osd_fsid),
(("osd.cfsid", osd_id), osd_clusterfsid),
(("osd.lvm", osd_id), ""),
(("osd.vg", osd_id), osd_vg),
(("osd.lv", osd_id), osd_lv),
(
("osd.stats", osd_id),
'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
),
] ]
) )
# Log it logger.out(
logger.out("Refreshed OSD {} disk on {}".format(osd_id, device), state="o") f"Successfully reimported OSD {osd_id} on {device}", state="o"
)
return True return True
except Exception as e: except Exception as e:
# Log it # Log it
logger.out("Failed to refresh OSD {} disk: {}".format(osd_id, e), state="e") logger.out(f"Failed to refresh OSD {osd_id} disk: {e}", state="e")
return False return False
@staticmethod @staticmethod