Rework replacement procedure again

Avoid calling other functions; replicate the actual process from Ceph
docs (https://docs.ceph.com/en/pacific/rados/operations/add-or-rm-osds/)
to ensure things work out well (e.g. preserving OSD IDs).
This commit is contained in:
Joshua Boniface 2023-11-03 16:31:56 -04:00
parent ed5bc9fb43
commit dd0177ce10
1 changed files with 233 additions and 59 deletions

View File

@ -27,6 +27,7 @@ from daemon_lib.ceph import format_bytes_fromhuman, get_list_osd
from distutils.util import strtobool from distutils.util import strtobool
from re import search, match, sub from re import search, match, sub
from uuid import uuid4
from json import loads as jloads from json import loads as jloads
@ -431,18 +432,7 @@ class CephOSDInstance(object):
osd_vg = osd_details["vg_name"] osd_vg = osd_details["vg_name"]
osd_lv = osd_details["lv_name"] osd_lv = osd_details["lv_name"]
# 5b. Activate the OSD # 5b. Add it to the crush map
logger.out(f"Activating OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
)
if retcode:
logger.out("Failed: ceph-volume lvm activate", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 5c. Add it to the crush map
logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i") logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i")
retcode, stdout, stderr = common.run_os_command( retcode, stdout, stderr = common.run_os_command(
f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}" f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}"
@ -453,6 +443,17 @@ class CephOSDInstance(object):
logger.out(stderr, state="d") logger.out(stderr, state="d")
raise Exception raise Exception
# 5c. Activate the OSD
logger.out(f"Activating OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
)
if retcode:
logger.out("Failed: ceph-volume lvm activate", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 5d. Wait half a second for it to activate # 5d. Wait half a second for it to activate
time.sleep(0.5) time.sleep(0.5)
@ -563,59 +564,232 @@ class CephOSDInstance(object):
if weight is None: if weight is None:
weight = all_osds_on_block[0]["stats"]["weight"] weight = all_osds_on_block[0]["stats"]["weight"]
# Determine how many split OSD(s) to recreate # Take down the OSD(s), but keep it's CRUSH map details and IDs
if len(all_osds_on_block) > 1 and all_osds_on_block[0]["is_split"]: try:
split_count = len(all_osds_on_block) for osd in all_osds_on_block:
else: osd_id = osd["id"]
split_count = None
# Determine if an ext_db should be readded # 1. Set the OSD down and out so it will flush
logger.out(f"Setting down OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph osd down {osd_id}"
)
if retcode:
logger.out("Failed: ceph osd down", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
logger.out(f"Setting out OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph osd out {osd_id}"
)
if retcode:
logger.out("Failed: ceph osd out", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 2. Wait for the OSD to be safe to remove (but don't wait for rebalancing to complete)
logger.out(f"Waiting for OSD {osd_id} to be safe to remove", state="i")
while True:
retcode, stdout, stderr = common.run_os_command(
f"ceph osd safe-to-destroy osd.{osd_id}"
)
if int(retcode) in [0, 11]:
break
else:
time.sleep(1)
# 3. Stop the OSD process and wait for it to be terminated
logger.out(f"Stopping OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"systemctl stop ceph-osd@{osd_id}"
)
if retcode:
logger.out("Failed: systemctl stop", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
time.sleep(5)
# 4. Destroy the OSD
logger.out(f"Destroying OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph osd destroy {osd_id} --yes-i-really-mean-it"
)
if retcode:
logger.out("Failed: ceph osd destroy", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
if not skip_zap:
# 5. Zap the old disk
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm zap --destroy {real_old_device}"
)
if retcode:
logger.out("Failed: ceph-volume lvm zap", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 6. Prepare the volume group on the new device
logger.out(f"Preparing LVM volume group on disk {new_device}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm zap --destroy {new_device}"
)
if retcode:
logger.out("Failed: ceph-volume lvm zap", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
retcode, stdout, stderr = common.run_os_command(f"pvcreate {new_device}")
if retcode:
logger.out("Failed: pvcreate", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
vg_uuid = str(uuid4())
retcode, stdout, stderr = common.run_os_command(
f"vgcreate ceph-{vg_uuid} {new_device}"
)
if retcode:
logger.out("Failed: vgcreate", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# Determine how many OSDs we want on the new device
osds_count = len(all_osds_on_block)
# Determine the size of the new device
_, new_device_size_bytes, _ = common.run_os_command(
f"blockdev --getsize64 {new_device}"
)
# Calculate the size of each OSD (in MB) based on the default 4M extent size
new_osd_size_mb = (
int(int(int(new_device_size_bytes) / osds_count) / 1024 / 1024 / 4) * 4
)
# Calculate the size, if applicable, of the OSD block if we were passed a ratio
if ext_db_ratio is not None: if ext_db_ratio is not None:
osd_db_ratio = ext_db_ratio osd_new_db_size_mb = int(
osd_db_size = None int(int(new_osd_size_mb * ext_db_ratio) / 4) * 4
)
elif ext_db_size is not None: elif ext_db_size is not None:
osd_db_ratio = None osd_new_db_size_mb = int(
osd_db_size = ext_db_size int(int(format_bytes_fromhuman(ext_db_size)) / 1024 / 1024 / 4) * 4
elif all_osds_on_block[0]["db_device"]: )
_, osd_db_size_bytes, _ = common.run_os_command( else:
_, new_device_size_bytes, _ = common.run_os_command(
f"blockdev --getsize64 {all_osds_on_block[0]['db_device']}" f"blockdev --getsize64 {all_osds_on_block[0]['db_device']}"
) )
osd_db_ratio = None osd_new_db_size_mb = int(
osd_db_size = f"{osd_db_size_bytes}" int(int(new_device_size_bytes) / 1024 / 1024 / 4) * 4
if not osd_db_size:
logger.out(
f"Could not get size of device {all_osds_on_block[0]['db_device']}; skipping external database creation",
state="w",
) )
osd_db_size = None
else:
osd_db_ratio = None
osd_db_size = None
# Remove each OSD on the block device
for osd in all_osds_on_block: for osd in all_osds_on_block:
result = CephOSDInstance.remove_osd( osd_id = osd["id"]
zkhandler, osd_fsid = osd["stats"]["uuid"]
logger,
node, logger.out(
osd["id"], f"Preparing LVM logical volume on disk {new_device} for OSD {osd_id}",
force_flag=True, state="i",
skip_zap_flag=skip_zap, )
retcode, stdout, stderr = common.run_os_command(
f"lvcreate -L {new_osd_size_mb}M -n osd-block-{osd_fsid} ceph-{vg_uuid}"
)
if retcode:
logger.out("Failed: lvcreate", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
logger.out(f"Preparing OSD {osd_id} on disk {new_device}", state="i")
retcode, stdout, stderr = common.run_os_comand(
f"ceph-volume lvm prepare --bluestore --osd-id {osd_id} --osd-fsid {osd_fsid} --data /dev/ceph-{vg_uuid}/osd-block-{osd_fsid}"
)
if retcode:
logger.out("Failed: ceph-volume lvm prepare", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
for osd in all_osds_on_block:
osd_id = osd["id"]
osd_fsid = osd["stats"]["uuid"]
if osd["db_device"]:
db_device = f"osd-db/osd-{osd_id}"
logger.out(
f"Destroying old Bluestore DB volume for OSD {osd_id}",
state="i",
)
retcode, stdout, stderr = common.run_os_command(
f"lvremove {db_device}"
) )
# Create [a] new OSD[s], on the new block device logger.out(
result = CephOSDInstance.add_osd( f"Creating new Bluestore DB volume for OSD {osd_id}", state="i"
zkhandler,
logger,
node,
new_device,
weight,
ext_db_ratio=osd_db_ratio,
ext_db_size=osd_db_size,
split_count=split_count,
) )
retcode, stdout, stderr = common.run_os_command(
f"lvcreate -L {osd_new_db_size_mb}M -n osd-{osd_id} osd-db"
)
if retcode:
logger.out("Failed: lvcreate", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
return result logger.out(
f"Attaching old Bluestore DB volume to OSD {osd_id}", state="i"
)
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm new-db --osd-id {osd_id} --osd-fsid {osd_fsid} --target {db_device}"
)
if retcode:
logger.out("Failed: ceph-volume lvm new-db", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}"
)
if retcode:
logger.out("Failed: ceph osd crush add", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
logger.out(f"Activating OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
)
if retcode:
logger.out("Failed: ceph-volume lvm activate", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# Log it
logger.out(
f"Successfully replaced OSDs {','.join([o['id'] for o in all_osds_on_block])} on new disk {new_device}",
state="o",
)
return True
except Exception as e:
# Log it
logger.out(
f"Failed to replace OSD(s) on new disk {new_device}: {e}", state="e"
)
return False
@staticmethod @staticmethod
def refresh_osd(zkhandler, logger, node, osd_id, device, ext_db_flag): def refresh_osd(zkhandler, logger, node, osd_id, device, ext_db_flag):