Rework replacement procedure again
Avoid calling other functions; replicate the actual process from Ceph docs (https://docs.ceph.com/en/pacific/rados/operations/add-or-rm-osds/) to ensure things work out well (e.g. preserving OSD IDs).
This commit is contained in:
parent
ed5bc9fb43
commit
dd0177ce10
|
@ -27,6 +27,7 @@ from daemon_lib.ceph import format_bytes_fromhuman, get_list_osd
|
||||||
|
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from re import search, match, sub
|
from re import search, match, sub
|
||||||
|
from uuid import uuid4
|
||||||
from json import loads as jloads
|
from json import loads as jloads
|
||||||
|
|
||||||
|
|
||||||
|
@ -431,18 +432,7 @@ class CephOSDInstance(object):
|
||||||
osd_vg = osd_details["vg_name"]
|
osd_vg = osd_details["vg_name"]
|
||||||
osd_lv = osd_details["lv_name"]
|
osd_lv = osd_details["lv_name"]
|
||||||
|
|
||||||
# 5b. Activate the OSD
|
# 5b. Add it to the crush map
|
||||||
logger.out(f"Activating OSD {osd_id}", state="i")
|
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
|
||||||
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
|
|
||||||
)
|
|
||||||
if retcode:
|
|
||||||
logger.out("Failed: ceph-volume lvm activate", state="e")
|
|
||||||
logger.out(stdout, state="d")
|
|
||||||
logger.out(stderr, state="d")
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
# 5c. Add it to the crush map
|
|
||||||
logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i")
|
logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i")
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}"
|
f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}"
|
||||||
|
@ -453,6 +443,17 @@ class CephOSDInstance(object):
|
||||||
logger.out(stderr, state="d")
|
logger.out(stderr, state="d")
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
|
# 5c. Activate the OSD
|
||||||
|
logger.out(f"Activating OSD {osd_id}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph-volume lvm activate", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
# 5d. Wait half a second for it to activate
|
# 5d. Wait half a second for it to activate
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
@ -563,59 +564,232 @@ class CephOSDInstance(object):
|
||||||
if weight is None:
|
if weight is None:
|
||||||
weight = all_osds_on_block[0]["stats"]["weight"]
|
weight = all_osds_on_block[0]["stats"]["weight"]
|
||||||
|
|
||||||
# Determine how many split OSD(s) to recreate
|
# Take down the OSD(s), but keep it's CRUSH map details and IDs
|
||||||
if len(all_osds_on_block) > 1 and all_osds_on_block[0]["is_split"]:
|
try:
|
||||||
split_count = len(all_osds_on_block)
|
for osd in all_osds_on_block:
|
||||||
else:
|
osd_id = osd["id"]
|
||||||
split_count = None
|
|
||||||
|
|
||||||
# Determine if an ext_db should be readded
|
# 1. Set the OSD down and out so it will flush
|
||||||
|
logger.out(f"Setting down OSD {osd_id}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph osd down {osd_id}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph osd down", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
logger.out(f"Setting out OSD {osd_id}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph osd out {osd_id}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph osd out", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 2. Wait for the OSD to be safe to remove (but don't wait for rebalancing to complete)
|
||||||
|
logger.out(f"Waiting for OSD {osd_id} to be safe to remove", state="i")
|
||||||
|
while True:
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph osd safe-to-destroy osd.{osd_id}"
|
||||||
|
)
|
||||||
|
if int(retcode) in [0, 11]:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# 3. Stop the OSD process and wait for it to be terminated
|
||||||
|
logger.out(f"Stopping OSD {osd_id}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"systemctl stop ceph-osd@{osd_id}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: systemctl stop", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
# 4. Destroy the OSD
|
||||||
|
logger.out(f"Destroying OSD {osd_id}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph osd destroy {osd_id} --yes-i-really-mean-it"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph osd destroy", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
|
||||||
|
if not skip_zap:
|
||||||
|
# 5. Zap the old disk
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm zap --destroy {real_old_device}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph-volume lvm zap", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 6. Prepare the volume group on the new device
|
||||||
|
logger.out(f"Preparing LVM volume group on disk {new_device}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm zap --destroy {new_device}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph-volume lvm zap", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
retcode, stdout, stderr = common.run_os_command(f"pvcreate {new_device}")
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: pvcreate", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
vg_uuid = str(uuid4())
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"vgcreate ceph-{vg_uuid} {new_device}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: vgcreate", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# Determine how many OSDs we want on the new device
|
||||||
|
osds_count = len(all_osds_on_block)
|
||||||
|
|
||||||
|
# Determine the size of the new device
|
||||||
|
_, new_device_size_bytes, _ = common.run_os_command(
|
||||||
|
f"blockdev --getsize64 {new_device}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate the size of each OSD (in MB) based on the default 4M extent size
|
||||||
|
new_osd_size_mb = (
|
||||||
|
int(int(int(new_device_size_bytes) / osds_count) / 1024 / 1024 / 4) * 4
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate the size, if applicable, of the OSD block if we were passed a ratio
|
||||||
if ext_db_ratio is not None:
|
if ext_db_ratio is not None:
|
||||||
osd_db_ratio = ext_db_ratio
|
osd_new_db_size_mb = int(
|
||||||
osd_db_size = None
|
int(int(new_osd_size_mb * ext_db_ratio) / 4) * 4
|
||||||
|
)
|
||||||
elif ext_db_size is not None:
|
elif ext_db_size is not None:
|
||||||
osd_db_ratio = None
|
osd_new_db_size_mb = int(
|
||||||
osd_db_size = ext_db_size
|
int(int(format_bytes_fromhuman(ext_db_size)) / 1024 / 1024 / 4) * 4
|
||||||
elif all_osds_on_block[0]["db_device"]:
|
)
|
||||||
_, osd_db_size_bytes, _ = common.run_os_command(
|
else:
|
||||||
|
_, new_device_size_bytes, _ = common.run_os_command(
|
||||||
f"blockdev --getsize64 {all_osds_on_block[0]['db_device']}"
|
f"blockdev --getsize64 {all_osds_on_block[0]['db_device']}"
|
||||||
)
|
)
|
||||||
osd_db_ratio = None
|
osd_new_db_size_mb = int(
|
||||||
osd_db_size = f"{osd_db_size_bytes}"
|
int(int(new_device_size_bytes) / 1024 / 1024 / 4) * 4
|
||||||
if not osd_db_size:
|
|
||||||
logger.out(
|
|
||||||
f"Could not get size of device {all_osds_on_block[0]['db_device']}; skipping external database creation",
|
|
||||||
state="w",
|
|
||||||
)
|
)
|
||||||
osd_db_size = None
|
|
||||||
else:
|
|
||||||
osd_db_ratio = None
|
|
||||||
osd_db_size = None
|
|
||||||
|
|
||||||
# Remove each OSD on the block device
|
|
||||||
for osd in all_osds_on_block:
|
for osd in all_osds_on_block:
|
||||||
result = CephOSDInstance.remove_osd(
|
osd_id = osd["id"]
|
||||||
zkhandler,
|
osd_fsid = osd["stats"]["uuid"]
|
||||||
logger,
|
|
||||||
node,
|
logger.out(
|
||||||
osd["id"],
|
f"Preparing LVM logical volume on disk {new_device} for OSD {osd_id}",
|
||||||
force_flag=True,
|
state="i",
|
||||||
skip_zap_flag=skip_zap,
|
)
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"lvcreate -L {new_osd_size_mb}M -n osd-block-{osd_fsid} ceph-{vg_uuid}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: lvcreate", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
logger.out(f"Preparing OSD {osd_id} on disk {new_device}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_comand(
|
||||||
|
f"ceph-volume lvm prepare --bluestore --osd-id {osd_id} --osd-fsid {osd_fsid} --data /dev/ceph-{vg_uuid}/osd-block-{osd_fsid}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph-volume lvm prepare", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
for osd in all_osds_on_block:
|
||||||
|
osd_id = osd["id"]
|
||||||
|
osd_fsid = osd["stats"]["uuid"]
|
||||||
|
|
||||||
|
if osd["db_device"]:
|
||||||
|
db_device = f"osd-db/osd-{osd_id}"
|
||||||
|
|
||||||
|
logger.out(
|
||||||
|
f"Destroying old Bluestore DB volume for OSD {osd_id}",
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"lvremove {db_device}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create [a] new OSD[s], on the new block device
|
logger.out(
|
||||||
result = CephOSDInstance.add_osd(
|
f"Creating new Bluestore DB volume for OSD {osd_id}", state="i"
|
||||||
zkhandler,
|
|
||||||
logger,
|
|
||||||
node,
|
|
||||||
new_device,
|
|
||||||
weight,
|
|
||||||
ext_db_ratio=osd_db_ratio,
|
|
||||||
ext_db_size=osd_db_size,
|
|
||||||
split_count=split_count,
|
|
||||||
)
|
)
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"lvcreate -L {osd_new_db_size_mb}M -n osd-{osd_id} osd-db"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: lvcreate", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
return result
|
logger.out(
|
||||||
|
f"Attaching old Bluestore DB volume to OSD {osd_id}", state="i"
|
||||||
|
)
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm new-db --osd-id {osd_id} --osd-fsid {osd_fsid} --target {db_device}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph-volume lvm new-db", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph osd crush add", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
logger.out(f"Activating OSD {osd_id}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph-volume lvm activate", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# Log it
|
||||||
|
logger.out(
|
||||||
|
f"Successfully replaced OSDs {','.join([o['id'] for o in all_osds_on_block])} on new disk {new_device}",
|
||||||
|
state="o",
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
# Log it
|
||||||
|
logger.out(
|
||||||
|
f"Failed to replace OSD(s) on new disk {new_device}: {e}", state="e"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def refresh_osd(zkhandler, logger, node, osd_id, device, ext_db_flag):
|
def refresh_osd(zkhandler, logger, node, osd_id, device, ext_db_flag):
|
||||||
|
|
Loading…
Reference in New Issue