Fix OSD creation for partition paths and fix gdisk

The previous implementation did not work with /dev/nvme devices or any
/dev/disk/by-* devices due to some logical failures in the partition
naming scheme, so fix these, and be explicit about what is supported in
the PVC CLI command output.

The 'echo | gdisk' implementation of partition creation also did not
work due to limitations of subprocess.run; instead, use sgdisk which
allows these commands to be written out explicitly and is included in
the same package as gdisk.
This commit is contained in:
Joshua Boniface 2021-09-26 00:08:54 -04:00
parent 44491dd988
commit 7a3a44d47c
2 changed files with 29 additions and 7 deletions

View File

@ -2601,7 +2601,7 @@ def ceph_osd():
@cluster_req @cluster_req
def ceph_osd_create_db_vg(node, device, confirm_flag): def ceph_osd_create_db_vg(node, device, confirm_flag):
""" """
Create a new Ceph OSD database volume group on node NODE with block device DEVICE. Create a new Ceph OSD database volume group on node NODE with block device DEVICE. DEVICE must be a valid raw block device, one of e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...', etc. Using partitions is not supported.
This volume group will be used for Ceph OSD database functionality if the '--ext-db' flag is passed to newly-created OSDs during 'pvc storage osd add'. DEVICE should be an extremely fast SSD device (NVMe, Intel Optane, etc.) which is significantly faster than the normal OSD disks and with very high write endurance. Only one OSD database volume group on a single physical device is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node; the database volume for each OSD is fixed to 5% of the OSD's size. Attempting to add additional database volume groups after the first will fail. This volume group will be used for Ceph OSD database functionality if the '--ext-db' flag is passed to newly-created OSDs during 'pvc storage osd add'. DEVICE should be an extremely fast SSD device (NVMe, Intel Optane, etc.) which is significantly faster than the normal OSD disks and with very high write endurance. Only one OSD database volume group on a single physical device is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node; the database volume for each OSD is fixed to 5% of the OSD's size. Attempting to add additional database volume groups after the first will fail.
""" """
@ -2648,7 +2648,7 @@ def ceph_osd_create_db_vg(node, device, confirm_flag):
@cluster_req @cluster_req
def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag): def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag):
""" """
Add a new Ceph OSD on node NODE with block device DEVICE. Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid raw block device, one of e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...', etc. Using partitions is not supported.
If '--ext-db' is specified, the existing OSD database volume group on NODE will be used; it must exist first or OSD creation will fail. See the 'pvc storage osd create-db-vg' command for more details. If '--ext-db' is specified, the existing OSD database volume group on NODE will be used; it must exist first or OSD creation will fail. See the 'pvc storage osd create-db-vg' command for more details.

View File

@ -26,6 +26,7 @@ import psutil
import daemon_lib.common as common import daemon_lib.common as common
from distutils.util import strtobool from distutils.util import strtobool
from re import match
class CephOSDInstance(object): class CephOSDInstance(object):
@ -318,20 +319,41 @@ class CephOSDInstance(object):
return False return False
# 1. Create an empty partition table # 1. Create an empty partition table
logger.out('Creating empty partiton table on block device {}'.format(device), state='i') logger.out('Creating partitons on block device {}'.format(device), state='i')
retcode, stdout, stderr = common.run_os_command( retcode, stdout, stderr = common.run_os_command(
'echo -e "o\ny\nn\n\n\n\n8e00\nw\ny\n" | sudo gdisk {}'.format(device) 'sgdisk --clear {}'.format(device)
) )
if retcode: if retcode:
print('gdisk partitioning') print('sgdisk create partition table')
print(stdout) print(stdout)
print(stderr) print(stderr)
raise raise
retcode, stdout, stderr = common.run_os_command(
'sgdisk --new 1:: --typecore 1:8e00 {}'.format(device)
)
if retcode:
print('sgdisk create pv partition')
print(stdout)
print(stderr)
raise
# Handle the partition ID portion
if match(r'by-path', device) or match(r'by-id', device):
# /dev/disk/by-path/pci-0000:03:00.0-scsi-0:1:0:0 -> pci-0000:03:00.0-scsi-0:1:0:0-part1
partition = '{}-part1'.format(device)
elif match(r'nvme', device):
# /dev/nvme0n1 -> nvme0n1p1
partition = '{}p1'.format(device)
else:
# /dev/sda -> sda1
# No other '/dev/disk/by-*' types are valid for raw block devices anyways
partition = '{}1'.format(device)
# 2. Create the PV # 2. Create the PV
logger.out('Creating PV on block device {}1'.format(device), state='i') logger.out('Creating PV on block device {}1'.format(device), state='i')
retcode, stdout, stderr = common.run_os_command( retcode, stdout, stderr = common.run_os_command(
'pvcreate --force {}1'.format(device) 'pvcreate --force {}'.format(partition)
) )
if retcode: if retcode:
print('pv creation') print('pv creation')
@ -342,7 +364,7 @@ class CephOSDInstance(object):
# 2. Create the VG (named 'osd-db') # 2. Create the VG (named 'osd-db')
logger.out('Creating VG "osd-db" on block device {}1'.format(device), state='i') logger.out('Creating VG "osd-db" on block device {}1'.format(device), state='i')
retcode, stdout, stderr = common.run_os_command( retcode, stdout, stderr = common.run_os_command(
'vgcreate --force osd-db {}1'.format(device) 'vgcreate --force osd-db {}'.format(partition)
) )
if retcode: if retcode:
print('vg creation') print('vg creation')